;   Divide-by-16 triple Qudrature State Machine for ATMEGA8
;   Copyright 2006 Jeff Epler <jepler@unpythonic.net>
;
;   This program is free software; you can redistribute it and/or modify
;   it under the terms of the GNU General Public License as published by
;   the Free Software Foundation; either version 2 of the License, or
;   (at your option) any later version.
;
;   This program is distributed in the hope that it will be useful,
;   but WITHOUT ANY WARRANTY; without even the implied warranty of
;   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;   GNU General Public License for more details.
;
;   You should have received a copy of the GNU General Public License
;   along with this program; if not, write to the Free Software
;   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA


; compile with: avr-gcc qq2.S -mmcu=atmega8

; Ideas to trim a few more cycles:
; * don't move both bits of the spindle and use "count" instead of "encoder"
;   in emc (save 2 cycles); use a different table and save another bld/bst
;   pair (2 more cycles)
; * make one pair of output bits match up with the result bits in the table,
;   so you can mov / andi instead of bst/bld (save 2 cycles)
; all of these optimizations -> 28 / 35 cycles depending on STRETCH_INDEX
; (571kHz / 457kHz)

; The code enabled by STRETCH_INDEX is untested and may not work

#include <avr/io.h>

#undef STRETCH_INDEX

#define table 256
#define PIN _SFR_IO_ADDR(PIND)
#define PORT _SFR_IO_ADDR(PORTC)
#define DDRO _SFR_IO_ADDR(DDRC)
#define PORTI _SFR_IO_ADDR(PORTD)

#define XLO r26
#define XHI r27
#define YLO r28
#define YHI r29
#define ZLO r30
#define ZHI r31

.globl main
main:
    ; set up I/O 

    ldi r16, 0xff
    out DDRO, r16
    out PORTI, r16
    out PORT, r16

    ; move table from ROM to RAM

    ldi ZLO, lo8(states)
    ldi ZHI, hi8(states)
    ldi XLO, 0
    ldi XHI, hi8(table)

1:
    lpm r0, z+
    st x+, r0
    tst XL
    brne 1b

    ; set up registers
    ldi XHI, hi8(table)
    ldi YHI, hi8(table)
    ldi ZHI, hi8(table)

    ldi XLO, 0
    ldi YLO, 0
    ldi ZLO, 0

    ldi r17, 0
#ifdef STRETCH_INDEX
    ldi r18, 0
#endif


1:
    in r16, PIN                 ; 1 cycle

#ifdef STRETCH_INDEX
    ; dealing with index pulse  ; 7 cycles
    sub r18, 1
    brne 2f
    andi r17, ~(1<<6)
2:
    sbrs r16, 6
    rjmp 3f
    ldi r18, 32                 ; 32 polling cycles ~ 82 microseconds
    ori r17, (1<<6)    
3:
#endif

    ; Dealing with channel A    ; 10 cycles
    bst r16, 0
    bld XLO, 0
    bst r16, 1
    bld XLO, 1
    ld XLO, x

    bst XLO, 6
    bld r17, 0
    bst XLO, 7
    bld r17, 1

    ; Dealing with channel B    ; 10 cycles
    bst r16, 2
    bld YLO, 0
    bst r16, 3
    bld YLO, 1
    ld YLO, y

    bst YLO, 6
    bld r17, 2
    bst YLO, 7
    bld r17, 3

    ; Dealing with channel C    ; 10 cycles
    bst r16, 4
    bld ZLO, 0
    bst r16, 5
    bld ZLO, 1
    ld ZLO, z

    bst ZLO, 6
    bld r17, 4
    bst ZLO, 7
    bld r17, 5

    out PORT, r17               ; 1 cycle

    rjmp 1b                     ; 2 cycles
                                ; total: 1+10+10+10+1+2 = 34 cycles (470kHz)
                                ;        stretch index:   41 cycles (390kHz)

; Quadrature division table N=16
; Divide-by-16, quadrature output in bits 6 and 7 (mega8 triple divider)
states:
.byte  0x00, 0x04, 0xbc, 0x00,   0x00, 0x04, 0x04, 0x08        ; 0x07      
.byte  0x08, 0x04, 0x0c, 0x08,   0x10, 0x0c, 0x0c, 0x08        ; 0x0f      
.byte  0x10, 0x14, 0x0c, 0x10,   0x10, 0x14, 0x14, 0x18        ; 0x17      
.byte  0x18, 0x14, 0x1c, 0x18,   0x20, 0x1c, 0x1c, 0x18        ; 0x1f      
.byte  0x20, 0x24, 0x1c, 0x20,   0x20, 0x24, 0x24, 0x28        ; 0x27      
.byte  0x28, 0x24, 0x2c, 0x28,   0x30, 0x2c, 0x2c, 0x28        ; 0x2f      
.byte  0x30, 0x34, 0x2c, 0x30,   0x30, 0x34, 0x34, 0x38        ; 0x37      
.byte  0x38, 0x34, 0x3c, 0x38,   0x40, 0x3c, 0x3c, 0x38        ; 0x3f      
.byte  0x40, 0x44, 0x3c, 0x40,   0x40, 0x44, 0x44, 0x48        ; 0x47      
.byte  0x48, 0x44, 0x4c, 0x48,   0x50, 0x4c, 0x4c, 0x48        ; 0x4f      
.byte  0x50, 0x54, 0x4c, 0x50,   0x50, 0x54, 0x54, 0x58        ; 0x57      
.byte  0x58, 0x54, 0x5c, 0x58,   0x60, 0x5c, 0x5c, 0x58        ; 0x5f      
.byte  0x60, 0x64, 0x5c, 0x60,   0x60, 0x64, 0x64, 0x68        ; 0x67      
.byte  0x68, 0x64, 0x6c, 0x68,   0x70, 0x6c, 0x6c, 0x68        ; 0x6f      
.byte  0x70, 0x74, 0x6c, 0x70,   0x70, 0x74, 0x74, 0x78        ; 0x77      
.byte  0x78, 0x74, 0x7c, 0x78,   0xc0, 0x7c, 0x7c, 0x78        ; 0x7f      
.byte  0x80, 0x84, 0xfc, 0x80,   0x80, 0x84, 0x84, 0x88        ; 0x87      
.byte  0x88, 0x84, 0x8c, 0x88,   0x90, 0x8c, 0x8c, 0x88        ; 0x8f      
.byte  0x90, 0x94, 0x8c, 0x90,   0x90, 0x94, 0x94, 0x98        ; 0x97      
.byte  0x98, 0x94, 0x9c, 0x98,   0xa0, 0x9c, 0x9c, 0x98        ; 0x9f      
.byte  0xa0, 0xa4, 0x9c, 0xa0,   0xa0, 0xa4, 0xa4, 0xa8        ; 0xa7      
.byte  0xa8, 0xa4, 0xac, 0xa8,   0xb0, 0xac, 0xac, 0xa8        ; 0xaf      
.byte  0xb0, 0xb4, 0xac, 0xb0,   0xb0, 0xb4, 0xb4, 0xb8        ; 0xb7      
.byte  0xb8, 0xb4, 0xbc, 0xb8,   0x00, 0xbc, 0xbc, 0xb8        ; 0xbf      
.byte  0xc0, 0xc4, 0x7c, 0xc0,   0xc0, 0xc4, 0xc4, 0xc8        ; 0xc7      
.byte  0xc8, 0xc4, 0xcc, 0xc8,   0xd0, 0xcc, 0xcc, 0xc8        ; 0xcf      
.byte  0xd0, 0xd4, 0xcc, 0xd0,   0xd0, 0xd4, 0xd4, 0xd8        ; 0xd7      
.byte  0xd8, 0xd4, 0xdc, 0xd8,   0xe0, 0xdc, 0xdc, 0xd8        ; 0xdf      
.byte  0xe0, 0xe4, 0xdc, 0xe0,   0xe0, 0xe4, 0xe4, 0xe8        ; 0xe7      
.byte  0xe8, 0xe4, 0xec, 0xe8,   0xf0, 0xec, 0xec, 0xe8        ; 0xef      
.byte  0xf0, 0xf4, 0xec, 0xf0,   0xf0, 0xf4, 0xf4, 0xf8        ; 0xf7      
.byte  0xf8, 0xf4, 0xfc, 0xf8,   0x80, 0xfc, 0xfc, 0xf8        ; 0xff      

