; ------------------------------------------------------------------------
;
; Title:
;
;   PD30 -- PIC "4-pin" frequency divider (10 MHz to 32768 Hz)
;
; Function:
;
;   This PIC program implements a digital frequency divider: the external
;   10 MHz input clock is divided by two carefully calculated factors in
;   order to generate a 32768 Hz output, exactly, on average.
;
;   This allows ubiquitous, low-power (tuning fork-based) watch or clock
;   display circuits to be driven via higher precision 10 MHz frequency
;   references, such as quartz OCXO, rubidium, cesium, or GPSDO.
;
; Diagram:
;                                ---__---
;                5V (Vdd)  +++++|1      8|=====  Ground (Vss)
;            10 MHz input  ---->|2  pD  7|---->  32768 Hz output
;                              -|3  30  6|-
;                              o|4      5|-
;                                --------
; or
;                                ---__---
;                5V (Vdd)  +++++|1      8|=====  Ground (Vss)
;            10 MHz input  ---->|2  pD  7|---->  32768 Hz output
;           32 kHz output  <----|3  30  6|---->     32 Hz output
;                              o|4      5|---->      1 Hz output
;                                --------
; Notes:
;
;   Only 4 pins are required: power (2.0-5.5V), ground, input and output.
;   o Tie input pin4/GP3 to Vdd or Vss.
;   For TADD-2 compatibility: pin3/GP4 has same 32 kHz output as pin7/GP0.
;   Zero jitter outputs: pin6/GP2 is 32 Hz, pin5/GP2 is 1 Hz.
;   Output frequency accuracy is the same as clock input accuracy.
;   Output drive current is 25 mA maximum per pin.
;   Coded for Microchip 12F675 but any '609 '615 '629 '635 '675 '683 works.
;
; Design:
;
;   Using a 10 MHz external clock (2.5 MIPS instruction rate) an output
;   pin is toggled 65536 times a second resulting in a 32 kHz square wave.
;
;   The design ensures there is no accumulated phase or frequency error:
;   every second of 10 MHz clock input cycles corresponds to 32768 output
;   cycles, exactly. Thus the accuracy of the 32 kHz output is equal to the
;   accuracy of the 10 MHz input.
;
;   Note that 2,500,000 is not integer divisible by 65,536 so the program
;   generates the output square wave with slightly varying duty cycle or
;   period. A unique design keeps output jitter to the minimum possible:
;   each 32 kHz edge is within one PIC instruction time (400 ns) of ideal.
;
;   For more information see:
;       http://leapsecond.com/pic/
;       http://leapsecond.com/tools/10m32k.c
;
; Version:
;
;   25-Jul-2008  Tom Van Baak (tvb)  www.LeapSecond.com/pic
;
; ------------------------------------------------------------------------

; Microchip MPLAB IDE assembler code (mpasm).

        list        p=pic12f675
        include     p12f675.inc
        __config    _EC_OSC & _MCLRE_OFF & _WDT_OFF

; Register definitions.

        cblock  0x20            ; define register base
            r2                  ; increments at     1 Hz
            r1                  ; increments at   256 Hz
            r0                  ; increments at 65536 Hz
            gpcopy              ; shadow GPIO
        endc

; One-time PIC 12F675 initialization.

        org     0               ; power-on entry here
        bcf     STATUS,RP0      ; bank 0
        clrf    GPIO            ; set all pins low
        movlw   07h             ; set mode to turn
        movwf   CMCON           ;   comparator off
        bsf     STATUS,RP0      ; bank 1
        clrf    ANSEL-0x80      ; set digital IO (no analog A/D)
        movlw   b'101000'       ; set GP4, GP2, GP1, GP0 as output(0) and
        movwf   TRISIO-0x80     ;   other pins are input(1)
        bcf     STATUS,RP0      ; bank 0
        clrf    r0              ;
        clrf    r1              ;
        clrf    r2              ;
        clrf    gpcopy          ;

; ----------------------------------------------------------------------

; The loop below is isochronous, i.e., under all conditions each code
; path in the loop uses exactly the same number of instruction cycles.
; Loop is 38 instruction cycles. Loop1 is one more, 39 cycles.
;
Loop1   nop                     ; waste one extra cycle
Loop    movf    gpcopy,W        ; shadow -> W
        movwf   GPIO            ; W -> outside world

        ; Increment 16-bit loop counter.

        incf    r0,F            ; lower byte of 16-bit counter
        btfsc   STATUS,Z        ; byte carry?
          incf  r1,F            ; upper byte of 16-bit counter
        btfsc   STATUS,Z        ; [test] byte carry?
          incf  r2,F            ; [test] ** BREAKPOINT **

        ; Set new output bits for this pass.

        clrf    gpcopy          ; clear shadow output
        btfsc   r0,0            ; check 32768 Hz bit
          bsf   gpcopy,0        ;   set GP0
        btfsc   r1,2            ; check 32 Hz bit
          bsf   gpcopy,1        ;   set GP1
        btfsc   r1,7            ; check 1 Hz bit
          bsf   gpcopy,2        ;   set GP2
        btfsc   r0,0            ; check 32768 Hz bit
          bsf   gpcopy,4        ;   set GP4

        ; Count trailing zeros in (12-bit) counter, groups of 4 at a time.

        nop                     ; (for loop timing)
        movf    r0, W           ; r0 -> W
        andlw   b'11111111'     ; mask
        btfsc   STATUS, Z       ; all 8 zero?
          goto  case8           ; yes, count bits in high byte

        andlw   b'00001111'     ; mask
        btfsc   STATUS, Z       ; low 4 zero?
          goto  case4           ; yes, count bits in high nibble
        goto    case0           ; no, count bits in low nibble

        ; Determine if a "leap cycle" is needed this time:
        ;
        ; every 4th (leap)
        ;   except every 8th (NO leap)
        ;     except every 32nd (leap)
        ;       except every 64th (NO leap)
        ;         except every 128th (leap)
        ;           except every 512th (NO leap)
        ;             except every 2048th (leap)

        ; Case 0 to 3 trailing zeros.

case0   movf    r0, W           ; r0.low -> W
        call    Nzeros          ; count 0-4 trailing zeros in this nibble
        addwf   PCL,F           ; jump PCL+W
          goto  Loop            ;  0 : 0000 0000 0000 0001 : 1
          goto  Loop            ;  1 : 0000 0000 0000 0010 : 2
          goto  Loop1           ;  2 : 0000 0000 0000 0100 : 4
          goto  Loop            ;  3 : 0000 0000 0000 1000 : 8
          goto  $               ; can't happen

        ; Case 4 to 7 trailing zeros.

case4   nop                     ; (for equal timing)
        swapf   r0, W           ; r0.high -> W
        call    Nzeros          ; count 0-4 trailing zeros in this nibble
        addwf   PCL,F           ; jump PCL+W
          goto  Loop            ;  4 : 0000 0000 0001 0000 : 16
          goto  Loop1           ;  5 : 0000 0000 0010 0000 : 32
          goto  Loop            ;  6 : 0000 0000 0100 0000 : 64
          goto  Loop1           ;  7 : 0000 0000 1000 0000 : 128
          goto  $               ; can't happen

        ; Case 8 to 12 trailing zeros.

case8   goto    $+1             ; (for equal timing)
        goto    $+1             ; (for equal timing)
        movf    r1, W           ; r0.low -> W
        call    Nzeros          ; count 0-4 trailing zeros in this nibble
        addwf   PCL,F           ; jump PCL+W
          goto  Loop1           ;  8 : 0000 0001 0000 0000 : 256
          goto  Loop            ;  9 : 0000 0010 0000 0000 : 512
          goto  Loop            ; 10 : 0000 0100 0000 0000 : 1024
          goto  Loop1           ; 11 : 0000 1000 0000 0000 : 2048
          goto  Loop1           ; 12 : 0001 0000 0000 0000 : 4096

; ----------------------------------------------------------------------
;
; Find number (0 to 4) of trailing zero bits in lower nibble of W.
;
Nzeros  andlw   b'1111'         ; isolate nibble
        addwf   PCL,F           ; jump PCL+W
          retlw 4               ; [ 0] : 0000
          retlw 0               ; [ 1] : 0001
          retlw 1               ; [ 2] : 0010
          retlw 0               ; [ 3] : 0011
          retlw 2               ; [ 4] : 0100
          retlw 0               ; [ 5] : 0101
          retlw 1               ; [ 6] : 0110
          retlw 0               ; [ 7] : 0111
          retlw 3               ; [ 8] : 1000
          retlw 0               ; [ 9] : 1001
          retlw 1               ; [10] : 1010
          retlw 0               ; [11] : 1011
          retlw 2               ; [12] : 1100
          retlw 0               ; [13] : 1101
          retlw 1               ; [14] : 1110
          retlw 0               ; [15] : 1111

        end

; ------------------------------------------------------------------------
;
; Theory:
;
; A PIC with a 10 MHz clock executes 2.5 million instructions per second
; and the instruction cycle time is 400 ns.
;
; To generate a 32 kHz square wave we toggle the output at 64 kHz.
; But note that 65,536 Hz doesn't divide evenly into 2,500,000 Hz.
; To be precise, 2500000 / 65536 = 38.14697265625.
;
; So the idea is to use a 38 instruction loop most of the time and then
; compensate with a 39 instruction loop the rest of the time such that
; exactly 32768 cycles occur over exactly 1 second.
;
; Note 2500000 % 65536 = 9632, and 65536 - 9632 = 55904, so that
; (55904 * 38) + (9632 * 39) = 2500000.
;
; We make the loop 38 cycles long, but 9632 of the 65536 loop times
; need to have one extra cycle. And to keep jitter as low as possible
; we spread out these extra cycles as evenly as possible.
;
; This makes me think of leap years. The year is about 365.242 days long.
; We can't have calendar day fractions so we add full (leap) days instead.
; So we define the calendar year be 365 days but every 4th year, except
; every 100th year, except every 400th year, we insert an extra day. The
; mean calendar year is:
;
;    365.0000
;     +0.2500 ( + 1/4 )
;     -0.0100 ( - 1/100 )
;     +0.0025 ( + 1/400 )
;     -------
;    365.2425
;
;
; We can do something similar here and keeping real-time PIC code in mind
; we'll use alternating binary steps. Let's insert some "leap cycles".
;
; 2500000 / 65536 = 38.14697265625 is 38 cycles plus a "leap cycle" every
; 4th time, except every 8th time, except every 32nd time, except every
; 64th time, except every 128th time, except every 512th time, except
; every 2048th time. With a calculator or table of negative powers of 2,
; it's easy to see how to get the "coefficients" to use:
;
;     38.00000000000
;     +0.25000000000 ( + 1/4 )
;     -0.12500000000 ( - 1/8 )
;     +0.03125000000 ( + 1/32 )
;     -0.01562500000 ( - 1/64 )
;     +0.00781250000 ( + 1/128 )
;     -0.00195312500 ( - 1/512 )
;     +0.00048828125 ( + 1/2048 )
;     --------------
;     38.14697265625
;
; It adds up perfectly: the 10 MHz input creates a 32.768 kHz output.
; Moreover, the output waveform will be as close as possible to the ideal
; 32 kHz square wave. The output timing will be perfect for any averaging
; times that are integer multiples of 1 second (actually, 1/32 second).
;
; The half-period of a perfect 32768 Hz square wave is 15.2587890625 us.
; A 10 MHz PIC can generate an interval of 15.2 us or 15.6 us but nothing
; in between. So some jitter will exist on every cycle, but it will within
; +/- 200 ns. This is smaller than any of my 32 kHz applications require.
;
; /tvb
