Author Topic: CALLBCA1, firmware-compatible tape loader  (Read 4069 times)

0 Members and 1 Guest are viewing this topic.

Offline cngsoft

  • CPC6128
  • ****
  • Posts: 159
  • Country: es
  • Liked: 634
  • Likes Given: 360
CALLBCA1, firmware-compatible tape loader
« on: 16:49, 06 December 13 »
One month ago I was chatting with Devilmarkus about tape encodings, and out of curiousity I tried writing my own firmware-compatible tape loader. I wanted it to be small yet complete, with Spectrum-like multicolour borders to keep impatient users entertained and to let them see whether the tape was loading the data or waiting for the pilot tone, and able to handle high transfer rates. Here's the outcome:

GrabaciĆ³n a 4000 baudios: "Goody" para Amstrad CPC
GrabaciĆ³n a 4000 baudios: "Goody" para Amstrad CPC

; **********************************************************************

; CNG 20131102-20131104 - just for fun, and following Devilmarkus' idea:
; Spectrum-coloured loader fully compatible with the CPC's built-in one.
; ESCAPE key handler added on 20131112 plus "(A+1)/2" patch on 20131113:
; with all optional flags set to 0 CALLBCA1 fits in exactly 254 bytes :)

; Set the optional flag SAFEBCA1 to 1 to push-n-pop IY + HL' (+7 bytes).
; Similarly, set the flag DEAFBCA1 to 1 to disable escaping (-32 bytes).

; **********************************************************************

callbca1 di ; HL=^ADDR,DE=SIZE,A=ID; CF=OK,IX=^LAST.ADDR,ABCDEHL[HL'IY]!

 if DEAFBCA1
  ld bc,$F610
 else
  ld bc,$F40E
  out (c),c
  ld bc,$F6D0
  out (c),c
  ld c,$10 ; 20131121: WE SAVE A BYTE :-)
 endif

 out (c),c
 ld b,$7F
 out (c),c

 if DEAFBCA1
 else
  ld bc,$F792
  out (c),c
  ld bc,$F650+8 ; ESCAPE: KEYBOARD LINE 8
  out (c),c
 endif

 push hl
 pop ix
 push de
 exx

 if SAFEBCA1
  ex hl,(sp)
  push iy
 else
  pop hl
 endif

 ld yl,a
 dec l
 inc l
 jr z,$+3 ; SPECIAL CASE: EXTRA LAST PAGE
 inc h
 exx

 if SAFEBCA1
  ld c,"F" ; CYAN/RED, AS IN THE SPECTRUM
 else
  ld c,"S" ; BRIGHT CYAN/RED LOOK BETTER!
 endif

; DETECT PILOT TONE AS 256 SIMILAR PULSES
callbca1init ld e,0
 ld h,e
 ld l,e
 ld yh,d ; THE NEXT OPCODES SET ITS VALUE

 if DEAFBCA1
 else
  ld a,$F4
  in a,(0) ; A BIT BETTER THAN "IN A,(C)"
  and $04
  jr z,callbca1exit ; ESCAPE: KEYB. BIT 2
 endif

callbca1tone call callbca1edge
 jr nc,callbca1init
 ld a,d ; WE NEED TO KEEP D AS SEEN ABOVE
 add 3 ; *!* A RELATIVELY LOW THRESHOLD..
 cp yh
 jr c,callbca1init ; NOT SHORT ENOUGH :-(
 sub 3*2 ; *!* TWICE THE PAST THRESHOLD..
 cp yh
 jr nc,callbca1init ; NOT LONG ENOUGH :-(
 ld a,d
 add l
 ld l,a
 adc h
 sub l
 ld h,a
 inc e
 jr nz,callbca1tone ; WAIT FOR 256 PULSES
 shr a
 add h
; adc e; 20131121: BAD IDEA, LESS PRECISE
 ld yh,a ; THIS WILL BE OUR BIT THRESHOLD

; READ PILOT UNTIL FINDING THE SYNC.PULSE
callbca1wait ld e,2
 call callbca1edge
 jr nc,callbca1init ; OVERFLOW, TRY AGAIN
 shr a
 cp d
 jr c,callbca1wait ; LONG EDGE, WAIT MORE
 dec e
 jr nz,callbca1wait+2 ; PULSE = TWO EDGES

; READ AND CHECK THE ID BYTE, STILL IN YL
 call callbca1byte
 jr nc,callbca1init
 sub yl
 jr nz,callbca1init ; WRONG BYTE, GO BACK
 ld yl,a ; YL IS NOW OUR 256-BYTE COUNTER
 ld a,c

 if SAFEBCA1
  xor $18 ; YELLOW/BLUE FROM THE SPECTRUM
 else
  xor $19 ; BRIGHT YELLOW/BLUE ARE BETTER
 endif

 ld c,a

; READ ONE PAGE OF BYTES AND ITS CHECKSUM
callbca1next ld hl,-1 ; RESET CRC16 VALUE
callbca1page call callbca1byte
 jr nc,callbca1exit ; ABORT ON READ ERROR
 exx
 ld a,1 ; THE LAST PAGE IS A SPECIAL CASE
 xor h
 jr nz,callbca1skip
 ld a,l
 dec a
 cp yl
callbca1skip exx
 jr c,callbca1skip_ ; DROP TRAILING BYTES
 ld (ix),e
 inc ix ; "DEC IX" FOR LDDR-LIKE LOADING!
callbca1skip_ inc yl
 jr nz,callbca1page
 push hl ; KEEP THE CRC16 FROM CHANGING!!
 call callbca1byte
 pop hl
 jr nc,callbca1exit
 xor h
 ld h,a
 push hl
 call callbca1byte
 pop hl
 jr nc,callbca1exit
 xor l
 and h
 inc a
 jr nz,callbca1exit ; IS THE CRC16 RIGHT?
 exx
 dec h
 exx
 jr nz,callbca1next ; ARE ALL PAGES DONE?
 scf

callbca1exit ; ld bc,$7F54
; out (c),c ; LET THE USER SET THE BORDER

 if SAFEBCA1
  pop iy
  exx
  pop hl
  exx
 endif
 if DEAFBCA1
 else
  ld bc,$F782
  out (c),c
 endif

 ld b,$F6
 dw $71ED ; THE UNDOCUMENTED "OUT (C),0"!
; ei ; IT'S UP TO THE USER TO ENABLE INTS
 ret

callbca1byte ld e,1
callbca1bits call callbca1edge
 ret nc
 call callbca1edge_
 ret nc
 sub d
 ld d,a ;*!* WE WILL NEED THE BIT 7 BELOW
; COPIED...
 xor h
 jp p,$+12 ; IT'S THE CRC-16-CCITT METHOD
 ld a,h
 xor $08
 ld h,a
 ld a,l
 xor $10
 ld l,a
 scf
 adc hl,hl
; ...PASTED
 rl d ;*!* AS SEEN ABOVE WE USE THE BIT 7
 rl e
 jr nc,callbca1bits
 ld a,e
 ret

callbca1edge ld d,0 ; EASIER MATHEMATICS!
callbca1edge_ ld a,8-1 ; DELAY (~32 NOPs)
 dec a
 jr nz,$-1
 ld b,$F5
 inc d
 ret z ; OVERFLOW, THE EDGE WAS TOO LONG!
 in a,(c)
 xor c
 and $80
 jr z,$-7
 xor c

 if SAFEBCA1
  xor $1A ; ZX SPECTRUM-LIKE DARK BORDERS
 else
  xor $1F ; BETTER-LOOKING BRIGHT BORDERS
 endif

 ld c,a
 ld b,$7F
 and b
 out (c),a
 ld a,yh
 scf
 ret

; **********************************************************************

The blocks can be made with the firmware call &BC9E, or with tools such as 2CDT, for example "2CDT -n -b 4000 -m 1 FILENAME TAPE.CDT".

Trivia: the CPC built-in loader can't stand more than 3472 baud, i.e. blocks where the bits 0 and 1 have pulses of 336+672T; in comparison, CALLBCA1 can handle (so far) up to 4050 baud, 288+576T. Higher rates would require better mathematics in the routine, as well as balancing edge lengths and CPU consumption: DEAFBCA1 and SAFEBCA1 push the maximum rate up or down two dozen baud! :-(

EDIT: attaching the sample tape. Also, 3500000/(3*288) ~ 4050.9259, not 4034.
« Last Edit: 18:36, 06 December 13 by cngsoft »
(if you can't see the banner right now my server is currently offline)

Offline Gryzor

  • Administrator
  • 6128 Plus
  • *****
  • Posts: 16.017
  • Country: gr
  • CPC-Wiki maintainer
    • CPCWiki
  • Liked: 3452
  • Likes Given: 6205
Re: CALLBCA1, firmware-compatible tape loader
« Reply #1 on: 21:08, 06 December 13 »
Ideal for Speccy ports! :D

So what's its max speed?

Offline arnoldemu

  • Supporter
  • 6128 Plus
  • *
  • Posts: 5.336
  • Country: gb
    • Unofficial Amstrad WWW Resource
  • Liked: 2274
  • Likes Given: 3478
Re: CALLBCA1, firmware-compatible tape loader
« Reply #2 on: 21:14, 06 December 13 »
Ideal for Speccy ports! :D

So what's its max speed?
older loader would take the time of grinding the coffee beans, then boiling water in a saucepan, then maining coffee.

new loader is instant coffee speed with a fast boil kettle.



My games. My Games
My website with coding examples: Unofficial Amstrad WWW Resource

Offline arnoldemu

  • Supporter
  • 6128 Plus
  • *
  • Posts: 5.336
  • Country: gb
    • Unofficial Amstrad WWW Resource
  • Liked: 2274
  • Likes Given: 3478
Re: CALLBCA1, firmware-compatible tape loader
« Reply #3 on: 21:15, 06 December 13 »
btw, I like this loader a lot! :)
My games. My Games
My website with coding examples: Unofficial Amstrad WWW Resource

Offline cngsoft

  • CPC6128
  • ****
  • Posts: 159
  • Country: es
  • Liked: 634
  • Likes Given: 360
Re: CALLBCA1, firmware-compatible tape loader
« Reply #4 on: 22:39, 06 December 13 »
Ideal for Speccy ports! :D

So what's its max speed?
Arnoldemu's comparison is fitting :-) Thanks for liking it!

Now seriously, this routine autodetects the baud rate, just like the firmware's original loader, so you can feed it any speed the CPC ROM could handle. It accepts higher rates because it does fewer CALLs and RETs but there's still a lot of overhead between reading each bit from tape and even more overhead between each byte. This overhead is also more difficult to precalculate and compensate than in the ZX Spectrum's built-in encoding, where operations are much more predictable and fewer conditional jumps are involved.

The lowest baud rate that CALLBCA1 tolerates is 3500000/3/2883 ~ 404.67 baud (i.e. a BIT0 edge is 2883 T long, where T=1/3500000 s). Lower rates cause overflows in the edge reader that understands them as interruptions in the signal. Reversely, and as explained in the original post, the highest accepted baud rate is slightly above 4k baud; higher rates drive to the shortest pulses being skipped by the edge reader, too slow to handle them, driving to data loss and ultimate CRC failure.

I'm not too sure of certain parts of the routine. The transfer rate detector assumes an error margin of 3*12=36 microseconds (the "threshold" in the CALLBCA1TONE paragraph) and the edge reader operates with a built-in delay of 2+7*4+2=32 microseconds (see CALLBCA1EDGE). In theory this is acceptable as long as these values are at least twice as long as 12 microseconds, the edge measurement "unit" of CALLBCA1EDGE. In practice I doubt real tapes can stay consistently inside these margins.
(if you can't see the banner right now my server is currently offline)

Offline ralferoo

  • Supporter
  • 6128 Plus
  • *
  • Posts: 969
  • Country: gb
  • Liked: 583
  • Likes Given: 222
Re: CALLBCA1, firmware-compatible tape loader
« Reply #5 on: 21:49, 07 December 13 »
This looks like good work. As you know I was doing some work on a loader a little while ago too, but that failed when I actually tried it on a real tape deck (none of the 464s could even load things they recorded themselves, I've bought new drive bands but not fitted them yet) and so I had to do my testing with a 6128 connected to an old hi-fi.

As part of that, it was interesting the massive variation in the drive speeds - admittedly all the bands were old and broken, but there was about a 20% speed variance. With that in mind, the adaptive baud rate system in the CPC really was a great idea. :)

Offline cngsoft

  • CPC6128
  • ****
  • Posts: 159
  • Country: es
  • Liked: 634
  • Likes Given: 360
Re: CALLBCA1, firmware-compatible tape loader
« Reply #6 on: 18:41, 08 December 13 »
I've just realised a couple of things:

1.- We can save one more byte: this paragraph

 ld yl,a
 dec l
 inc l
 jr z,$+3 ; SPECIAL CASE: EXTRA LAST PAGE
 inc h
 exx

should turn into

 ld yl,a
 xor a
 cp l
 adc h
 ld h,a
 exx

2.- There's no point in using the flag SAFEBCA1 in the three conditional snippets of code that define the border colours (ld c,"F"/"S"; xor $18/$19; xor $1A/$1F); it should be its own flag, that we might call DARKBCA1 because it sets the dark colours when the condition is true.
(if you can't see the banner right now my server is currently offline)

Offline cngsoft

  • CPC6128
  • ****
  • Posts: 159
  • Country: es
  • Liked: 634
  • Likes Given: 360
Re: CALLBCA1, firmware-compatible tape loader
« Reply #7 on: 18:14, 28 December 13 »
Two more size optimisations that save 7 bytes:

1.- Calculating the average length of the pilot tone edges can be done with fewer operations. This piece of code

 jr nc,callbca1init
 ld a,d ; WE NEED TO KEEP D AS SEEN ABOVE
 add 3 ; *!* A RELATIVELY LOW THRESHOLD..
 cp yh
 jr c,callbca1init ; NOT SHORT ENOUGH :-(
 sub 3*2 ; *!* TWICE THE PAST THRESHOLD..
 cp yh
 jr nc,callbca1init ; NOT LONG ENOUGH :-(

should become

 jr nc,callbca1init
 sub d ; WE NEED TO KEEP D AS SEEN ABOVE!
 add 8 ; *!* A RELATIVELY LOW THRESHOLD..
 sub 8*2+1 ; *!* TWICE THE PAST THRESHOLD
 jr nc,callbca1init ; D IS OUT OF BOUNDS!

Notice also that I raised the threshold from 3 to 8. With values below 8 my real CPC had trouble reading 4000 baud blocks.

2.- Detecting the sync pulse edges can be done with two SUB D rather than

 jr nc,callbca1init ; OVERFLOW, TRY AGAIN
 shr a
 cp d
 jr c,callbca1wait ; LONG EDGE, WAIT MORE

thus resulting in

 jr nc,callbca1init ; OVERFLOW, TRY AGAIN
 sub d
 sub d
 jr c,callbca1wait ; LONG EDGE, WAIT MORE

that is one byte shorter, albeit also SLIGHTLY less tolerant to noise.
(if you can't see the banner right now my server is currently offline)

Offline cngsoft

  • CPC6128
  • ****
  • Posts: 159
  • Country: es
  • Liked: 634
  • Likes Given: 360
Re: CALLBCA1, firmware-compatible tape loader
« Reply #8 on: 12:48, 14 January 14 »
I just came up with yet another size optimisation: we can save one more byte by turning

callbca1bits call callbca1edge
 ret nc
 call callbca1edge_
 ret nc

into

callbca1bits call callbca1edge
 call c,callbca1edge_
 ret nc

because I always forget that the Z80 is equipped with conditional calls and returns.
« Last Edit: 12:51, 14 January 14 by cngsoft »
(if you can't see the banner right now my server is currently offline)

Offline sigh

  • 6128 Plus
  • ******
  • Posts: 1.283
  • Liked: 576
  • Likes Given: 59
Re: CALLBCA1, firmware-compatible tape loader
« Reply #9 on: 03:22, 15 January 14 »
Silly question:

Is this how fast it would load on a real CPC? Wold the tape deck be able to handle such speeds? I'm asking because the speed of how that picture was loading is incredibly fast!

Offline cngsoft

  • CPC6128
  • ****
  • Posts: 159
  • Country: es
  • Liked: 634
  • Likes Given: 360
Re: CALLBCA1, firmware-compatible tape loader
« Reply #10 on: 19:10, 15 January 14 »
Silly question:

Is this how fast it would load on a real CPC? Wold the tape deck be able to handle such speeds? I'm asking because the speed of how that picture was loading is incredibly fast!
The video was recorded with CPCE's "tape speedup" disabled, so yes, you can load sixty-four kilobytes in a couple of minutes (and indeed, if a baud is a bit per second, 4000 baud = half a kilobyte per second). However, the relatively low fidelity of actual cassettes made homemade recordings at 2000 baud unreliable, and commercial distribution could afford somewhat higher rates (for example, the Speedlock encoding runs at ~2080 baud) at the expense of using more expensive materials and making read-only tapes. For my own tests, I plugged the tape input of my CPC 6128 into the audio output of my PC, and played the virtual tapes back.

Speaking of these tests, after noticing that the bit reader of CALLBCA1 had a slight bias, I turned

 ret nc
 sub d
 ld d,a ;*!* WE WILL NEED THE BIT 7 BELOW

into

 ret nc
 sbc d ; 20140115: BETTER AVERAGE? *SUB D
 ld d,a ;*!* WE WILL NEED THE BIT 7 BELOW

and got slightly more balanced measurements in the sampling of edges and pulses: a 4000 baud sample went from

+0       0   0%
+1       0   0%
+2       0   0%
+3       0   0%
+4    1530   0%
+5   17268   5%
+6    8482   2%
+7   43653  13%
+8  102523  32%

-8   10030   3%
-7   30192   9%
-6    7636   2%
-5   50403  15%
-4   30574   9%
-3    3205   1%
-2    8190   2%
-1    2185   0%

to

+0       0   0%
+1       0   0%
+2       0   0%
+3    1583   0%
+4   17296   5%
+5    8464   2%
+6   43419  13%
+7  102694  32%

-9   10068   3%
-8   30233   9%
-7    7505   2%
-6   50393  15%
-5   30657   9%
-4    3214   1%
-3    8085   2%
-2    2261   0%
-1       0   0%

whose averages of +6,38 and -6,29 are far more balanced than the previous values of +7,38 and -5,29.
« Last Edit: 12:31, 02 January 17 by cngsoft »
(if you can't see the banner right now my server is currently offline)

Offline pelrun

  • Supporter
  • 6128 Plus
  • *
  • Posts: 661
  • Country: au
    • index.php?action=treasury
  • Liked: 349
  • Likes Given: 217
Re: CALLBCA1, firmware-compatible tape loader
« Reply #11 on: 22:05, 04 April 14 »
I just used this in my new DSK2CDT2DISC tool, converted into pog form rsx form. Works a treat!

Offline cngsoft

  • CPC6128
  • ****
  • Posts: 159
  • Country: es
  • Liked: 634
  • Likes Given: 360
Re: CALLBCA1, firmware-compatible tape loader
« Reply #12 on: 18:11, 27 April 14 »
I just used this in my new DSK2CDT2DISC tool, converted into pog form rsx form. Works a treat!
Well, that's good to know, that my silly project could be useful to someone :-)

That being said, I developed two extra size optimisations and a couple of conceptual improvements while preparing yesterday's new release of BB4CPC (in time for RetroMadrid 2014, no less). The first one turns

 if DEAFBCA1
 else
  ld bc,$F782
  out (c),c
 endif
 ld b,$F6
 dw $71ED ; THE UNDOCUMENTED "OUT (C),0"!


 into

 if DEAFBCA1
  ld b,$F6
 else
  ld bc,$F782
  out (c),c
  dec b ; PATCH 20140419: SAVES ONE BYTE!
 endif
 dw $71ED ; THE UNDOCUMENTED "OUT (C),0"!


to save one byte when DEAFBCA1 is disabled on compile time. There's also

 ld a,8-1 ; DELAY (~32 NOPs)
 dec a
 jr nz,$-1


that becomes

 ld b,8-1 ; SAFETY DELAY! (1/2 SCANLINE)
 djnz $ ; 20140426: -1B, DEC A; JR NZ,$-1


that is one byte shorter.

As for the conceptual changes,

callbca1init ld e,0
 ld h,e
 ld l,e
 ld yh,d ; THE NEXT OPCODES SET ITS VALUE


can be turned into

callbca1init ld hl,0
 ld e,l
 ld yh,d ; THE NEXT OPCODES SET ITS VALUE


that does the same job and is one line shorter (same size in compiled bytes, though) and more readable to the human programmer.

Finally, if someone thinks that finding the wrong ID should be grounds for exitting the function with the carry flag reset (i.e. an error happened) we can change

 jr nc,callbca1init
 sub yl
 jr nz,callbca1init ; WRONG BYTE, GO BACK


into

 jr nc,callbca1init
 xor yl ; BETTER THAN "SUB YL": CARRY = 0
 jr nz,callbca1exit ; WRONG ID, I GIVE UP


because XOR YL will always reset the carry flag regardless of whether the ID is right or wrong while SUB YL might set it if the detected ID is lower than the expected one, an undesired behavior if we want wrong IDs to immediately abort the operation.
(if you can't see the banner right now my server is currently offline)

Offline cngsoft

  • CPC6128
  • ****
  • Posts: 159
  • Country: es
  • Liked: 634
  • Likes Given: 360
Re: CALLBCA1, firmware-compatible tape loader
« Reply #13 on: 13:32, 24 July 14 »
After a couple of experiments performed together with Dlfrsilver I devised one more improvement: the built-in delay in the edge reader (originally  ld a,8-1: dec a: jr nz,$-1 and later  ld b,8-1: djnz $) can be safely removed. Not only the code becomes several bytes lighter, but the transfer rate ceiling rises too: we reached 6000 baud, i.e. 750 bytes per second. Needless to say, this is only going to work with a crystal-clear signal...
(if you can't see the banner right now my server is currently offline)

Offline cpcitor

  • The user previously known as FindYWay
  • CPC6128
  • ****
  • Posts: 298
  • Country: fr
  • My heart still runs on traditional CPC.
    • My code for the CPC.
  • Liked: 148
  • Likes Given: 370
Re: CALLBCA1, firmware-compatible tape loader
« Reply #14 on: 22:57, 14 October 20 »
Congrats @cngsoft! After 6 years I discover this thread and all the improvements you made.

6000 bauds! Not for actual tapes, but in practice this can be very useful in a machine-to-machine-via-audio-cable setup.

Thanks for sharing all the bits of improvement, too.

Did you post some "final" source code somewhere?
Had a CPC since 1985, currently software dev professional, including embedded systems.

I made the first CPC cross-dev environment that auto-installs C compiler and tools: cpc-dev-tool-chain: a portable toolchain for C/ASM development targetting CPC.