init files

This commit is contained in:
AArt1256 2025-11-13 19:07:39 +03:00
commit 8197a022bd
1409 changed files with 139317 additions and 0 deletions

193
loader/src/decompress/b2decomp.s Executable file
View file

@ -0,0 +1,193 @@
; ByteBoozer Decruncher /HCL May.2003
; B2 Decruncher December 2014
; with slight modifications by Krill
decompress = Decrunch
decompsrc = Get1+1
.FEATURE labels_without_colons, leading_dot_in_identifiers
;Variables.. #Bytes
zp_base = DECOMPVARS ; -
bits = zp_base ;1
put = decdestlo
.macro .GetNextBit
.local DgEnd
asl bits
bne DgEnd
jsr GetNewBits
DgEnd
.endmacro
.macro .GetLen
.local GlEnd
.local GlLoop
lda #1
GlLoop
.GetNextBit
bcc GlEnd
.GetNextBit
rol
bpl GlLoop
GlEnd
.endmacro
Decrunch
jsr Gnb
lda loadaddrhi
sta Get2+2
sta Get3+2
ldx loadaddrlo
jsr GetNewBits
tya
.if LOADCOMPD_TO
clc
adc loadaddroffslo
php
.endif
storedadrl:
sta put
jsr GetNewBits
tya
.if LOADCOMPD_TO
plp
adc loadaddroffshi
.endif
storedadrh:
sta put + 1
lda #$80
sta bits
DLoop
POLLBLOCK
.GetNextBit
bcs Match
Literal
; Literal run.. get length.
.GetLen
sta LLen+1
ldy #0
LLoop
Get3 lda $ff00,x
inx
bne *+5
jsr GnbInc
sta (put),y
iny
LLen cpy #0
bne LLoop
clc
tya
adc put
sta put
bcc *+4
inc put+1
iny
beq DLoop
; Has to continue with a match..
Match
; Match.. get length.
.GetLen
sta MLen+1
; Length 255 -> EOF
cmp #$ff
beq End
; Get num bits
cmp #2
lda #0
rol
.GetNextBit
rol
.GetNextBit
rol
tay
lda Tab,y
beq MByte
; Get bits < 8
MLoop1 .GetNextBit
rol
bcs MLoop1
bmi MShort
MByte
; Get byte
eor #$ff
tay
Get2 lda $ff00,x
inx
bne MLong
jsr GnbInc
jmp MLong
MShort
ldy #$ff
MLong
;clc
adc put
sta MLda+1
tya
adc put+1
sta MLda+2
ldy #$ff
MLoop2 iny
MLda lda $b00b,y
sta (put),y
MLen cpy #0
bne MLoop2
;sec
tya
adc put
sta put
bcc *+4
inc put+1
jmp DLoop
GetNewBits
Get1 ldy $ff00,x
sty bits
rol bits
inx
beq GnbInc
End rts
GnbInc
inc Get1+2
inc Get2+2
inc Get3+2
Gnb
php
pha
tya
pha
GETBLOCK Get1+2
pla
tay
pla
ldx #0
plp
rts
Tab
; Short offsets
.byte %11011111 ; 3
.byte %11111011 ; 6
.byte %00000000 ; 8
.byte %10000000 ; 10
; Long offsets
.byte %11101111 ; 4
.byte %11111101 ; 7
.byte %10000000 ; 10
.byte %11110000 ; 13

View file

@ -0,0 +1,266 @@
decompsrc = bitfire_lz_sector_ptr1
.if MEM_DECOMP_TO_API
; cannot copy remaining uncompressed blob of unknown size
.error "***** MEM_DECOMP_TO_API is not supported for BITNAX. Copy compressed data to original location, then use MEM_DECOMP_API to decompress in-place. *****"
.endif
BITFIRE_DECOMP_ZERO_OVERLAP = 1
.FEATURE labels_without_colons, leading_dot_in_identifiers
.lz_bits = DECOMPVARS + 0
.lz_dst = DECOMPVARS + 1
.lz_end = DECOMPVARS + 3
.lz_tmp = DECOMPVARS + 5
decompress:
jsr .lz_next_page_
lda loadaddrhi
sta bitfire_lz_sector_ptr2 + 1
ldx loadaddrlo
.if BITFIRE_DECOMP_ZERO_OVERLAP
ldy #$03; destination and end addresses
.else
ldy #$01; destination address
.endif
: lda (loadaddrlo),y
sta .lz_dst,y
inx
bne :+
jsr .lz_next_page
: dey
bpl :--
.if LOADCOMPD_TO
clc
lda loadaddroffslo
adc .lz_dst
sta .lz_dst
lda loadaddroffshi
adc .lz_dst+1
sta .lz_dst+1
.if BITFIRE_DECOMP_ZERO_OVERLAP
clc
lda loadaddroffslo
adc .lz_end
sta .lz_end
lda loadaddroffshi
adc .lz_end+1
sta .lz_end+1
.endif
.endif
sec
.lz_type_refill
jsr .lz_refill_bits ;refill bit buffer .lz_bits
;******** Start the next match/literal run ********
.lz_type_check
bcc .lz_do_match
beq .lz_type_refill ;we will fall through on entry
;******** Process literal run ********
lda #$00
:
rol ;-> a = $01 after first round
asl .lz_bits
bne *+5
jsr .lz_refill_bits ;kills y
bcc .lz_lrun_gotten
asl .lz_bits
bne :-
jsr .lz_refill_bits
bne :-
.lz_lrun_gotten
sta .lz_lcopy_len ;Store LSB of run-length
ldy #$00
.lz_lcopy
bitfire_lz_sector_ptr2 = * + 1 ;Copy the literal data, forward or overlap is getting a pain in the ass.
lda $ff00,x
sta (.lz_dst),y
inx
bne :+
clc
jsr .lz_next_page
:
iny
.lz_lcopy_len = * + 1
cpy #$00
bne .lz_lcopy
tya
;.if LOAD_VIA_KERNAL_FALLBACK | (LOAD_UNDER_D000_DFFF & (PLATFORM <> diskio::platform::COMMODORE_16))
bne *+5
jmp .lz_maximum ;maximum literal run, bump sector pointers and so on and force new type bit
;.else
; beq .lz_maximum ;maximum literal run, bump sector pointers and so on and force new type bit
;.endif
;XXX TODO can we reuse the same code? In one case continue with match, in other case redecide
clc
adc .lz_dst
sta .lz_dst
bcc *+4
inc .lz_dst+1
POLLBLOCK
;no need for a type bit, after each literal a match follows, except for maximum runlength literals
;******** Process match ********
.lz_do_match
lda #$01 ;this could be made shorter by using the last bitfetch of the upcoming loop and restoring the carry again by a cmp #$02. Saves bytes, but makes things slower, as eof check is also done with all short matches then
asl .lz_bits ;first length bit (where a one identifies
bne *+5 ;a two-byte match)
jsr .lz_refill_bits
bcc .lz_get_offs ;all done, length is 2, skip further bitfetches (and eof check)
:
asl .lz_bits
bne *+5
jsr .lz_refill_bits
rol
asl .lz_bits
bne *+5
jsr .lz_refill_bits
bcc :-
.lz_got_len
tay ;XXX TODO could this be placed elsewhere to make the tay obsolete?
beq .lz_end_of_file ;A 257-byte (=>$00) run serves as a sentinel, but not with zero-overlap, except when depacking from a non inplace address, then it is still appended
.lz_get_offs
sta .lz_mcopy_len ;store length at final destination
lda #%11000000 ;fetch 2 more prefix bits
rol ;previous bit is still in carry \o/
:
asl .lz_bits
bne *+5
jsr .lz_refill_bits
rol
bcs :-
beq .lz_8_and_more ;0 + 8 bits to fetch, branch out before table lookup to save a few cycles and one byte in the table, also save complexity on the bitfetcher
tay
lda .lz_lentab,y
: ;same as above
asl .lz_bits ;XXX same code as above, so annoying :-(
bne *+5
jsr .lz_refill_bits
rol
bcs :-
bmi .lz_less_than_8 ;either 3,4,6 or 7 bits fetched -> highbyte will be $ff
.lz_8_and_more
jsr .lz_refill_bits
eor #$ff ;5 of 13, 2 of 10, 0 of 8 bits fetched as highbyte, lowbyte still to be fetched
sta .lz_tmp ;XXX this is a pain in the arse that A and Y need to be swapped :-(
tya
ldy .lz_tmp
SKIPWORD
.lz_less_than_8
ldy #$ff ;XXX TODO silly, y is set twice in short case
adc .lz_dst ;subtract offset from lz_dst
sta .lz_m+1
tya ;hibyte
adc .lz_dst+1
sta .lz_m+2
ldy #$ff ;The copy loop. This needs to be run
;forwards since RLE-style matches can overlap the destination
.lz_mcopy
iny
.lz_m lda $face,y ;copy one byte
sta (.lz_dst),y
.lz_mcopy_len = * + 1
cpy #$ff
bne .lz_mcopy
tya ;advance destination pointer
; sec ;XXX TODO carry set = type check needed, cleared (literal) = match follows anyway
adc .lz_dst
sta .lz_dst
.if BITFIRE_DECOMP_ZERO_OVERLAP = 0
.lz_skip_poll bcc :+
.lz_maximum inc .lz_dst+1 ;this is also used by maximum length
bcs .lz_skip_end
:
.else
bcc :+ ;proceed to check
.lz_maximum
inc .lz_dst+1 ;advance hi byte
; lda .lz_dst ;if entering via .lz_maximum, a = 0, so we would pass the following check only if the endadress is @ $xx00
: ;if so, the endaddress can't be $xx00 and the highbyte check will fail, as we just successfully wrote a literal with type bit, so the end address must be greater then the current lz_dst, as either another literal or match must follow. Can you still follow me?! :-D
eor .lz_end ;check end address
.lz_skip_poll beq .lz_check_end ;all okay, poll for a new block
.endif ; BITFIRE_DECOMP_ZERO_OVERLAP
POLLBLOCK
.lz_skip_end
;literals needing an explicit type bit
asl .lz_bits ;fetch next type bit
jmp .lz_type_check
.if BITFIRE_DECOMP_ZERO_OVERLAP
.lz_check_end
lda .lz_dst+1 ;check highbyte
eor .lz_end+1
bne .lz_skip_end ;skip poll, so that only one branch needs to be manipulated
;sta .barrier ;clear barrier and force to load until EOF, XXX does not work, but will at least force one additional block before leaving as barrier will be set again upon next block being fetched. Will overlap be > than 2 blocks? most likely not? CRAP, tony taught me that there is /o\
;lda #$ff
;sta bitfire_load_addr_hi ;needed if the barrier method will not work out, plain jump to poll loop will fail on stand alone depack?
;jmp .lz_next_page ;load any remaining literal blob if there, or exit with rts in case of plain decomp (rts there instead of php). So we are forced until either the sector_ptr reaches $00xx or EOF happens, so nothing can go wrong
; fetching any remaining final literals uncompressed blob is performed by the caller (loadcompd in resident.s)
.endif ; BITFIRE_DECOMP_ZERO_OVERLAP
.lz_end_of_file
rts
.lz_refill_bits
bitfire_lz_sector_ptr1 = * + 1
bitfire_load_addr_hi = * + 2
ldy $ff00,x
inx
bne .lz_same_page
.lz_next_page
inc bitfire_load_addr_hi
inc bitfire_lz_sector_ptr2 + 1
.lz_next_page_
php
pha
sty .lz_tmp
GETBLOCK bitfire_load_addr_hi
ldx #0
ldy .lz_tmp
pla
plp
.lz_same_page
;store bits? happens on all calls, except when a whole literal is fetched
bcc :+ ;only store lz_bits if carry is set (in all cases, except when literal is fetched for offset)
sty .lz_bits
rol .lz_bits
: rts
.lz_lentab = * - 1
;short offset init values
;.byte %00000000 ;2
.byte %11011111 ;0
.byte %11111011 ;1
.byte %10000000 ;3
;long offset init values
.byte %11101111 ;offset 0
.byte %11111101 ;offset 1
.byte %10000000 ;offset 2
.byte %11110000 ;offset 3

View file

@ -0,0 +1,305 @@
decompsrc = lz_sector_ptr1
;-------------------------------------------------------------------------------
;Regular version of the Lempel-Ziv decompressor
;-------------------------------------------------------------------------------
lz_dst = decdestlo ;Decompression destination pointer.
;Initialize this to whatever address
;you want to decompress to
lz_bits = DECOMPVARS + $00 ;Shift register. Initialized to $80
;for a new file
lz_scratch = DECOMPVARS + $01 ;Temporary zeropage storage
lz_ybuffer = DECOMPVARS + $02 ;Temporary register storage when fetching sector
lz_sector = $ff00 ;The one-page buffer from which the
;compressed data is actually read,
;and which gets refilled by
;lz_fetch_sector.
;-------------------------------------------------------------------------------
;This is the user's hook to replenish the sector buffer with some new bytes.
;
;A and Y are expected to be preserved while carry must remain set on exit.
;X should point to the first byte of the new data, e.g. zero for a full 256-byte
;page of data or two to skip past the sector and track links.
;
;When fetching from a larger in-memory array rather than a single sector buffer
;the lz_sector_ptr1..3 pointers will need to be patched up
;-------------------------------------------------------------------------------
lz_fetch_sector:
inc lz_sector_ptr1 + 1
inc lz_sector_ptr2 + 1
inc lz_sector_ptr3 + 1
lz_fetch_sector_:
pha
sty lz_ybuffer
GETBLOCK lz_sector_ptr1 + 1
ldx #0
ldy lz_ybuffer
pla
sec
rts
decompress:
jsr lz_fetch_sector_
lda loadaddrhi
sta lz_sector_ptr2 + 1
sta lz_sector_ptr3 + 1
ldx loadaddrlo
jsr _lz_refill_bits
tya
storedadrl:
sta lz_dst + $00
jsr _lz_refill_bits
tya
storedadrh:
sta lz_dst + $01
.if LOADCOMPD_TO
clc
lda loadaddroffslo
adc lz_dst + $00
sta lz_dst + $00
lda loadaddroffshi
adc lz_dst + $01
sta lz_dst + $01
.endif
;-------------------------------------------------------------------------------
;This is the main lz_decrunch function which may be called to decompress an
;entire file.
;
;On entry and exit the X register points to the next available byte in the
;sector buffer, in ascending order from $00 to $ff.
;This implies that the initial sector must have already been fetched, and that a
;file ending with X wrapped to $00 will have needlessly fetched an extra sector
;(which may be taken advantage of when decoding a contiguous set of files.)
;-------------------------------------------------------------------------------
;******** Start the next match/literal run ********
lz_decrunch: sec ;This is the main entry point. Forcibly
_lz_type_refill:
jsr _lz_refill_bits ;fill up the the bit buffer on entry
bne _lz_type_cont ;(BRA)
;Wrap the high-byte of the destination pointer.
_lz_mfinish: bcc *+4
_lz_maximum: inc lz_dst+1 ;This is also used by maximum length
;literals needing an explicit type bit
POLLBLOCK
;Literal or match to follow?
asl lz_bits
_lz_type_cont: bcc _lz_do_match
beq lz_decrunch
;******** Process literal run ********
lda #%00000000 ;Decode run length
_lz_lrun_loop: rol
asl lz_bits
bcs _lz_lrun_test
_lz_lrun_back: asl lz_bits
bne _lz_lrun_loop
jsr _lz_refill_bits
bne _lz_lrun_loop ;(BRA)
_lz_lrun_test: bne _lz_lrun_gotten
jsr _lz_refill_bits
bcc _lz_lrun_back
_lz_lrun_gotten:
sta _lz_copy_cnt+1 ;Store LSB of run-length
ldy #$00
_lz_lcopy:
lz_sector_ptr2 = *+1 ;Copy the literal data. Note the
lda lz_sector,x
inx
bne *+5
jsr lz_fetch_sector ;Grab a new sector for the literal loop
sta (lz_dst),y
iny
_lz_copy_cnt: cpy #$00
bne _lz_lcopy
;Time to advance the destination pointer.
;Maximum run length literals exit here as a type-bit needs
;to be fetched afterwards
tya
beq _lz_maximum
clc
adc lz_dst+0
sta lz_dst+0
bcc *+4
inc lz_dst+1
POLLBLOCK
;One literal run following another only makes sense if the
;first run is of maximum length and had to be split. As that
;case has been taken care of we can safely omit the type bit
;here
;******** Process match ********
_lz_do_match: lda #%00100000 ;Determine offset length by a two-bit
_lz_moff_range: asl lz_bits ;prefix combined with the first run
bne *+5 ;length bit (where a one identifies
jsr _lz_refill_bits ;a two-byte match).
rol ;The rest of the length bits will
bcc _lz_moff_range ;then follow *after* the offset data
tay
lda _lz_moff_length,y
beq _lz_moff_far
_lz_moff_loop: asl lz_bits ;Load partial offset byte
bne *+9
sty lz_scratch
jsr _lz_refill_bits
ldy lz_scratch
rol
bcc _lz_moff_loop
bmi _lz_moff_near
_lz_moff_far: sta lz_scratch ;Save the bits we just read as the
;high-byte
lz_sector_ptr3 = *+1
lda lz_sector,x ;For large offsets we can load the
inx ;low-byte straight from the stream
bne *+5 ;without going throught the shift
jsr lz_fetch_sector ;register
; sec
adc _lz_moff_adjust_lo,y
bcs _lz_moff_pageok
dec lz_scratch
sec
_lz_moff_pageok:
adc lz_dst+0
sta _lz_match+0
lda lz_scratch
adc _lz_moff_adjust_hi,y
sec
bcs _lz_moff_join ;(BRA)
_lz_moff_near:
; sec ;Special case handling of <8 bit offsets.
adc _lz_moff_adjust_lo,y;We may can safely ignore the MSB from
; sec ;the base adjustment table as the
adc lz_dst+0 ;maximum base (for a 4/5/6/7 bit
sta _lz_match+0 ;length sequence) is 113
lda #$ff
_lz_moff_join: adc lz_dst+1
sta _lz_match+1
cpy #$04 ;Get any remaning run length bits
lda #%00000001
bcs _lz_mrun_gotten
_lz_mrun_loop: asl lz_bits
bne *+5
jsr _lz_refill_bits
rol
asl lz_bits
bcc _lz_mrun_loop
bne _lz_mrun_gotten
jsr _lz_refill_bits
bcc _lz_mrun_loop
_lz_mrun_gotten:
tay ;A 257-byte (=>$00) run serves as a
beq _lz_end_of_file ;sentinel
sta _lz_mcopy_len
ldy #$ff ;The copy loop. This needs to be run
_lz_mcopy: iny ;forwards since RLE-style matches can
_lz_match = *+1 ;overlap the destination
lda $ffff,y
sta (lz_dst),y
_lz_mcopy_len = *+1
cpy #$ff
bne _lz_mcopy
tya ;Advance destination pointer
; sec
adc lz_dst+0
sta lz_dst+0
jmp _lz_mfinish
;******** Fetch some more bits to work with ********
lz_sector_ptr1 = *+1
_lz_refill_bits:
ldy lz_sector,x
sty lz_bits
inx
bne *+5
jsr lz_fetch_sector
; sec
rol lz_bits
_lz_end_of_file:
rts
;******** Offset coding tables ********
;This length table is a bit funky. The idea here is to use the
;value as the initial value of the shift register instead of
;keeping a separate counter.
;In other words we iterate until the leading one is shifted out.
;Then afterwards the bit just below it (our new sign bit) is set
;if the offset is shorter than 8-bits, and conversely it's
;cleared if we need to fetch a separate low-byte
;as well.
;The fact that the sign bit is cleared as a flag is compensated
;for in the lz_moff_adjust_hi table
_lz_moff_length:
;Long (>2 byte matches)
.byte %00011111 ;4 bits
.byte %00000011 ;7 bits
.byte %01011111 ;10 bits
.byte %00001011 ;13 bits
;Short (2 byte matches)
.byte %01011111 ;10 bits
.byte %00000000 ;8 bits
.byte %00000111 ;6 bits
.byte %00111111 ;3 bits
_lz_moff_adjust_lo:
;Long (>2 byte matches)
.byte %11111110 ;1-16
.byte %11101110 ;17-144
.byte %01101110 ;145-1168
.byte %01101110 ;1169-9360
;Short (2 byte matches)
.byte %10110110 ;329-1352
.byte %10110110 ;73-328
.byte %11110110 ;9-72
.byte %11111110 ;1-8
_lz_moff_adjust_hi = *-2
;Long (>2 byte matches)
; .byte %11111111 ;1-16 (unreferenced)
; .byte %11111111 ;17-144 (unreferenced)
.byte %01111111 ;145-1168
.byte %01111011 ;1169-9360
;Short (2 byte matches)
.byte %01111110 ;329-1352
.byte %11111110 ;73-328
; .byte %11111111 ;9-72 (unreferenced)
; .byte %11111111 ;1-8 (unreferenced)

611
loader/src/decompress/exodecomp.s Executable file
View file

@ -0,0 +1,611 @@
; slightly modified by Krill/Plush for loader integration
;
; Copyright (c) 2002 - 2020 Magnus Lind.
;
; This software is provided 'as-is', without any express or implied warranty.
; In no event will the authors be held liable for any damages arising from
; the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software in a
; product, an acknowledgment in the product documentation would be
; appreciated but is not required.
;
; 2. Altered source versions must be plainly marked as such, and must not
; be misrepresented as being the original software.
;
; 3. This notice may not be removed or altered from any distribution.
;
; 4. The names of this software and/or it's copyright holders may not be
; used to endorse or promote products derived from this software without
; specific prior written permission.
;
; -------------------------------------------------------------------
; Known quirks:
; Can't handle a sequence reference that ends at $ffff. It is left in
; since it is a corner case and fixing it impacts negatively on
; performance or backwards compatibility.
; A simple way to work around this is to not decrunch to address $ffff.
; -------------------------------------------------------------------
; Controls if the shared get_bits routines should be inlined or not.
;INLINE_GET_BITS=1
.IFNDEF INLINE_GET_BITS
INLINE_GET_BITS = 0
.ENDIF
; -------------------------------------------------------------------
; if literal sequences is not used (the data was crunched with the -c
; flag) then the following line can be uncommented for shorter and.
; slightly faster code.
;LITERAL_SEQUENCES_NOT_USED = 1
.IFNDEF LITERAL_SEQUENCES_NOT_USED
LITERAL_SEQUENCES_NOT_USED = 0
.ENDIF
; -------------------------------------------------------------------
; if the sequence length is limited to 256 (the data was crunched with
; the -M256 flag) then the following line can be uncommented for
; shorter and slightly faster code.
;MAX_SEQUENCE_LENGTH_256 = 1
.IFNDEF MAX_SEQUENCE_LENGTH_256
MAX_SEQUENCE_LENGTH_256 = 0
.ENDIF
; -------------------------------------------------------------------
; if the sequence length 3 has its own offset table (the data was
; crunched with the -P+16 flag) then the following
; line must be uncommented.
;EXTRA_TABLE_ENTRY_FOR_LENGTH_THREE = 1
.IFNDEF EXTRA_TABLE_ENTRY_FOR_LENGTH_THREE
EXTRA_TABLE_ENTRY_FOR_LENGTH_THREE = 0
.ENDIF
; -------------------------------------------------------------------
; if sequence offsets are not reused (the data was crunched with the
; -P-32 flag) then the following line must be uncommented. Uncommenting the
; line will also result in shorter and slightly faster code.
;DONT_REUSE_OFFSET = 1
.IFNDEF DONT_REUSE_OFFSET
DONT_REUSE_OFFSET = 0
.ENDIF
; -------------------------------------------------------------------
; if decrunching forwards then the following line must be uncommented.
DECRUNCH_FORWARDS = 1
.IFNDEF DECRUNCH_FORWARDS
DECRUNCH_FORWARDS = 0
.ENDIF
; -------------------------------------------------------------------
; if split encoding is used (the data is crunched with the -E flag)
; then the following line must be uncommented.
;ENABLE_SPLIT_ENCODING = 1
.IFNDEF ENABLE_SPLIT_ENCODING
ENABLE_SPLIT_ENCODING = 0
.ENDIF
; -------------------------------------------------------------------
; The decruncher jsr:s to the get_crunched_byte address when it wants to
; read a crunched byte into A. This subroutine has to preserve X and Y
; register and must not modify the state of the carry nor the overflow flag.
; -------------------------------------------------------------------
;.import get_crunched_byte
; -------------------------------------------------------------------
; This function is the heart of the decruncher. (for non split crunched files)
; It initializes the decruncher zeropage locations and precalculates the
; decrunch tables and decrunches the data
; This function will not change the interrupt status bit and it will not
; modify the memory configuration.
; -------------------------------------------------------------------
;.export decrunch
.IF ENABLE_SPLIT_ENCODING <> 0
; -------------------------------------------------------------------
; To decrunch files crunched with the split feature (-E) you can't use the
; decrunch function. Instead you call the split_decrunch function. But you
; can only do this if the decrunch table contains the encoding used by the
; file you are decrunching. To generate the correct content for the decrunch
; table call set the get_crunched_byte function to point to the encoding data
; and then call the split_gentable function.
; -------------------------------------------------------------------
.export split_gentable
.export split_decrunch
.ENDIF
; -------------------------------------------------------------------
; zero page addresses used
; -------------------------------------------------------------------
zp_len_lo = DECOMPVARS + 0; $9e
zp_len_hi = DECOMPVARS + 1; $9f
zp_src_lo = DECOMPVARS + 2; $ae
zp_src_hi = zp_src_lo + 1
zp_bits_hi = DECOMPVARS + 4; $a7
.IF DONT_REUSE_OFFSET = 0
zp_ro_state = DECOMPVARS + 5; $a8
.ENDIF
zp_bitbuf = DECOMPVARS + 6; $fd
.if MEM_DECOMP_TO_API
zp_dest_lo = decdestlo; zp_bitbuf + 1 ; dest addr lo
zp_dest_hi = decdesthi; zp_bitbuf + 2 ; dest addr hi
.else
zp_dest_lo = zp_bitbuf + 1 ; dest addr lo
zp_dest_hi = zp_bitbuf + 2 ; dest addr hi
.endif
.IF EXTRA_TABLE_ENTRY_FOR_LENGTH_THREE <> 0
encoded_entries = 68
.ELSE
encoded_entries = 52
.ENDIF
tabl_bi = decrunch_table
tabl_lo = decrunch_table + encoded_entries
tabl_hi = decrunch_table + encoded_entries * 2
decompress = decrunch
get_crunched_byte:
php
jsr getbyte
plp
rts
;; refill bits is always inlined
.MACRO mac_refill_bits
pha
jsr get_crunched_byte
rol
sta zp_bitbuf
pla
.ENDMACRO
.MACRO mac_get_bits
.IF INLINE_GET_BITS <> 0
.SCOPE
adc #$80 ; needs c=0, affects v
asl
bpl gb_skip
gb_next:
asl zp_bitbuf
bne gb_ok
mac_refill_bits
gb_ok:
rol
bmi gb_next
gb_skip:
bvc skip
gb_get_hi:
sec
sta zp_bits_hi
jsr get_crunched_byte
skip:
.ENDSCOPE
.ELSE
jsr get_bits
.ENDIF
.ENDMACRO
.MACRO mac_init_zp
;.SCOPE
; -------------------------------------------------------------------
; init zeropage and x reg. (8 bytes)
;
init_zp:
.if MEM_DECOMP_TO_API
jsr get_crunched_byte
storedadrh:
sta zp_dest_hi
jsr get_crunched_byte
storedadrl:
sta zp_dest_lo
jsr get_crunched_byte
sta zp_bitbuf
.else
jsr get_crunched_byte
sta zp_bitbuf - 1,x
dex
bne init_zp
.endif
.if LOADCOMPD_TO
clc
lda loadaddroffslo
adc zp_dest_lo
sta zp_dest_lo
lda loadaddroffshi
adc zp_dest_hi
sta zp_dest_hi
.endif
;.ENDSCOPE
.ENDMACRO
.IF INLINE_GET_BITS = 0
get_bits:
adc #$80 ; needs c=0, affects v
asl
bpl gb_skip
gb_next:
asl zp_bitbuf
bne gb_ok
mac_refill_bits
gb_ok:
rol
bmi gb_next
gb_skip:
bvs gb_get_hi
rts
gb_get_hi:
sec
sta zp_bits_hi
jmp get_crunched_byte
.ENDIF
; -------------------------------------------------------------------
; no code below this comment has to be modified in order to generate
; a working decruncher of this source file.
; However, you may want to relocate the tables last in the file to a
; more suitable address.
; -------------------------------------------------------------------
; -------------------------------------------------------------------
; jsr this label to decrunch, it will in turn init the tables and
; call the decruncher
; no constraints on register content, however the
; decimal flag has to be cleared (it almost always is, otherwise do a cld)
decrunch:
.IF ENABLE_SPLIT_ENCODING <> 0
ldx #3
jsr internal_gentable
jmp normal_decrunch
split_gentable:
ldx #1
internal_gentable:
jsr split_init_zp
.ELSE
ldx #3
mac_init_zp
.ENDIF
; -------------------------------------------------------------------
; calculate tables (64 bytes) + get_bits macro
; x must be #0 when entering
;
ldy #0
clc
table_gen:
tax
tya
and #$0f
sta tabl_lo,y
beq shortcut ; start a new sequence
; -------------------------------------------------------------------
txa
adc tabl_lo - 1,y
sta tabl_lo,y
lda zp_len_hi
adc tabl_hi - 1,y
shortcut:
sta tabl_hi,y
; -------------------------------------------------------------------
lda #$01
sta <zp_len_hi
lda #$78 ; %01111000
mac_get_bits
; -------------------------------------------------------------------
lsr
tax
beq rolled
php
rolle:
asl zp_len_hi
sec
ror
dex
bne rolle
plp
rolled:
ror
sta tabl_bi,y
bmi no_fixup_lohi
lda zp_len_hi
stx zp_len_hi
.BYTE $24
no_fixup_lohi:
txa
; -------------------------------------------------------------------
iny
cpy #encoded_entries
bne table_gen
; -------------------------------------------------------------------
.IF ENABLE_SPLIT_ENCODING <> 0
rts
split_decrunch:
ldx #3
jsr split_init_zp
; X reg must be 0 here
sec
normal_decrunch:
.ENDIF
; -------------------------------------------------------------------
; prepare for main decruncher
.IF DONT_REUSE_OFFSET = 0
ror zp_ro_state
sec
.ENDIF
ldy zp_dest_lo
stx zp_dest_lo
stx zp_bits_hi
; -------------------------------------------------------------------
; copy one literal byte to destination (11 bytes)
;
literal_start1:
.IF DECRUNCH_FORWARDS = 0
tya
bne no_hi_decr
dec zp_dest_hi
.IF DONT_REUSE_OFFSET = 0
dec zp_src_hi
.ENDIF
no_hi_decr:
dey
.ENDIF
jsr get_crunched_byte
sta (zp_dest_lo),y
.IF DECRUNCH_FORWARDS <> 0
iny
bne skip_hi_incr
inc zp_dest_hi
.IF DONT_REUSE_OFFSET = 0
inc zp_src_hi
.ENDIF
skip_hi_incr:
.ENDIF
; -------------------------------------------------------------------
; fetch sequence length index (15 bytes)
; x must be #0 when entering and contains the length index + 1
; when exiting or 0 for literal byte
next_round:
.IF DONT_REUSE_OFFSET = 0
ror zp_ro_state
.ENDIF
dex
lda zp_bitbuf
no_literal1:
asl
bne nofetch8
jsr get_crunched_byte
rol
nofetch8:
inx
bcc no_literal1
sta zp_bitbuf
; -------------------------------------------------------------------
; check for literal byte (2 bytes)
;
beq literal_start1
; -------------------------------------------------------------------
; check for decrunch done and literal sequences (4 bytes)
;
cpx #$11
.IF INLINE_GET_BITS <> 0
bcc skip_jmp
jmp exit_or_lit_seq
skip_jmp:
.ELSE
bcs exit_or_lit_seq
.ENDIF
; -------------------------------------------------------------------
; calulate length of sequence (zp_len) (18(11) bytes) + get_bits macro
;
lda tabl_bi - 1,x
mac_get_bits
adc tabl_lo - 1,x ; we have now calculated zp_len_lo
sta zp_len_lo
.IF MAX_SEQUENCE_LENGTH_256 = 0
lda zp_bits_hi
adc tabl_hi - 1,x ; c = 0 after this.
sta zp_len_hi
; -------------------------------------------------------------------
; here we decide what offset table to use (27(26) bytes) + get_bits_nc macro
; z-flag reflects zp_len_hi here
;
ldx zp_len_lo
.ELSE
tax
.ENDIF
.IF MAX_SEQUENCE_LENGTH_256 = 0
lda #0
.ENDIF
.IF DONT_REUSE_OFFSET = 0
; -------------------------------------------------------------------
; here we decide to reuse latest offset or not (13(15) bytes)
;
bit <zp_ro_state
bmi test_reuse
no_reuse:
.ENDIF
; -------------------------------------------------------------------
; here we decide what offset table to use (17(15) bytes)
;
.IF MAX_SEQUENCE_LENGTH_256 = 0
sta <zp_bits_hi
.ENDIF
lda #$e1
.IF EXTRA_TABLE_ENTRY_FOR_LENGTH_THREE <> 0
cpx #$04
.ELSE
cpx #$03
.ENDIF
bcs gbnc2_next
lda tabl_bit - 1,x
gbnc2_next:
asl zp_bitbuf
bne gbnc2_ok
tax
jsr get_crunched_byte
rol
sta zp_bitbuf
txa
gbnc2_ok:
rol
bcs gbnc2_next
tax
; -------------------------------------------------------------------
; calulate absolute offset (zp_src) (17 bytes) + get_bits macro
;
lda tabl_bi,x
mac_get_bits
.IF DECRUNCH_FORWARDS = 0
adc tabl_lo,x
sta zp_src_lo
lda zp_bits_hi
adc tabl_hi,x
adc zp_dest_hi
sta zp_src_hi
.ELSE
clc
adc tabl_lo,x
eor #$ff
sta zp_src_lo
lda zp_bits_hi
adc tabl_hi,x
eor #$ff
adc zp_dest_hi
sta zp_src_hi
clc
.ENDIF
; -------------------------------------------------------------------
; prepare for copy loop (2 bytes)
;
ldx zp_len_lo
; -------------------------------------------------------------------
; main copy loop (30 bytes)
;
copy_next:
.IF DECRUNCH_FORWARDS = 0
tya
bne copy_skip_hi
dec zp_dest_hi
dec zp_src_hi
copy_skip_hi:
dey
.ENDIF
.IF LITERAL_SEQUENCES_NOT_USED = 0
bcs get_literal_byte
.ENDIF
lda (zp_src_lo),y
literal_byte_gotten:
sta (zp_dest_lo),y
.IF DECRUNCH_FORWARDS <> 0
iny
bne copy_skip_hi
inc zp_dest_hi
inc zp_src_hi
copy_skip_hi:
.ENDIF
dex
bne copy_next
.IF MAX_SEQUENCE_LENGTH_256 = 0
lda zp_len_hi
.IF INLINE_GET_BITS <> 0
bne copy_next_hi
.ENDIF
.ENDIF
stx zp_bits_hi
.IF INLINE_GET_BITS = 0
beq next_round
.ELSE
jmp next_round
.ENDIF
.IF MAX_SEQUENCE_LENGTH_256 = 0
copy_next_hi:
dec zp_len_hi
jmp copy_next
.ENDIF
.IF DONT_REUSE_OFFSET = 0
; -------------------------------------------------------------------
; test for offset reuse (11 bytes)
;
test_reuse:
bvs no_reuse
.IF MAX_SEQUENCE_LENGTH_256 <> 0
lda #$00 ; fetch one bit
.ENDIF
asl zp_bitbuf
bne gbnc1_ok
pha
jsr get_crunched_byte
rol
sta zp_bitbuf
pla
gbnc1_ok:
rol
beq no_reuse ; bit == 0 => C=0, no reuse
bne copy_next ; bit != 0 => C=0, reuse previous offset
.ENDIF
; -------------------------------------------------------------------
; exit or literal sequence handling (16(12) bytes)
;
exit_or_lit_seq:
.IF LITERAL_SEQUENCES_NOT_USED = 0
beq decr_exit
jsr get_crunched_byte
.IF MAX_SEQUENCE_LENGTH_256 = 0
sta zp_len_hi
.ENDIF
jsr get_crunched_byte
tax
bcs copy_next
decr_exit:
.ENDIF
rts
.IF LITERAL_SEQUENCES_NOT_USED = 0
get_literal_byte:
jsr get_crunched_byte
bcs literal_byte_gotten
.ENDIF
.IF EXTRA_TABLE_ENTRY_FOR_LENGTH_THREE <> 0
; -------------------------------------------------------------------
; the static stable used for bits+offset for lengths 1, 2 and 3 (3 bytes)
; bits 2, 4, 4 and offsets 64, 48, 32 corresponding to
; %10010000, %11100011, %11100010
tabl_bit:
.BYTE $90, $e3, $e2
.ELSE
; -------------------------------------------------------------------
; the static stable used for bits+offset for lengths 1 and 2 (2 bytes)
; bits 2, 4 and offsets 48, 32 corresponding to %10001100, %11100010
tabl_bit:
.BYTE $8c, $e2
.ENDIF
.IF ENABLE_SPLIT_ENCODING <> 0
split_init_zp:
mac_init_zp
rts
.ENDIF
; -------------------------------------------------------------------
; end of decruncher
; -------------------------------------------------------------------
; -------------------------------------------------------------------
; this 156 (204) byte table area may be relocated. It may also be
; clobbered by other data between decrunches.
; -------------------------------------------------------------------
decrunch_table:
.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
.IF EXTRA_TABLE_ENTRY_FOR_LENGTH_THREE <> 0
.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
.ENDIF
.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
.byte 0,0,0,0,0,0,0,0,0,0,0,0
; -------------------------------------------------------------------
; end of decruncher
; -------------------------------------------------------------------

180
loader/src/decompress/lcdecomp.s Executable file
View file

@ -0,0 +1,180 @@
; the following depack code is
; written by Marek Matula (MMS/Taboo)
; in its original form and has been
; slightly modified
; compress using taboo levelcrush or
; crush.exe by taboo,
; don't forget to convert the
; compressed files (not needed for mem-decompressing)
; Level-crusher v1.0/v1.1 depacker
; (c)1998 Taboo Productions!
; All rights reserved
LC_SPEED = 6
byte = DECOMPVARS+$00
hi = DECOMPVARS+$01
dest = decdestlo
desth = decdesthi
decompress:
jsr getbyte
.if LOADCOMPD_TO
clc
adc loadaddroffslo
php
.endif
storedadrl: sta dest
jsr getbyte
.if LOADCOMPD_TO
plp
adc loadaddroffshi
.endif
storedadrh: sta desth
ldx #$00
stx byte
jp18: stx hi
lda #$01
asl byte
bne *+$05
jsr getbit
bcs jp2
jp4: asl byte
bne *+$05
jsr getbit
bcs jp3
asl byte
bne *+$05
jsr getbit
rol a
rol hi
bpl jp4
jp3: tax
beq jp5
; literal run
ldy #$00
literalrun: jsr getbyte
sta (dest),y
inc dest
bne :+
inc desth
: dex
bne literalrun
jp5: cpx hi
dec hi
bcc literalrun
stx hi
jp2: lda #$01
asl byte
bne *+$05
jsr getbit
bcc jp9
jp8: asl byte
bne *+$05
jsr getbit
bcs jp10
asl byte
bne *+$05
jsr getbit
rol a
bcc jp8
; decompression finished
rts
jp9: inx
jp10: adc #$01
sta depseqle
txa
asl byte
bne *+$05
jsr getbit
rol a
asl byte
bne *+$05
jsr getbit
rol a
tay
lda #$00
jp12: ldx tab,y
jp11: asl byte
bne *+$05
jsr getbit
rol a
rol hi
dex
bne jp11
dey
bmi jp14
cpy #$03
clc
beq jp14
adc #$01
bcc jp12
inc hi
bcs jp12
jp14: adc depseqle
bcc jp15
inc hi
; copy sequence
jp15: clc
sbc dest
eor #$ff
sta depseqcp+$01
lda hi
sbc desth
eor #$ff
sta depseqcp+$02
ldy #$00
depseqcp: lda $00,y
sta (dest),y
iny
depseqle = *+$01
cpy #$00
bne depseqcp
tya
clc
adc dest
sta dest
bcc jp17
inc desth
jp17: jmp jp18
getbit: pha
jsr getbyte
sec
rol a
sta byte
pla
jp19: rts
tab:
.if LC_SPEED = 6
.byte 4,3,3,3,4,2,2,2
.endif
.if LC_SPEED = 5
.byte 4,2,3,3,4,2,2,2
.endif
.if LC_SPEED = 4
.byte 4,2,2,3,4,2,2,2
.endif
.if LC_SPEED = 3
.byte 4,2,2,2,4,2,2,2
.endif
.if LC_SPEED = 2
.byte 3,2,2,2,3,2,2,2
.endif
.if LC_SPEED = 1
.byte 3,1,2,2,3,1,2,2
.endif
.if LC_SPEED = 0
.byte 2,2,1,1,2,2,1,1
.endif

View file

@ -0,0 +1,334 @@
; with slight modifications by Krill/Plush
.FEATURE labels_without_colons, leading_dot_in_identifiers
decompress = .loadcomp_entry
decompsrc = lzsa_srcptr
.if MEM_DECOMP_TO_API
; cannot copy remaining uncompressed blob of unknown size
.error "***** MEM_DECOMP_TO_API is not supported for LZSA2. Copy compressed data to original location, then use MEM_DECOMP_API to decompress in-place. *****"
.endif
BITFIRE_ZP_ADDR = DECOMPVARS - 1
.define asr alr
lzsa2_get_byte:
lda (lzsa_srcptr),y ;Subroutine version for when
inc <lzsa_srcptr + 0 ;inlining isn't advantageous.
beq .lz_next_page
rts
.lz_next_page
inc <lzsa_srcptr + 1 ;Inc & test for bank overflow.
.lz_next_page_
pha
txa
pha
GETBLOCK <lzsa_srcptr + 1
pla
tax
pla
rts
.loadcomp_entry
jsr .lz_next_page_ ;shuffle in data first
; ldy #$00 ;Initialize source index.
ldx #$04
:
jsr lzsa2_get_byte
dex
sta <lzsa_dstptr + 0,x
bne :-
.if LOADCOMPD_TO
ldx #2
:
clc
lda loadaddroffslo
adc <lzsa_dstptr,x
sta <lzsa_dstptr,x
lda loadaddroffshi
adc <lzsa_dstptr + 1,x
sta <lzsa_dstptr + 1,x
dex
dex
bpl :-
ldx #0
.endif
; -----------------------------------------------------------------------------
;
; Copyright (C) 2019 Emmanuel Marty
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
;
; Original software by Emmanuel Marty, altered by Tobias Bindhammer
; to adopt it to bitfire, includes optimization in size and speed and
; changes to the encoded format
; -----------------------------------------------------------------------------
lzsa_cmdbuf = BITFIRE_ZP_ADDR + 1 ;1 byte.
lzsa_nibflg = BITFIRE_ZP_ADDR + 2 ;1 byte.
lzsa_nibble = BITFIRE_ZP_ADDR + 3 ;1 byte.
lzsa_offset = BITFIRE_ZP_ADDR + 4 ;1 word.
lzsa_winptr = BITFIRE_ZP_ADDR + 6 ;1 word.
lzsa_srcptr = BITFIRE_ZP_ADDR + 8 ;1 word.
lzsa_dstptr = BITFIRE_ZP_ADDR + 10 ;1 word.
lzsa_endptr = BITFIRE_ZP_ADDR + 12 ;1 word.
lzsa_length = lzsa_winptr + 0
lzsa_tmp = lzsa_winptr + 1
;command byte:
;765 432 10
;ttt mmm ll
;this saves an lsr in .asm - with xyzllmmm we would need 3 lsr, now we need two lsr
;ll literals length
;mmm match length
.depacker_start
;XXX TODO can be saved when used with filename? or if it exits cleared
lzsa2_depack
sty <lzsa_nibflg ;Initialize nibble buffer.
;
;Copy bytes from compressed source data.
;
;Hi-byte of length or offset.
.cp_length
jsr lzsa2_get_byte
sta <lzsa_cmdbuf ;Preserve this for later.
and #$03 ;Extract literal length.
beq .lz_offset ;size = 0 -> no literal, continue with match
cmp #$03 ;Extended length?
bne .got_cp_len ;1..2
jsr .get_length ;x is set by .get_length
beq .put_cp_len
.got_cp_len
inx ;Increment # of pages to copy.
.put_cp_len
stx <lzsa_length
tax ;low byte
.cp_page
;.lz_ptr1
; lda $face,x
; sta (dst),y
; inx
; bne +
; jsr .lz_next_page
;+
; iny
; cpy #$00
; bne .cp_page
; tya
; clc
; adc <lzsa_dstptr + 0
; sta <lzsa_dstptr + 0
; bcc +
; inc <özsa_dstptr + 1
;+
; dec <lzsa_length
; bne .cp_page
lda (lzsa_srcptr),y
;saves another 6 bytes if used here, then single point of change
;jsr lzsa2_get_byte
sta (lzsa_dstptr),y
inc <lzsa_srcptr + 0
bne .skip1
jsr .lz_next_page
.skip1
inc <lzsa_dstptr + 0
bne .skip2
inc <lzsa_dstptr + 1
.skip2
dex
bne .cp_page
dec <lzsa_length ;Any full pages left to copy?
bne .cp_page
;ttt token for match offset len
;codepath 1
;0m0 5-bit offset - m goes into first bit of offset + one nibble is read ;11111111 111mnnnn
;0m1 13-bit offset - m goes into first bit of offset + one nibble and on byte is read ;111mnnnn bbbbbbbb
;codepath 2
;10m 9-bit offset - m goes into first bit of offset + one byte is read ;1111111m bbbbbbbb
;110 16-bit offset - two bytes are read ;bbbbbbbb bbbbbbbb
;111 repeat offset
;fetch a nibble
;13,9,16 -> fetch a lowbyte
;9 bit fetch a bit
;16 -> fetch a highbyte
;rep, skip all
.lz_offset
lda <lzsa_cmdbuf
asl
bcc .get_5_13_bits ;prefer path with 3 options over path with 2 options
.get_9_16_rep
asl
bcc .get_9_bits
.get_16_rep
bmi .lz_length ;Repeat previous offset.
.get_16_bits
jsr lzsa2_get_byte ;Get hi-byte of offset.
bne .get_low8
.get_9_bits
asl
lda #$ff ;-> $ff/$fe
rol
bne .get_low8 ;BRA
.get_5_13_bits
sta <lzsa_tmp
jsr lzsa2_get_nibble
asl <lzsa_tmp ;shift in bit 5/13
rol
asl <lzsa_tmp ;shift in token bit to decide 5/13
dex ;x = $ff
bcc .get_low0 ;all done for 5-bit offset
sbc #2 ;Subtract 512 because 13-bit - this extends the range for 13 bits offsets, as 0..511 is covered by 9 bit matches
;offset starts at $FE00.
.get_low8
tax
jsr lzsa2_get_byte
.get_low0
sta <lzsa_offset + 0
stx <lzsa_offset + 1 ;Save new offset.
;
;Copy bytes from decompressed window.
;
;N.B. X=0 is expected and guaranteed when we get here.
;
.lz_length
ldx #$00 ;Hi-byte of length.
lda <lzsa_cmdbuf
lsr
asr #$0e ;extract match len and clear C
adc #$02 ;correct length
cmp #$09 ;Extended length?
bne .got_lz_len ;a = 2 .. 8, C = 0
jsr .get_length ;x is set by .get_length, y is still 0
clc
beq .calc_lz_addr ;only need to check for zero here
.got_lz_len ;C = 0
eor #$ff ;Negate the lo-byte of length
tay
iny
eor #$ff ;restore A, a bit ugly
inx ;Increment # of pages to copy.
;clc ;Calc destination for partial page
adc <lzsa_dstptr + 0
sta <lzsa_dstptr + 0
bcs .calc_lz_addr_
dec <lzsa_dstptr + 1
.calc_lz_addr
lda <lzsa_dstptr + 0 ;N.B. Offset is negative!
.calc_lz_addr_
clc ;Calc address of match.
adc <lzsa_offset + 0
sta <lzsa_winptr + 0
lda <lzsa_dstptr + 1
adc <lzsa_offset + 1
sta <lzsa_winptr + 1
.lz_page
lda (lzsa_winptr),y
sta (lzsa_dstptr),y
iny
bne .lz_page
inc <lzsa_winptr + 1
inc <lzsa_dstptr + 1
dex ;Any full pages left to copy?
bne .lz_page
lda <lzsa_srcptr + 0
eor <lzsa_endptr + 0
bne .lz_poll
eor <lzsa_srcptr + 1
eor <lzsa_endptr + 1
beq .finished2
.lz_skip_end
jmp .cp_length ;Loop around to the beginning.
.lz_poll
POLLBLOCK
; ldy #0
ldx #$00 ;clear x afterwards, yet a bit annoying, y stays 0
beq .lz_skip_end
.get_length
;entered with x = 0
adc #$0e ;C = 1 -> adc #$0f
sta <lzsa_tmp
jsr lzsa2_get_nibble
adc #$00 ;C = 1
beq .byte_length ;Extended length?
adc <lzsa_tmp ;C = 0 from addition above -> $10 + lzsa_tmp
.got_length ;Z-flag is set
rts ;lengths.
.byte_length
jsr lzsa2_get_byte ;So rare, this can be slow!
clc ;adc #$0f + 0
adc <lzsa_tmp
bcc .got_length
beq .finished
.word_length
;this is fetched big-endian, to avoid pha/pla, saves code
jsr lzsa2_get_byte ;So rare, this can be slow!
tax
jsr lzsa2_get_byte ;So rare, this can be slow!
and #$ff ;restore z-flag
rts
.finished
pla ;Decompression completed, pop
pla ;return address.
.finished2
rts
;
;Get a nibble value from compressed data in A.
;
;XXX TODO pack nibbles in another way? -> 10101010 -> first nibble = and #$55, second nibble = asr #$aa?
lzsa2_get_nibble
lsr <lzsa_nibflg ;Is there a nibble waiting?
lda <lzsa_nibble ;Extract the lo-nibble.
bcs .got_nibble
inc <lzsa_nibflg ;Reset the flag.
jsr lzsa2_get_byte
sta <lzsa_nibble ;Preserve for next time.
lsr ;Extract the hi-nibble.
lsr
lsr
lsr
sec
.got_nibble
ora #$f0
rts

344
loader/src/decompress/ncdecomp.s Executable file
View file

@ -0,0 +1,344 @@
;
; NuCrunch 1.0
; Christopher Jam
; May 2018
; with slight modifications by Krill
;
decompress = decrunch
decompsrc = read+1
NC_BLOCK_INTERFACE = 1
.macro getByte1
jsr get_byte
.endmacro
.macro getBit1
.local nomore
asl zbs1
bne nomore
getByte1
sec
rol
sta zbs1
nomore:
.endmacro
; get head of a pair of bits from the bitpair stream
; (must getBit2t precisely once before invoking again)
.macro getBit2h
.local nomore
asl zbs2
bne nomore
getByte1
sec
rol
sta zbs2
nomore:
.endmacro
.if NC_BLOCK_INTERFACE
; same, but preserving A
.macro getBit2hpa
.local nomore
asl zbs2
bne nomore
pha
getByte1
sec
rol
sta zbs2
pla
nomore:
.endmacro
.else
; same, but preserving A/ trashing X.
.macro getBit2hpa
.local nomore
asl zbs2
bne nomore
tax
getByte1
sec
rol
sta zbs2
txa
nomore:
.endmacro
.endif
; get tail of a pair of bits from the bitpair stream
.macro getBit2t
asl zbs2
.endmacro
; get head of a quad of bits from the quad stream
; (must getBit4t precisely three times before invoking again)
.macro getBit4h
.local nomore
asl zbs4
bne nomore
getByte1
sec
rol
sta zbs4
nomore:
.endmacro
; get tail of a quad of bits from the quad stream
.macro getBit4t
asl zbs4
.endmacro
; note, trashes X. Also, carry is clear when done
.macro getExpGoulombTail
.local ndone
ndone:
getBit2hpa
rol
getBit2t
bcs ndone
.endmacro
.macro getExpGoulombTail_odd_aligned
.local ndone
ndone:
getBit2t
rol
getBit2hpa
bcs ndone
.endmacro
.ifdef NUCRUNCH_ALIGN_FOR_SPEED
.byte <-$64-*,0 ; place decode_copy on a page boundary
.endif
decrunch_zpa=DECOMPVARS ;5 bytes required
zbs1 = decrunch_zpa+$00 ; 1 byte
zbs2 = decrunch_zpa+$01 ; 1 byte
zbs4 = decrunch_zpa+$02 ; 1 byte
zpc = decrunch_zpa+$03 ; 2 bytes
zpd = decdestlo
offsetm1 = zpc ; these are aliased, as never need both
decrunch:
; ldy #0
sty zbs1
sty zbs2
sty zbs4
decrunch_next_group:
.if NC_BLOCK_INTERFACE
jsr read_init
lda loadaddrhi
sta literal_read+2
ldx loadaddrlo
.endif
next_segment:
jsr get_byte
.if LOADCOMPD_TO
clc
adc loadaddroffslo
php
.endif
storedadrl:
sta zpd+0
jsr get_byte
.if LOADCOMPD_TO
plp
adc loadaddroffshi
.endif
storedadrh:
sta zpd+1
decode_literal:
; get count [ExpGoulomb0+1] in x
.if NC_BLOCK_INTERFACE
getBit1
lda#1
bcc ret1
getExpGoulombTail
ret1:
sta literal_len + 1
.else
ldx#1
getBit1
bcc ret1
lda#1
getExpGoulombTail
tax
ret1:
.endif
literal_loop:
.if NC_BLOCK_INTERFACE
literal_read:
lda $ff00,x
inx
bne *+5
jsr read_inc
.else
decompgetbyte:
jsr getcmem
.endif
sta (zpd),y
iny
.if NC_BLOCK_INTERFACE
literal_len:
cpy #0
.else
dex
.endif
bne literal_loop
clc
tya
adc zpd
sta zpd
bcc *+4
inc zpd+1
ldy#0
; literal is always followed by copy
decode_copy:
getBit2h
bcc short_offset
lda#1
getExpGoulombTail_odd_aligned
adc#255
sta offsetm1+1
getByte1
sta offsetm1
jmp got_high
short_offset:
lda#0
sta offsetm1+1
;ExpGoulomb k=3
getBit4h
lda#1
bcc no_tail
getExpGoulombTail_odd_aligned
no_tail:
adc#255
getBit4t
rol
getBit4t
rol
getBit4t
rol
sta offsetm1
got_high:
.if NC_BLOCK_INTERFACE
lda#1
getBit2t
bcc length_two
getExpGoulombTail
cmp#255
beq end_of_segment ; copy length of 256 marks end of segment
length_two:
sta copy_len+1
.else
ldx#1
getBit2t
bcc length_two
lda#1
getExpGoulombTail
tax
cpx#255
beq end_of_segment ; copy length of 256 marks end of segment
length_two:
.endif
; note carry is clear at this point; good as we want to subtract (offsetm1+1)
lda zpd
sbc offsetm1
sta zpc
lda zpd+1
sbc offsetm1+1
sta zpc+1
lda (zpc),y
sta (zpd),y
copy_loop:
iny
lda (zpc),y
sta (zpd),y
.if NC_BLOCK_INTERFACE
copy_len:
cpy #0
.else
dex
.endif
bne copy_loop
tya
; carry will be set from SBC above
adc zpd
sta zpd
bcc *+4
inc zpd+1
.if NC_BLOCK_INTERFACE
POLLBLOCK
.endif
ldy#0
getBit1
bcs jmp_decode_copy
jmp decode_literal
jmp_decode_copy:
jmp decode_copy
get_byte:
.if NC_BLOCK_INTERFACE
read:
lda $ff00,x
inx
beq read_inc
rts
read_inc:
inc literal_read+2
inc read+2
read_init:
pha
tya
pha
GETBLOCK read+2
pla
tay
pla
ldx#0
end_of_segment:
rts
.else
decompgetbyte1:
jmp getcmem
.endif
.if NC_BLOCK_INTERFACE
.else
end_of_file:
rts
end_of_segment:
lda offsetm1
cmp#0
beq end_of_file
jmp next_segment
.endif
decrunch_end:

261
loader/src/decompress/pudecomp.s Executable file
View file

@ -0,0 +1,261 @@
; The following depack code is
; written by Pasi Ojala (Albert/PU239)
; in its original form and has been
; slightly modified.
; The original routine was located at
; http://www.cs.tut.fi/~albert/Dev/pucrunch/sa_uncrunch.asm
; at the time of writing.
; crunch using the -c0 switch
.define OLD_VERSION 0; pre 2004/3/24, this includes http://www.cs.tut.fi/~albert/Dev/pucrunch/pucrunch_x86.zip as of 2013/4/9
LZPOS = DECOMPVARS + $00
bitstr = DECOMPVARS + $02
decompress: ldx #5
@222: jsr getbyt ; skip 'p', 'u', endAddr HI&LO, leave starting escape in A
dex
bne @222
sta esc+1 ; starting escape
jsr getbyt ; read startAddr
.if MEM_DECOMP_TO_API
ldx storedadrl
cpx #OPC_LDA_ZP
bne :+
storedadrl: lda decdestlo
:
.endif
.if LOADCOMPD_TO
clc
adc loadaddroffslo
php
.endif
sta OUTPOS
jsr getbyt
.if MEM_DECOMP_TO_API
ldx storedadrh
cpx #OPC_LDA_ZP
bne :+
storedadrh: lda decdesthi
:
.endif
.if LOADCOMPD_TO
plp
adc loadaddroffshi
.endif
sta OUTPOS+1
jsr getbyt ; read # of escape bits
sta escB0+1
sta escB1+1
lda #8
sec
sbc escB1+1
sta noesc+1 ; 8-escBits
jsr getbyt
sta mg+1 ; maxGamma + 1
lda #9
sec
sbc mg+1 ; 8 - maxGamma == (8 + 1) - (maxGamma + 1)
sta longrle+1
jsr getbyt
sta mg1+1 ; (1<<maxGamma)
asl a
clc
sbc #0
sta mg21+1 ; (2<<maxGamma) - 1
jsr getbyt
sta elzpb+1
jsr getbyt ; exec address
sta lo+1 ; lo
jsr getbyt
sta hi+1 ; hi
jsr getbyt ; rleUsed
ldx #0
tay
beq @1 ; Y == 0 ?
@0: jsr getbyt
sta table,x
inx
dey
bne @0
@1: ; setup bit store - $80 means empty
lda #$80
sta bitstr
bne main
getbyt: jsr getnew
lda bitstr
ror
rts
newesc: ldy esc+1 ; remember the old code (top bits for escaped byte)
escB0: ldx #2 ; ** PARAMETER 0..8
jsr getchkf ; get & save the new escape code
sta esc+1
tya ; pre-set the bits
; Fall through and get the rest of the bits.
noesc: ldx #6 ; ** PARAMETER 8..0
jsr getchkf
jsr putch ; output the escaped/normal byte
; Fall through and check the escape bits again
main: ldy #0 ; Reset to a defined state
tya ; A = 0
escB1: ldx #2 ; ** PARAMETER 0..8
jsr getchkf ; X = 0
esc: cmp #0
bne noesc
; Fall through to packed code
jsr getval ; X = 0
sta LZPOS ; xstore - save the length for a later time
lsr ; cmp #1 ; LEN == 2 ? (A is never 0)
bne lz77 ; LEN != 2 -> LZ77
;tya ; A = 0
jsr get1bit ; X = 0
lsr ; bit -> C, A = 0
bcc lz77_2 ; A=0 -> LZPOS+1
;***FALL THRU***
; e..e01
jsr get1bit ; X = 0
lsr ; bit -> C, A = 0
bcc newesc ; e..e010
;***FALL THRU***
; e..e011
srle: iny ; Y is 1 bigger than MSB loops
jsr getval ; Y is 1, get len, X = 0
sta LZPOS ; xstore - Save length LSB
mg1: cmp #64 ; ** PARAMETER 63-64 -> C clear, 64-64 -> C set..
bcc chrcode ; short RLE, get bytecode
longrle: ldx #2 ; ** PARAMETER 111111xxxxxx
jsr getbits ; get 3/2/1 more bits to get a full byte, X = 0
sta LZPOS ; xstore - Save length LSB
jsr getval ; length MSB, X = 0
tay ; Y is 1 bigger than MSB loops
chrcode: jsr getval ; Byte Code, X = 0
tax ; this is executed most of the time anyway
lda table-1,x; Saves one jump if done here (loses one txa)
.if OLD_VERSION
cpx #32 ; 31-32 -> C clear, 32-32 -> C set..
.else
cpx #16 ; 15-16 -> C clear, 16-16 -> C set..
.endif
bcc @1 ; 1..31, we got the right byte from the table
; Ranks 32..64 (11111°xxxxx), get byte..
txa ; get back the value (5 valid bits)
.if OLD_VERSION
ldx #3
.else
ldx #4
.endif
jsr getbits ; get 3 more bits to get a full byte, X = 0
@1: ldx LZPOS ; xstore - get length LSB
inx ; adjust for cpx#$ff;bne -> bne
dorle: jsr putch
dex
bne dorle ; xstore 0..255 -> 1..256
dey
bne dorle ; Y was 1 bigger than wanted originally
mainbeq: beq main ; reverse condition -> jump always
lz77: jsr getval ; X = 0
mg21: cmp #127 ; ** PARAMETER Clears carry (is maximum value)
bne noeof
; EOF
eof:
hi: ldx #0
lo: ldy #0
rts
noeof: sbc #0 ; C is clear -> subtract 1 (1..126 -> 0..125)
elzpb: ldx #0 ; ** PARAMETER (more bits to get)
jsr getchkf ; clears Carry, X = 0
lz77_2: sta LZPOS+1 ; offset MSB
jsr get8bit ; clears Carry, X = 0
; Note: Already eored in the compressor..
;eor #255 ; offset LSB 2's complement -1 (i.e., -X = ~X+1)
adc OUTPOS ; -offset -1 + curpos (C is clear)
ldx LZPOS ; xstore = LZLEN (read before it's overwritten)
sta LZPOS
lda OUTPOS+1
sbc LZPOS+1 ; takes C into account
sta LZPOS+1 ; copy X+1 number of chars from LZPOS to OUTPOS
;ldy #0 ; Y was 0 originally, we don't change it
inx ; adjust for cpx#$ff;bne -> bne
lzloop: lda (LZPOS),y; using abs,y is 3 bytes longer, only 1 cycle/byte faster
jsr putch ; Note: must be copied forwards!
iny ; Y does not wrap because X=0..255 and Y initially 0
dex
bne lzloop ; X loops, (256,1..255)
beq mainbeq ; jump through another beq (-1 byte, +3 cycles)
getnew: pha ; 1 Byte/3 cycles
jsr getbyte
sec
rol ; Shift out the next bit and
; shift in C=1 (last bit marker)
sta bitstr ; bitstr initial value = $80 == empty
pla ; 1 Byte/4 cycles
rts
; 25+12 = 37
; getval : Gets a 'static huffman coded' value
; ** Scratches X, returns the value in A **
getval: inx ; X <- 1
txa ; set the top bit (value is 1..255)
gv0: asl bitstr
bne @1
jsr getnew
@1: bcc getchk ; got 0-bit
inx
mg: cpx #7 ; ** PARAMETER unary code maximum length + 1
bne gv0
beq getchk ; inverse condition -> jump always
; getval: 18 bytes
; 15 + 17*n + 6+15*n+12 + 36*n/8 = 33 + 32*n + 36*n/8 cycles
; getbits: Gets X bits from the stream
; ** Scratches X, returns the value in A **
get8bit: ldx #7
get1bit: inx ;2
getbits: asl bitstr
bne @1
jsr getnew
@1: rol ;2
getchk: dex ;2 more bits to get ?
getchkf: bne getbits ;2/3
clc ;2 return carry cleared
rts ;6+6
OUTPOS = *+$01
putch: sta $aaaa ; ** parameter
inc OUTPOS ; ZP
bne @0
inc OUTPOS+1 ; ZP
@0: rts
.if OLD_VERSION
table: .res 31,0
.else
table: .res 15,0
.endif

View file

@ -0,0 +1,491 @@
;**************************************************************************
;*
;* Copyright (c) 2015, 2017 Daniel Kahlin <daniel@kahlin.net>
;* Written by Daniel Kahlin <daniel@kahlin.net>
;* Slightly modified by Gunnar Ruthenberg <krill@plush.de>
;*
;* DESCRIPTION
;* subsizer 0.6 decruncher - stand alone version
;*
;* usage:
;* You need to provide a function to get a byte from the input
;* stream. (must preserve X,Y and C)
;*
;******
decompress = decrunch
.FEATURE leading_dot_in_identifiers
;**************************************************************************
;*
;* Configuration options
;*
;******
FORWARD_DECRUNCHING = 1
HAVE_LONG_PARTS = 1
.if HAVE_LONG_PARTS
PART_MASK = %00001111
N_PARTS = 16
.else
PART_MASK = %00000111
N_PARTS = 8
.endif
len_zp = DECOMPVARS
copy_zp = DECOMPVARS + 1
hibits_zp = DECOMPVARS + 3
buf_zp = DECOMPVARS + 4
.if MEM_DECOMP_TO_API
dest_zp = decdestlo
.else
dest_zp = DECOMPVARS + 5
.endif
endm_zp = DECOMPVARS + 7
;**************************************************************************
;*
;* NAME fast macros
;*
;******
;******
;* get bit macro
.macro get_bit
.local .gb_skp1
asl buf_zp
bne .gb_skp1
; C=1 (because the marker bit was just shifted out)
jsr dc_get_byte
rol
sta buf_zp
.gb_skp1:
.endmacro
;******
;* get bits max8 macro
.macro get_bits_max8
.local .gb_lp1
.local .gb_skp1
.gb_lp1:
asl buf_zp
bne .gb_skp1
; C=1 (because the marker bit was just shifted out)
pha
jsr dc_get_byte
rol
sta buf_zp
pla
.gb_skp1:
rol
dey
bne .gb_lp1
.endmacro
;******
;* get bits max8 masked macro
.macro get_bits_max8_masked
.local .gb_lp1
.local .gb_skp1
.gb_lp1:
asl buf_zp
bne .gb_skp1
; C=1 (because the marker bit was just shifted out)
tay
jsr dc_get_byte
rol
sta buf_zp
tya
.gb_skp1:
rol
bcs .gb_lp1
.endmacro
;******
;* get bits max16 macro
.macro get_bits_max16
.local .gb_lp1
.local .gb_skp1
.gb_lp1:
asl buf_zp
bne .gb_skp1
; C=1 (because the marker bit was just shifted out)
pha
jsr dc_get_byte
rol
sta buf_zp
pla
.gb_skp1:
rol
rol hibits_zp
dey
bne .gb_lp1 ; C=0 for all Y!=0
.endmacro
;**************************************************************************
;*
;* NAME decrunch
;*
;******
decrunch:
.if MEM_DECOMP_TO_API
; Get endm_zp, dest_zp, and buf_zp
dc_lp00:
jsr dc_get_byte
sta endm_zp
jsr dc_get_byte
storedadrh:
sta dest_zp+1
jsr dc_get_byte
storedadrl:
sta dest_zp+0
jsr dc_get_byte
sta buf_zp
ldx #0
.else
ldx #4
dc_lp00:
jsr dc_get_byte
sta buf_zp-1,x
dex
bne dc_lp00
.endif
; X = 0
.if LOADCOMPD_TO
clc
lda loadaddroffslo
adc dest_zp+0
sta dest_zp+0
lda loadaddroffshi
adc dest_zp+1
sta dest_zp+1
.endif
; ldx #0
dc_lp01:
;******
;* get 4 bits
lda #%11100000
dcg_lp1:
asl buf_zp
bne dcg_skp1
; C=1 (because the marker bit was just shifted out)
tay
jsr dc_get_byte
rol
sta buf_zp
tya
dcg_skp1:
rol
bcs dcg_lp1
; Acc = 4 bits.
sta bits,x
txa
and #PART_MASK
tay
beq dc_skp01
lda #0
sta hibits_zp
ldy bits-1,x
sec
dc_lp02:
rol
rol hibits_zp
dey
bpl dc_lp02
; C = 0
; clc
adc base_l-1,x
tay
lda hibits_zp
adc base_h-1,x
dc_skp01:
sta base_h,x
tya
sta base_l,x
inx
cpx #N_PARTS*4+4
bne dc_lp01
; perform decrunch
ldy #0
; fall through
;**************************************************************************
;*
;* NAME decruncher
;*
;* DESCRIPTION
;* decruncher
;*
;******
decrunch_entry:
;******
;* single literal byte
;*
dc_literal:
.if FORWARD_DECRUNCHING
jsr dc_get_byte
; ldy #0
sta (dest_zp),y
inc dest_zp
bne dc_skp5
inc dest_zp+1
dc_skp5:
dc_common:
.else
lda dest_zp
bne dc_skp5
dec dest_zp+1
dc_skp5:
dec dest_zp
jsr dc_get_byte
; ldy #0
dc_common:
sta (dest_zp),y
.endif
; fall through
decrunch_main:
;------
; perform actual decrunch
dc_lp1:
get_bit
bcs dc_literal
; get length as bits/base.
ldx #$80-N_PARTS
dc_lp2:
inx
bmi dc_skp0
get_bit
bcc dc_lp2
clc
dc_skp0:
; C = 0, Y = 0
; lda #0
tya
ldy bits_len-$80+N_PARTS-1,x
beq dcb1_skp2
get_bits_max8
dcb1_skp2:
; C = 0
adc base_len-$80+N_PARTS-1,x
sta len_zp
; C = 0
.if FORWARD_DECRUNCHING
tax
.else
;******
;* IN: len = $01..$100 (Acc = $00..$ff)
;* OUT: dest_zp = dest_zp - len, X = len-1
;*
tax
; clc
eor #$ff
adc dest_zp
sta dest_zp
bcs dc_skp22
dec dest_zp+1
dc_skp22:
.endif
; check end marker here to avoid thrashing carry earlier
cpx endm_zp
beq done
;******
;* Get selector bits depending on length.
;*
;* IN: len = $01..$100 (X = $00..$ff)
;* OUT:
;*
cpx #4
bcc dc_skp2
ldx #3
dc_skp2:
; get offset as bits/base.
lda tabb,x
get_bits_max8_masked
tax
; C = 0
lda #0
sta hibits_zp
ldy bits_offs,x
beq dcb3_skp2
get_bits_max16
dcb3_skp2:
; C = 0, Acc/hibits_zp + base_offs,x = offset - 1
.if FORWARD_DECRUNCHING
adc base_offs_l,x
bcc dcb3_skp3
inc hibits_zp
clc
dcb3_skp3:
eor #$ff
adc dest_zp
sta copy_zp
lda dest_zp+1
sbc hibits_zp
sbc base_offs_h,x
sta copy_zp+1
.else
; perform: copy_zp = Acc/hibits_zp + base_offs,x + 1 + dest_zp
; result: copy_zp = dest_zp + offset
adc base_offs_l,x
bcc dcb3_skp3
inc hibits_zp
dcb3_skp3:
sec
adc dest_zp
sta copy_zp
lda hibits_zp
adc base_offs_h,x
; C = 0
adc dest_zp+1
sta copy_zp+1
.endif
.if FORWARD_DECRUNCHING
copy:
.if 1
; this is shorter
ldx len_zp
inx
ldy #$ff
dc_lp4:
iny
lda (copy_zp),y
sta (dest_zp),y
dex
bne dc_lp4
.else
; and this might be faster on average, as there are many 1-byte sequences
ldy len_zp
beq dc_skp4
ldx len_zp
ldy #$00
dc_lp4:
lda (copy_zp),y
sta (dest_zp),y
iny
dex
bne dc_lp4
dc_skp4:
lda (copy_zp),y
sta (dest_zp),y
.endif
; C = 1
tya
adc dest_zp
sta dest_zp
bcc dc_skp22
inc dest_zp+1
dc_skp22:
ldy #$00
.else
;******
;* Reverse fast copy
;*
;* IN: len = $01..$100 (len_zp = $00..$ff), C = 0
;*
copy:
ldy len_zp
beq dc_skp4
dc_lp4:
lda (copy_zp),y
sta (dest_zp),y
dey
bne dc_lp4
dc_skp4:
lda (copy_zp),y
; sta (dest_zp),y
.endif
jmp dc_common
; bcc dc_common ; always taken
;******
;* exit out
done:
rts
.if HAVE_LONG_PARTS
tabb:
.byte %10000000 | (48 >> 2) ; 2 bits
.byte %11100000 | (0 >> 4) ; 4 bits
.byte %11100000 | (16 >> 4) ; 4 bits
.byte %11100000 | (32 >> 4) ; 4 bits
.else
tabb:
.byte %10000000 | (24 >> 2) ; 2 bits
.byte %11000000 | (0 >> 3) ; 3 bits
.byte %11000000 | (8 >> 3) ; 3 bits
.byte %11000000 | (16 >> 3) ; 3 bits
.endif
;**************************************************************************
;*
;* NAME dc_get_byte
;*
;* DESCRIPTION
;* Get byte from the packed stream.
;*
;******
dc_get_byte = getbyte
end_decruncher:
begin_tables:
;**************************************************************************
;*
;* NAME base_l, base_h, bits
;*
;* DESCRIPTION
;* Data for bits/base decoding.
;*
;******
base_l:
base_len:
.res N_PARTS,0
base_offs_l:
.res N_PARTS*3+4,0
base_h = * - N_PARTS
; .res N_PARTS,0
base_offs_h:
.res N_PARTS*3+4,0
bits:
bits_len:
.res N_PARTS,0
bits_offs:
.res N_PARTS*3+4,0
end_tables:
; eof

View file

@ -0,0 +1,190 @@
decompress = decrunch
decompsrc = sp
TC_BLOCK_INTERFACE = 1
.if (PLATFORM = diskio::platform::COMMODORE_64) | (PLATFORM = diskio::platform::COMMODORE_128)
USE_UNINTENDED_OPCODES = 1
.else
USE_UNINTENDED_OPCODES = 0
.endif
;.export decrunch
.if USE_UNINTENDED_OPCODES
.define sbx axs
.endif
.if MEM_DECOMP_TO_API
dp=decdestlo
.else
dp=DECOMPVARS + 0 ;4
.endif
sp=DECOMPVARS + 2 ;6 ; sp must follow dp, cf init code
cs=DECOMPVARS + 4 ;8
decrunch:
.if TC_BLOCK_INTERFACE = 0
stx sp+1
.endif
ldy#2
init_loop:
.if USE_UNINTENDED_OPCODES
stx sp-2,y ;first iter stores sp-low :D
.else
sta sp-2,y ;first iter stores sp-low :D
.endif
.if TC_BLOCK_INTERFACE
; read three blocks ahead,
; - one because literal strings read up to 128 bytes past sp
; - two more to absorb up to 256 blocks worth of read 254 bytes/use 256 bytes
tya
pha
jsr tc_getblock
pla
tay
.endif
.if USE_UNINTENDED_OPCODES
lax(sp),y
.else
lda(sp),y
.endif
dey
bpl init_loop
pha
.if MEM_DECOMP_TO_API
storedadrl = * + 1
storedadrh = * + 1
lda #0
cmp #OPC_STA_ZP
beq :+
lda dp; override destination address
bne *+4
dec dp+1
dec dp
jmp :++
: lda sp-2; destination address as stored in header
sta dp
lda sp-1
sta dp+1
:
.endif
.if LOADCOMPD_TO
clc
lda loadaddroffslo
adc dp
sta dp
lda loadaddroffshi
adc dp+1
sta dp+1
.endif
lda#$02
bne update_sp
literal_run:
literal_loop:
iny
lda(sp),y
sta(dp),y
dex
bmi literal_loop
tya
pha
clc
increase_dp_by_a_and_sp_by_tos_plus_one:
adc dp
sta dp
bcc :+
inc dp+1
:
pla
update_sp:
sec
adc sp
sta sp
bcc :+
inc sp+1
.if TC_BLOCK_INTERFACE
jsr tc_getblock
.endif
:
next_command:
POLLBLOCK
ldy#0
.if USE_UNINTENDED_OPCODES
lax(sp),y
.else
lda(sp),y
tax
.endif
beq decrunch_done
; literal: x = 128+length-1
; near copy: a = %11xxxxxx
; far copy: a|0xf8 = >(~(offset-1)), x = 8*(length-2) | (some low bits)
asl
bcc far_copy
bpl literal_run
near_copy:
ldx#$07 ; clear high byte of -ve offset. Also ensures copy_loop doesn't loop.
.byt $f0 ; beq (not taken) to skip over the iny
far_copy:
iny
; carry is set for near_copy, clear for far_copy
lda(sp),y ;fetch second byte (or for near copy, refetch first). This is low 8 bits of offset.
adc dp
sta cs
txa
ora#$f8
adc dp+1
sta cs+1
.if USE_UNINTENDED_OPCODES = 0
txa
lsr
lsr
lsr
tax
.endif
tya
pha ; save opcode length to stack
ldy#1
lda(cs),y
sta(dp),y
copy_loop:
iny
lda(cs),y
sta(dp),y
.if USE_UNINTENDED_OPCODES
txa ; spend an extra 2 cycles per byte here to save 10 in the bitfield extraction. A win on average
sbx#8
bpl copy_loop
.else
dex
bpl copy_loop
clc
.endif
tya
bcc increase_dp_by_a_and_sp_by_tos_plus_one ; always taken.
decrunch_done:
pla
iny
sta(dp),y
rts
.if TC_BLOCK_INTERFACE
tc_getblock:
GETBLOCK sp+1
rts
.endif
edecrunch:

View file

@ -0,0 +1,392 @@
decompsrc = tsget
decompress = tsdecrunch
.feature c_comments, leading_dot_in_identifiers
.define .label
/*
decrunch_extreme.asm
NMOS 6502 decompressor for data stored in TSCrunch format.
Copyright Antonio Savona 2022.
*/
.define INPLACE 1 ; Enables inplace decrunching. Use -i switch when crunching.
.label tsget = DECOMPVARS + 0 ; 2 bytes
.label tstemp = DECOMPVARS + 2
.label tsput = decdestlo ; 2 bytes
.label lzput = DECOMPVARS + 3 ; 2 bytes
.if INPLACE
.macro TS_DECRUNCH src
lda #<src
sta.zp tsget
lda #>src
sta.zp tsget + 1
jsr tsdecrunch
.endmacro
.else
.macro TS_DECRUNCH(src,dst)
{
lda #<src
sta.zp tsget
lda #>src
sta.zp tsget + 1
lda #<dst
sta.zp tsput
lda #>dst
sta.zp tsput + 1
jsr tsdecrunch
}
.endif
tsdecrunch:
jsr getfirstblock
decrunch:
.if INPLACE
;ldy #0
dey
: iny
sta optRun + 1
.if PREFER_SPEED_OVER_SIZE
ldx #$d0 ; bne opcode
and #1
bne skp
ldx #$29 ; and immediate opcode
skp:
stx optOdd
.endif
.if MEM_DECOMP_TO_API
storedadrl = * + 1
storedadrh = * + 1
lda #0
cmp #OPC_LDA_ZP
lda (tsget),y
bcs :+
sta tsput , y ; last iteration trashes lzput, with no effect.
:
cpy #3
bne :--
.else
lda (tsget),y
sta tsput , y ; last iteration trashes lzput, with no effect.
cpy #3
bne :-
.endif
pha
.if LOADCOMPD_TO
clc
lda loadaddroffslo
adc tsput
sta tsput
lda loadaddroffshi
adc tsput + 1
sta tsput + 1
sec
.endif
tya
ldy #0
beq update_getonly
.else
ldy #0
lda (tsget),y
sta optRun + 1
ldx #$d0 //bne opcode
and #1
bne !skp+
ldx #$29 //and immediate opcode
!skp:
stx optOdd
inc tsget
bne entry2
inc tsget + 1
.endif
entry2:
POLLBLOCK
lax (tsget),y
bmi rleorlz
cmp #$20
bcs lz2
; literal
.if INPLACE
inc tsget
beq updatelit_hi
return_from_updatelit:
and #1
bne odd_lit
ts_delit_loop:
lda (tsget),y
sta (tsput),y
iny
dex
odd_lit:
lda (tsget),y
sta (tsput),y
iny
dex
bne ts_delit_loop
tya
tax
; carry is clear
ldy #0
.else ; not inplace
tay
and #1
bne !odd+
ts_delit_loop:
lda (tsget),y
dey
sta (tsput),y
!odd:
lda (tsget),y
dey
sta (tsput),y
bne ts_delit_loop
txa
inx
.endif
updatezp_noclc:
adc tsput
sta tsput
bcs updateput_hi
putnoof:
txa
update_getonly:
adc tsget
sta tsget
bcc entry2
jsr getblock
bcc entry2
.if INPLACE
updatelit_hi:
pha
jsr getblock
pla
tax
bcc return_from_updatelit
.endif
updateput_hi:
inc tsput+1
clc
bcc putnoof
; LZ2
lz2:
beq done
ora #$80
adc tsput
sta lzput
lda tsput + 1
sbc #$00
sta lzput + 1
; y already zero
lda (lzput),y
sta (tsput),y
iny
lda (lzput),y
sta (tsput),y
.if PREFER_SPEED_OVER_SIZE
tya
dey
adc tsput
sta tsput
bcs lz2_put_hi
skp_lz2:
inc tsget
bne entry2
jsr getblock
bcc entry2
lz2_put_hi:
inc tsput + 1
bcs skp_lz2
.else
tya ; y = a = 1.
tax ; y = a = x = 1. a + carry = 2
dey ; ldy #0
beq updatezp_noclc
.endif
rleorlz:
alr #$7f
bcc ts_delz
; RLE
beq zeroRun
plain:
ldx #2
iny
sta tstemp ; number of bytes to de-rle
lsr ; c = test parity
lda (tsget),y ; fetch rle byte
ldy tstemp
runStart:
sta (tsput),y
bcs odd_rle
sec
ts_derle_loop:
dey
sta (tsput),y
odd_rle:
dey
sta (tsput),y
bne ts_derle_loop
; update zero page with a = runlen, x = 2 , y = 0
lda tstemp
bcs updatezp_noclc
done:
.if INPLACE
pla
sta (tsput),y
.endif
rts
; LZ
ts_delz:
lsr
sta lzto + 1
iny
lda tsput
bcc long
sbc (tsget),y
sta lzput
lda tsput+1
sbc #$00
ldx #2
; lz MUST decrunch forward
lz_put:
sta lzput+1
ldy #0
lda lzto + 1
lsr
bcs odd_lz
lda (lzput),y
sta (tsput),y
ts_delz_loop:
iny
odd_lz:
lda (lzput),y
sta (tsput),y
iny
lda (lzput),y
sta (tsput),y
lzto: cpy #0
bne ts_delz_loop
tya
; update zero page with a = runlen, x = 2, y = 0
ldy #0
; clc not needed as we have len - 1 in A (from the encoder) and C = 1
jmp updatezp_noclc
zeroRun:
optRun: ldy #255
.if PREFER_SPEED_OVER_SIZE
sta (tsput),y
optOdd: bne odd_zero
ts_dezero_loop:
dey
sta (tsput),y
odd_zero:
dey
sta (tsput),y
bne ts_dezero_loop
lda optRun + 1
ldx #1
jmp updatezp_noclc
.else
sty tstemp
tya
alr #$01
ldx #1
bne runStart
.endif
long:
; carry is clear and compensated for from the encoder
adc (tsget),y
sta lzput
iny
lax (tsget),y
ora #$80
adc tsput + 1
cpx #$80
rol lzto + 1
ldx #3
bne lz_put
getblock:
inc tsget + 1
getfirstblock:
GETBLOCK tsget + 1
clc
rts

View file

@ -0,0 +1,672 @@
;
; (c) Copyright 2021 by Tobias Bindhammer. All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
; * Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in the
; documentation and/or other materials provided with the distribution.
; * The name of its author may not be used to endorse or promote products
; derived from this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
; DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
; ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;
.FEATURE labels_without_colons, leading_dot_in_identifiers
.if MEM_DECOMP_TO_API
; cannot perform src == dst check when not depacking in-place,
; as offsets (arbitrary src - original src) or (arbitrary dst - original dst) are not known,
; also cannot copy remaining uncompressed blob of unknown size
.error "***** MEM_DECOMP_TO_API is not supported for ZX0. Copy compressed data to original location, then use MEM_DECOMP_API to decompress in-place. *****"
.endif
.if PREFER_SPEED_OVER_SIZE
; faster but bigger decruncher
decompsrc = lz_src
lz_bits = BLOCKINDEX + 1
lz_src = DECOMPVARS + 0
lz_dst = decdestlo
lz_len_hi = DECOMPVARS + 4
; USE_DALI is defined in resident.s
.if USE_DALI
.define set_lz_bit_marker ror
.define get_lz_bit lsr <lz_bits
.else
.define set_lz_bit_marker rol
.define get_lz_bit asl <lz_bits
.endif
.define inc_src_ptr jsr lz_next_page ;sets X = 0, so all sane
;------------------
;ELIAS FETCH
;------------------
.lz_refill_bits
tax ;save bits fetched so far
lda (lz_src),y ;fetch another lz_bits byte from stream
set_lz_bit_marker
sta <lz_bits
inc <lz_src + 0 ;postponed pointer increment, so no need to save A on next_page call
beq .lz_inc_src2
.lz_inc_src2_
txa ;restore fetched bits, also postponed, so A can be trashed on lz_inc_src above
bcs .lz_lend ;last bit fetched?
.lz_get_loop
get_lz_bit ;fetch payload bit
.lz_length_16_
rol ;shift in new payload bit
bcs .lz_length_16 ;first 1 drops out from lowbyte, need to extend to 16 bit, unfortunatedly this does not work with inverted numbers
get_lz_bit ;fetch control bit
bcc .lz_get_loop ;need more bits
beq .lz_refill_bits ;buffer is empty, fetch new bits
.lz_lend ;was the last bit
rts
.lz_length_16 ;this routine happens very rarely, so we can waste cycles
pha ;save so far received lobyte
tya ;was lda #$01, but A = 0 + upcoming rol makes this also start with A = 1
jsr .lz_length_16_ ;get up to 7 more bits
sta <lz_len_hi ;and save hibyte
pla ;restore lobyte
; bne .lz_not_zero ;was the lobyte zero?
; pla ;pull lowbyte of last jsr-call from stack
; pha ;restore
; ;eor #<.lz_jsr_addr
; bmi + ;eof and match case, no need to decrement lz_len_hi
; dec <lz_len_hi ;decrement lz_len_hi
;+
; tya
bne .lz_not_zero ;was the lobyte zero?
dec <lz_len_hi ;yes, so decrement hibyte beforehand to avoid expensive checks later on, except for one case
tya ;keep Z = 0, (but not needed in case of eof, the dec also results in 0 there)
.lz_not_zero
rts
;------------------
;DECOMP INIT
;------------------
decompress
;copy over end_pos and lz_dst from stream
; ldy #$00 ;needs to be set in any case, also plain decomp enters here
ldx #$02
.if USE_DALI
stx <lz_bits
.endif
:
lda (lz_src),y
sta <lz_dst,x
inc <lz_src
jsr .lz_init
dex
bpl :-
sty .lz_offset_lo + 1 ;initialize offset with $0000
sty .lz_offset_hi + 1
sty <lz_len_hi ;reset len - XXX TODO could also be cleared upon installer, as the depacker leaves that value clean again
.if LOADCOMPD_TO
clc
lda loadaddroffslo
adc <lz_dst
sta <lz_dst
lda loadaddroffshi
adc <lz_dst + 1
sta <lz_dst + 1
jmp .lz_poll
.else
bmi .lz_poll; jmp
.endif
;------------------
;SELDOM STUFF
;------------------
.lz_inc_src2
inc_src_ptr
bne .lz_inc_src2_
.lz_inc_src3
inc_src_ptr
bcs .lz_inc_src3_
.lz_dst_inc
inc <lz_dst + 1
bcs .lz_dst_inc_
;------------------
;SELDOM STUFF
;------------------
.lz_clc
clc
bcc .lz_clc_back
.lz_cp_page
dec <lz_len_hi
txa ;much shorter this way. if we recalculate m_src and dst, endcheck also hits in if we end with an multipage match, else maybe buggy?
beq .lz_l_page ;if entered from a literal, x == 0
tya ;if entered from a match, x is anything between $01 and $ff due to inx stx <lz_dst + 1, except if we would depack to zp?
bcs .lz_m_page ;as Y = 0, we can skip the part that does Y = A xor $ff
;------------------
;POLLING
;------------------
.lz_poll
.lz_start_over
POLLBLOCK
;------------------
;ENTRY POINT DEPACKER
;------------------
lda #$01 ;we fall through this check on entry and start with literal
get_lz_bit
bcs .lz_match ;after each match check for another match or literal?
;------------------
;LITERAL
;------------------
.lz_literal
get_lz_bit
bcs :++
:
get_lz_bit ;fetch payload bit
rol ;can also moved to front and executed once on start
get_lz_bit ;fetch payload bit
bcc :-
:
bne :+
.lz_start_depack
jsr .lz_refill_bits
:
tax
.lz_l_page
.lz_cp_lit ;XXX TODO copy with Y += 1 but have lz_src + 0 eor #$ff in x and countdown x, so that lz_src + 1 can be incremented in time?
lda (lz_src),y ;/!\ Need to copy this way, or we run into danger to copy from an area that is yet blocked by barrier, this totally sucks, loading in order reveals that
sta (lz_dst),y
inc <lz_src + 0
beq .lz_inc_src3
.lz_inc_src3_
inc <lz_dst + 0
beq .lz_dst_inc
.lz_dst_inc_
dex
bne .lz_cp_lit
;XXX TODO we could also just manipulate a branch and make it go to either page handling or fall through? enable branch if 16 bits are fetched and lz_len > 0? dop or bcs to disable and enable, 80 or b0
lda <lz_len_hi ;more pages to copy?
bne .lz_cp_page ;happens very seldom
;------------------
;NEW OR OLD OFFSET
;------------------
;XXX TODO fetch length first and then decide if literal, match, repeat? But brings our checks for last bit to the end? need to check then on typebit? therefore entry for fetch is straight?
;in case of type bit == 0 we can always receive length (not length - 1), can this used for an optimization? can we fetch length beforehand? and then fetch offset? would make length fetch simpler? place some other bit with offset?
rol ;was A = 0, C = 1 -> A = 1 with rol, but not if we copy literal this way
get_lz_bit
bcs .lz_match ;either match with new offset or old offset
;------------------
;REPEAT LAST OFFSET
;------------------
.lz_repeat
get_lz_bit
bcs :++
:
get_lz_bit ;fetch payload bit
rol ;can also moved to front and executed once on start
get_lz_bit ;cheaper with 2 branches, as initial branch to .lz_literal therefore is removed
bcc :-
:
bne :+
jsr .lz_refill_bits ;fetch more bits
beq .lz_m_page ;XXX TODO sec after sbc #1 is also sufficient, but slower
:
sbc #$01 ;subtract 1, will be added again on adc as C = 1
.lz_match_big ;we enter with length - 1 here from normal match
eor #$ff
tay
.lz_m_page
eor #$ff ;restore A
adc <lz_dst + 0 ;add length
sta <lz_dst + 0
bcs .lz_clc ;/!\ branch happens very seldom, if so, clear carry
dec <lz_dst + 1 ;subtract one more in this case
.lz_clc_back
.lz_offset_lo sbc #$00 ;carry is cleared, subtract (offset + 1)
sta .lz_msrcr + 0
lax <lz_dst + 1
.lz_offset_hi sbc #$00
sta .lz_msrcr + 1
.lz_cp_match ;XXX TODO if repeated offset: add literal size to .lz_msrcr and done?
.lz_msrcr = * + 1
lda $beef,y
sta (lz_dst),y
iny
bne .lz_cp_match
inx
stx <lz_dst + 1 ;cheaper to get lz_dst + 1 into x than lz_dst + 0 for upcoming compare
lda <lz_len_hi ;check for more loop runs
.if LOAD_TO_RAM_UNDER_IO | LOAD_VIA_KERNAL_FALLBACK
beq :+
jmp .lz_cp_page ;do more page runs? Yes? Fall through
:
.else
bne .lz_cp_page ;do more page runs? Yes? Fall through
.endif
.lz_check_poll
cpx <lz_src + 1 ;check for end condition when depacking inplace, lz_dst + 0 still in X
.if LOAD_TO_RAM_UNDER_IO | LOAD_VIA_KERNAL_FALLBACK
bne :+
lda <lz_dst + 0
eor <lz_src + 0
beq lz_next_page
: jmp .lz_start_over
.else
bne .lz_start_over
lda <lz_dst + 0
eor <lz_src + 0
bne .lz_start_over
beq lz_next_page
.endif
;XXX TODO, save one byte above and the beq lz_next_page can be omitted and lz_next_page copied here again
;jmp .ld_load_raw ;but should be able to skip fetch, so does not work this way
;top ;if lz_src + 1 gets incremented, the barrier check hits in even later, so at least one block is loaded, if it was $ff, we at least load the last block @ $ffxx, it must be the last block being loaded anyway
;as last block is forced, we would always wait for last block to be loaded if we enter this loop, no matter how :-)
;------------------
;MATCH
;------------------
:
get_lz_bit ;fetch payload bit
rol ;add bit to number
.lz_match
get_lz_bit ;fetch control bit
bcc :- ;not yet done, fetch more bits
bne :+ ;last bit or bitbuffer empty? fetched 1 to 4 bits now
jsr .lz_refill_bits ;refill bitbuffer
beq .lz_eof ;so offset was $100 as lowbyte is $00, only here 4-8 bits are fetched
:
sbc #$01 ;subtract 1, elias numbers range from 1..256, we need 0..255
lsr ;set bit 15 to 0 while shifting hibyte
sta .lz_offset_hi + 1 ;hibyte of offset
lda (lz_src),y ;fetch another byte directly, same as refill_bits...
ror ;and shift -> first bit for lenth is in carry, and we have %0xxxxxxx xxxxxxxx as offset
sta .lz_offset_lo + 1 ;lobyte of offset
inc <lz_src + 0 ;postponed, so no need to save A on next_page call
beq .lz_inc_src1
.lz_inc_src1_
lda #$01 ;fetch new number, start with 1
bcs .lz_match_big ;length = 1, do it the very short way
:
get_lz_bit ;fetch more bits
rol
get_lz_bit
bcc :-
bne .lz_match_big
jsr .lz_refill_bits ;fetch remaining bits
;.lz_jsr_addr = * - 1
; bcs .lz_match_big ;lobyte != 0?
bne .lz_match_big ;lobyte != 0?
;------------------
;SELDOM STUFF
;------------------
inc <lz_len_hi ;need to correct <lz_len_hi
bcs .lz_match_big ;and enter match copy loop
.lz_inc_src1
inc_src_ptr
bne .lz_inc_src1_
;------------------
;NEXT PAGE IN STREAM
;------------------
.lz_init
bne .lz_next_page_
lz_next_page
inc <lz_src + 1
.lz_next_page_
php
txa
pha
GETBLOCK <lz_src + 1
pla
tax
plp
.lz_eof
rts
;XXX TODO
;decide upon 2 bits with bit <lz_bits? bmi + bvs + bvc? bpl/bmi decides if repeat or not, bvs = length 2/check for new bits and redecide, other lengths do not need to check, this can alos be used on other occasions?
;do a jmp ($00xx) to determine branch?
.else
decompsrc = .lz_src
OFFSET_OPT = 0; +8
.lz_bits = BLOCKINDEX + 1
.lz_src = DECOMPVARS + 0
.lz_dst = decdestlo
.if OFFSET_OPT
.else
.lz_offset = DECOMPVARS + 2
.endif
.lz_len_hi = DECOMPVARS + 4
; USE_DALI is defined in resident.s
.if USE_DALI
.define rot ror
.define shf lsr
.else
.define rot rol
.define shf asl
.endif
;------------------
;SELDOM STUFF
;------------------
.lz_l_page
.lz_dcp
dec <.lz_len_hi
bcs .lz_cp_lit
sec
bcs .lz_match_big
;------------------
;POINTER HANDLING LITERAL COPY
;------------------
.lz_dst_inc
inc <.lz_dst + 1
bcs .lz_dst_inc_
.lz_src_inc
jsr .lz_next_page ;sets X = 0, so all sane
bcs .lz_src_inc_
decompress
;copy over end_pos and lz_dst from stream
ldx #2
.if USE_DALI
stx <.lz_bits
.endif
:
lda (.lz_src),y ;copy over first two bytes
sta <.lz_dst,x
inc <.lz_src + 0
jsr .lz_init
.if OFFSET_OPT
.else
sty .lz_offset,x; reset offset and length-hi
.endif
dex
bpl :-
.if LOADCOMPD_TO
clc
lda loadaddroffslo
adc <.lz_dst
sta <.lz_dst
lda loadaddroffshi
adc <.lz_dst + 1
sta <.lz_dst + 1
.endif
.if OFFSET_OPT
sty .lz_offset_lo + 1 ;initialize offset with $0000
sty .lz_offset_hi + 1
sty <.lz_len_hi ;reset len - XXX TODO could also be cleared upon installer, as the depacker leaves that value clean again
.endif
;start with an empty lz_bits, first shf <.lz_bits leads to literal this way and bits are refilled upon next shift
.if USE_DALI
.elseif LOAD_VIA_KERNAL_FALLBACK | MEM_DECOMP_API
lda #$40
sta <.lz_bits
.endif
;------------------
;POLLING
;------------------
.lz_start_over POLLBLOCK
;------------------
;LITERAL
;------------------
lda #$01 ;we fall through this check on entry and start with literal
shf <.lz_bits
bcs .lz_match ;after each match check for another match or literal?
.lz_literal
jsr .lz_length
tax
beq .lz_l_page ;happens very seldom, so let's do that with lz_l_page that also decrements lz_len_hi, it returns on c = 1, what is always true after jsr .lz_length
.lz_cp_lit
lda (.lz_src),y ;Need to copy this way, or wie copy from area that is blocked by barrier
sta (.lz_dst),y
inc <.lz_src + 0
beq .lz_src_inc
.lz_src_inc_
inc <.lz_dst + 0
beq .lz_dst_inc
.lz_dst_inc_
dex
bne .lz_cp_lit
lda <.lz_len_hi ;more pages to copy?
bne .lz_l_page ;happens very seldom
;------------------
;NEW OR OLD OFFSET
;------------------
;in case of type bit == 0 we can always receive length (not length - 1), can this used for an optimization? can we fetch length beforehand? and then fetch offset? would make length fetch simpler? place some other bit with offset?
rol ;A = 0, C = 1 -> A = 1
shf <.lz_bits
;rol
;bne .lz_match
;else A = 0
;but only for lowbyte?!
bcs .lz_match ;either match with new offset or old offset
;------------------
;DO MATCH
;------------------
.lz_repeat
jsr .lz_length
sbc #$01
bcc .lz_dcp ;fix highbyte of length in case and set carry again (a = $ff -> compare delivers carry = 1)
;sec ;XXX TODO in fact we could save on the sbc #$01 as the sec and adc later on corrects that again, but y would turn out one too less
.lz_match_big ;we enter with length - 1 here from normal match
eor #$ff
tay
;XXX TODO save on eor #$ff and do sbc lz_dst + 0?
eor #$ff ;restore A
.lz_match_len2 ;entry from new_offset handling
adc <.lz_dst + 0
sta <.lz_dst + 0
tax ;remember for later end check, cheaper this way
bcs .lz_clc ;/!\ branch happens very seldom, if so, clear carry
dec <.lz_dst + 1 ;subtract one more in this case
.lz_clc_back
.if OFFSET_OPT
.lz_offset_lo sbc #$00 ;carry is cleared, subtract (offset + 1) in fact we could use sbx here, but would not respect carry, but a and x are same, but need x later anyway for other purpose
.else
sbc <.lz_offset
.endif
sta .lz_msrcr + 0
lda <.lz_dst + 1
.if OFFSET_OPT
.lz_offset_hi sbc #$00
.else
sbc <.lz_offset + 1
.endif
sta .lz_msrcr + 1
; ;XXX TODO would have dst + 0 and + 1 in X and A here, of any use?
.lz_cp_match
;XXX TODO if repeated offset: add literal size to .lz_msrcr and done?
.lz_msrcr = * + 1
lda $beef,y
sta (.lz_dst),y
iny
bne .lz_cp_match
inc <.lz_dst + 1
lda <.lz_len_hi ;check for more loop runs
bne .lz_m_page ;do more page runs? Yes? Fall through
.lz_check_poll
cpx <.lz_src + 0 ;check for end condition when depacking inplace, .lz_dst + 0 still in X
.lz_skip_poll bne .lz_start_over ;-> can be changed to .lz_poll, depending on decomp/loadcomp
lda <.lz_dst + 1
sbc <.lz_src + 1
bne .lz_start_over
;jmp .ld_load_raw ;but should be able to skip fetch, so does not work this way
;top ;if lz_src + 1 gets incremented, the barrier check hits in even later, so at least one block is loaded, if it was $ff, we at least load the last block @ $ffxx, it must be the last block being loaded anyway
;as last block is forced, we would always wait for last block to be loaded if we enter this loop, no matter how :-)
;------------------
;NEXT PAGE IN STREAM
;------------------
.lz_init
bne :+
.lz_next_page
inc <.lz_src + 1
:
.if LOAD_TO_RAM_UNDER_IO
jmp getblock
.else
php
txa
pha
GETBLOCK <.lz_src + 1
pla
tax
plp
rts
.endif
;------------------
;FETCH A NEW OFFSET
;------------------
: ;lz_length as inline
shf <.lz_bits ;fetch payload bit
rol ;can also moved to front and executed once on start
.lz_match
shf <.lz_bits
bcc :-
bne :+
jsr .lz_refill_bits
:
sbc #$01 ;XXX TODO can be omitted if just endposition is checked, but 0 does not exist as value?
bcc .lz_eof ;underflow. must have been 0
lsr
.if OFFSET_OPT
sta .lz_offset_hi + 1 ;hibyte of offset
.else
sta .lz_offset + 1 ;hibyte of offset
.endif
lda (.lz_src),y ;fetch another byte directly
ror
.if OFFSET_OPT
sta .lz_offset_lo + 1
.else
sta .lz_offset
.endif
inc <.lz_src + 0 ;postponed, so no need to save A on next_page call
bne :+
jsr .lz_next_page ;preserves carry, all sane
:
lda #$01
ldy #$fe
bcs .lz_match_len2 ;length = 1 ^ $ff, do it the very short way :-)
:
shf <.lz_bits ;fetch first payload bit
;XXX TODO we could check bit 7 before further shf?
rol ;can also moved to front and executed once on start
shf <.lz_bits
bcc :-
bne .lz_match_big
ldy #$00 ;only now y = 0 is needed
jsr .lz_refill_bits ;fetch remaining bits
; bcs .lz_match_big
jmp .lz_match_big
;------------------
;SELDOM STUFF
;------------------
.lz_clc
clc
bcc .lz_clc_back
.lz_m_page
dec <.lz_len_hi
inc .lz_msrcr + 1 ;XXX TODO only needed if more pages follow
bne .lz_cp_match
;------------------
;ELIAS FETCH
;------------------
.lz_refill_bits
tax
lda (.lz_src),y
rot
sta <.lz_bits
inc <.lz_src + 0 ;postponed, so no need to save A on next_page call
bne :+ ;XXX TODO if we would prefer beq, 0,2% saving
jsr .lz_next_page ;preserves carry and A, clears X, Y, all sane
:
txa
bcs .lz_lend
;lda #$00
;slo <.lz_bits
.lz_get_loop
shf <.lz_bits ;fetch payload bit
.lz_length_16_
rol ;can also moved to front and executed once on start
bcs .lz_length_16 ;first 1 drops out from lowbyte, need to extend to 16 bit, unfortunatedly this does not work with inverted numbers
.lz_length
shf <.lz_bits
bcc .lz_get_loop
beq .lz_refill_bits
.lz_lend
.lz_eof
rts
.lz_length_16 ;happens very rarely
pha ;save LSB
tya ;was lda #$01, but A = 0 + rol makes this also start with MSB = 1
jsr .lz_length_16_ ;get up to 7 more bits
sta <.lz_len_hi ;save MSB
pla ;restore LSB
rts
.if LOAD_TO_RAM_UNDER_IO
getblock php
txa
pha
GETBLOCK <.lz_src + 1
pla
tax
plp
rts
.endif
.endif