init files

2025-11-13 19:07:39 +03:00 · 2025-11-13 19:07:39 +03:00 · 8197a022bd
commit 8197a022bd
1409 changed files with 139317 additions and 0 deletions
--- a/loader/tools/b2/Decruncher.inc
+++ b/loader/tools/b2/Decruncher.inc
@ -0,0 +1,161 @@
+; ByteBoozer Decruncher    /HCL May.2003
+; B2 Decruncher            December 2014
+
+; call: Y = AddrLo
+;       X = AddrHi
+
+;Variables..        #Bytes
+zp_base	= $02       ; -
+bits	= zp_base   ;1
+put	= zp_base+2 ;2
+
+#macro	GetNextBit() {.(
+	asl bits
+	bne DgEnd
+	jsr GetNewBits
+DgEnd
+.)}
+
+#macro	GetLen() {.(
+	lda #1
+GlLoop
+	.GetNextBit()
+	bcc GlEnd
+	.GetNextBit()
+	rol
+	bpl GlLoop
+GlEnd
+.)}
+
+Decrunch
+	sty Get1+1
+	sty Get2+1
+	sty Get3+1
+	stx Get1+2
+	stx Get2+2
+	stx Get3+2
+
+	ldx #0
+	jsr GetNewBits
+	sty put-1,x
+	cpx #2
+	bcc *-7
+	lda #$80
+	sta bits
+DLoop
+	.GetNextBit()
+	bcs Match
+Literal
+	; Literal run.. get length.
+	.GetLen()
+	sta LLen+1
+
+	ldy #0
+LLoop
+Get3	lda $feed,x
+	inx
+	bne *+5
+	jsr GnbInc
+L1	sta (put),y
+	iny
+LLen	cpy #0
+	bne LLoop
+
+	clc
+	tya
+	adc put
+	sta put
+	bcc *+4
+	inc put+1
+
+	iny
+	beq DLoop
+
+	; Has to continue with a match..
+
+Match
+	; Match.. get length.
+	.GetLen()
+	sta MLen+1
+
+	; Length 255 -> EOF
+	cmp #$ff
+	beq End
+
+	; Get num bits
+	cmp #2
+	lda #0
+	rol
+	.GetNextBit()
+	rol
+	.GetNextBit()
+	rol
+	tay
+	lda Tab,y
+	beq M8
+
+	; Get bits < 8
+M_1	.GetNextBit()
+	rol
+	bcs M_1
+	bmi MShort
+M8
+	; Get byte
+	eor #$ff
+	tay
+Get2	lda $feed,x
+	inx
+	bne *+5
+	jsr GnbInc
+	jmp Mdone
+MShort
+	ldy #$ff
+Mdone
+	;clc
+	adc put
+	sta MLda+1
+	tya
+	adc put+1
+	sta MLda+2
+
+	ldy #$ff
+MLoop	iny
+MLda	lda $beef,y
+	sta (put),y
+MLen	cpy #0
+	bne MLoop
+
+	;sec
+	tya
+	adc put
+	sta put
+	bcc *+4
+	inc put+1
+
+	jmp DLoop
+
+End	rts
+
+GetNewBits
+Get1	ldy $feed,x
+	sty bits
+	rol bits
+	inx
+	bne GnbEnd
+GnbInc	inc Get1+2
+	inc Get2+2
+	inc Get3+2
+GnbEnd
+	rts
+
+Tab
+	; Short offsets
+	.byte %11011111 ; 3
+	.byte %11111011 ; 6
+	.byte %00000000 ; 8
+	.byte %10000000 ; 10
+	; Long offsets
+	.byte %11101111 ; 4
+	.byte %11111101 ; 7
+	.byte %10000000 ; 10
+	.byte %11110000 ; 13
--- a/loader/tools/b2/Makefile
+++ b/loader/tools/b2/Makefile
@ -0,0 +1,9 @@
+CC = gcc
+
+OBJECTS = \
+	file.c \
+	cruncher.c \
+	bb.c
+
+all: $(OBJECTS)
+	$(CC) -O3 $(OBJECTS) -o b2.exe
--- a/loader/tools/b2/bb.c
+++ b/loader/tools/b2/bb.c
@ -0,0 +1,77 @@
+#include "bb.h"
+#include "file.h"
+#include "cruncher.h"
+#include <stdio.h>
+#include <string.h>
+
+int main(int argc, char * argv[])
+{
+  File myFile;
+  File myBBFile;
+  char* fileName;
+  bool isExecutable = false;
+  bool isRelocated = false;
+  uint address = 0;
+
+  if((argc != 2 && argc != 4) ||
+     (strcmp(argv[1], "-h") == 0) ||
+     (strcmp(argv[1], "-help") == 0)){
+    printf("Usage: b2 [-[c|e|r] xxxx] <filename>\n");
+    printf("   -c: Make executable with start address xxxx.\n");
+    printf("   -e: Same as -c :P.\n");
+    printf("   -r: Relocate file to hex address xxxx.\n");
+    return 0;
+  }
+
+  if(argc == 2) {
+    fileName = argv[1];
+  } else {
+    int i;
+    char *s = argv[2];
+    fileName = argv[3];
+
+    if((strcmp(argv[1], "-c") == 0) ||
+       (strcmp(argv[1], "-e") == 0))
+      isExecutable = true;
+    else if(strcmp(argv[1], "-r") == 0)
+      isRelocated = true;
+    else {
+      printf("Don't understand, aborting.\n");
+      return -1;
+    }
+    if(strlen(s) != 4){
+      printf("Don't understand, aborting.\n");
+      return -1;
+    }
+
+    for(i = 0; i < 4; ++i){
+      byte c;
+      if(s[i] >= '0' && s[i] <= '9') c = s[i] - '0';
+      if(s[i] >= 'a' && s[i] <= 'f') c = s[i] - 'a' + 10;
+      if(s[i] >= 'A' && s[i] <= 'F') c = s[i] - 'A' + 10;
+      address *= 16;
+      address += c;
+    }
+  }
+
+  if(!readFile(&myFile, fileName)) {
+    printf("Error (B-1): Open file \"%s\", aborting.\n", fileName);
+    return -1;
+  }
+
+  if(!crunch(&myFile, &myBBFile, address, isExecutable, isRelocated)) {
+    freeFile(&myFile);
+    return -1;
+  }
+
+  if(!writeFile(&myBBFile, myFile.name)) {
+    printf("Error (B-2): Write file \"%s\", aborting.\n", myBBFile.name);
+  }
+
+  printf("B2: \"%s\" -> \"%s\"\n", myFile.name, myBBFile.name);
+
+  freeFile(&myFile);
+  freeFile(&myBBFile);
+
+  return 0;
+}
--- a/loader/tools/b2/bb.h
+++ b/loader/tools/b2/bb.h
@ -0,0 +1,19 @@
+#ifndef _bb_h_
+#define _bb_h_
+
+#ifndef NULL
+#define NULL ((void*)0)
+#endif
+
+#ifndef byte
+typedef unsigned char byte;
+#endif
+#ifndef uint
+typedef unsigned int uint;
+#endif
+
+typedef enum { false = 0, true = 1 } bool;
+
+#define memSize 65536
+
+#endif // _bb_h_
--- a/loader/tools/b2/cruncher.c
+++ b/loader/tools/b2/cruncher.c
@ -0,0 +1,747 @@
+#include "cruncher.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+#define log(format, ...)
+//#define log(format, ...) fprintf (stderr, format, ## __VA_ARGS__)
+
+#define NUM_BITS_SHORT_0 3
+#define NUM_BITS_SHORT_1 6
+#define NUM_BITS_SHORT_2 8
+#define NUM_BITS_SHORT_3 10
+#define NUM_BITS_LONG_0 4
+#define NUM_BITS_LONG_1 7
+#define NUM_BITS_LONG_2 10
+#define NUM_BITS_LONG_3 13
+
+#define LEN_SHORT_0 (1 << NUM_BITS_SHORT_0)
+#define LEN_SHORT_1 (1 << NUM_BITS_SHORT_1)
+#define LEN_SHORT_2 (1 << NUM_BITS_SHORT_2)
+#define LEN_SHORT_3 (1 << NUM_BITS_SHORT_3)
+#define LEN_LONG_0 (1 << NUM_BITS_LONG_0)
+#define LEN_LONG_1 (1 << NUM_BITS_LONG_1)
+#define LEN_LONG_2 (1 << NUM_BITS_LONG_2)
+#define LEN_LONG_3 (1 << NUM_BITS_LONG_3)
+
+#define COND_SHORT_0(o) ((o) >= 0 && (o) < LEN_SHORT_0)
+#define COND_SHORT_1(o) ((o) >= LEN_SHORT_0 && (o) < LEN_SHORT_1)
+#define COND_SHORT_2(o) ((o) >= LEN_SHORT_1 && (o) < LEN_SHORT_2)
+#define COND_SHORT_3(o) ((o) >= LEN_SHORT_2 && (o) < LEN_SHORT_3)
+#define COND_LONG_0(o) ((o) >= 0 && (o) < LEN_LONG_0)
+#define COND_LONG_1(o) ((o) >= LEN_LONG_0 && (o) < LEN_LONG_1)
+#define COND_LONG_2(o) ((o) >= LEN_LONG_1 && (o) < LEN_LONG_2)
+#define COND_LONG_3(o) ((o) >= LEN_LONG_2 && (o) < LEN_LONG_3)
+
+#define MAX_OFFSET LEN_LONG_3
+#define MAX_OFFSET_SHORT LEN_SHORT_3
+
+#define DECRUNCHER_LENGTH 0xd5
+byte decrCode[DECRUNCHER_LENGTH] = {
+  0x0b, 0x08, 0x00, 0x00, 0x9e, 0x32, 0x30, 0x36, 0x31, 0x00, 0x00, 0x00, 0x78, 0xa9, 0x34, 0x85,
+  0x01, 0xa2, 0xb7, 0xbd, 0x1e, 0x08, 0x95, 0x0f, 0xca, 0xd0, 0xf8, 0x4c, 0x10, 0x00, 0xbd, 0xd6,
+  0x07, 0x9d, 0x00, 0xff, 0xe8, 0xd0, 0xf7, 0xc6, 0x12, 0xc6, 0x15, 0xa5, 0x12, 0xc9, 0x07, 0xb0,
+  0xed, 0x20, 0xa0, 0x00, 0xb0, 0x17, 0x20, 0x8e, 0x00, 0x85, 0x36, 0xa0, 0x00, 0x20, 0xad, 0x00,
+  0x91, 0x77, 0xc8, 0xc0, 0x00, 0xd0, 0xf6, 0x20, 0x83, 0x00, 0xc8, 0xf0, 0xe4, 0x20, 0x8e, 0x00,
+  0xaa, 0xe8, 0xf0, 0x71, 0x86, 0x7b, 0xa9, 0x00, 0xe0, 0x03, 0x2a, 0x20, 0x9b, 0x00, 0x20, 0x9b,
+  0x00, 0xaa, 0xb5, 0xbf, 0xf0, 0x07, 0x20, 0x9b, 0x00, 0xb0, 0xfb, 0x30, 0x07, 0x49, 0xff, 0xa8,
+  0x20, 0xad, 0x00, 0xae, 0xa0, 0xff, 0x65, 0x77, 0x85, 0x74, 0x98, 0x65, 0x78, 0x85, 0x75, 0xa0,
+  0x00, 0xb9, 0xad, 0xde, 0x99, 0x00, 0x00, 0xc8, 0xc0, 0x00, 0xd0, 0xf5, 0x20, 0x83, 0x00, 0xd0,
+  0xa0, 0x18, 0x98, 0x65, 0x77, 0x85, 0x77, 0x90, 0x02, 0xe6, 0x78, 0x60, 0xa9, 0x01, 0x20, 0xa0,
+  0x00, 0x90, 0x05, 0x20, 0x9b, 0x00, 0x10, 0xf6, 0x60, 0x20, 0xa0, 0x00, 0x2a, 0x60, 0x06, 0xbe,
+  0xd0, 0x08, 0x48, 0x20, 0xad, 0x00, 0x2a, 0x85, 0xbe, 0x68, 0x60, 0xad, 0xed, 0xfe, 0xe6, 0xae,
+  0xd0, 0x02, 0xe6, 0xaf, 0x60, 0xa9, 0x37, 0x85, 0x01, 0x4c, 0x00, 0x00, 0x80, 0xdf, 0xfb, 0x00,
+  0x80, 0xef, 0xfd, 0x80, 0xf0
+};
+
+
+byte *ibuf;
+byte *obuf;
+uint ibufSize;
+int get; //points to ibuf[]
+uint put; //points to obuf[]
+
+typedef struct {
+  uint cost;
+  uint next;
+  uint litLen;
+  uint offset;
+} node;
+
+typedef struct {
+  byte value;
+  byte valueAfter;
+  uint length;
+} RLEInfo;
+
+node *context;
+uint *link;
+RLEInfo *rleInfo;
+uint first[65536];
+uint last[65536];
+
+byte curByte;
+byte curCnt;
+uint curIndex;
+
+void wBit(uint bit) {
+  if(curCnt == 0) {
+    obuf[curIndex] = curByte;
+    curIndex = put;
+    curCnt = 8;
+    curByte = 0;
+    put++;
+  }
+
+  curByte <<= 1;
+  curByte |= (bit & 1);
+  curCnt--;
+}
+
+void wFlush() {
+  while(curCnt != 0) {
+    curByte <<= 1;
+    curCnt--;
+  }
+  obuf[curIndex] = curByte;
+}
+
+void wByte(uint b) {
+  obuf[put] = b;
+  put++;
+}
+
+void wBytes(uint get, uint len) {
+  uint i;
+  for(i = 0; i < len; i++) {
+    wByte(ibuf[get]);
+    get++;
+  }
+}
+
+void wLength(uint len) {
+//  if(len == 0) return; // Should never happen
+
+  byte bit = 0x80;
+  while((len & bit) == 0) {
+    bit >>= 1;
+  }
+
+  while(bit > 1) {
+    wBit(1);
+    bit >>= 1;
+    wBit(((len & bit) == 0) ? 0 : 1);
+  }
+
+  if(len < 0x80) {
+    wBit(0);
+  }
+}
+
+void wOffset(uint offset, uint len) {
+  uint i = 0;
+  uint n = 0;
+  uint b;
+
+  if(len == 1) {
+    if(COND_SHORT_0(offset)) {
+      i = 0;
+      n = NUM_BITS_SHORT_0;
+    }
+    if(COND_SHORT_1(offset)) {
+      i = 1;
+      n = NUM_BITS_SHORT_1;
+    }
+    if(COND_SHORT_2(offset)) {
+      i = 2;
+      n = NUM_BITS_SHORT_2;
+    }
+    if(COND_SHORT_3(offset)) {
+      i = 3;
+      n = NUM_BITS_SHORT_3;
+    }
+  } else {
+    if(COND_LONG_0(offset)) {
+      i = 0;
+      n = NUM_BITS_LONG_0;
+    }
+    if(COND_LONG_1(offset)) {
+      i = 1;
+      n = NUM_BITS_LONG_1;
+    }
+    if(COND_LONG_2(offset)) {
+      i = 2;
+      n = NUM_BITS_LONG_2;
+    }
+    if(COND_LONG_3(offset)) {
+      i = 3;
+      n = NUM_BITS_LONG_3;
+    }
+  }
+
+  // First write number of bits
+  wBit(((i & 2) == 0) ? 0 : 1);
+  wBit(((i & 1) == 0) ? 0 : 1);
+
+  if(n >= 8) { // Offset is 2 bytes
+
+    // Then write the bits less than 8
+    b = 1 << n;
+    while(b > 0x100) {
+      b >>= 1;
+      wBit(((b & offset) == 0) ? 0 : 1);
+    };
+
+    // Finally write a whole byte, if necessary
+    wByte((offset & 255) ^ 255); // Inverted(!)
+    offset >>= 8;
+
+  } else { // Offset is 1 byte
+
+    // Then write the bits less than 8
+    b = 1 << n;
+    while(b > 1) {
+      b >>= 1;
+      wBit(((b & offset) == 0) ? 1 : 0); // Inverted(!)
+    };
+
+  }
+}
+
+
+/*
+ * Cost functions
+ */
+uint costOfLength(uint len) {
+  if(len == 1) return 1;
+  if(len >= 2 && len <= 3) return 3;
+  if(len >= 4 && len <= 7) return 5;
+  if(len >= 8 && len <= 15) return 7;
+  if(len >= 16 && len <= 31) return 9;
+  if(len >= 32 && len <= 63) return 11;
+  if(len >= 64 && len <= 127) return 13;
+  if(len >= 128 && len <= 255) return 14;
+
+  printf("costOfLength got wrong value: %i\n", len);
+  return 10000;
+}
+
+uint costOfOffset(uint offset, uint len) {
+  if(len == 1) {
+    if(COND_SHORT_0(offset)) return NUM_BITS_SHORT_0;
+    if(COND_SHORT_1(offset)) return NUM_BITS_SHORT_1;
+    if(COND_SHORT_2(offset)) return NUM_BITS_SHORT_2;
+    if(COND_SHORT_3(offset)) return NUM_BITS_SHORT_3;
+  } else {
+    if(COND_LONG_0(offset)) return NUM_BITS_LONG_0;
+    if(COND_LONG_1(offset)) return NUM_BITS_LONG_1;
+    if(COND_LONG_2(offset)) return NUM_BITS_LONG_2;
+    if(COND_LONG_3(offset)) return NUM_BITS_LONG_3;
+  }
+
+  printf("costOfOffset got wrong offset: %i\n", offset);
+  return 10000;
+}
+
+uint calculateCostOfMatch(uint len, uint offset) {
+  uint cost = 1; // Copy-bit
+  cost += costOfLength(len - 1);
+  cost += 2; // NumOffsetBits
+  cost += costOfOffset(offset - 1, len - 1);
+  return cost;
+}
+
+uint calculateCostOfLiteral(uint oldCost, uint litLen) {
+  uint newCost = oldCost + 8;
+
+  // FIXME, what if litLen > 255?
+  //
+  // FIXME, cost model for literals does not work.
+  // Quick wins on short matches are prioritized before
+  // a longer literal run, which in the end results in a
+  // worse result.
+  // Most obvious on files hard to crunch.
+  switch(litLen) {
+  case 1:
+  case 128:
+    newCost++;
+    break;
+  case 2:
+  case 4:
+  case 8:
+  case 16:
+  case 32:
+  case 64:
+    newCost += 2;
+    break;
+  default:
+    break;
+  }
+
+  return newCost;
+}
+
+
+void setupHelpStructures() {
+  uint i;
+
+  // Setup RLE-info
+  get = ibufSize - 1;
+  while (get > 0) {
+
+    byte cur = ibuf[get];
+    if (cur == ibuf[get-1]) {
+
+      uint len = 2;
+      while ((get >= len) && 
+	     (cur == ibuf[get-len])) {
+	len++;
+      }
+
+      rleInfo[get].length = len;
+      if (get >= len) {
+	rleInfo[get].valueAfter = ibuf[get-len];
+      } else {
+	rleInfo[get].valueAfter = cur; // Avoid accessing ibuf[-1]
+      }
+
+      get -= len;
+    } else {
+      get--;
+    }
+  }
+
+
+  // Setup Linked list
+  for (i = 0; i < 65536; i++) {
+    first[i] = 0;
+    last[i] = 0;
+  }
+
+  get = ibufSize - 1;
+  uint cur = ibuf[get];
+
+  while (get > 0) {
+
+    cur = ((cur << 8) | ibuf[get-1]) & 65535;
+
+    if (first[cur] == 0) {
+      first[cur] = last[cur] = get;
+    } else {
+      link[last[cur]] = get;
+      last[cur] = get;
+    }
+
+    if (rleInfo[get].length == 0) { // No RLE-match here..
+      get--;
+    } else { // if RLE-match..
+      get -= (rleInfo[get].length - 1);
+    }
+
+  }
+}
+
+
+void findMatches() {
+
+  typedef struct match {
+    uint length;
+    uint offset;
+  } match;
+
+  match matches[256];
+
+  node lastNode;
+  uint i;
+
+  get = ibufSize - 1;
+  uint cur = ibuf[get];
+
+  lastNode.cost = 0;
+  lastNode.next = 0;
+  lastNode.litLen = 0;
+
+  while (get >= 0) {
+
+    // Clear matches for current position
+    for (i = 0; i < 256; i++) {
+      matches[i].length = 0;
+      matches[i].offset = 0;
+    }
+
+    cur = (cur << 8) & 65535; // Table65536 lookup
+    if (get > 0) cur |= ibuf[get-1];
+    int scn = first[cur];
+    scn = link[scn];
+
+    uint longestMatch = 0;
+
+    if (rleInfo[get].length == 0) { // No RLE-match here..
+
+      // Scan until start of file, or max offset
+      while (((get - scn) <= MAX_OFFSET) &&
+	     (scn > 0) &&
+	     (longestMatch < 255)) {
+
+	// Ok, we have a match of length 2
+	// ..or longer, but max 255 or file start
+	uint len = 2;
+	while ((len < 255) &&
+	       (scn >= len) &&
+	       (ibuf[scn - len] == ibuf[get - len])) {
+	  ++len;
+	}
+
+	// Calc offset
+	uint offset = get - scn;
+
+	// Store match only if it's the longest so far
+	if(len > longestMatch) {
+	  longestMatch = len;
+
+	  // Store the match only if first (= best) of this length
+	  while(len >= 2 && matches[len].length == 0) {
+
+	    // If len == 2, check against short offset!!
+	    if ((len > 2) ||
+		((len == 2) && (offset <= MAX_OFFSET_SHORT))) {
+	      matches[len].length = len;
+	      matches[len].offset = get - scn;
+	    }
+
+	    len--;
+	  };
+	}
+
+	scn = link[scn]; // Table65535 lookup
+      };
+
+      first[cur] = link[first[cur]]; // Waste first entry
+
+    } else { // if RLE-match..
+
+      uint rleLen = rleInfo[get].length;
+      byte rleValAfter = rleInfo[get].valueAfter;
+
+
+      // First match with self-RLE, which is always
+      // one byte shorter than the RLE itself.
+      uint len = rleLen - 1;
+      if (len > 1) {
+	if (len > 255) len = 255;
+	longestMatch = len;
+
+	// Store the match
+	while(len >= 2) {
+	  matches[len].length = len;
+	  matches[len].offset = 1;
+
+	  len--;
+	};
+      }
+
+
+      // Search for more RLE-matches..
+      // Scan until start of file, or max offset
+      while (((get - scn) <= MAX_OFFSET) &&
+	     (scn > 0) &&
+	     (longestMatch < 255)) {
+
+	// Check for longer matches with same value and after..
+	// FIXME, that is not what it does, is it?!
+	if ((rleInfo[scn].length > longestMatch) &&
+	    (rleLen > longestMatch)) {
+
+	  uint offset = get - scn;
+	  len = rleInfo[scn].length;
+
+	  if (len > rleLen)
+	    len = rleLen;
+
+	  if ((len > 2) ||
+	      ((len == 2) && (offset <= MAX_OFFSET_SHORT))) {
+	    matches[len].length = len;
+	    matches[len].offset = offset;
+
+	    longestMatch = len;
+	  }
+	}
+
+
+	// Check for matches beyond the RLE..
+	if ((rleInfo[scn].length >= rleLen) &&
+	    (rleInfo[scn].valueAfter == rleValAfter)) {
+	 
+	  // Here is a match that goes beyond the RLE..
+	  // Find out correct offset to use valueAfter..
+	  // Then search further to see if more bytes equal.
+
+	  len = rleLen;
+	  uint offset = get - scn + (rleInfo[scn].length - rleLen);
+
+	  if (offset <= MAX_OFFSET) {
+	    while ((len < 255) &&
+		   (get >= (offset + len)) &&
+		   (ibuf[get - (offset + len)] == ibuf[get - len])) {
+	      ++len;
+	    }
+	    if (len > longestMatch){
+	      longestMatch = len;
+
+	      // Store the match only if first (= best) of this length
+	      while(len >= 2 && matches[len].length == 0) {
+
+		// If len == 2, check against short offset!!
+		if ((len > 2) ||
+		    ((len == 2) && (offset <= MAX_OFFSET_SHORT))) {
+		  matches[len].length = len;
+		  matches[len].offset = offset;
+		}
+
+		len--;
+	      }; //while
+	    }
+	  }
+	}
+
+	scn = link[scn]; // Table65535 lookup
+      }
+
+      
+      if (rleInfo[get].length > 2) {
+	// Expand RLE to next position
+	rleInfo[get-1].length = rleInfo[get].length - 1;
+	rleInfo[get-1].value = rleInfo[get].value;
+	rleInfo[get-1].valueAfter = rleInfo[get].valueAfter;
+      } else {
+	// End of RLE, advance link.
+	first[cur] = link[first[cur]]; // Waste first entry
+      }
+    }
+
+
+    // Now we have all matches from this position..
+    // ..visit all nodes reached by the matches.
+
+    for (i = 255; i > 0; i--) {
+
+      // Find all matches we stored
+      uint len = matches[i].length;
+      uint offset = matches[i].offset;
+
+      if (len != 0) {
+
+	uint targetI = get - len + 1;
+    	node* target = &context[targetI];
+
+    	// Calculate cost for this jump
+    	uint currentCost = lastNode.cost;
+	currentCost += calculateCostOfMatch(len, offset);
+
+    	// If this match is first or cheapest way to get here
+    	// then update node
+    	if (target->cost == 0 ||
+    	    target->cost > currentCost) {
+
+	  target->cost = currentCost;
+    	  target->next = get + 1;
+    	  target->litLen = 0;
+	  target->offset = offset;
+	}
+      }
+    }
+
+
+    // Calc the cost for this node if using one more literal
+    uint litLen = lastNode.litLen + 1;
+    uint litCost = calculateCostOfLiteral(lastNode.cost, litLen);
+
+    // If literal run is first or cheapest way to get here
+    // then update node
+    node* this = &context[get];
+    if (this->cost == 0 ||
+    	this->cost >= litCost) {
+      this->cost = litCost;
+      this->next = get + 1;
+      this->litLen = litLen;
+    }
+
+    lastNode.cost = this->cost;
+    lastNode.next = this->next;
+    lastNode.litLen = this->litLen;
+
+    // Loop to the next position
+    get--;
+  };
+
+}
+
+
+// Returns margin
+int writeOutput() {
+  uint i;
+
+  put = 0;
+
+  curByte = 0;
+  curCnt = 8;
+  curIndex = put;
+  put++;
+
+  int maxDiff = 0;
+
+  bool needCopyBit = true;
+
+  for (i = 0; i < ibufSize;) {
+
+    uint link = context[i].next;
+    uint cost = context[i].cost;
+    uint litLen = context[i].litLen;
+    uint offset = context[i].offset;
+
+    if (litLen == 0) {
+      // Put Match
+      uint len = link - i;
+
+      log("$%04x: Mat(%i, %i)\n", i, len, offset);
+  
+      if(needCopyBit) {
+	wBit(1);
+      }
+      wLength(len - 1);
+      wOffset(offset - 1, len - 1);
+
+      i = link;
+
+      needCopyBit = true;
+    } else {
+      // Put LiteralRun
+      needCopyBit = false;
+
+      while(litLen > 0) {
+	uint len = litLen < 255 ? litLen : 255;
+
+	log("$%04x: Lit(%i)\n", i, len);
+
+	wBit(0);
+	wLength(len);
+	wBytes(i, len);
+
+	if (litLen == 255) {
+	  needCopyBit = true;
+	}
+
+	litLen -= len;
+	i += len;
+      };
+    }
+
+    if ((int)(i - put) > maxDiff) {
+      maxDiff = i - put;
+    }
+
+  }
+
+  wBit(1);
+  wLength(0xff);
+  wFlush();
+
+  int margin = (maxDiff - (i - put));
+
+  return margin;
+}
+
+
+bool crunch(File *aSource,
+	    File *aTarget,
+	    uint address,
+	    bool isExecutable,
+	    bool isRelocated)
+{
+  uint i;
+  byte *target;
+
+  ibufSize = aSource->size - 2;
+  ibuf = (byte*)malloc(ibufSize);
+  context = (node*)malloc(sizeof(node) * ibufSize);
+  link = (uint*)malloc(sizeof(uint) * ibufSize);
+  rleInfo = (RLEInfo*)malloc(sizeof(RLEInfo) * ibufSize);
+
+  // Load ibuf and clear context
+  for(i = 0; i < ibufSize; ++i) {
+    ibuf[i] = aSource->data[i + 2];
+    context[i].cost = 0;
+    link[i] = 0;
+    rleInfo[i].length = 0;
+  }
+
+  setupHelpStructures();
+  findMatches();
+  obuf = (byte*)malloc(memSize);
+  int margin = writeOutput();
+
+  uint packLen = put;
+  uint fileLen = put;
+  uint decrLen = 0;
+  if(isExecutable) {
+    decrLen = DECRUNCHER_LENGTH;
+    fileLen += decrLen + 2;
+  } else {
+    fileLen += 4;
+  }
+
+  aTarget->size = fileLen;
+  aTarget->data = (byte*)malloc(aTarget->size);
+  target = aTarget->data;
+
+  if(isExecutable) {
+    uint startAddress = 0x10000 - packLen;
+    uint transfAddress = fileLen + 0x6ff;
+
+    decrCode[0x1f] = transfAddress & 0xff; // Transfer from..
+    decrCode[0x20] = transfAddress >> 8;   //
+    decrCode[0xbc] = startAddress & 0xff; // Depack from..
+    decrCode[0xbd] = startAddress >> 8;   //
+    decrCode[0x85] = aSource->data[0]; // Depack to..
+    decrCode[0x86] = aSource->data[1]; //
+    decrCode[0xca] = address & 0xff; // Jump to..
+    decrCode[0xcb] = address >> 8;   //
+
+    target[0] = 0x01;
+    target[1] = 0x08;
+
+    for(i = 0; i < decrLen; ++i) {
+      target[i + 2] = decrCode[i];
+    }
+
+    for(i = 0; i < put; ++i) {
+      target[i + 2 + decrLen] = obuf[i];
+    }
+
+  } else { // Not executable..
+
+    // Experimantal decision of start address
+//    uint startAddress = 0xfffa - packLen - 2;
+    uint startAddress = (aSource->data[1] << 8) | aSource->data[0];
+    startAddress += (ibufSize - packLen - 2 + margin);
+
+    if (isRelocated) {
+      startAddress = address - packLen - 2;
+    }
+
+    target[0] = startAddress & 0xff; // Load address
+    target[1] = startAddress >> 8;
+    target[2] = aSource->data[0]; // Depack to address
+    target[3] = aSource->data[1];
+
+    for(i = 0; i < put; ++i) {
+      target[i + 4] = obuf[i];
+    }
+  }
+
+  free(ibuf);
+  free(context);
+  free(link);
+  free(rleInfo);
+
+  return true;
+}
--- a/loader/tools/b2/cruncher.h
+++ b/loader/tools/b2/cruncher.h
@ -0,0 +1,9 @@
+#ifndef _cruncher_h_
+#define _cruncher_h_
+
+#include "bb.h"
+#include "file.h"
+
+bool crunch(File *aSource, File *aTarget, uint startAdress, uint decrFlag, bool isRelocated);
+
+#endif // _cruncher_h_
--- a/loader/tools/b2/file.c
+++ b/loader/tools/b2/file.c
@ -0,0 +1,72 @@
+#include "file.h"
+#include <stdlib.h>
+#include <string.h>
+
+void freeFile(File *aFile)
+{
+  free(aFile->name);
+  free(aFile->data);
+}
+
+bool readFile(File *aFile, const char *fileName)
+{
+  FILE *fp = NULL;
+  struct stat fileStatus;
+
+  aFile->name = (char *)strdup(fileName);
+
+  if(stat(aFile->name, &fileStatus) == -1) {
+    return false;
+  }
+  aFile->size = fileStatus.st_size;
+
+  fp = fopen(aFile->name, "rb");
+  if(fp == NULL) {
+    return false;
+  }
+
+  aFile->data = (byte *)malloc(aFile->size);
+  if(aFile->data == NULL) {
+    fclose(fp);
+    return false;
+  }
+
+  if(fread(aFile->data, 1, aFile->size, fp) != aFile->size) {
+    fclose(fp);
+    free(aFile->data);
+    return false;
+  }
+
+  fclose(fp);
+  return true;
+}
+
+bool writeFile(File *aFile, const char *fileName)
+{
+  FILE *fp = NULL;
+  size_t length;
+  char *ext;
+
+  length = strlen(fileName);
+  aFile->name = (char *)malloc(length + 4);
+
+  if(aFile->name == NULL){
+    return false;
+  }
+
+  strncpy(aFile->name, fileName, length);
+  strncpy(aFile->name + length, ".b2\0", 4);
+
+  fp = fopen(aFile->name, "wb");
+  if(fp == NULL) {
+    return false;
+  }
+
+  if(fwrite(aFile->data, 1, aFile->size, fp) != aFile->size) {
+    fclose(fp);
+    return false;
+  }
+
+  fclose(fp);
+  return true;
+}
--- a/loader/tools/b2/file.h
+++ b/loader/tools/b2/file.h
@ -0,0 +1,19 @@
+#ifndef _file_h_
+#define _file_h_
+
+#include "bb.h"
+
+#include <stdio.h>
+#include <sys/stat.h>
+
+typedef struct {
+  char *name;
+  size_t size;
+  byte *data;
+} File;
+
+void freeFile(File *aFile);
+bool readFile(File *aFile, const char *fileName);
+bool writeFile(File *aFile, const char *fileName);
+
+#endif // _file_h_
--- a/loader/tools/bitnax-07a8c67/lz.c
+++ b/loader/tools/bitnax-07a8c67/lz.c
--- a/loader/tools/cc1541/LICENSE.txt
+++ b/loader/tools/cc1541/LICENSE.txt
@ -0,0 +1,21 @@
+Copyright (c) 2008-2022 JackAsser, Krill, Claus, Björn Esser
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/loader/tools/cc1541/Makefile
+++ b/loader/tools/cc1541/Makefile
@ -0,0 +1,102 @@
+CC1541_CFLAGS = -std=c99 -pipe -O2 -Wall -Wextra -pedantic
+
+ALL_TARGETS = cc1541
+
+ifneq ($(ENABLE_MAN),)
+ALL_TARGETS += man
+endif
+
+ifneq ($(ENABLE_WERROR),)
+CC1541_CFLAGS += -Werror
+endif
+
+override CFLAGS := $(CC1541_CFLAGS) $(CFLAGS)
+
+prefix ?= /usr/local
+bindir ?= $(prefix)/bin
+mandir ?= $(prefix)/share/man
+
+INSTALL ?= install
+
+VERSION := $(shell grep 'define VERSION' cc1541.c | cut -d\" -f2)
+
+CC1541_SRC := Makefile transwarp\ v0.84.prg transwarp\ v0.86.prg $(wildcard *.c *.h *.in *.sln *.vcxproj* LICENSE* README*)
+
+all: $(ALL_TARGETS)
+
+cc1541: cc1541.c
+
+cc1541.1.txt: cc1541.1.txt.in
+	sed -e "s/@@VERSION@@/$(VERSION)/g" < cc1541.1.txt.in > cc1541.1.txt
+
+cc1541.1: cc1541.1.txt
+	a2x -d manpage -f manpage cc1541.1.txt
+
+man: cc1541.1
+
+test_cc1541: test_cc1541.c
+
+check: cc1541 test_cc1541
+	./test_cc1541 ./cc1541
+
+test: check
+
+install: all
+	$(INSTALL) -Dpm 0755 ./cc1541 $(DESTDIR)$(bindir)/cc1541
+ifneq ($(ENABLE_MAN),)
+	$(INSTALL) -Dpm 0644 ./cc1541.1 $(DESTDIR)$(mandir)/man1/cc1541.1
+endif
+
+cc1541-$(VERSION).tar: $(CC1541_SRC)
+	rm -rf cc1541-$(VERSION)/ *~ README.md.T
+	mkdir -p cc1541-$(VERSION)
+	cp -a $(CC1541_SRC) cc1541-$(VERSION)/
+	chmod 0644 cc1541-$(VERSION)/*
+	tar cf cc1541-$(VERSION).tar cc1541-$(VERSION)/
+	rm -rf cc1541-$(VERSION)/
+
+cc1541-$(VERSION).tar.bz2: cc1541-$(VERSION).tar
+	bzip2 -9cz < cc1541-$(VERSION).tar > cc1541-$(VERSION).tar.bz2
+
+cc1541-$(VERSION).tar.gz: cc1541-$(VERSION).tar
+	gzip -9c < cc1541-$(VERSION).tar > cc1541-$(VERSION).tar.gz
+
+cc1541-$(VERSION).tar.xz: cc1541-$(VERSION).tar
+	xz -ce < cc1541-$(VERSION).tar > cc1541-$(VERSION).tar.xz
+
+cc1541-$(VERSION).zip: $(CC1541_SRC)
+	rm -rf cc1541-$(VERSION)/ *~ README.md.T
+	mkdir -p cc1541-$(VERSION)
+	cp -a $(CC1541_SRC) cc1541-$(VERSION)/
+	chmod 0644 cc1541-$(VERSION)/*
+	zip -9r cc1541-$(VERSION).zip cc1541-$(VERSION)/
+	rm -rf cc1541-$(VERSION)/
+
+dist-bz2: cc1541-$(VERSION).tar.bz2
+dist-gz:  cc1541-$(VERSION).tar.gz
+dist-xz:  cc1541-$(VERSION).tar.xz
+dist-zip: cc1541-$(VERSION).zip
+dist-all: dist-bz2 dist-gz dist-xz dist-zip
+
+dist: dist-gz dist-zip
+
+dist-check: dist
+	tar xf cc1541-$(VERSION).tar.gz
+	$(MAKE) -C cc1541-$(VERSION)/ all check
+	rm -rf cc1541-$(VERSION)/
+
+codestyle: cc1541.c test_cc1541.c
+	astyle --style=kr -n -s -z2 cc1541.c test_cc1541.c
+
+wrap: cc1541.1.txt.in LICENSE.txt README.md
+	for f in $^; do \
+	  fold -s -w 70 < $$f | perl -pe 's/[\t\040]+$$//' > $$f.T; \
+	  mv -f $$f.T $$f; \
+	done
+
+clean:
+	rm -rf cc1541-$(VERSION)/ *~ README.md.T *.o *.orig cc1541 test_cc1541 cc1541-$(VERSION).* cc1541.1 cc1541.1.txt
+
+.PHONY: all check clean codestyle dist dist-all dist-bz2 dist-check dist-gz dist-xz dist-zip install man test wrap
+
+.NOTPARALLEL: cc1541-$(VERSION).tar cc1541-$(VERSION).zip
--- a/loader/tools/cc1541/README.md
+++ b/loader/tools/cc1541/README.md
@ -0,0 +1,147 @@
+# README #
+
+This is cc1541, a tool for creating Commodore 1541 Floppy disk
+images in D64, D71 or D81 format with custom sector interleaving
+etc.  Also supports extended tracks 35-40 using either SPEED DOS or
+DOLPHIN DOS BAM-formatting. Furthermore supports writing Transwarp
+disk images for the fantastically fast loader by Krill.
+
+Originally written by JackAsser, with improvements and fixes by
+Krill, some more improvements by Claus and further improvements by
+Björn Esser.
+
+The program is provided under the terms of the MIT license, please
+refer to the included LICENSE.txt file for its terms and conditions.
+
+The public source code repository can be found here:
+https://bitbucket.org/PTV_Claus/cc1541/src/master/
+
+## Usage examples ##
+
+* "cc1541 -h" shows the command line help
+* "cc1541 image.d64" lists the content of image.d64
+* "cc1541 -f program -w program.prg image.d64" adds the file
+  program.prg to image.d64 (or creates the image if it does not
+  exist yet) using the name "program"
+* "cc1541 -f program1 -w program1.prg -f program2 -w program2.prg
+  image.d64" adds two files under the names program1 and program2
+* "cc1541 -s 4 -f program -w program.prg image.d64" writes a file
+  with a dedicated sector interleave for a fastloader (the best
+  value depends on the used fastloader and its configuration)
+* "cc1541 -f game -W game.prg -f 'transwarp v0.86' -w transwarp.prg
+  Transwarp-Game.d64" creates a Transwarp disk image with the
+  required boot file.
+* "cc1541 -T DEL -f ---------------- -L image.d64" creates a DEL 
+  entry as separator.
+
+## Version history ##
+
+v4.0
+
+* The default handling for large tail gaps has been removed, as it
+  does not provide any advantage and was only there to be as close
+  as possible to CBM DOS behaviour. In fact, it actually slows down 
+  fast loaders. This warrants a major version number increase.
+* -H switch added to set BAM messages at BAM offset $ab
+* -F with negative values now specifies a track skew
+* -B cannot be used with transwarp files anymore, as the loader
+  relies on correct block sizes in the directory
+* Bugfix: the BAM allocation for SPEED DOS and DOLPHIN DOS was 
+  wrong (mixed up between the two and also shifted by 4 bytes)
+* Bugfix: Data from unused blocks could leak into the last block 
+  of a new file behind the actual file data
+
+v3.4
+
+* Support for Transwarp 0.86 and later
+* Commandline option -R added for restoring deleted or formatted
+  files
+* -L switch added to create directory entries without writing a 
+  file
+* -a switch added to print a cc1541 command line that re-creates
+  the directory art of the given disk image
+* -T now also allows a number as parameter
+* Files with file type DEL will now actually be written, use
+  -L if you do not want that
+* Improved verbose allocation printout
+* More robustness against invalid t/s links
+* Added support for underscore in PETSCII filenames
+* Bugfix: no endless loops for cyclic directory chains anymore
+
+v3.3
+
+* Transwarp encoding added
+* Verbose mode displays file allocation on unmodified images
+* Optimised non-standard interleave scheme
+* Added unicode printout option for PETSCII, special thanks to
+  Jonas Minnberg for helping with Linux support and inverse
+  characters!
+* Output file is not touched when no change is requested
+* Bugfix: Fixed crash on write protected output file
+* Bugfix: Spacing in directory printout was not correct when
+  file size is more than 999 blocks
+
+v3.2
+
+* -N switch added to allow duplicate file names
+* Trying to create loop file with the same name as the referenced
+  file now throws an error, if not -N is also specified
+* Changed default next file first sector on new track to 0 to align
+  with Commodore DOS
+* Interleave violations are now shown in block allocation printout
+* Block allocation for loop files is not displayed anymore when
+  referenced file was also written
+* Bugfix: Fixed file index printout for block allocation
+* Bugfix: Correct interleave is now displayed for loop files when
+  referenced file was also written
+
+v3.1
+
+* Bugfix: overwriting files did not work correctly when there was a
+  free slot in the directory before the file to overwrite
+* Bugfix: Filetype, open and protected flags were not reset after
+  writing a file
+* Bugfix: default disk name and id were wrong in command line help
+
+v3.0
+
+* ASCII to PETSCII conversion added, this breaks backward
+  compatibility (and therefore warrants a major version increase)!
+* Support for D81 images
+* Default printout is now a full directory similar to how it would be
+  displayed on a Commodore machine
+* -v switch added for verbose output of file and block allocation
+* -M switch added to specify number of characters for filename hash
+  calculation for latest Krill loader
+* -m switch added to skip filename collision check
+* -B switch added to allow setting the displayed file size
+* -o switch added to prevent overwriting of existing files on an image
+* -V switch added to validate images before editing them
+* -T switch added to allow setting the file type
+* -O switch added to allow setting the open flag
+* -P switch added to allow setting the protected flag
+* Hex escapes are now also allowed for disk name and ID
+* When no disk file name is provided, only the base name of the input
+  file is used as disk file name instead of the full path
+* Bugfix: fixed memory access issue for filenames with 16 letters
+* Bugfix: fixed endless loop when reaching track 53 on a D71
+* Bugfix: fixed that (shadow) directory track would be used for data
+  when the shadow directory is a neighbour of the actual directory
+* Bugfix: G64 output is now an optional additional output using -g,
+  avoiding the utterly broken reading of G64 files
+* Bugfix: loop files have actual file size per default instead of 0
+* Bugfix: printouts to stderr and stdout are more consistent now
+
+v2.0
+
+* The first version with a release number
+* All existing modifications consolidated (hopefully)
+* G64 output dependent on output file name instead of a source code
+  define
+* Converted to ANSI C99
+* MSVC build files added
+* getopt removed
+* Simple test suite added
+* Bugfix: hex escape was not considered for file overwrite detection
+* Bugfix: first sector per track was ignored for track 1
+* Bugfix: default sector interleave was ignored for first file
--- a/loader/tools/cc1541/cc1541
+++ b/loader/tools/cc1541/cc1541
--- a/loader/tools/cc1541/cc1541.1.txt.in
+++ b/loader/tools/cc1541/cc1541.1.txt.in
@ -0,0 +1,209 @@
+= cc1541(1)
+Björn Esser
+:doctype:       manpage
+:Author:        JackAsser, Krill, Claus, Björn Esser
+:Email:         besser82@fedoraproject.org
+:manvolnum:     1
+:manversion:    v@@VERSION@@
+:manmanual:     cc1541 manual
+:mansource:     cc1541
+:man-linkstyle: pass:[blue R < >]
+
+== Name
+
+cc1541 - A tool for creating Commodore 1541 Floppy disk images
+in D64, D71 or D81 format with custom sector interleaving etc.
+Also supports extended tracks 35-40 using either SPEED DOS or
+DOLPHIN DOS BAM-formatting.
+
+== Synopsis
+
+*cc1541* [_options_] image.[_d64|d71|d81_]
+
+== Options
+
+*-n diskname*::
+  Disk name, default='cc1541'.
+
+*-i id*::
+  Disk ID, default='00 2a'.
+
+*-H message*::
+  Hidden BAM message. Only for D64 (up to 85 chars) or SPEED DOS
+(up to 20 chars).
+
+*-w localname*::
+  Write local file to disk, if filename is not set then the local
+name is used.  After file written, the filename is unset.
+
+*-W localname*::
+  Like -w, but encode file in Transwarp format.
+
+*-K key*::
+  Set an encryption key for Transwarp files, a string of up to 29
+  characters.
+
+*-f filename*::
+  Use filename as name when writing next file, use prefix _#_ to
+include arbitrary PETSCII characters (e.g. -f "START#a0,8,1").
+
+*-o*::
+  Do not overwrite if file with same name exists already.
+
+*-V*::
+  Do not modify image unless it is in valid CBM DOS format.
+
+*-T filetype*::
+  Filetype for next file, allowed parameters are PRG, SEQ, USR, REL
+and DEL, or a decimal number between 0 and 255. Default is PRG.
+
+*-P*::
+  Set write protect flag for next file.
+
+*-O*::
+  Set open flag for next file.
+
+*-N*::
+  Force creation of a new directory entry, even if a file with the
+  same name exists already.
+
+*-l filename*::
+  Write loop file (an additional dir entry) to existing file to disk,
+set filename with -f.
+
+*-L*::
+  Add dir entry without writing file (track and sector will be 0),
+requires a filename given with -f.
+
+*-B numblocks*::
+  Write the given value as file size in blocks to the directory for
+the next file.
+
+*-M numchars*::
+  Hash computation maximum filename length, this must match loader
+option FILENAME_MAXLENGTH in Krill's loader.  Default is 16.
+
+*-m*::
+  Ignore filename hash collisions, without this switch a collision
+results in an error.
+
+*-d track*::
+  Maintain a shadow directory (copy of the actual directory without a
+valid BAM).
+
+*-t*::
+  Use directory track to also store files (makes -x useless) (default no).
+
+*-u numblocks*::
+  When using -t, amount of dir blocks to leave free (default=2).
+
+*-x*::
+  Don't split files over directory track hole (default split files).
+
+*-F*::
+  Next file first sector on a new track (default=0).  Any negative
+value assumes aligned tracks and uses current sector + interleave - value.
+After each file, the value falls back to the default.  Not applicable
+for D81.
+
+*-S value*::
+  Default sector interleave, default=10. Not applicable for D81.
+
+*-s value*::
+  Next file sector interleave, valid after each file. The interleave value
+falls back to the default value set by -S after the first sector of the
+next file. Not applicable for D81.
+
+*-e*::
+  Start next file on an empty track (default start sector is current
+sector plus interleave).
+
+*-E*::
+  Try to fit file on a single track.
+
+*-r track*::
+  Restrict next file blocks to the specified track or higher.
+
+*-b sector*::
+  Set next file beginning sector to the specified value.  Not
+applicable for D81.
+
+*-c*::
+  Save next file cluster-optimized (d71 only).
+
+*-4*::
+  Use tracks 35-40 with SPEED DOS BAM formatting.
+
+*-5*::
+  Use tracks 35-40 with DOLPHIN DOS BAM formatting.
+
+*-R level*::
+  Try to restore deleted and formatted files.
+level 0: Only restore all dir entries without touching any t/s links.
+level 1: Fix dir entries for files with valid t/s chains.
+level 2: Also add wild sector chains with valid t/s chains.
+level 3: Also fix dir entries with invalid t/s chains.
+level 4: Also add and fix wild invalid t/s chains.
+level 5: Also add reasonable wild single blocks.
+
+*-g filename*::
+  Write additional g64 output file with given name.
+
+*-a*::
+Print command line options that would create the same directory as the
+one in the given image (for directory art import).
+
+*-U mapping*::
+  Print PETSCII as Unicode (requires Unicode 13.0 font, e.g. UNSCII).
+Use mapping 0 for ASCII output, 1 for upper case, 2 for lower case,
+default is 0.
+
+*-q*::
+  Be quiet.
+
+*-v*::
+  Be verbose.
+
+*-h*::
+  Print command line help.
+
+== Exit status
+
+*0*::
+  Success.
+
+*-1*::
+  Failure.
+
+== Resources
+
+Project web site: https://bitbucket.org/PTV_Claus/cc1541/src
+
+== Bug Reports
+
+Please report bugs to:
+https://bitbucket.org/PTV_Claus/cc1541/issues/new
+
+== License
+
+Copyright (C) 2008-2021 {author}. +
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/loader/tools/cc1541/cc1541.c
+++ b/loader/tools/cc1541/cc1541.c
--- a/loader/tools/cc1541/cc1541.sln
+++ b/loader/tools/cc1541/cc1541.sln
@ -0,0 +1,38 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 14
+VisualStudioVersion = 14.0.25420.1
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cc1541", "cc1541.vcxproj", "{230D43EC-B2E8-447B-8605-D166B69FD94B}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test_cc1541", "test_cc1541.vcxproj", "{4206D44B-8628-4E98-9F6F-775C8AC5E1B9}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|x64 = Debug|x64
+		Debug|x86 = Debug|x86
+		Release|x64 = Release|x64
+		Release|x86 = Release|x86
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{230D43EC-B2E8-447B-8605-D166B69FD94B}.Debug|x64.ActiveCfg = Debug|x64
+		{230D43EC-B2E8-447B-8605-D166B69FD94B}.Debug|x64.Build.0 = Debug|x64
+		{230D43EC-B2E8-447B-8605-D166B69FD94B}.Debug|x86.ActiveCfg = Debug|Win32
+		{230D43EC-B2E8-447B-8605-D166B69FD94B}.Debug|x86.Build.0 = Debug|Win32
+		{230D43EC-B2E8-447B-8605-D166B69FD94B}.Release|x64.ActiveCfg = Release|x64
+		{230D43EC-B2E8-447B-8605-D166B69FD94B}.Release|x64.Build.0 = Release|x64
+		{230D43EC-B2E8-447B-8605-D166B69FD94B}.Release|x86.ActiveCfg = Release|Win32
+		{230D43EC-B2E8-447B-8605-D166B69FD94B}.Release|x86.Build.0 = Release|Win32
+		{4206D44B-8628-4E98-9F6F-775C8AC5E1B9}.Debug|x64.ActiveCfg = Debug|x64
+		{4206D44B-8628-4E98-9F6F-775C8AC5E1B9}.Debug|x64.Build.0 = Debug|x64
+		{4206D44B-8628-4E98-9F6F-775C8AC5E1B9}.Debug|x86.ActiveCfg = Debug|Win32
+		{4206D44B-8628-4E98-9F6F-775C8AC5E1B9}.Debug|x86.Build.0 = Debug|Win32
+		{4206D44B-8628-4E98-9F6F-775C8AC5E1B9}.Release|x64.ActiveCfg = Release|x64
+		{4206D44B-8628-4E98-9F6F-775C8AC5E1B9}.Release|x64.Build.0 = Release|x64
+		{4206D44B-8628-4E98-9F6F-775C8AC5E1B9}.Release|x86.ActiveCfg = Release|Win32
+		{4206D44B-8628-4E98-9F6F-775C8AC5E1B9}.Release|x86.Build.0 = Release|Win32
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
--- a/loader/tools/cc1541/cc1541.vcxproj
+++ b/loader/tools/cc1541/cc1541.vcxproj
@ -0,0 +1,152 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{230D43EC-B2E8-447B-8605-D166B69FD94B}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>cc1541</RootNamespace>
+    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <IntDir>$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <IntDir>$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="cc1541.c" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
--- a/loader/tools/cc1541/cc1541.vcxproj.filters
+++ b/loader/tools/cc1541/cc1541.vcxproj.filters
@ -0,0 +1,22 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="cc1541.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
--- a/loader/tools/cc1541/test_cc1541.c
+++ b/loader/tools/cc1541/test_cc1541.c
--- a/loader/tools/cc1541/test_cc1541.vcxproj
+++ b/loader/tools/cc1541/test_cc1541.vcxproj
@ -0,0 +1,153 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{4206D44B-8628-4E98-9F6F-775C8AC5E1B9}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>test_cc1541</RootNamespace>
+    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <IntDir>$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="test_cc1541.c" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
--- a/loader/tools/cc1541/test_cc1541.vcxproj.filters
+++ b/loader/tools/cc1541/test_cc1541.vcxproj.filters
@ -0,0 +1,22 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="test_cc1541.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
--- a/loader/tools/cc1541/transwarp
+++ b/loader/tools/cc1541/transwarp
--- a/loader/tools/cc1541/transwarp
+++ b/loader/tools/cc1541/transwarp
--- a/loader/tools/compressedfileconverter.pl
+++ b/loader/tools/compressedfileconverter.pl
@ -0,0 +1,85 @@
+#!/usr/bin/env perl
+
+=head1 NAME
+
+compressedfileconverter.pl
+
+=head1 DESCRIPTION
+
+This script converts files compressed by some crunchers
+so they can be loaded and decompressed in-place, on the fly.
+
+=head1 SYNOPSIS
+
+  compressedfileconverter.pl [dnx|lc] uncompressed_infile compressed_infile outfile
+
+=cut
+
+use strict;
+use warnings;
+
+
+my %margins = (
+    ''    => 0,
+    'dnx' => 4,
+    'lc'  => 3
+);
+
+my $type = '';
+if (scalar @ARGV == 4) {
+    $type = shift @ARGV;
+}
+
+if (!($type ~~ %margins) or (scalar @ARGV != 3)) {
+   die "Usage: $0 [dnx|lc] uncompressed_infile compressed_infile outfile\n";
+}
+
+my $uncompressed_infile = shift @ARGV;
+my $compressed_infile   = shift @ARGV;
+my $converted_outfile   = shift @ARGV;
+
+
+open UNCOMPRESSED, $uncompressed_infile
+    or die "\nCan't open uncompressed file $uncompressed_infile for reading: $!\n";
+binmode UNCOMPRESSED;
+
+my $uncompressed_address;
+read(UNCOMPRESSED, $uncompressed_address, 2);
+my $uncompressed_data;
+my $uncompressed_size = read(UNCOMPRESSED, $uncompressed_data, 65536);
+$uncompressed_address = unpack("S", $uncompressed_address);
+close UNCOMPRESSED;
+
+
+open COMPRESSED, $compressed_infile
+    or die "\nCan't open compressed file $compressed_infile for reading: $!\n";
+binmode COMPRESSED;
+
+my $compressed_data;
+if (!($type eq 'lc')) {
+    my $compressed_address;
+    read(COMPRESSED, $compressed_address, 2);
+}
+my $compressed_size = read(COMPRESSED, $compressed_data, 65536);
+close COMPRESSED;
+
+open CONVERTED, ">$converted_outfile"
+    or die "\nCan't open converted file $converted_outfile for writing: $!\n";
+binmode CONVERTED;
+
+my $offset = 0;
+my $safety_margin = $margins{$type};
+$compressed_size += $offset;
+my $converted_address = $uncompressed_address + $uncompressed_size + $safety_margin - $compressed_size;
+
+
+$converted_address = pack("S", $converted_address);
+print CONVERTED $converted_address;
+if ($offset) {
+    $uncompressed_address = pack("S<", $uncompressed_address);
+    print CONVERTED $uncompressed_address;
+}
+print CONVERTED $compressed_data;
+close CONVERTED;
+
+exit 0;
--- a/loader/tools/dali/Makefile
+++ b/loader/tools/dali/Makefile
@ -0,0 +1,55 @@
+ACME ?= acme
+ACME_OPTS ?= -f cbm
+DALI_CFLAGS ?= $(CFLAGS) -Os -Wall
+CC ?= gcc
+SALVADOR_PATH = salvador
+SALVADOR_CFLAGS = -O3 -g -fomit-frame-pointer -Isrc/libdivsufsort/include -Isrc -fPIC
+SALVADOR_CC = gcc
+SALVADOR_OBJS = $(SALVADOR_PATH)/obj/src/*.o $(SALVADOR_PATH)/obj/src/libdivsufsort/lib/*.o
+
+V ?= 0
+ifeq ($(V),1)
+VR:=
+else
+VR:=@
+endif
+
+all: dali
+
+dali: dali.c sfx_small.h sfx_fast.h salvador.a
+	@echo "Building dali..."
+	$(VR)$(CC) $(DALI_CFLAGS) -o $@ $^
+	$(VR)strip $@
+
+salvador.a:
+	@echo "Creating salvador library..."
+	$(VR)make CFLAGS='$(SALVADOR_CFLAGS)' CC='$(SALVADOR_CC)' -C $(SALVADOR_PATH)
+	$(VR)ar rcs $@ $(SALVADOR_OBJS)
+	$(VR)objcopy --redefine-sym main=salvador_main $@
+
+depack.prg: depack.asm dzx0.asm ../../benchmark/files/a.lz
+	$(ACME) $(ACME_OPTS) -o $@ $<
+
+sfx_small.h: sfx.asm
+	@echo "Compiling sfx code for dali..."
+	$(VR)$(ACME) $(ACME_OPTS) -l $(basename $@).lst -o $(basename $@) $<
+	$(VR)grep 'DALI' $(basename $@).lst | sed -e 's/[[:space:]]*;[[:space:]]*.*//g' -e 's/[[:space:]]*//g' -e 's/\=\$$/ 0x/g' -e 's/^/#define /' > $@
+	$(VR)echo 'static const char decruncher_small[] = {' >> $@
+	@#create a hexdump, add a marker (+) where lines are truncated (each 50 chars = 8 bytes per line), substitute marker (+) with newline (use tr here, as bsd-sed fails on \n), add identation to each line
+	$(VR)hexdump -ve '1/1 "0x%.2x,"' $(basename $@) | sed -e 's/,$$/+/g' -e 's/.\{50\}/&+/g' | tr -s '+' '\n' | sed 's/^/&    /g' >> $@
+	$(VR)echo '};' >> $@
+	$(VR)rm $(basename $@).lst $(basename $@)
+
+sfx_fast.h: sfx.asm
+	@echo "Compiling sfx code for dali..."
+	$(VR)$(ACME) $(ACME_OPTS) -DSFX_FAST=1 -l $(basename $@).lst -o $(basename $@) $<
+	$(VR)grep 'DALI' $(basename $@).lst | sed -e 's/[[:space:]]*;[[:space:]]*.*//g' -e 's/[[:space:]]*//g' -e 's/\=\$$/ 0x/g' -e 's/^/#define /' > $@
+	$(VR)echo 'static const char decruncher[] = {' >> $@
+	@#create a hexdump, add a marker (+) where lines are truncated (each 50 chars = 8 bytes per line), substitute marker (+) with newline (use tr here, as bsd-sed fails on \n), add identation to each line
+	$(VR)hexdump -ve '1/1 "0x%.2x,"' $(basename $@) | sed -e 's/,$$/+/g' -e 's/.\{50\}/&+/g' | tr -s '+' '\n' | sed 's/^/&    /g' >> $@
+	$(VR)echo '};' >> $@
+	$(VR)rm $(basename $@).lst $(basename $@)
+
+clean:
+	$(VR)-rm dali sfx_small.h sfx_fast.h depack.prg salvador.a
+	$(VR)make -C salvador clean
--- a/loader/tools/dali/dali.c
+++ b/loader/tools/dali/dali.c
@ -0,0 +1,731 @@
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "sfx_small.h"
+#include "sfx_fast.h"
+
+#define BUFFER_SIZE 65536  /* must be > MAX_OFFSET */
+#define INITIAL_OFFSET 1
+
+#define FALSE 0
+#define TRUE 1
+#define DALI_BITS_LEFT 0
+#define DALI_ELIAS_LE 1
+
+typedef struct ctx {
+    unsigned char *packed_data;
+    unsigned char *reencoded_data;
+    unsigned char *unpacked_data;
+    size_t packed_index;
+    size_t packed_size;
+    int packed_bit_mask;
+    int packed_bit_value;
+    size_t reencoded_index;
+    int reencoded_bit_mask;
+    int reencoded_bit_value;
+    int reencoded_bit_index;
+    size_t unpacked_index;
+    size_t unpacked_size;
+    int inplace;
+
+    char *output_name;
+    char *input_name;
+    char *prefix_name;
+    char *clamped_name;
+
+    int cbm;
+    int cbm_orig_addr;
+    int cbm_packed_addr;
+    int cbm_range_from;
+    int cbm_range_to;
+    int cbm_relocate_packed_addr;
+    int cbm_relocate_origin_addr;
+    int cbm_prefix_from;
+
+    int sfx;
+    int sfx_addr;
+    int sfx_01;
+    int sfx_cli;
+    int sfx_small;
+    int sfx_size;
+    char *sfx_code;
+    int lz_bits;
+} ctx;
+
+void salvador_main();
+
+static int read_number(char* arg, char* argname, int limit) {
+    int number;
+    if (arg != NULL && arg[0] == '$') number = strtoul(arg + 1, NULL, 16);
+    else if (arg != NULL && arg[0] == '0' && arg[1] == 'x') number = strtoul(arg + 2, NULL, 16);
+    else if (arg != NULL && arg[0] >= '0' && arg[0] <= '9') number = strtoul(arg, NULL, 10);
+    else {
+        fprintf(stderr, "Error: no valid number given for argument %s (given value is: '%s')\n", argname, arg);
+        exit(1);
+    }
+    if (number < 0 || number > limit) {
+        fprintf(stderr, "Error: Number '%s' out of range (0 - 65536)\n", arg);
+        exit(1);
+    }
+    return number;
+}
+
+static void file_write_byte(int byte, FILE *ofp) {
+    if (fputc(byte, ofp) != byte) {
+        fprintf(stderr, "Error: Cannot write output file\n");
+        perror("fputc");
+        exit(1);
+    }
+    return;
+}
+
+static inline unsigned bit_size(unsigned value) {
+#   ifdef __GNUC__
+//    enum { WORD_BITS = sizeof(unsigned) * CHAR_BIT };
+
+    return ((sizeof(unsigned) * 8 - 1) ^ __builtin_clz(value));
+#   else
+    signed bits = -1;
+
+    do
+        ++bits;
+    while(value >>= 1);
+
+    return bits;
+#   endif
+}
+
+void write_reencoded_byte(ctx* ctx, int value) {
+    ctx->reencoded_data[ctx->reencoded_index++] = value;
+}
+
+void write_reencoded_bit(ctx* ctx, int value) {
+    if (!(ctx->reencoded_bit_mask & 255)) {
+        if (DALI_BITS_LEFT == 1) {
+            ctx->reencoded_bit_mask = 0x80;
+        } else {
+            ctx->reencoded_bit_mask = 0x1;
+        }
+        /* remember position of bit-buffer */
+        ctx->reencoded_bit_index = ctx->reencoded_index;
+        ctx->lz_bits++;
+        write_reencoded_byte(ctx, 0);
+    }
+    if (value)
+        ctx->reencoded_data[ctx->reencoded_bit_index] |= ctx->reencoded_bit_mask;
+    if (DALI_BITS_LEFT == 1) {
+        ctx->reencoded_bit_mask >>= 1;
+    } else {
+        ctx->reencoded_bit_mask <<= 1;
+    }
+}
+
+void write_reencoded_interlaced_elias_gamma(ctx* ctx, int value, int skip) {
+    int bits = bit_size(value);
+    int i;
+
+    for (i = 2; i <= value; i <<= 1);
+    i >>= 1;
+
+    if (DALI_ELIAS_LE) {
+        if (bits >= 8) {
+            /* change bit-order, send LSB first */
+            /* remove preceeding 1 first */
+            value = value & ((0xffff ^ i));
+            /* move LSB bits to the beginning */
+            value = (value >> 8) | ((value & 0xff) << (bits - 8));
+        }
+    }
+
+    while ((i >>= 1) > 0) {
+        if (!skip) write_reencoded_bit(ctx, 0);
+        skip = 0;
+        write_reencoded_bit(ctx, ((value & i) > 0));
+    }
+    if (!skip) write_reencoded_bit(ctx, 1);
+}
+
+int read_byte(ctx* ctx) {
+    return ctx->packed_data[ctx->packed_index++];
+}
+
+int read_bit(ctx* ctx) {
+    if ((ctx->packed_bit_mask >>= 1) == 0) {
+        ctx->packed_bit_mask = 0x80;
+        ctx->packed_bit_value = read_byte(ctx);
+    }
+    return (ctx->packed_bit_value & ctx->packed_bit_mask) != 0;
+}
+
+int read_interlaced_elias_gamma(ctx* ctx, int inverted, int skip) {
+    int value = 1;
+    /* skip first read bit if skip != 0 */
+    while (skip || !read_bit(ctx)) {
+        skip = 0;
+        value = (value << 1) | (read_bit(ctx) ^ inverted);
+    }
+    return value;
+}
+
+void save_reencoded_stream(ctx* ctx) {
+}
+
+void copy_inplace_literal(ctx* ctx) {
+    int i;
+    for (i = ctx->unpacked_index; i < ctx->unpacked_size; i++) {
+        ctx->reencoded_data[ctx->packed_index] = ctx->unpacked_data[i];
+        ctx->packed_index++;
+    }
+}
+
+void encode_literal(ctx* ctx, int length, int first) {
+    int i;
+    if (!first) write_reencoded_bit(ctx, 0);
+    write_reencoded_interlaced_elias_gamma(ctx, length, 0);
+    for (i = 0; i < length; i++) {
+        write_reencoded_byte(ctx, ctx->unpacked_data[ctx->unpacked_index + i]);
+    }
+}
+
+void encode_rep(ctx* ctx, int length) {
+    write_reencoded_bit(ctx, 0);
+    write_reencoded_interlaced_elias_gamma(ctx, length, 0);
+}
+
+void encode_match(ctx* ctx, int length, int offset) {
+    write_reencoded_bit(ctx, 1);
+    write_reencoded_interlaced_elias_gamma(ctx, ((offset - 1) >> 7) + 1, 0);
+    write_reencoded_byte(ctx, (((offset - 1) & 0x7f) << 1) | (length == 2));
+    write_reencoded_interlaced_elias_gamma(ctx, length - 1, 1);
+}
+
+void reencode_packed_stream(ctx* ctx) {
+    int last_offset = INITIAL_OFFSET;
+    int length;
+    int overwrite;
+
+    int bit, byte;
+    int i;
+
+    int safe_input_index = 0;
+    int safe_output_index = 0;
+
+    int first = 1;
+
+    ctx->packed_index = 0;
+    ctx->packed_bit_mask = 0;
+
+    ctx->reencoded_index = 0;
+    ctx->reencoded_bit_mask = 0;
+    ctx->reencoded_bit_value = 0;
+    ctx->reencoded_bit_index = 0;
+
+    ctx->packed_index = 0;
+    ctx->unpacked_index = 0;
+
+    ctx->packed_bit_mask = 0;
+    ctx->lz_bits = 0;
+
+    while (1) {
+        /* literal */
+        length = read_interlaced_elias_gamma(ctx, FALSE, 0);
+        for (i = 0; i < length; i++) read_byte(ctx);
+        encode_literal(ctx, length, first);
+        first = 0;
+
+        ctx->unpacked_index += length;
+
+        overwrite = (ctx->unpacked_index) - (ctx->unpacked_size - ctx->packed_size + ctx->packed_index);
+        /* literal would overwrite packed src */
+        if (ctx->inplace && overwrite >= 0) {
+            /* go back to previous index */
+            ctx->unpacked_index = safe_input_index;
+            ctx->packed_index = safe_output_index;
+            copy_inplace_literal(ctx);
+            return;
+        }
+        /* do remember last safe position */
+        safe_input_index = ctx->unpacked_index;
+        safe_output_index = ctx->packed_index;
+
+        /* copy from new or last offset? */
+        bit = read_bit(ctx);
+        if (!bit) {
+            /* copy from last_offset */
+            length = read_interlaced_elias_gamma(ctx, FALSE, 0);
+            encode_rep(ctx, length);
+
+            ctx->unpacked_index += length;
+
+            overwrite = (ctx->unpacked_index) - (ctx->unpacked_size - ctx->packed_size + ctx->packed_index);
+            /* rep would overwrite packed src */
+            if (ctx->inplace && overwrite >= 0) {
+                copy_inplace_literal(ctx);
+                return;
+            }
+            safe_input_index = ctx->unpacked_index;
+            safe_output_index = ctx->packed_index;
+
+            bit = read_bit(ctx);
+        }
+
+        while (bit) {
+            /* copy from new_offset */
+            last_offset = read_interlaced_elias_gamma(ctx, TRUE, 0);
+            if (last_offset == 256) {
+                if (!ctx->inplace) {
+                    write_reencoded_bit(ctx, 1);
+                    write_reencoded_interlaced_elias_gamma(ctx, last_offset, 0);
+                }
+                return;
+            }
+            byte = read_byte(ctx);
+            if (byte & 1) length = 2;
+            else length = read_interlaced_elias_gamma(ctx, FALSE, 1) + 1;
+            last_offset = (last_offset << 7) - (byte >> 1);
+            encode_match(ctx, length, last_offset);
+
+            ctx->unpacked_index += length;
+
+            overwrite = (ctx->unpacked_index) - (ctx->unpacked_size - ctx->packed_size + ctx->packed_index);
+            /* rep would overwrite packed src */
+            if (ctx->inplace && overwrite >= 0) {
+                copy_inplace_literal(ctx);
+                return;
+            }
+            safe_input_index = ctx->unpacked_index;
+            safe_output_index = ctx->packed_index;
+
+            bit = read_bit(ctx);
+        }
+    }
+}
+
+void write_reencoded_stream(ctx* ctx) {
+    FILE *fp = NULL;
+    /* write reencoded output file */
+    fp = fopen(ctx->output_name, "wb");
+    if (!fp) {
+        fprintf(stderr, "Error: Cannot create reencoded file (%s)\n", ctx->output_name);
+        exit(1);
+    }
+
+    /* as sfx */
+    if (ctx->sfx) {
+        printf("Creating sfx with start-address $%04x\n", ctx->sfx_addr);
+        if (ctx->sfx_small) {
+            ctx->sfx_size = sizeof(decruncher_small);
+            /* copy over to change values in code */
+            ctx->sfx_code = (char *)malloc(ctx->sfx_size);
+            memcpy (ctx->sfx_code, decruncher_small, ctx->sfx_size);
+
+            /* setup jmp target after decompression */
+            ctx->sfx_code[DALI_SMALL_SFX_ADDR + 0] = ctx->sfx_addr & 0xff;
+            ctx->sfx_code[DALI_SMALL_SFX_ADDR + 1] = (ctx->sfx_addr >> 8) & 0xff;
+
+            /* setup decompression destination */
+            ctx->sfx_code[DALI_SMALL_DST + 0] = ctx->cbm_orig_addr & 0xff;
+            ctx->sfx_code[DALI_SMALL_DST + 1] = (ctx->cbm_orig_addr >> 8) & 0xff;
+
+            /* setup compressed data src */
+            ctx->sfx_code[DALI_SMALL_SRC + 0] = (0x10000 - ctx->reencoded_index) & 0xff;
+            ctx->sfx_code[DALI_SMALL_SRC + 1] = ((0x10000 - ctx->reencoded_index) >> 8) & 0xff;
+
+            /* setup compressed data end */
+            ctx->sfx_code[DALI_SMALL_DATA_END + 0] = (0x0801 + ctx->sfx_size - 2 + ctx->reencoded_index - 0x100) & 0xff;
+            ctx->sfx_code[DALI_SMALL_DATA_END + 1] = ((0x0801 + ctx->sfx_size - 2 + ctx->reencoded_index - 0x100) >> 8) & 0xff;
+
+            ctx->sfx_code[DALI_SMALL_DATA_SIZE_HI] = 0xff - (((ctx->reencoded_index + 0x100) >> 8) & 0xff);
+        } else {
+            ctx->sfx_size = sizeof(decruncher);
+            /* copy over to change values in code */
+            ctx->sfx_code = (char *)malloc(ctx->sfx_size);
+            memcpy (ctx->sfx_code, decruncher, ctx->sfx_size);
+
+            if (ctx->sfx_01 < 0) ctx->sfx_01 = 0x37;
+
+            /* setup jmp target after decompression */
+            ctx->sfx_code[DALI_FAST_SFX_ADDR + 0] = ctx->sfx_addr & 0xff;
+            ctx->sfx_code[DALI_FAST_SFX_ADDR + 1] = (ctx->sfx_addr >> 8) & 0xff;
+
+            /* setup decompression destination */
+            ctx->sfx_code[DALI_FAST_DST + 0] = ctx->cbm_orig_addr & 0xff;
+            ctx->sfx_code[DALI_FAST_DST + 1] = (ctx->cbm_orig_addr >> 8) & 0xff;
+
+            /* setup compressed data src */
+            ctx->sfx_code[DALI_FAST_SRC + 0] = (0x10000 - ctx->reencoded_index) & 0xff;
+            ctx->sfx_code[DALI_FAST_SRC + 1] = ((0x10000 - ctx->reencoded_index) >> 8) & 0xff;
+
+            /* setup compressed data end */
+            ctx->sfx_code[DALI_FAST_DATA_END + 0] = (0x0801 + ctx->sfx_size - 2 + ctx->reencoded_index - 0x100) & 0xff;
+            ctx->sfx_code[DALI_FAST_DATA_END + 1] = ((0x0801 + ctx->sfx_size - 2 + ctx->reencoded_index - 0x100) >> 8) & 0xff;
+
+            ctx->sfx_code[DALI_FAST_DATA_SIZE_HI] = 0xff - (((ctx->reencoded_index + 0x100) >> 8) & 0xff);
+
+            ctx->sfx_code[DALI_FAST_01] = ctx->sfx_01;
+            if (ctx->sfx_cli) ctx->sfx_code[DALI_FAST_CLI] = 0x58;
+        }
+        printf("original: $%04x-$%04lx ($%04lx) 100%%\n", ctx->cbm_orig_addr, ctx->cbm_orig_addr + ctx->unpacked_size, ctx->unpacked_size);
+        printf("packed:   $%04x-$%04lx ($%04lx) %3.2f%%\n", 0x0801, 0x0801 + (int)ctx->sfx_size + ctx->packed_index, (int)ctx->sfx_size + ctx->packed_index, ((float)(ctx->packed_index + (int)ctx->sfx_size) / (float)(ctx->unpacked_size) * 100.0));
+
+        if (fwrite(ctx->sfx_code, sizeof(char), ctx->sfx_size, fp) != ctx->sfx_size) {
+            fprintf(stderr, "Error: Cannot write output file %s\n", ctx->output_name);
+            exit(1);
+        }
+    /* or standard compressed */
+    } else {
+        if (ctx->cbm_relocate_origin_addr >= 0) {
+            ctx->cbm_orig_addr = ctx->cbm_relocate_origin_addr;
+            ctx->cbm = TRUE;
+        }
+
+        if (ctx->inplace) {
+            ctx->cbm_packed_addr = ctx->cbm_range_to - ctx->packed_index - 2;
+        } else {
+            if (ctx->cbm_relocate_packed_addr >= 0) {
+                ctx->cbm_packed_addr = ctx->cbm_relocate_packed_addr;
+            } else {
+                ctx->cbm_packed_addr = ctx->cbm_orig_addr;
+            }
+        }
+
+
+        if (ctx->cbm) {
+            printf("original: $%04x-$%04lx ($%04lx) 100%%\n", ctx->cbm_orig_addr, ctx->cbm_orig_addr + ctx->unpacked_size, ctx->unpacked_size);
+            printf("packed:   $%04x-$%04lx ($%04lx) %3.2f%%\n", ctx->cbm_packed_addr, ctx->cbm_packed_addr + ctx->packed_index + 2, ctx->packed_index + 2, ((float)(ctx->packed_index) / (float)(ctx->unpacked_size) * 100.0));
+            if ((ctx->cbm_packed_addr >= 0xd000 && ctx->cbm_packed_addr < 0xe000) || (ctx->cbm_packed_addr < 0xd000 && ctx->cbm_packed_addr + ctx->packed_index + 2 > 0xd000)) {
+                fprintf(stderr, "Warning: Packed file lies in I/O-range from $d000-$dfff\n");
+            }
+
+            /* little endian */
+            file_write_byte(ctx->cbm_packed_addr & 255, fp);
+            file_write_byte((ctx->cbm_packed_addr >> 8) & 255, fp);
+
+            /* big endian, as read backwards by depacker */
+            file_write_byte((ctx->cbm_orig_addr >> 8) & 255, fp);
+            file_write_byte(ctx->cbm_orig_addr & 255, fp);
+        } else {
+            printf("original: $%04x-$%04lx ($%04lx) 100%%\n", 0, ctx->unpacked_size, ctx->unpacked_size);
+            printf("packed:   $%04x-$%04lx ($%04lx) %3.2f%%\n", 0, ctx->packed_index, ctx->packed_index, ((float)(ctx->packed_index) / (float)(ctx->unpacked_size) * 100.0));
+        }
+    }
+
+    if (fwrite(ctx->reencoded_data, sizeof(char), ctx->packed_index, fp) != ctx->packed_index) {
+        fprintf(stderr, "Error: Cannot write output file\n");
+        exit(1);
+    }
+    fclose(fp);
+}
+
+void do_reencode(ctx* ctx) {
+    char tmp_name[] = "dict-XXXXXX";
+    char src_name[] = "src-XXXXXX";
+    unsigned char *dict_data = NULL;
+    int dict_size = 0;
+    char *salvador_argv[5];
+    int salvador_argc = 0;
+    int dict_temp = FALSE;
+    FILE *dfp = NULL;
+    FILE *sfp = NULL;
+    FILE* ufp = NULL;
+    FILE* pfp = NULL;
+
+
+    /* determine output filename */
+    if (ctx->output_name == NULL) {
+        ctx->output_name = (char *)malloc(strlen(ctx->input_name) + 4);
+        strcpy(ctx->output_name, ctx->input_name);
+        strcat(ctx->output_name, ".lz");
+        printf("output name: %s\n", ctx->output_name);
+    }
+
+    /* allocate buffers */
+    ctx->packed_data = (unsigned char *)malloc(BUFFER_SIZE);
+    ctx->unpacked_data = (unsigned char *)malloc(BUFFER_SIZE + 2);
+    ctx->reencoded_data = (unsigned char *)malloc(BUFFER_SIZE);
+
+    if (!ctx->packed_data || !ctx->unpacked_data || !ctx->reencoded_data) {
+        fprintf(stderr, "Error: Insufficient memory\n");
+        exit(1);
+    }
+
+    /* load unpacked file */
+    ufp = fopen(ctx->input_name, "rb");
+    if (!ufp) {
+        fprintf(stderr, "Error: Cannot access input file\n");
+        exit(1);
+    }
+    ctx->unpacked_size = fread(ctx->unpacked_data, sizeof(char), BUFFER_SIZE + 2, ufp);
+    fclose(ufp);
+
+    /* cbm address handling */
+    if (ctx->cbm_relocate_origin_addr >= 0) {
+        ctx->cbm_orig_addr = ctx->cbm_relocate_origin_addr;
+    } else {
+        ctx->cbm_orig_addr = ctx->unpacked_data[0] + (ctx->unpacked_data[1] << 8);
+    }
+
+    if (ctx->cbm) {
+      ctx->unpacked_data += 2;
+      ctx->unpacked_size -= 2;
+    }
+
+    /* take care of range (--from --to) */
+    if (ctx->cbm_range_from < 0) ctx->cbm_range_from = ctx->cbm_orig_addr;
+    if (ctx->cbm_range_to < 0) ctx->cbm_range_to = ctx->cbm_orig_addr + ctx->unpacked_size;
+
+    if ((ctx->cbm_range_to - ctx->cbm_orig_addr) > ctx->unpacked_size) {
+        ctx->cbm_range_to = ctx->unpacked_size + ctx->cbm_orig_addr;
+        fprintf(stderr, "Warning: File ends at $%04x, adopting --to value\n", ctx->cbm_range_to);
+    }
+    ctx->unpacked_size = (ctx->cbm_range_to - ctx->cbm_orig_addr);
+
+    /* if range is below start_address, adopt range */
+    if (ctx->cbm_range_from < ctx->cbm_orig_addr) {
+        ctx->cbm_range_from = ctx->cbm_orig_addr;
+        fprintf(stderr, "Warning: File starts at $%04x, adopting --from value\n", ctx->cbm_range_from);
+    }
+    if (ctx->cbm_range_from > ctx->cbm_range_to) {
+        fprintf(stderr, "Error: --from beyond fileend ($%04x - $%04x)\n", ctx->cbm_range_from, ctx->cbm_range_to);
+        exit(1);
+    }
+
+    /* setup dict lengths and position */
+    if (ctx->cbm_prefix_from >= 0) {
+        if (ctx->cbm_range_from < 0) {
+            fprintf(stderr, "Error: Dict is zero size (use --from)\n");
+            exit(1);
+        }
+        else if (ctx->cbm_prefix_from >= ctx->cbm_range_from) {
+            fprintf(stderr, "Error: --from must be greater than --prefix-from\n");
+            exit(1);
+        }
+        if (ctx->cbm_range_from >= 0 && ctx->cbm_range_from - ctx->cbm_prefix_from > 32640) {
+            //ctx->cbm_prefix_from = ctx->cbm_range_from - 32640;
+            fprintf(stderr, "Info: --prefix-from  exceeds max offset, not all bytes can be used\n");
+        }
+        /* if range is below start_address, adopt range */
+        if (ctx->cbm_prefix_from < ctx->cbm_orig_addr) {
+            ctx->cbm_prefix_from = ctx->cbm_orig_addr;
+            fprintf(stderr, "Warning: File starts at $%04x, adopting --prefix-from value\n", ctx->cbm_prefix_from);
+        }
+        dict_data = ctx->unpacked_data + ctx->cbm_prefix_from - ctx->cbm_orig_addr;
+        dict_size = ctx->cbm_range_from - ctx->cbm_prefix_from;
+    }
+
+    /* load file from start_pos on only, so skip bytes on input */
+    ctx->unpacked_data += (ctx->cbm_range_from - ctx->cbm_orig_addr);
+    /* also adopt ctx->unpacked_size */
+    ctx->unpacked_size -= (ctx->cbm_range_from - ctx->cbm_orig_addr);
+    /* and set up new load-address */
+    ctx->cbm_orig_addr = ctx->cbm_range_from;
+
+    if (ctx->unpacked_size <= 0) {
+        fprintf(stderr, "Error: Input too small\n");
+        exit(1);
+    }
+
+    printf("Compressing from $%04x to $%04x = $%04lx bytes\n", ctx->cbm_range_from, ctx->cbm_range_to, ctx->unpacked_size);
+
+    if (ctx->cbm_relocate_packed_addr >= 0 || ctx->sfx) {
+        ctx->inplace = FALSE;
+    }
+
+    /* write clamped raw data */
+    ctx->clamped_name = (char*)malloc(sizeof(src_name));
+    strcpy(ctx->clamped_name, src_name);
+    sfp = fdopen(mkstemp(ctx->clamped_name),"wb");
+    if (!sfp) {
+        fprintf(stderr, "Error: Cannot create clamped file %s\n", ctx->clamped_name);
+        exit(1);
+    }
+    if (ctx->unpacked_size != 0) {
+        if (fwrite(ctx->unpacked_data, sizeof(char), ctx->unpacked_size, sfp) != ctx->unpacked_size) {
+            fprintf(stderr, "Error: Cannot write clamped file\n");
+            perror("fwrite");
+            exit(1);
+        }
+    }
+    fclose(sfp);
+
+    /* ctreate temp file for dict */
+    if (ctx->cbm_prefix_from >= 0) {
+        if (ctx->prefix_name == NULL) {
+            ctx->prefix_name = (char*)malloc(sizeof(tmp_name));
+            strcpy(ctx->prefix_name, tmp_name);
+            dfp = fdopen(mkstemp(ctx->prefix_name),"wb");
+            printf("using prefix: $%04x - $%04x\n", ctx->cbm_prefix_from, ctx->cbm_prefix_from + dict_size);
+            if (!dfp) {
+                fprintf(stderr, "Error: Cannot create dict file %s\n", ctx->prefix_name);
+                exit(1);
+            }
+            if (!dict_data || fwrite(dict_data, sizeof(char), dict_size, dfp) != dict_size) {
+                fprintf(stderr, "Error: Cannot write dict file %s\n", ctx->prefix_name);
+                remove(ctx->prefix_name);
+                exit(1);
+            }
+            dict_temp = TRUE;
+            fclose(dfp);
+        }
+    }
+
+    /* compress data with salvador */
+    salvador_argv[salvador_argc++] = "salvador";
+    if (ctx->prefix_name) {
+        salvador_argv[salvador_argc++] = "-D";
+        salvador_argv[salvador_argc++] = ctx->prefix_name;
+    }
+    salvador_argv[salvador_argc++] = ctx->clamped_name;
+    salvador_argv[salvador_argc++] = ctx->output_name;
+    salvador_main(salvador_argc, salvador_argv);
+
+    /* delete dict */
+    if (dict_temp) remove(ctx->prefix_name);
+    /* remove clamped */
+    remove(ctx->clamped_name);
+
+    /* read packed data */
+    pfp = fopen(ctx->output_name, "rb");
+    if (!pfp) {
+        fprintf(stderr, "Error: Cannot access input file\n");
+        exit(1);
+    }
+    ctx->packed_size = fread(ctx->packed_data, sizeof(char), BUFFER_SIZE, pfp);
+    fclose(pfp);
+
+    /* determine size without eof-marker -> remove 18 bits, either 2 byte or three byte depending on position of last bitpair */
+    if (ctx->packed_data[ctx->packed_size - 1] & 0x80) ctx->packed_size -= 3;
+    else ctx->packed_size -= 2;
+
+    reencode_packed_stream(ctx);
+
+    if (ctx->packed_index + 2 > ctx->unpacked_size) {
+        fprintf(stderr, "Error: Packed file larger than original\n");
+        exit(1);
+    }
+
+    //printf("control-bytes: $%04x\n", ctx->lz_bits);
+
+    write_reencoded_stream(ctx);
+    return;
+}
+
+int main(int argc, char *argv[]) {
+    int i;
+
+    ctx ctx = { 0 };
+
+    ctx.output_name = NULL;
+    ctx.input_name = NULL;
+    ctx.prefix_name = NULL;
+
+    ctx.inplace = TRUE;
+
+    ctx.cbm = TRUE;
+    ctx.cbm_orig_addr = 0;
+    ctx.cbm_packed_addr = 0;
+    ctx.cbm_range_from = -1;
+    ctx.cbm_range_to = -1;
+    ctx.cbm_relocate_packed_addr = -1;
+    ctx.cbm_relocate_origin_addr = -1;
+    ctx.cbm_prefix_from = -1;
+
+    ctx.sfx = FALSE;
+    ctx.sfx_addr = -1;
+    ctx.sfx_01 = -1;
+    ctx.sfx_cli = FALSE;
+    ctx.sfx_small = FALSE;
+    ctx.sfx_code = NULL;
+
+    for (i = 1; i < argc; i++) {
+        if (!strncmp(argv[i], "-", 1) || !strncmp(argv[i], "--", 2)) {
+            if (!strcmp(argv[i], "--binfile")) {
+                ctx.cbm = FALSE;
+            } else if (!strcmp(argv[i], "--prefix-from")) {
+                ctx.cbm_prefix_from = read_number(argv[i + 1], argv[i], 65536);
+                i++;
+            } else if (!strcmp(argv[i], "--prefix-file")) {
+                i++;
+                ctx.prefix_name = argv[i];
+            } else if (!strcmp(argv[i], "--no-inplace")) {
+                ctx.inplace = FALSE;
+            } else if (!strcmp(argv[i], "--small")) {
+                ctx.sfx_small = TRUE;
+            } else if (!strcmp(argv[i], "--relocate-packed")) {
+                ctx.cbm_relocate_packed_addr = read_number(argv[i + 1], argv[i], 65536);
+                i++;
+            } else if (!strcmp(argv[i], "--relocate-origin")) {
+                ctx.cbm_relocate_origin_addr = read_number(argv[i + 1], argv[i], 65536);
+                i++;
+            } else if (!strcmp(argv[i], "--from")) {
+                ctx.cbm_range_from = read_number(argv[i + 1], argv[i], 65536);
+                i++;
+            } else if (!strcmp(argv[i], "--to")) {
+                ctx.cbm_range_to = read_number(argv[i + 1], argv[i], 65536);
+                i++;
+            } else if (!strcmp(argv[i], "--01")) {
+                ctx.sfx_01 = read_number(argv[i + 1], argv[i], 256);
+                i++;
+            } else if (!strcmp(argv[i], "--cli")) {
+                ctx.sfx_cli = TRUE;
+            } else if (!strcmp(argv[i], "--sfx")) {
+                ctx.sfx_addr = read_number(argv[i + 1], argv[i], 65536);
+                i++;
+                ctx.sfx = TRUE;
+                ctx.inplace = FALSE;
+            } else if (!strcmp(argv[i], "-o")) {
+                i++;
+                ctx.output_name = argv[i];
+            } else {
+                fprintf(stderr, "Error: Unknown option %s\n", argv[i]);
+                exit(1);
+            }
+        } else if (i == argc - 1) {
+            ctx.input_name = argv[i];
+        } else {
+            fprintf(stderr, "Error: Unknown option %s\n", argv[i]);
+            exit(1);
+        }
+    }
+
+    printf("dali v0.3 - a zx0-reencoder for bitfire by Tobias Bindhammer\n");
+    printf("underlying zx0-packer salvador by Emmanuel Marty\n");
+
+    if (argc == 1) {
+        fprintf(stderr, "Usage: %s [options] input\n"
+                        "  -o [filename]               Set output filename.\n"
+                        "  --sfx [num]                 Create a c64 compatible sfx-executable.\n"
+                        "  --01 [num]                  Set 01 to [num] after sfx.\n"
+                        "  --cli [num]                 Do a CLI after sfx, default is SEI.\n"
+                        "  --small                     Use a very small depacker that fits into zeropage, but --01 and --cli are ignored and it trashes zeropage (!)\n"
+                        "  --no-inplace                Disable inplace-decompression.\n"
+                        "  --binfile                   Input file is a raw binary without load-address.\n"
+                        "  --from [num]                Compress file from [num] on.\n"
+                        "  --to [num]                  Compress file until position [num].\n"
+                        "  --prefix-from [num]         Use preceeding data from [num] on as dictionary (in combination with --from).\n"
+                        "  --prefix-file [file]        Use preceeding data from [file] as dictionary.\n"
+                        "  --relocate-packed [num]     Relocate packed data to desired address [num] (resulting file can't de decompressed inplace!)\n"
+                        "  --relocate-origin [num]     Set load-address of source file to [num] prior to compression. If used on bin-files, load-address and depack-target is prepended on output.\n"
+                        ,argv[0]);
+        exit(1);
+    }
+
+    if (!ctx.sfx && ctx.sfx_small) {
+        fprintf(stderr, "Info: No sfx, ignoring --small option\n");
+    }
+    if (!ctx.sfx && ctx.sfx_01 >= 0) {
+        fprintf(stderr, "Info: No sfx, ignoring --01 option\n");
+    }
+    if (!ctx.sfx && ctx.sfx_cli) {
+        fprintf(stderr, "Info: No sfx, ignoring --cli option\n");
+    }
+
+    if (ctx.input_name == NULL) {
+        fprintf(stderr, "Error: No input-filename given\n");
+        exit(1);
+    }
+
+    do_reencode(&ctx);
+    return 0;
+}
--- a/loader/tools/dali/depack.asm
+++ b/loader/tools/dali/depack.asm
@ -0,0 +1,106 @@
+!cpu 6510
+		* = $1000
+
+		sei
+		lda #$35
+		sta $01
+		lda $d011
+		bpl *-3
+		lda #$0b
+		sta $d011
+
+		ldx #$00
+		lda #$20
+-
+		sta $0400,x
+		sta $0500,x
+		sta $0600,x
+		sta $0700,x
+		dex
+		bne -
+
+		jsr .timer_start
+
+		ldx #<data_start
+		lda #>data_start
+
+		jsr depack
+
+		jsr .timer_stop
+
+		lda #$1b
+		sta $d011
+		jmp *
+
+.timer_start
+                lda #$00
+                sta $dc0e
+                lda #$40
+                sta $dc0f
+                lda #$ff
+                sta $dc04
+                sta $dc05
+                sta $dc06
+                sta $dc07
+                lda #$41
+                sta $dc0f
+                lda #$01
+                sta $dc0e
+                rts
+.timer_stop
+                lda #$00
+                sta $dc0e
+                lda #$40
+                sta $dc0f
+
+		ldy #$00
+-
+		lda .cycles,y
+		sta $0400,y
+		iny
+		cpy #$08
+		bne -
+                lda $dc04
+		pha
+                lda $dc05
+		pha
+                lda $dc06
+		pha
+                lda $dc07
+		jsr .print_hex
+		pla
+		jsr .print_hex
+		pla
+		jsr .print_hex
+		pla
+.print_hex
+		eor #$ff
+		pha
+		lsr
+		lsr
+		lsr
+		lsr
+		tax
+		lda .hextab,x
+		sta $0400,y
+		iny
+		pla
+		ldx #$0f
+		sbx #$00
+		lda .hextab,x
+		sta $0400,y
+		iny
+		rts
+.cycles
+		!scr "cycles: "
+.hextab
+		!scr "0123456789abcdef"
+
+!align 255,0
+depack
+!src "dzx0_dali.asm"
+!warn "depacker size: ", * - depack
+
+;		* = $6a61
+data_start
+!bin "testfile.lz",,4
--- a/loader/tools/dali/dzx0_dali.asm
+++ b/loader/tools/dali/dzx0_dali.asm
@ -0,0 +1,282 @@
+!cpu 6510
+
+CONFIG_ZP_ADDR		= $f0
+LZ_BITS_LEFT            = 0
+INPLACE			= 0
+SETUP_LZ_DST		= 0
+
+lz_bits			= CONFIG_ZP_ADDR + 0
+lz_dst			= CONFIG_ZP_ADDR + 1
+lz_src			= CONFIG_ZP_ADDR + 3
+lz_len_hi		= CONFIG_ZP_ADDR + 5
+
+!macro get_lz_bit {
+        !if LZ_BITS_LEFT = 1 {
+                asl <lz_bits
+        } else {
+                lsr <lz_bits
+        }
+}
+
+!macro set_lz_bit_marker {
+        !if LZ_BITS_LEFT = 1{
+                rol
+        } else {
+                ror
+        }
+}
+
+!macro init_lz_bits {
+        !if LZ_BITS_LEFT = 1 {
+                        lda #$40
+                        sta <lz_bits                    ;start with an empty lz_bits, first +get_lz_bit leads to literal this way and bits are refilled upon next shift
+        } else {
+                        stx <lz_bits
+        }
+}
+
+!macro inc_lz_src {
+			inc <lz_src + 1
+}
+
+;---------------------------------------------------------------------------------
+;DEPACKER STUFF
+;---------------------------------------------------------------------------------
+
+			sta <lz_src + 1
+			stx <lz_src + 0
+
+			lda #$00
+			sta <lz_dst + 0
+			lda #$a0
+			sta <lz_dst + 1
+
+                        ldx #$02
+			+init_lz_bits
+                        ldy #$00                        ;needs to be set in any case, also plain decomp enters here
+!if SETUP_LZ_DST = 1 {
+-
+                        lda (lz_src),y
+                        sta <lz_dst + 0 - 1, x
+                        inc <lz_src + 0
+                        bne +
+                        +inc_lz_src
+
+                        dex
+                        bne -
+}
+                        sty .lz_offset_lo + 1           ;initialize offset with $0000
+                        sty .lz_offset_hi + 1
+                        sty <lz_len_hi
+!if INPLACE = 1 {
+			beq .lz_start_over
+.lz_end_check_
+			ldx <lz_dst + 0			;check for end condition when depacking inplace, lz_dst + 0 still in X
+			cpx <lz_src + 0
+			bne .lz_start_over
+.lz_eof
+			rts				;if lz_src + 1 gets incremented, the barrier check hits in even later, so at least one block is loaded, if it was $ff, we at least load the last block @ $ffxx, it must be the last block being loaded anyway
+.lz_end_check
+			cpx <lz_src + 1
+			beq .lz_end_check_		;we could check against src >= dst XXX TODO
+} else {
+.lz_end_check
+}
+.lz_start_over
+			lda #$01			;we fall through this check on entry and start with literal
+			+get_lz_bit
+			bcs .lz_match			;after each match check for another match or literal?
+
+			;------------------
+			;LITERAL
+			;------------------
+.lz_literal
+			+get_lz_bit
+			bcs +
+-							;lz_length as inline
+			+get_lz_bit			;fetch payload bit
+			rol				;can also moved to front and executed once on start
+			+get_lz_bit
+			bcc -
+
+			bne +
+			jsr .lz_refill_bits
+
+			tax
+.lz_l_page_
+.lz_cp_lit
+			lda (lz_src),y			;/!\ Need to copy this way, or we run into danger to copy from an area that is yet blocked by barrier, this totally sucks, loading in order reveals that
+			sta (lz_dst),y
+
+			inc <lz_src + 0
+			beq .lz_inc_src3
+.lz_inc_src3_
+			inc <lz_dst + 0
+			beq .lz_dst_inc
+.lz_dst_inc_
+			dex
+			bne .lz_cp_lit
+
+			lda <lz_len_hi			;more pages to copy?
+			bne .lz_l_page			;happens very seldom
+
+			;------------------
+			;NEW OR OLD OFFSET
+			;------------------
+							;in case of type bit == 0 we can always receive length (not length - 1), can this used for an optimization? can we fetch length beforehand? and then fetch offset? would make length fetch simpler? place some other bit with offset?
+			rol				;was A = 0, C = 1 -> A = 1 with rol, but not if we copy literal this way
+			+get_lz_bit
+			bcs .lz_match			;either match with new offset or old offset
+
+			;------------------
+			;REPEAT LAST OFFSET
+			;------------------
+.lz_repeat
+			+get_lz_bit			;cheaper with 2 branches, as initial branch to .lz_literal therefore is removed
+			bcs +
+-
+			+get_lz_bit			;fetch payload bit
+			rol				;can also moved to front and executed once on start
+			+get_lz_bit			;cheaper with 2 branches, as initial branch to .lz_literal therefore is removed
+			bcc -
+
+			bne +
+			jsr .lz_refill_bits		;fetch more bits
+			beq .lz_m_page			;avoid underflow of A on sbc #$01 faster than forcing carry to 1 with a sec all times
+
+			sbc #$01			;subtract 1, will be added again on adc as C = 1
+.lz_match_big						;we enter with length - 1 here from normal match
+			eor #$ff
+			tay
+.lz_m_page_
+			eor #$ff			;restore A
+.lz_match_len2						;entry from new_offset handling
+			adc <lz_dst + 0
+			sta <lz_dst + 0
+			bcs .lz_clc			;/!\ branch happens very seldom, if so, clear carry
+			dec <lz_dst + 1			;subtract one more in this case
+.lz_clc_back
+.lz_offset_lo		sbc #$00			;carry is cleared, subtract (offset + 1) in fact we could use sbx here, but would not respect carry, but a and x are same, but need x later anyway for other purpose
+			sta .lz_msrcr + 0
+			lax <lz_dst + 1
+.lz_offset_hi		sbc #$00
+			sta .lz_msrcr + 1
+.lz_cp_match
+			;XXX TODO if repeated offset: add literal size to .lz_msrcr and done?
+.lz_msrcr = * + 1
+			lda $beef,y
+			sta (lz_dst),y
+			iny
+			bne .lz_cp_match
+			inx
+			stx <lz_dst + 1			;cheaper to get lz_dst + 1 into x than lz_dst + 0 for upcoming compare
+
+			lda <lz_len_hi			;check for more loop runs
+			beq .lz_end_check		;do more page runs? Yes? Fall through
+.lz_m_page
+.lz_l_page
+			dec <lz_len_hi
+			txa				;much shorter this way. if we recalculate m_src and dst, endcheck also hits in if we end with an multipage match, else maybe buggy?
+			beq .lz_l_page_
+			tya
+			bcs .lz_m_page_			;as Y = 0, we can skip the part that does Y = A xor $ff
+
+
+			;------------------
+			;SELDOM STUFF
+			;------------------
+.lz_dst_inc
+			inc <lz_dst + 1
+			bcs .lz_dst_inc_
+.lz_inc_src3
+			+inc_lz_src
+			bcs .lz_inc_src3_
+
+			;------------------
+			;MATCH
+			;------------------
+-							;lz_length as inline
+			+get_lz_bit			;fetch payload bit
+			rol				;can also moved to front and executed once on start
+.lz_match
+			+get_lz_bit
+			bcc -
+
+			bne +
+			jsr .lz_refill_bits
+			beq .lz_lend			;underflow, so offset was $100
+
+			sbc #$01			;subtract 1, elias numbers range from 1..256, we need 0..255
+
+			lsr				;set bit 15 to 0 while shifting hibyte
+			sta .lz_offset_hi + 1		;hibyte of offset
+
+			lda (lz_src),y			;fetch another byte directly, same as refill_bits...
+			ror				;and shift -> first bit for lenth is in carry, and we have %0xxxxxxx xxxxxxxx as offset
+			sta .lz_offset_lo + 1
+
+			inc <lz_src + 0			;postponed, so no need to save A on next_page call
+			beq .lz_inc_src1
+.lz_inc_src1_
+			lda #$01
+			ldy #$fe
+			bcs .lz_match_len2		;length = 1 ^ $ff, do it the very short way :-)
+-
+			+get_lz_bit
+			rol
+			+get_lz_bit
+			bcc -
+			bne .lz_match_big
+			ldy #$00			;only now y = 0 is needed
+			jsr .lz_refill_bits		;fetch remaining bits
+			bne .lz_match_big
+			inc <lz_len_hi
+			bcs .lz_match_big		;and enter match copy loop
+
+			;------------------
+			;SELDOM STUFF
+			;------------------
+.lz_clc
+			clc
+			bcc .lz_clc_back
+.lz_inc_src1
+			+inc_lz_src			;preserves carry, all sane
+			bne .lz_inc_src1_
+.lz_inc_src2
+			+inc_lz_src			;preserves carry and A, clears X, Y, all sane
+			bne .lz_inc_src2_
+
+			;------------------
+			;ELIAS FETCH
+			;------------------
+.lz_refill_bits
+			tax
+			lda (lz_src),y
+			+set_lz_bit_marker
+			sta <lz_bits
+			inc <lz_src + 0 		;postponed, so no need to save A on next_page call
+			beq .lz_inc_src2
+.lz_inc_src2_
+			txa				;also postpone, so A can be trashed on lz_inc_src above
+			bcs .lz_lend
+.lz_get_loop
+			+get_lz_bit			;fetch payload bit
+.lz_length_16_
+			rol				;can also moved to front and executed once on start
+			bcs .lz_length_16		;first 1 drops out from lowbyte, need to extend to 16 bit, unfortunatedly this does not work with inverted numbers
+			+get_lz_bit
+			bcc .lz_get_loop
+			beq .lz_refill_bits
+.lz_lend
+			rts
+.lz_length_16						;happens very rarely
+			pha				;save LSB
+			tya				;was lda #$01, but A = 0 + upcoming rol makes this also start with MSB = 1
+			jsr .lz_length_16_		;get up to 7 more bits
+			sta <lz_len_hi			;save MSB
+			pla				;restore LSB
+			bne +
+			dec <lz_len_hi
+			tya
+
+			rts
--- a/loader/tools/dali/dzx0_orig_zx0_v1.asm
+++ b/loader/tools/dali/dzx0_orig_zx0_v1.asm
@ -0,0 +1,254 @@
+;/!\ Attention, this depacker only works with the original zx0 version contained within this folder as well, it does not work with the modified version coming with bitfire, as some things on the encoding got changed due to speed optimizations
+
+!cpu 6510
+
+ZX0_INPLACE		= 0
+ZX0_INLINE_GET_LEN	= 0
+
+.ZP_ADDR		= $f8
+.lz_dst			= .ZP_ADDR + 0
+.lz_bits		= .ZP_ADDR + 2
+.lz_len_hi		= .ZP_ADDR + 4
+
+.depacker_start
+		;------------------
+		;INIT
+		;------------------
+
+		;lowbyte of data start must be in X, highbyte in A
+		sta .lz_src1
+		sta .lz_src2
+		sta .lz_src3
+
+		ldy #$ff
+		sty .lz_offset_lo + 1
+		sty .lz_offset_hi + 1
+
+		iny
+		sty <.lz_len_hi
+		lda #$40
+		sta <.lz_bits			;will make us fall through on next test and force us to load a new byte into bit-buffer upon next .lz_get_len
+
+		;------------------
+		;LITERAL
+		;------------------
+.lz_start_over
+		lda #$01
+		asl <.lz_bits
+!if ZX0_INLINE_GET_LEN == 1 {
+		bcc .lz_literal
+		jmp .lz_new_offset		;after each match check for another match or literal
+-						;lz_get_len as inline
+		asl <.lz_bits			;fetch payload bit
+		rol
+.lz_literal
+		asl <.lz_bits
+		bcc -
+
+		bne +
+		jsr .lz_refill_bits
+
+} else {
+		bcs .lz_new_offset
+.lz_literal
+		jsr .lz_get_len
+}
+		sta .lz_y + 1
+		and #$ff			;annoying, but flags are not set corresponding to A
+		beq .lz_l_page
+;		dec <.lz_len_hi			;happens very seldom, so let's do that with lz_l_page that also decrements lz_len_hi
+.lz_cp_literal
+.lz_src1 = * + 2
+		lda $1000,x
+		inx
+		bne +
+		jsr .lz_inc_src_hi
+
+		sta (.lz_dst),y
+		iny
+.lz_y		cpy #$00
+		bne .lz_cp_literal
+
+		dey				;this way we force increment of lz_dst + 1 if y = 0
+		tya				;carry is still set on first round
+		adc <.lz_dst + 0		;correct dst after copy loop
+		sta <.lz_dst + 0
+		bcc +
+		inc <.lz_dst + 1
+
+		ldy <.lz_len_hi
+		bne .lz_l_page			;happens very seldom -> move away to prefer Z = 0 case
+
+		;------------------
+		;NEW OR OLD OFFSET
+		;------------------
+
+		lda #$01
+		asl <.lz_bits
+		bcs .lz_new_offset		;either match with new offset or old offset
+!if ZX0_INLINE_GET_LEN == 1 {
+		bcc .lz_match_repeat
+
+		;------------------
+		;DO MATCH
+		;------------------
+-						;lz_get_len as inline
+		asl <.lz_bits			;fetch payload bit
+		rol
+.lz_match_repeat
+		asl <.lz_bits
+		bcc -
+
+		bne +
+		jsr .lz_refill_bits
+
+} else {
+.lz_match_repeat
+		jsr .lz_get_len
+}
+		sbc #$01			;saves the iny later on
+		bcs +
+		dcp .lz_len_hi			;dec highbyte of length by one, a = $ff, so cmp will always set carry for free on top
+
+.lz_match_
+		eor #$ff
+		;beq .lz_calc_msrc		;just fall through on zero? $ff + sec -> addition is neutralized and carry is set, so no harm
+		tay
+		eor #$ff			;restore A
+.lz_match__					;entry from new_offset handling
+		adc <.lz_dst + 0
+		sta <.lz_dst + 0
+		bcs .lz_clc			;/!\ branch happens very seldom
+		dec <.lz_dst + 1
+.lz_clc
+		clc
+.lz_offset_lo	adc #$00			;carry is cleared, subtract (offset + 1)
+		sta .lz_msrcr + 0
+		lda <.lz_dst + 1
+.lz_offset_hi	adc #$ff
+		sta .lz_msrcr + 1
+.lz_cp_match
+.lz_msrcr = * + 1
+		lda $beef,y
+		sta (.lz_dst),y
+		iny
+		bne .lz_cp_match
+		inc <.lz_dst + 1
+
+		lda <.lz_len_hi			;check for more loop runs
+!if ZX0_INPLACE == 1 {
+		bne .lz_m_page			;do more page runs
+
+		cpx <.lz_dst + 0		;check for end condition when depacking inplace
+		bne .lz_start_over
+		lda <.lz_dst + 1
+		sbc <.lz_src1
+		bne .lz_start_over
+		rts
+} else {
+		beq .lz_start_over		;do more page runs
+}
+		;------------------
+		;SELDOM STUFF
+		;------------------
+.lz_m_page
+		dec <.lz_len_hi
+		inc .lz_msrcr + 1
+		jmp .lz_cp_match
+.lz_l_page
+		dec <.lz_len_hi
+		sec				;only needs to be set for consecutive rounds of literals, happens very seldom
+		ldy #$00
+		beq .lz_cp_literal
+
+		;------------------
+		;FETCH A NEW OFFSET
+		;------------------
+
+!if ZX0_INLINE_GET_LEN == 1 {
+-						;lz_get_len as inline
+		asl <.lz_bits			;fetch payload bit
+		rol
+.lz_new_offset
+		asl <.lz_bits
+		bcc -
+
+		bne +
+		jsr .lz_refill_bits
+
+} else {
+.lz_new_offset
+		jsr .lz_get_len
+}
+		sbc #$01
+		bcc .lz_eof			;underflow. must have been 0
+		eor #$ff
+
+		ror
+		sta .lz_offset_hi + 1		;hibyte of offset
+
+.lz_src2 = * + 2
+		lda $1000,x			;looks expensive, but is cheaper than loop
+		inx
+		bne +
+		jsr .lz_inc_src_hi
+
+		ror
+		sta .lz_offset_lo + 1
+
+		lda #$01
+		ldy #$fe
+		bcs .lz_match__			;length = 2 ^ $ff, do it the very short way :-)
+		ldy #$00
+!if ZX0_INLINE_GET_LEN == 1 {
+-
+		asl <.lz_bits			;fetch first payload bit
+		rol
+		asl <.lz_bits
+		bcc -
+		bne .lz_match_
+		jsr .lz_refill_bits		;fetch remaining bits
+} else {
+		jsr .lz_get_len_
+}
+		bcs .lz_match_
+
+.lz_inc_src_hi
+		inc .lz_src1
+		inc .lz_src2
+		inc .lz_src3
+		rts
+
+!if ZX0_INLINE_GET_LEN == 0 {
+.lz_get_len_
+-						;lz_get_len as inline
+		asl <.lz_bits			;fetch payload bit
+		rol
+.lz_get_len
+		asl <.lz_bits
+		bcc -
+		bne .lz_get_end
+}
+.lz_refill_bits					;refill bits, this happens after 4 payload-bits bestcase
+.lz_src3 = * + 2
+		ldy $1000,x
+		inx
+		bne +
+		jsr .lz_inc_src_hi
+
+		sty <.lz_bits
+		ldy #$00
+		rol <.lz_bits
+		bcs .lz_get_end
+-						;continue with 16 bit shifting
+		asl <.lz_bits			;fetch payload bit
+		rol				;can also moved to front and executed once on start
+.lz_get_len_16
+		rol <.lz_len_hi
+		asl <.lz_bits
+		bcc -
+		beq .lz_refill_bits
+.lz_get_end
+.lz_eof
+		rts
+.depacker_end
--- a/loader/tools/dali/dzx0_orig_zx0_v2.asm
+++ b/loader/tools/dali/dzx0_orig_zx0_v2.asm
@ -0,0 +1,207 @@
+;/!\ Attention, this depacker only works with the original zx0 version contained within this folder as well, it does not work with the modified version coming with bitfire, as some things on the encoding got changed due to speed optimizations
+
+!cpu 6510
+
+;ZX0_INPLACE		= 0
+
+.ZP_ADDR		= $f8
+.lz_dst			= .ZP_ADDR + 0
+.lz_bits		= .ZP_ADDR + 2
+.lz_len_hi		= .ZP_ADDR + 4
+
+.depacker_start
+		;------------------
+		;INIT
+		;------------------
+
+		;lowbyte of data start must be in X, highbyte in A
+		sta .lz_src1
+		sta .lz_src2
+		sta .lz_src3
+
+		lda #$00
+		sta <.lz_dst + 0
+		lda #$a0
+		sta <.lz_dst + 1
+
+		ldy #$ff
+		sty .lz_offset_lo + 1
+		sty .lz_offset_hi + 1
+
+		iny
+		sty <.lz_len_hi
+		lda #$40
+		sta <.lz_bits			;will make us fall through on next test and force us to load a new byte into bit-buffer upon next .lz_get_len
+
+		;------------------
+		;LITERAL
+		;------------------
+.lz_start_over
+		lda #$01
+		asl <.lz_bits
+		bcs .lz_new_offset
+.lz_literal
+		jsr .lz_get_len
+		sta .lz_y + 1
+		and #$ff                        ;annoying, but flags are not set corresponding to A
+		beq .lz_l_page_
+.lz_cp_literal
+.lz_src1 = * + 2
+		lda $1000,x
+		inx
+		bne +
+		jsr .lz_inc_src_hi
+
+		sta (.lz_dst),y
+		iny
+.lz_y		cpy #$00
+		bne .lz_cp_literal
+
+		dey				;this way we force increment of lz_dst + 1 if y = 0
+		tya				;carry is still set on first round
+		adc <.lz_dst + 0		;correct dst after copy loop
+		sta <.lz_dst + 0
+		bcc +
+		inc <.lz_dst + 1
+
+		ldy <.lz_len_hi
+		bne .lz_l_page			;happens very seldom -> move away to prefer Z = 0 case
+
+		;------------------
+		;NEW OR OLD OFFSET
+		;------------------
+
+		lda #$01
+		asl <.lz_bits
+		bcs .lz_new_offset		;either match with new offset or old offset
+.lz_match_repeat
+		jsr .lz_get_len
+;!if ZX0_INPLACE == 0 {
+;}
+		sbc #$01			;saves the iny later on
+		bcc .lz_dcp			;dec highbyte of length by one, a = $ff, so cmp will always set carry for free on top
+.lz_match_
+		eor #$ff
+		;beq .lz_calc_msrc		;just fall through on zero? $ff + sec -> addition is neutralized and carry is set, so no harm
+		tay
+		eor #$ff			;restore A
+.lz_match__					;entry from new_offset handling
+		adc <.lz_dst + 0
+		sta <.lz_dst + 0
+		bcs .lz_clc			;/!\ branch happens very seldom
+		dec <.lz_dst + 1
+.lz_clc_back
+		clc
+.lz_offset_lo	adc #$ff			;carry is cleared, subtract (offset + 1)
+		sta .lz_msrcr + 0
+		lda <.lz_dst + 1
+.lz_offset_hi	adc #$ff
+		sta .lz_msrcr + 1
+.lz_cp_match
+.lz_msrcr = * + 1
+		lda $beef,y
+		sta (.lz_dst),y
+		iny
+		bne .lz_cp_match
+		inc <.lz_dst + 1
+
+		lda <.lz_len_hi			;check for more loop runs
+;!if ZX0_INPLACE == 1 {
+;		bne .lz_m_page			;do more page runs
+;
+;		cpx <.lz_dst + 0		;check for end condition when depacking inplace
+;		bne .lz_start_over
+;		lda <.lz_dst + 1
+;		sbc <.lz_src1
+;		bne .lz_start_over
+;		rts
+;.lz_m_page
+;		lda #$ff
+;} else {
+		beq .lz_start_over		;do more page runs
+		lda #$ff
+;}
+		;------------------
+		;SELDOM STUFF
+		;------------------
+.lz_dcp
+		dcp .lz_len_hi
+		bcs .lz_match_
+.lz_clc
+		clc
+		bcc .lz_clc_back
+.lz_l_page
+		sec				;only needs to be set for consecutive rounds of literals, happens very seldom
+		ldy #$00
+.lz_l_page_
+		dec <.lz_len_hi
+		bcs .lz_cp_literal
+
+		;------------------
+		;FETCH A NEW OFFSET
+		;------------------
+
+.lz_new_offset
+		lda #$fe
+		jsr .lz_get_len
+		sty .lz_len_hi
+		adc #$00
+		beq .lz_eof			;underflow. must have been 0
+
+		sec
+		ror
+		sta .lz_offset_hi + 1		;hibyte of offset
+
+.lz_src2 = * + 2
+		lda $1000,x			;looks expensive, but is cheaper than loop
+		inx
+		bne +
+		jsr .lz_inc_src_hi
+
+		ror
+		sta .lz_offset_lo + 1
+
+		lda #$01
+		ldy #$fe
+		bcs .lz_match__			;length = 2 ^ $ff, do it the very short way :-)
+		ldy #$00
+		jsr .lz_get_len_
+		bcs .lz_match_
+
+.lz_inc_src_hi
+		inc .lz_src1
+		inc .lz_src2
+		inc .lz_src3
+		rts
+
+.lz_get_len_
+-						;lz_get_len as inline
+		asl <.lz_bits			;fetch payload bit
+		rol
+.lz_get_len
+		asl <.lz_bits
+		bcc -
+		bne .lz_get_end
+.lz_refill_bits					;refill bits, this happens after 4 payload-bits bestcase
+.lz_src3 = * + 2
+		ldy $1000,x
+		inx
+		bne +
+		jsr .lz_inc_src_hi
+
+		sty <.lz_bits
+		ldy #$00
+		rol <.lz_bits
+		bcs .lz_get_end
+-						;continue with 16 bit shifting
+		asl <.lz_bits			;fetch payload bit
+		rol				;can also moved to front and executed once on start
+.lz_get_len_16
+		rol <.lz_len_hi
+		asl <.lz_bits
+		bcc -
+		beq .lz_refill_bits
+.lz_get_end
+.lz_eof
+		rts
+.depacker_end
--- a/loader/tools/dali/salvador/LICENSE
+++ b/loader/tools/dali/salvador/LICENSE
@ -0,0 +1,3 @@
+The salvador code is available under the Zlib license, except for src/matchfinder.c which is placed under the Creative Commons CC0 license.
+
+Please consult LICENSE.zlib.md and LICENSE.CC0.md for more information.
--- a/loader/tools/dali/salvador/LICENSE.cc0.md
+++ b/loader/tools/dali/salvador/LICENSE.cc0.md
@ -0,0 +1,43 @@
+## creative commons
+
+# CC0 1.0 Universal
+
+CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED HEREUNDER.
+
+### Statement of Purpose
+
+The laws of most jurisdictions throughout the world automatically confer exclusive Copyright and Related Rights (defined below) upon the creator and subsequent owner(s) (each and all, an "owner") of an original work of authorship and/or a database (each, a "Work").
+
+Certain owners wish to permanently relinquish those rights to a Work for the purpose of contributing to a commons of creative, cultural and scientific works ("Commons") that the public can reliably and without fear of later claims of infringement build upon, modify, incorporate in other works, reuse and redistribute as freely as possible in any form whatsoever and for any purposes, including without limitation commercial purposes. These owners may contribute to the Commons to promote the ideal of a free culture and the further production of creative, cultural and scientific works, or to gain reputation or greater distribution for their Work in part through the use and efforts of others.
+
+For these and/or other purposes and motivations, and without any expectation of additional consideration or compensation, the person associating CC0 with a Work (the "Affirmer"), to the extent that he or she is an owner of Copyright and Related Rights in the Work, voluntarily elects to apply CC0 to the Work and publicly distribute the Work under its terms, with knowledge of his or her Copyright and Related Rights in the Work and the meaning and intended legal effect of CC0 on those rights.
+
+1. __Copyright and Related Rights.__ A Work made available under CC0 may be protected by copyright and related or neighboring rights ("Copyright and Related Rights"). Copyright and Related Rights include, but are not limited to, the following:
+
+    i. the right to reproduce, adapt, distribute, perform, display, communicate, and translate a Work;
+
+    ii. moral rights retained by the original author(s) and/or performer(s);
+
+    iii. publicity and privacy rights pertaining to a person's image or likeness depicted in a Work;
+
+    iv. rights protecting against unfair competition in regards to a Work, subject to the limitations in paragraph 4(a), below;
+
+    v. rights protecting the extraction, dissemination, use and reuse of data in a Work;
+
+    vi. database rights (such as those arising under Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, and under any national implementation thereof, including any amended or successor version of such directive); and
+
+    vii. other similar, equivalent or corresponding rights throughout the world based on applicable law or treaty, and any national implementations thereof.
+
+2. __Waiver.__ To the greatest extent permitted by, but not in contravention of, applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and unconditionally waives, abandons, and surrenders all of Affirmer's Copyright and Related Rights and associated claims and causes of action, whether now known or unknown (including existing as well as future claims and causes of action), in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each member of the public at large and to the detriment of Affirmer's heirs and successors, fully intending that such Waiver shall not be subject to revocation, rescission, cancellation, termination, or any other legal or equitable action to disrupt the quiet enjoyment of the Work by the public as contemplated by Affirmer's express Statement of Purpose.
+
+3. __Public License Fallback.__ Should any part of the Waiver for any reason be judged legally invalid or ineffective under applicable law, then the Waiver shall be preserved to the maximum extent permitted taking into account Affirmer's express Statement of Purpose. In addition, to the extent the Waiver is so judged Affirmer hereby grants to each affected person a royalty-free, non transferable, non sublicensable, non exclusive, irrevocable and unconditional license to exercise Affirmer's Copyright and Related Rights in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "License"). The License shall be deemed effective as of the date CC0 was applied by Affirmer to the Work. Should any part of the License for any reason be judged legally invalid or ineffective under applicable law, such partial invalidity or ineffectiveness shall not invalidate the remainder of the License, and in such case Affirmer hereby affirms that he or she will not (i) exercise any of his or her remaining Copyright and Related Rights in the Work or (ii) assert any associated claims and causes of action with respect to the Work, in either case contrary to Affirmer's express Statement of Purpose.
+
+4. __Limitations and Disclaimers.__
+
+    a. No trademark or patent rights held by Affirmer are waived, abandoned, surrendered, licensed or otherwise affected by this document.
+
+    b. Affirmer offers the Work as-is and makes no representations or warranties of any kind concerning the Work, express, implied, statutory or otherwise, including without limitation warranties of title, merchantability, fitness for a particular purpose, non infringement, or the absence of latent or other defects, accuracy, or the present or absence of errors, whether or not discoverable, all to the greatest extent permissible under applicable law.
+
+    c. Affirmer disclaims responsibility for clearing rights of other persons that may apply to the Work or any use thereof, including without limitation any person's Copyright and Related Rights in the Work. Further, Affirmer disclaims responsibility for obtaining any necessary consents, permissions or other rights required for any use of the Work.
+
+    d. Affirmer understands and acknowledges that Creative Commons is not a party to this document and has no duty or obligation with respect to this CC0 or use of the Work.
--- a/loader/tools/dali/salvador/LICENSE.zlib.md
+++ b/loader/tools/dali/salvador/LICENSE.zlib.md
@ -0,0 +1,19 @@
+Copyright (c) 2021 Emmanuel Marty
+
+This software is provided 'as-is', without any express or implied warranty. In
+no event will the authors be held liable for any damages arising from the use of
+this software.
+
+Permission is granted to anyone to use this software for any purpose, including
+commercial applications, and to alter it and redistribute it freely, subject to
+the following restrictions:
+
+1.  The origin of this software must not be misrepresented; you must not claim
+    that you wrote the original software. If you use this software in a product,
+    an acknowledgment in the product documentation would be appreciated but is
+    not required.
+
+2.  Altered source versions must be plainly marked as such, and must not be
+    misrepresented as being the original software.
+
+3.  This notice may not be removed or altered from any source distribution.
--- a/loader/tools/dali/salvador/Makefile
+++ b/loader/tools/dali/salvador/Makefile
@ -0,0 +1,28 @@
+CC=clang
+CFLAGS=-O3 -g -fomit-frame-pointer -Isrc/libdivsufsort/include -Isrc
+OBJDIR=obj
+LDFLAGS=
+
+$(OBJDIR)/%.o: src/../%.c
+	@mkdir -p '$(@D)'
+	$(CC) $(CFLAGS) -c $< -o $@
+
+APP := salvador
+
+OBJS += $(OBJDIR)/src/salvador.o
+OBJS += $(OBJDIR)/src/expand.o
+OBJS += $(OBJDIR)/src/matchfinder.o
+OBJS += $(OBJDIR)/src/shrink.o
+OBJS += $(OBJDIR)/src/libdivsufsort/lib/divsufsort.o
+OBJS += $(OBJDIR)/src/libdivsufsort/lib/divsufsort_utils.o
+OBJS += $(OBJDIR)/src/libdivsufsort/lib/sssort.o
+OBJS += $(OBJDIR)/src/libdivsufsort/lib/trsort.o
+
+all: $(APP)
+
+$(APP): $(OBJS)
+	$(CC) $^ $(LDFLAGS) -o $(APP)
+
+clean:
+	@rm -rf $(APP) $(OBJDIR)
+
--- a/loader/tools/dali/salvador/README.md
+++ b/loader/tools/dali/salvador/README.md
@ -0,0 +1,31 @@
+salvador -- a fast, near-optimal compressor for the ZX0 format
+==============================================================
+
+salvador is a command-line tool and a library that compresses bitstreams in the ZX0 format. 
+
+The tool outputs compressed files that are within 0.02% on average, of the files produced by the zx0 packer itself. The compressor is, however, several orders of magnitude faster, with compression speed similar to [apultra](https://github.com/emmanuel-marty/apultra). 
+
+The compressor can pack files of any size, however, due to the 31.5 KB window size, files larger than 128-256 KB will get a better ratio with apultra. This will not be an issue when compressing for the main target, 8-bit micros. By default, salvador compresses for the modern (V2) format. The classic, legacy format is also supported; use the -classic flag on the command line.
+
+salvador is written in portable C. It is fully open-source under a liberal license. You can use the ZX0 decompression libraries for your target environment. As with LZSA and apultra, you can do whatever you like with it.
+
+The output is fully compatible with the [ZX0](https://github.com/einar-saukas/ZX0) compressor by Einar Saukas.
+
+Check out [Dali](https://csdb.dk/release/?id=213694&show=summary) by Bitbreaker, that uses Salvador to compress for the C64, including self, in-place decompression and proper handling of load-addresses. The tool is part of the [Bitfire](https://github.com/bboxy/bitfire) C64 loading system.
+
+Included 8-bit decompression code:
+
+ * [8088](https://github.com/emmanuel-marty/salvador/tree/main/asm/8088) by Emmanuel Marty. 
+ * [68000](https://github.com/emmanuel-marty/salvador/tree/main/asm/68000) by Emmanuel Marty. 
+ * [z80](https://github.com/emmanuel-marty/salvador/tree/main/asm/Z80) by spke and uniabis. Use the -classic flag to compress data for the Z80.
+ * [6502](https://github.com/emmanuel-marty/salvador/tree/main/asm/6502) by John Brandwood. Use the -classic flag to compress data for the 6502.
+ * [HuC6280](https://github.com/emmanuel-marty/salvador/tree/main/asm/HuC6280) by John Brandwood. Use the -classic flag for this platform as well.
+
+External decompression code:
+
+ * [6809 and 6309](https://github.com/dougmasten/zx0-6x09) by Doug Masten. Use the -classic flag to compress data for the 6809 or 6309 depackers.
+
+License:
+
+* The salvador code is available under the Zlib license.
+* The match finder (matchfinder.c) is available under the CC0 license due to using portions of code from Eric Bigger's Wimlib in the suffix array-based matchfinder.
--- a/loader/tools/dali/salvador/VS2019/salvador.sln
+++ b/loader/tools/dali/salvador/VS2019/salvador.sln
@ -0,0 +1,31 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio Version 16
+VisualStudioVersion = 16.0.31729.503
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "salvador", "salvador.vcxproj", "{F4C10DBA-8808-4418-A78F-719C6A7761EF}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|x64 = Debug|x64
+		Debug|x86 = Debug|x86
+		Release|x64 = Release|x64
+		Release|x86 = Release|x86
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{F4C10DBA-8808-4418-A78F-719C6A7761EF}.Debug|x64.ActiveCfg = Debug|x64
+		{F4C10DBA-8808-4418-A78F-719C6A7761EF}.Debug|x64.Build.0 = Debug|x64
+		{F4C10DBA-8808-4418-A78F-719C6A7761EF}.Debug|x86.ActiveCfg = Debug|Win32
+		{F4C10DBA-8808-4418-A78F-719C6A7761EF}.Debug|x86.Build.0 = Debug|Win32
+		{F4C10DBA-8808-4418-A78F-719C6A7761EF}.Release|x64.ActiveCfg = Release|x64
+		{F4C10DBA-8808-4418-A78F-719C6A7761EF}.Release|x64.Build.0 = Release|x64
+		{F4C10DBA-8808-4418-A78F-719C6A7761EF}.Release|x86.ActiveCfg = Release|Win32
+		{F4C10DBA-8808-4418-A78F-719C6A7761EF}.Release|x86.Build.0 = Release|Win32
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+	GlobalSection(ExtensibilityGlobals) = postSolution
+		SolutionGuid = {6A45AD4E-9B6B-4AAB-9FEA-CA453AE1822D}
+	EndGlobalSection
+EndGlobal
--- a/loader/tools/dali/salvador/VS2019/salvador.vcxproj
+++ b/loader/tools/dali/salvador/VS2019/salvador.vcxproj
@ -0,0 +1,182 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <VCProjectVersion>16.0</VCProjectVersion>
+    <Keyword>Win32Proj</Keyword>
+    <ProjectGuid>{f4c10dba-8808-4418-a78f-719c6a7761ef}</ProjectGuid>
+    <RootNamespace>salvador</RootNamespace>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(ProjectDir)bin\</OutDir>
+    <TargetName>$(ProjectName)_debug</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(ProjectDir)bin\</OutDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(ProjectDir)bin\</OutDir>
+    <TargetName>$(ProjectName)_debug</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(ProjectDir)bin\</OutDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\src\libdivsufsort\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\src\libdivsufsort\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\src\libdivsufsort\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\src\libdivsufsort\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\src\expand.c" />
+    <ClCompile Include="..\src\libdivsufsort\lib\divsufsort.c" />
+    <ClCompile Include="..\src\libdivsufsort\lib\divsufsort_utils.c" />
+    <ClCompile Include="..\src\libdivsufsort\lib\sssort.c" />
+    <ClCompile Include="..\src\libdivsufsort\lib\trsort.c" />
+    <ClCompile Include="..\src\matchfinder.c" />
+    <ClCompile Include="..\src\salvador.c" />
+    <ClCompile Include="..\src\shrink.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\src\expand.h" />
+    <ClInclude Include="..\src\format.h" />
+    <ClInclude Include="..\src\libdivsufsort\include\divsufsort.h" />
+    <ClInclude Include="..\src\libdivsufsort\include\divsufsort_config.h" />
+    <ClInclude Include="..\src\libdivsufsort\include\divsufsort_private.h" />
+    <ClInclude Include="..\src\libsalvador.h" />
+    <ClInclude Include="..\src\matchfinder.h" />
+    <ClInclude Include="..\src\shrink.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
--- a/loader/tools/dali/salvador/VS2019/salvador.vcxproj.filters
+++ b/loader/tools/dali/salvador/VS2019/salvador.vcxproj.filters
@ -0,0 +1,78 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Fichiers sources">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Fichiers d%27en-tête">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd</Extensions>
+    </Filter>
+    <Filter Include="Fichiers de ressources">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+    <Filter Include="Fichiers sources\libdivsufsort">
+      <UniqueIdentifier>{86e66d4d-937b-4037-af93-856105377549}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Fichiers sources\libdivsufsort\include">
+      <UniqueIdentifier>{c4baa29b-3731-40b1-bcc9-d5aa5673114f}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Fichiers sources\libdivsufsort\lib">
+      <UniqueIdentifier>{56092ba4-e514-4de2-8528-adff2da7ac3f}</UniqueIdentifier>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\src\expand.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\src\matchfinder.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\src\salvador.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\src\shrink.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\src\libdivsufsort\lib\divsufsort.c">
+      <Filter>Fichiers sources\libdivsufsort\lib</Filter>
+    </ClCompile>
+    <ClCompile Include="..\src\libdivsufsort\lib\divsufsort_utils.c">
+      <Filter>Fichiers sources\libdivsufsort\lib</Filter>
+    </ClCompile>
+    <ClCompile Include="..\src\libdivsufsort\lib\sssort.c">
+      <Filter>Fichiers sources\libdivsufsort\lib</Filter>
+    </ClCompile>
+    <ClCompile Include="..\src\libdivsufsort\lib\trsort.c">
+      <Filter>Fichiers sources\libdivsufsort\lib</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\src\expand.h">
+      <Filter>Fichiers sources</Filter>
+    </ClInclude>
+    <ClInclude Include="..\src\format.h">
+      <Filter>Fichiers sources</Filter>
+    </ClInclude>
+    <ClInclude Include="..\src\libsalvador.h">
+      <Filter>Fichiers sources</Filter>
+    </ClInclude>
+    <ClInclude Include="..\src\matchfinder.h">
+      <Filter>Fichiers sources</Filter>
+    </ClInclude>
+    <ClInclude Include="..\src\shrink.h">
+      <Filter>Fichiers sources</Filter>
+    </ClInclude>
+    <ClInclude Include="..\src\libdivsufsort\include\divsufsort.h">
+      <Filter>Fichiers sources\libdivsufsort\include</Filter>
+    </ClInclude>
+    <ClInclude Include="..\src\libdivsufsort\include\divsufsort_config.h">
+      <Filter>Fichiers sources\libdivsufsort\include</Filter>
+    </ClInclude>
+    <ClInclude Include="..\src\libdivsufsort\include\divsufsort_private.h">
+      <Filter>Fichiers sources\libdivsufsort\include</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
--- a/loader/tools/dali/salvador/VS2019/salvador.vcxproj.user
+++ b/loader/tools/dali/salvador/VS2019/salvador.vcxproj.user
@ -0,0 +1,19 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="Current" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LocalDebuggerCommandArguments>-c -v -test</LocalDebuggerCommandArguments>
+    <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LocalDebuggerCommandArguments>-c -v -test</LocalDebuggerCommandArguments>
+    <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LocalDebuggerCommandArguments>-c -v -test</LocalDebuggerCommandArguments>
+    <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LocalDebuggerCommandArguments>-c -v -test</LocalDebuggerCommandArguments>
+    <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
+  </PropertyGroup>
+</Project>
--- a/loader/tools/dali/salvador/asm/6502/zx0_6502.asm
+++ b/loader/tools/dali/salvador/asm/6502/zx0_6502.asm
@ -0,0 +1,251 @@
+; ***************************************************************************
+; ***************************************************************************
+;
+; zx0_6502.asm
+;
+; NMOS 6502 decompressor for data stored in Einar Saukas's ZX0 format.
+;
+; This code is written for the ACME assembler.
+;
+; The code is 196 bytes long, and is self-modifying.
+;
+; Copyright John Brandwood 2021.
+;
+; Distributed under the Boost Software License, Version 1.0.
+; (See accompanying file LICENSE_1_0.txt or copy at
+;  http://www.boost.org/LICENSE_1_0.txt)
+;
+; ***************************************************************************
+; ***************************************************************************
+
+
+
+; ***************************************************************************
+; ***************************************************************************
+;
+; Decompression Options & Macros
+;
+
+                ;
+                ; Assume that we're decompessing from a large multi-bank
+                ; compressed data file, and that the next bank may need to
+                ; paged in when a page-boundary is crossed.
+                ;
+
+ZX0_FROM_BANK   =       0
+
+                ;
+                ; Macro to increment the source pointer to the next page.
+                ;
+
+                !macro   ZX0_INC_PAGE {
+                !if     ZX0_FROM_BANK {
+                        jsr     zx0_next_page
+                } else   {
+                        inc     <zx0_srcptr + 1
+                }
+                }
+
+
+
+; ***************************************************************************
+; ***************************************************************************
+;
+; Data usage is 8 bytes of zero-page.
+;
+
+zx0_srcptr      =       $F8                     ; 1 word.
+zx0_dstptr      =       $FA                     ; 1 word.
+zx0_length      =       $FC                     ; 1 word.
+zx0_offset      =       $FE                     ; 1 word.
+
+
+
+; ***************************************************************************
+; ***************************************************************************
+;
+; zx0_unpack - Decompress data stored in Einar Saukas's ZX0 format.
+;
+; Args: zx0_srcptr = ptr to compessed data
+; Args: zx0_dstptr = ptr to output buffer
+; Uses: lots!
+;
+
+zx0_unpack:     ldy     #$FF                    ; Initialize default offset.
+                sty     <zx0_offset+0
+                sty     <zx0_offset+1
+                iny                             ; Initialize source index.
+                sty     <zx0_length+1           ; Initialize length to 1.
+
+                ldx     #$40                    ; Initialize empty buffer.
+
+zx0_next_cmd:   lda     #1                      ; Initialize length back to 1.
+                sta     <zx0_length + 0
+
+                txa                             ; Restore bit-buffer.
+
+                asl                             ; Copy from literals or new offset?
+                bcc     zx0_cp_literal
+
+                ;
+                ; Copy bytes from new offset.
+                ;
+
+zx0_new_offset: jsr     zx0_gamma_flag          ; Get offset MSB, returns CS.
+
+                tya                             ; Negate offset MSB and check
+                sbc     <zx0_length + 0         ; for zero (EOF marker).
+                bcs     zx0_got_eof
+
+                sec
+                ror
+                sta     <zx0_offset + 1         ; Save offset MSB.
+
+                lda     (<zx0_srcptr),y         ; Get offset LSB.
+                inc     <zx0_srcptr + 0
+                beq     zx0_inc_of_src
+
+zx0_off_skip1:  ror                             ; Last offset bit starts gamma.
+                sta     <zx0_offset + 0         ; Save offset LSB.
+
+                lda     #-2                     ; Minimum length of 2?
+                bcs     zx0_get_lz_dst
+
+                lda     #1                      ; Initialize length back to 1.
+                sta     <zx0_length + 0
+
+                txa                             ; Restore bit-buffer.
+
+                jsr     zx0_gamma_data          ; Get length, returns CS.
+
+                lda     <zx0_length + 0         ; Negate lo-byte of (length+1).
+                eor     #$FF
+
+;               bne     zx0_get_lz_dst          ; N.B. Optimized to do nothing!
+;
+;               inc     <zx0_length + 1         ; Increment from (length+1).
+;               dec     <zx0_length + 1         ; Decrement because lo-byte=0.
+
+zx0_get_lz_dst: tay                             ; Calc address of partial page.
+                eor     #$FF                    ; Always CS from previous SBC.
+                adc     <zx0_dstptr + 0
+                sta     <zx0_dstptr + 0
+                bcs     zx0_get_lz_win
+
+                dec     <zx0_dstptr + 1
+
+zx0_get_lz_win: clc                             ; Calc address of match.
+                adc     <zx0_offset + 0         ; N.B. Offset is negative!
+                sta     zx0_winptr + 0
+                lda     <zx0_dstptr + 1
+                adc     <zx0_offset + 1
+                sta     zx0_winptr + 1
+
+zx0_winptr      =       *+1
+
+zx0_lz_page:    lda     $1234,y                 ; Self-modifying zx0_winptr.
+                sta     (<zx0_dstptr),y
+                iny
+                bne     zx0_lz_page
+                inc     <zx0_dstptr + 1
+
+                lda     <zx0_length + 1         ; Any full pages left to copy?
+                beq     zx0_next_cmd
+
+                dec     <zx0_length + 1         ; This is rare, so slower.
+                inc     zx0_winptr + 1
+                bne     zx0_lz_page             ; Always true.
+
+zx0_got_eof:    rts                             ; Finished decompression.
+
+                ;
+                ; Copy bytes from compressed source.
+                ;
+
+zx0_cp_literal: jsr     zx0_gamma_flag          ; Get length, returns CS.
+
+                pha                             ; Preserve bit-buffer.
+
+                ldx     <zx0_length + 0         ; Check the lo-byte of length
+                bne     zx0_cp_byte             ; without effecting CS.
+
+zx0_cp_page:    dec     <zx0_length + 1         ; Decrement # of pages to copy.
+
+zx0_cp_byte:    lda     (<zx0_srcptr),y         ; CS throughout the execution of
+                sta     (<zx0_dstptr),y         ; of this .cp_page loop.
+
+                inc     <zx0_srcptr + 0
+                beq     zx0_inc_cp_src
+
+zx0_cp_skip1:   inc     <zx0_dstptr + 0
+                beq     zx0_inc_cp_dst
+
+zx0_cp_skip2:   dex                             ; Any bytes left to copy?
+                bne     zx0_cp_byte
+
+                lda     <zx0_length + 1         ; Any full pages left to copy?
+                bne     zx0_cp_page             ; Optimized for branch-unlikely.
+
+                inx                             ; Initialize length back to 1.
+                stx     <zx0_length + 0
+
+                pla                             ; Restore bit-buffer.
+
+                asl                             ; Copy from last offset or new offset?
+                bcs     zx0_new_offset
+
+                ;
+                ; Copy bytes from last offset (rare so slower).
+                ;
+
+zx0_old_offset: jsr     zx0_gamma_flag          ; Get length, returns CS.
+
+                tya                             ; Negate the lo-byte of length.
+                sbc     <zx0_length + 0
+                sec                             ; Ensure CS before zx0_get_lz_dst!
+                bne     zx0_get_lz_dst
+
+                dec     <zx0_length + 1         ; Decrement because lo-byte=0.
+                bcs     zx0_get_lz_dst          ; Always true!
+
+                ;
+                ; Optimized handling of pointers crossing page-boundaries.
+                ;
+
+zx0_inc_of_src: +ZX0_INC_PAGE
+                bne     zx0_off_skip1           ; Always true.
+
+zx0_inc_cp_src: +ZX0_INC_PAGE
+                bcs     zx0_cp_skip1            ; Always true.
+
+zx0_inc_cp_dst: inc     <zx0_dstptr + 1
+                bcs     zx0_cp_skip2            ; Always true.
+
+zx0_inc_ga_src: +ZX0_INC_PAGE
+                bne     zx0_gamma_skip          ; Always true.
+
+                ;
+                ; Get 16-bit interlaced Elias gamma value.
+                ;
+
+zx0_gamma_data: asl                             ; Get next bit.
+                rol     <zx0_length + 0
+zx0_gamma_flag: asl
+                bcc     zx0_gamma_data          ; Loop until finished or empty.
+                bne     zx0_gamma_done          ; Bit-buffer empty?
+
+zx0_gamma_load: lda     (<zx0_srcptr),y         ; Reload the empty bit-buffer
+                inc     <zx0_srcptr + 0         ; from the compressed source.
+                beq     zx0_inc_ga_src
+zx0_gamma_skip: rol
+                bcs     zx0_gamma_done          ; Finished?
+
+zx0_gamma_word: asl                             ; Get next bit.
+                rol     <zx0_length + 0
+                rol     <zx0_length + 1
+                asl
+                bcc     zx0_gamma_word          ; Loop until finished or empty.
+                beq     zx0_gamma_load          ; Bit-buffer empty?
+
+zx0_gamma_done: tax                             ; Preserve bit-buffer.
+                rts
--- a/loader/tools/dali/salvador/asm/68000/unzx0_68000.S
+++ b/loader/tools/dali/salvador/asm/68000/unzx0_68000.S
@ -0,0 +1,76 @@
+;  unzx0_68000.s - ZX0 decompressor for 68000 - 88 bytes
+;
+;  in:  a0 = start of compressed data
+;       a1 = start of decompression buffer
+;
+;  Copyright (C) 2021 Emmanuel Marty
+;  ZX0 compression (c) 2021 Einar Saukas, https://github.com/einar-saukas/ZX0
+;
+;  This software is provided 'as-is', without any express or implied
+;  warranty.  In no event will the authors be held liable for any damages
+;  arising from the use of this software.
+;
+;  Permission is granted to anyone to use this software for any purpose,
+;  including commercial applications, and to alter it and redistribute it
+;  freely, subject to the following restrictions:
+;
+;  1. The origin of this software must not be misrepresented; you must not
+;     claim that you wrote the original software. If you use this software
+;     in a product, an acknowledgment in the product documentation would be
+;     appreciated but is not required.
+;  2. Altered source versions must be plainly marked as such, and must not be
+;     misrepresented as being the original software.
+;  3. This notice may not be removed or altered from any source distribution.
+
+zx0_decompress:
+               movem.l a2/d2,-(sp)  ; preserve registers
+               moveq #-128,d1       ; initialize empty bit queue
+                                    ; plus bit to roll into carry
+               moveq #-1,d2         ; initialize rep-offset to 1
+
+.literals:     bsr.s .get_elias     ; read number of literals to copy
+               subq.l #1,d0         ; dbf will loop until d0 is -1, not 0
+.copy_lits:    move.b (a0)+,(a1)+   ; copy literal byte
+               dbf d0,.copy_lits    ; loop for all literal bytes
+               
+               add.b d1,d1          ; read 'match or rep-match' bit
+               bcs.s .get_offset    ; if 1: read offset, if 0: rep-match
+
+.rep_match:    bsr.s .get_elias     ; read match length (starts at 1)
+.do_copy:      subq.l #1,d0         ; dbf will loop until d0 is -1, not 0
+.do_copy_offs: move.l a1,a2         ; calculate backreference address
+               add.l d2,a2          ; (dest + negative match offset)               
+.copy_match:   move.b (a2)+,(a1)+   ; copy matched byte
+               dbf d0,.copy_match   ; loop for all matched bytes
+
+               add.b d1,d1          ; read 'literal or match' bit
+               bcc.s .literals      ; if 0: go copy literals
+
+.get_offset:   moveq #-2,d0         ; initialize value to $fe
+               bsr.s .elias_loop    ; read high byte of match offset
+               addq.b #1,d0         ; obtain negative offset high byte
+               beq.s .done          ; exit if EOD marker
+               move.w d0,d2         ; transfer negative high byte into d2
+               lsl.w #8,d2          ; shift it to make room for low byte
+
+               moveq #1,d0          ; initialize length value to 1
+               move.b (a0)+,d2      ; read low byte of offset + 1 bit of len
+               asr.l #1,d2          ; shift len bit into carry/offset in place
+               bcs.s .do_copy_offs  ; if len bit is set, no need for more
+               bsr.s .elias_bt      ; read rest of elias-encoded match length
+               bra.s .do_copy_offs  ; go copy match
+
+.get_elias:    moveq #1,d0          ; initialize value to 1
+.elias_loop:   add.b d1,d1          ; shift bit queue, high bit into carry
+               bne.s .got_bit       ; queue not empty, bits remain
+               move.b (a0)+,d1      ; read 8 new bits
+               addx.b d1,d1         ; shift bit queue, high bit into carry
+                                    ; and shift 1 from carry into bit queue
+
+.got_bit:      bcs.s .got_elias     ; done if control bit is 1
+.elias_bt:     add.b d1,d1          ; read data bit
+               addx.l d0,d0         ; shift data bit into value in d0
+               bra.s .elias_loop    ; keep reading
+
+.done:         movem.l (sp)+,a2/d2  ; restore preserved registers
+.got_elias:    rts
--- a/loader/tools/dali/salvador/asm/8088/unzx0_8088.S
+++ b/loader/tools/dali/salvador/asm/8088/unzx0_8088.S
@ -0,0 +1,94 @@
+;  unzx0_8088.S - ZX0 decompressor for 8088 - 81 bytes - NASM
+;
+;  inputs:
+;  * ds:si: start of compressed data
+;  * es:di: start of decompression buffer
+;
+;  Copyright (C) 2021 Emmanuel Marty
+;  ZX0 compression (c) 2021 Einar Saukas, https://github.com/einar-saukas/ZX0
+;
+;  This software is provided 'as-is', without any express or implied
+;  warranty.  In no event will the authors be held liable for any damages
+;  arising from the use of this software.
+;
+;  Permission is granted to anyone to use this software for any purpose,
+;  including commercial applications, and to alter it and redistribute it
+;  freely, subject to the following restrictions:
+;
+;  1. The origin of this software must not be misrepresented; you must not
+;     claim that you wrote the original software. If you use this software
+;     in a product, an acknowledgment in the product documentation would be
+;     appreciated but is not required.
+;  2. Altered source versions must be plainly marked as such, and must not be
+;     misrepresented as being the original software.
+;  3. This notice may not be removed or altered from any source distribution.
+
+        segment .text
+        bits 16
+
+zx0_decompress:
+        cld                     ; make string operations go forward
+        mov     al,080H         ; initialize empty bit queue
+                                ; plus bit to roll into carry
+        xor     dx,dx           ; initialize rep-offset to 1
+        dec     dx
+
+.literals:
+        call    .get_elias      ; read number of literals to copy
+        rep     movsb           ; copy literal bytes
+
+        add     al,al           ; shift bit queue, and high bit into carry
+        jc      .get_offset     ; if 1: read offset, if 0: rep-match
+
+        call    .get_elias      ; read rep-match length (starts at 1)
+
+.copy_match:
+        push    ds              ; save ds:si (current pointer to compressed data)
+        push    si
+
+        push    es
+        pop     ds
+        mov     si,di           ; point to destination in es:di + offset in dx
+        add     si,dx
+        rep     movsb           ; copy matched bytes
+
+        pop     si              ; restore ds:si
+        pop     ds
+
+        add     al,al           ; read 'literal or match' bit
+        jnc     .literals       ; if 0: go copy literals
+
+.get_offset:
+        mov     cl,0feh         ; initialize value to FEh
+        call    .elias_loop     ; read high byte of match offset
+        inc     cl              ; obtain negative offset high byte
+        je      .done           ; exit if EOD marker
+        
+        mov     dh,cl           ; transfer negative high byte into dh
+        mov     cx,1            ; initialize match length value to 1
+        mov     dl,[si]         ; read low byte of offset + 1 bit of len
+        inc     si
+        stc                     ; set high bit that is shifted into bit 15
+        rcr     dx,1            ; shift len bit into carry/offset in place
+        jc      .got_offs       ; if len bit is set, no need for more
+        call    .elias_bt       ; read rest of elias-encoded match length
+.got_offs:
+        inc     cx              ; fix match length
+        jmp     short .copy_match ; go copy match
+
+.get_elias:
+        mov     cx,1            ; initialize value to 1
+.elias_loop:
+        add     al,al           ; shift bit queue, and high bit into carry
+        jnz     .got_bit        ; queue not empty, bits remain
+        lodsb                   ; read 8 new bits
+        adc     al,al           ; shift bit queue, and high bit into carry
+.got_bit:
+        jc      .got_elias      ; done if control bit is 1
+.elias_bt:
+        add     al,al           ; read data bit
+        adc     cx,cx           ; shift into cx
+        jmp     short .elias_loop ; keep reading
+.got_elias:
+.done:
+        ret
--- a/loader/tools/dali/salvador/asm/HuC6280/unpack-zx0.asm
+++ b/loader/tools/dali/salvador/asm/HuC6280/unpack-zx0.asm
@ -0,0 +1,489 @@
+; ***************************************************************************
+; ***************************************************************************
+;
+; unpack-zx0.asm
+;
+; HuC6280 decompressor for Einar Saukas's "classic" ZX0 format.
+;
+; The code length is 193 bytes for RAM, 243 bytes for direct-to-VRAM, plus
+; some generic utility code.
+;
+; Copyright John Brandwood 2021.
+;
+; Distributed under the Boost Software License, Version 1.0.
+; (See accompanying file LICENSE_1_0.txt or copy at
+;  http://www.boost.org/LICENSE_1_0.txt)
+;
+; ***************************************************************************
+; ***************************************************************************
+;
+; ZX0 "modern" format is not supported, because it costs an extra 4 bytes of
+; code in this decompressor, and it runs slower.
+;
+; ***************************************************************************
+; ***************************************************************************
+
+
+
+; ***************************************************************************
+; ***************************************************************************
+;
+; If you decompress directly to VRAM, then you need to define a ring-buffer
+; in RAM, both sized and aligned to a power-of-two (i.e. 512, 1KB, 2KB, 4KB).
+;
+; You also need to make sure that you tell the compressor that it needs to
+; limit the window size with its "-w" option.
+;
+; Note that CD-ROM developers should really just decompress to RAM, and then
+; use a TIA to copy the data to VRAM; because that is faster, you get better
+; compression without a window, and you save code memory by not needing both
+; versions of the decompression routine.
+;
+
+	.ifndef	ZX0_WINBUF
+
+ZX0_WINBUF	=	($3800) >> 8		; Default to a 2KB window in
+ZX0_WINMSK	=	($0800 - 1) >> 8	; RAM, located at $3800.
+
+	.endif
+
+
+
+; ***************************************************************************
+; ***************************************************************************
+;
+; Data usage is 11 bytes of zero-page, using aliases for clarity.
+;
+
+zx0_srcptr	=	__si			; 1 word.
+zx0_dstptr	=	__di			; 1 word.
+
+zx0_length	=	__ax			; 1 word.
+zx0_offset	=	__bx			; 1 word.
+zx0_winptr	=	__cx			; 1 word.
+zx0_bitbuf	=	__dl			; 1 byte.
+
+
+
+; ***************************************************************************
+; ***************************************************************************
+;
+; zx0_to_ram - Decompress data stored in Einar Saukas's ZX0 "classic" format.
+;
+; Args: __si, __si_bank = _farptr to compressed data in MPR3.
+; Args: __di = ptr to output address in RAM.
+;
+; Uses: __si, __di, __ax, __bx, __cx, __dh !
+;
+
+zx0_to_ram	.proc
+
+		jsr	__si_to_mpr3		; Map zx0_srcptr to MPR3.
+
+		ldx	#$40			; Initialize bit-buffer.
+
+		ldy	#$FF			; Initialize offset to $FFFF.
+		sty	<zx0_offset + 0
+		sty	<zx0_offset + 1
+
+		iny				; Initialize hi-byte of length
+		sty	<zx0_length + 1		; to zero.
+
+.lz_finished:	iny				; Initialize length back to 1.
+		sty	<zx0_length + 0
+
+		txa				; Restore bit-buffer.
+
+		asl	a			; Copy from literals or new offset?
+		bcc	.cp_literals
+
+		;
+		; Copy bytes from new offset.
+		;
+
+.new_offset:	jsr	.get_gamma_flag		; Get offset MSB, returns CS.
+
+		cla				; Negate offset MSB and check
+		sbc	<zx0_length + 0		; for zero (EOF marker).
+		beq	.got_eof
+
+		sec
+		ror	a
+		sta	<zx0_offset + 1		; Save offset MSB.
+
+		lda	[zx0_srcptr]		; Get offset LSB.
+		inc	<zx0_srcptr + 0
+		beq	.inc_off_src
+
+.off_skip1:	ror	a			; Last offset bit starts gamma.
+		sta	<zx0_offset + 0		; Save offset LSB.
+
+		lda	#-2			; Minimum length of 2?
+		bcs	.get_lz_dst
+
+		sty	<zx0_length + 0		; Initialize length back to 1.
+
+		txa				; Restore bit-buffer.
+
+		bsr	.get_gamma_data		; Get length, returns CS.
+
+		lda	<zx0_length + 0		; Negate lo-byte of (length+1).
+		eor	#$FF
+
+;		bne	.get_lz_dst		; N.B. Optimized to do nothing!
+;
+;		inc	<zx0_length + 1		; Increment from (length+1).
+;		dec	<zx0_length + 1		; Decrement because lo-byte=0.
+
+.get_lz_dst:	tay				; Calc address of partial page.
+		eor	#$FF
+		adc	<zx0_dstptr + 0		; Always CS from .get_gamma_data.
+		sta	<zx0_dstptr + 0
+		bcs	.get_lz_win
+
+		dec	<zx0_dstptr + 1
+
+.get_lz_win:	clc				; Calc address of match.
+		adc	<zx0_offset + 0		; N.B. Offset is negative!
+		sta	<zx0_winptr + 0
+		lda	<zx0_dstptr + 1
+		adc	<zx0_offset + 1
+		sta	<zx0_winptr + 1
+
+.lz_byte:	lda	[zx0_winptr], y		; Copy bytes from window into
+		sta	[zx0_dstptr], y		; decompressed data.
+		iny
+		bne	.lz_byte
+		inc	<zx0_dstptr + 1
+
+		lda	<zx0_length + 1		; Any full pages left to copy?
+		beq	.lz_finished
+
+		dec	<zx0_length + 1		; This is rare, so slower.
+		inc	<zx0_winptr + 1
+		bra	.lz_byte
+
+.got_eof:	leave				; Finished decompression!
+
+		;
+		; Copy bytes from compressed source.
+		;
+
+.cp_literals:	bsr	.get_gamma_flag		; Get length, returns CS.
+
+		ldy	<zx0_length + 0		; Check if lo-byte of length
+		bne	.cp_byte		; == 0 without effecting CS.
+
+.cp_page:	dec	<zx0_length + 1		; Decrement # pages to copy.
+
+.cp_byte:	lda	[zx0_srcptr]		; Copy bytes from compressed
+		sta	[zx0_dstptr]		; data to decompressed data.
+
+		inc	<zx0_srcptr + 0
+		beq	.inc_cp_src
+.cp_skip1:	inc	<zx0_dstptr + 0
+		beq	.inc_cp_dst
+
+.cp_skip2:	dey				; Any bytes left to copy?
+		bne	.cp_byte
+
+		lda	<zx0_length + 1		; Any pages left to copy?
+		bne	.cp_page		; Optimized for branch-unlikely.
+
+		iny				; Initialize length back to 1.
+		sty	<zx0_length + 0
+
+		txa				; Restore bit-buffer.
+
+		asl	a			; Copy from last offset or new offset?
+		bcs	.new_offset
+
+		;
+		; Copy bytes from last offset (rare so slower).
+		;
+
+.old_offset:	bsr	.get_gamma_flag		; Get length, returns CS.
+
+		cla				; Negate the lo-byte of length.
+		sbc	<zx0_length + 0
+		sec				; Ensure CS before .get_lz_dst!
+		bne	.get_lz_dst
+
+		dec	<zx0_length + 1		; Decrement because lo-byte=0.
+		bra	.get_lz_dst
+
+		;
+		; Optimized handling of pointers crossing page-boundaries.
+		;
+
+.inc_off_src:	jsr	__si_inc_page
+		bra	.off_skip1
+
+.inc_cp_src:	jsr	__si_inc_page
+		bra	.cp_skip1
+
+.inc_cp_dst:	inc	<zx0_dstptr + 1
+		bra	.cp_skip2
+
+.gamma_page:	jsr	__si_inc_page
+		bra	.gamma_skip1
+
+		;
+		; Get 16-bit interlaced Elias gamma value.
+		;
+
+.get_gamma_data:asl	a			; Get next bit.
+		rol	<zx0_length + 0
+.get_gamma_flag:asl	a
+		bcc	.get_gamma_data		; Loop until finished or empty.
+		bne	.gamma_done		; Bit-buffer empty?
+
+.gamma_reload:	lda	[zx0_srcptr]		; Reload the empty bit-buffer
+		inc	<zx0_srcptr + 0		; from the compressed source.
+		beq	.gamma_page
+.gamma_skip1:	rol	a
+		bcs	.gamma_done		; Finished?
+
+.get_gamma_loop:asl	a			; Get next bit.
+		rol	<zx0_length + 0
+		rol	<zx0_length + 1
+		asl	a
+		bcc	.get_gamma_loop		; Loop until finished or empty.
+		beq	.gamma_reload		; Bit-buffer empty?
+
+.gamma_done:	tax				; Preserve bit-buffer.
+		rts
+
+		.endp
+
+
+
+; ***************************************************************************
+; ***************************************************************************
+;
+; zx0_to_vdc - Decompress data stored in Einar Saukas's ZX0 "classic" format.
+;
+; Args: __si, __si_bank = _farptr to compressed data in MPR3.
+; Args: __di = ptr to output address in VRAM.
+;
+; Uses: __si, __di, __ax, __bx, __cx, __dl, __dh!
+;
+
+		.procgroup			; Group code in the same bank.
+
+	.if	SUPPORT_SGX
+zx0_to_sgx	.proc
+		ldx	#SGX_VDC_OFFSET		; Offset to SGX VDC.
+		db	$E0			; Turn "clx" into a "cpx #".
+		.endp
+	.endif
+
+zx0_to_vdc	.proc
+
+		clx				; Offset to PCE VDC.
+
+		jsr	__si_to_mpr3		; Map zx0_srcptr to MPR3.
+		jsr	__di_to_vram		; Map zx0_dstptr to VRAM.
+
+		lda	#$40			; Initialize bit-buffer.
+		sta	<zx0_bitbuf
+
+		ldy	#$FF			; Initialize offset to $FFFF.
+		sty	<zx0_offset + 0
+		sty	<zx0_offset + 1
+
+		iny				; Initialize hi-byte of length
+		sty	<zx0_length + 1		; to zero.
+
+		lda	#ZX0_WINBUF		; Initialize window ring-buffer
+		sta	<zx0_dstptr + 1		; location in RAM.
+		sty	<zx0_dstptr + 0
+
+.lz_finished:	iny				; Initialize length back to 1.
+		sty	<zx0_length + 0
+
+		lda	<zx0_bitbuf		; Restore bit-buffer.
+
+		asl	a			; Copy from literals or new offset?
+		bcc	.cp_literals
+
+		;
+		; Copy bytes from new offset.
+		;
+
+.new_offset:	jsr	.get_gamma_flag		; Get offset MSB, returns CS.
+
+		cla				; Negate offset MSB and check
+		sbc	<zx0_length + 0		; for zero (EOF marker).
+		beq	.got_eof
+
+		sec
+		ror	a
+		sta	<zx0_offset + 1		; Save offset MSB.
+
+		lda	[zx0_srcptr]		; Get offset LSB.
+		inc	<zx0_srcptr + 0
+		beq	.inc_off_src
+
+.off_skip1:	ror	a			; Last offset bit starts gamma.
+		sta	<zx0_offset + 0		; Save offset LSB.
+
+		bcs	.got_lz_two		; Minimum length of 2?
+
+		sty	<zx0_length + 0		; Initialize length back to 1.
+
+		lda	<zx0_bitbuf		; Restore bit-buffer.
+
+		jsr	.get_gamma_data		; Get length, returns CS.
+
+		ldy	<zx0_length + 0		; Get lo-byte of (length+1).
+.got_lz_two:	iny
+
+;		bne	.get_lz_win		; N.B. Optimized to do nothing!
+;
+;		inc	<zx0_length + 1		; Increment from (length+1).
+;		dec	<zx0_length + 1		; Decrement because lo-byte=0.
+
+.get_lz_win:	clc				; Calc address of match.
+		lda	<zx0_dstptr + 0		; N.B. Offset is negative!
+		adc	<zx0_offset + 0
+		sta	<zx0_winptr + 0
+		lda	<zx0_dstptr + 1
+		adc	<zx0_offset + 1
+		and	#ZX0_WINMSK
+		ora	#ZX0_WINBUF
+		sta	<zx0_winptr + 1
+
+.lz_byte:	lda	[zx0_winptr]		; Copy bytes from window into
+		sta	[zx0_dstptr]		; decompressed data.
+		sta	VDC_DL, x
+		txa
+		eor	#1
+		tax
+
+		inc	<zx0_winptr + 0
+		beq	.inc_lz_win
+.lz_skip1:	inc	<zx0_dstptr + 0
+		beq	.inc_lz_dst
+
+.lz_skip2:	dey				; Any bytes left to copy?
+		bne	.lz_byte
+
+		lda	<zx0_length + 1		; Any pages left to copy?
+		beq	.lz_finished		; Optimized for branch-likely.
+
+		dec	<zx0_length + 1		; This is rare, so slower.
+		bra	.lz_byte
+
+.got_eof:	leave				; Finished decompression!
+
+		;
+		; Copy bytes from compressed source.
+		;
+
+.cp_literals:	bsr	.get_gamma_flag		; Get length, returns CS.
+
+		ldy	<zx0_length + 0		; Check the lo-byte of length
+		bne	.cp_byte		; without effecting CS.
+
+.cp_page:	dec	<zx0_length + 1
+
+.cp_byte:	lda	[zx0_srcptr]		; Copy bytes from compressed
+		sta	[zx0_dstptr]		; data to decompressed data.
+		sta	VDC_DL, x
+		txa
+		eor	#1
+		tax
+
+		inc	<zx0_srcptr + 0
+		beq	.inc_cp_src
+.cp_skip1:	inc	<zx0_dstptr + 0
+		beq	.inc_cp_dst
+
+.cp_skip2:	dey				; Any bytes left to copy?
+		bne	.cp_byte
+
+		lda	<zx0_length + 1		; Any pages left to copy?
+		bne	.cp_page		; Optimized for branch-unlikely.
+
+		iny				; Initialize length back to 1.
+		sty	<zx0_length + 0
+
+		lda	<zx0_bitbuf		; Restore bit-buffer.
+
+		asl	a			; Copy from last offset or new offset?
+		bcs	.new_offset
+
+		;
+		; Copy bytes from last offset (rare so slower).
+		;
+
+.old_offset:	bsr	.get_gamma_flag		; Get length, returns CS.
+
+		ldy	<zx0_length + 0		; Check the lo-byte of length.
+		bne	.get_lz_win
+
+		dec	<zx0_length + 1		; Decrement because lo-byte=0.
+		bra	.get_lz_win
+
+		;
+		; Optimized handling of pointers crossing page-boundaries.
+		;
+
+.inc_off_src:	jsr	__si_inc_page
+		bra	.off_skip1
+
+.inc_lz_dst:	bsr	.next_dstpage
+		bra	.lz_skip2
+
+.inc_cp_src:	jsr	__si_inc_page
+		bra	.cp_skip1
+
+.inc_cp_dst:	bsr	.next_dstpage
+		bra	.cp_skip2
+
+.inc_lz_win:	lda	<zx0_winptr + 1
+		inc	a
+		and	#ZX0_WINMSK
+		ora	#ZX0_WINBUF
+		sta	<zx0_winptr + 1
+		bra	.lz_skip1
+
+.next_dstpage:	lda	<zx0_dstptr + 1
+		inc	a
+		and	#ZX0_WINMSK
+		ora	#ZX0_WINBUF
+		sta	<zx0_dstptr + 1
+		rts
+
+.gamma_page:	jsr	__si_inc_page
+		bra	.gamma_skip1
+
+		;
+		; Get 16-bit interlaced Elias gamma value.
+		;
+
+.get_gamma_data:asl	a			; Get next bit.
+		rol	<zx0_length + 0
+.get_gamma_flag:asl	a
+		bcc	.get_gamma_data		; Loop until finished or empty.
+		bne	.gamma_done		; Bit-buffer empty?
+
+.gamma_reload:	lda	[zx0_srcptr]		; Reload the empty bit-buffer
+		inc	<zx0_srcptr + 0		; from the compressed source.
+		beq	.gamma_page
+.gamma_skip1:	rol	a
+		bcs	.gamma_done		; Finished?
+
+.get_gamma_loop:asl	a			; Get next bit.
+		rol	<zx0_length + 0
+		rol	<zx0_length + 1
+		asl	a
+		bcc	.get_gamma_loop		; Loop until finished or empty.
+		beq	.gamma_reload		; Bit-buffer empty?
+
+.gamma_done:	sta	<zx0_bitbuf		; Preserve bit-buffer.
+		rts
+
+		.endp
+		.endprocgroup
--- a/loader/tools/dali/salvador/asm/Z80/unzx0v1_fast.asm
+++ b/loader/tools/dali/salvador/asm/Z80/unzx0v1_fast.asm
@ -0,0 +1,187 @@
+;
+;  Speed-optimized ZX0v1 decompressor by spke & uniabis (190 bytes)
+;
+;  ver.00 by spke (27/01-23/03/2021, 191 bytes)
+;  ver.01 by spke (24/03/2021, 193(+2) bytes, fixed a bug in the initialization)
+;  ver.02 by uniabis (25-29/03/2021, 191(-2) bytes, +0.5% speed, fixed a bug in the gamma code reader)
+;  ver.03 by uniabis (16/08/2021, 190(-1) bytes)
+;  ver.04 by spke (07-08/12/2021, updated info, renamed to reflect the use of the old compression format)
+;
+;  Original ZX0 decompressors were written by Einar Saukas
+;
+;  This decompressor was written on the basis of "Standard" decompressor by
+;  Einar Saukas and optimized for speed by spke and uniabis. This decompressor is
+;  about 5% faster than the "Turbo" decompressor, which is 128 bytes long.
+;  It has about the same speed as the 412 byte version of the "Mega" decompressor.
+;  
+;  The decompressor uses AF, AF', BC, DE, HL and IX and relies upon self-modified code.
+;
+;  There are two compressors available for ZX0 format. The official optimal compressors
+;  by Einar Saukas are available from https://github.com/einar-saukas/ZX0
+;  They can be invoked as follows:
+;
+;  zx0.exe file_to_be_compressed name_of_compressed_file.zx0 (for compressors ver.1.x), or
+;  zx0.exe -c file_to_be_compressed name_of_compressed_file.zx0 (for compressors ver.2.x)
+;
+;  Option -c indicates the use of the "old" ver 1.x compression format assumed by this decompressor.
+;
+;  An alternative heuristic compressor "Salvador" has been developed by Emmanuel Marty,
+;  see https://github.com/emmanuel-marty/salvador
+;
+;  It has fractionally lower compression ratio compared to the official compressor,
+;  but works much faster, so may be a better fit for the majority of development needs.
+;  Salvador is invoked by using:
+;
+;  salvador.exe -classic file_to_be_compressed name_of_compressed_file.zx0
+;
+;  The decompression is done in the standard way:
+;
+;  ld hl,FirstByteOfCompressedData
+;  ld de,FirstByteOfMemoryForDecompressedData
+;  call DecompressZX0v1
+;
+;  Of course, ZX0 compression algorithms are (c) 2021 Einar Saukas,
+;  see https://github.com/einar-saukas/ZX0 for more information
+;
+;  Drop me an email if you have any comments/ideas/suggestions: zxintrospec@gmail.com
+;
+;  This software is provided 'as-is', without any express or implied
+;  warranty.  In no event will the authors be held liable for any damages
+;  arising from the use of this software.
+;
+;  Permission is granted to anyone to use this software for any purpose,
+;  including commercial applications, and to alter it and redistribute it
+;  freely, subject to the following restrictions:
+;
+;  1. The origin of this software must not be misrepresented; you must not
+;     claim that you wrote the original software. If you use this software
+;     in a product, an acknowledgment in the product documentation would be
+;     appreciated but is not required.
+;  2. Altered source versions must be plainly marked as such, and must not be
+;     misrepresented as being the original software.
+;  3. This notice may not be removed or altered from any source distribution.
+
+		MACRO	RELOAD_BITS
+			ld a,(hl) : inc hl : rla
+		ENDM
+
+		MACRO	INLINE_READ_GAMMA
+.ReadGammaBits		add a : rl c : add a : jr nc,.ReadGammaBits
+		ENDM
+
+@DecompressZX0v1:	ld ix,CopyMatch1 : scf : exa					; AF' must have flag C switched on
+			ld bc,#FFFF : ld (PrevOffset),bc				; default offset is -1
+			inc bc : ld a,#80 : jr RunOfLiterals				; BC is assumed to contains 0 most of the time
+			
+ShorterOffsets			; 7-bit offsets allow additional optimizations,
+				; based on the facts that C==0 and AF' has C ON!
+				exa : sbc a : ld (PrevOffset+1),a			; the top byte of the offset is always #FF
+				ld a,(hl) : inc hl
+				rra : ld (PrevOffset),a					; note that AF' always has flag C ON
+
+			jr nc,LongerMatch
+
+CopyMatch2			; the case of matches with len=2
+				exa : ld c,2
+
+CopyMatch1			; the faster match copying code
+				push hl							; preserve source
+PrevOffset			EQU $+1 : ld hl,#FFFF					; restore offset (default offset is -1)
+				add hl,de						; HL = dest - offset
+				ldir
+				pop hl							; restore source
+
+			; after a match you can have either
+			; 0 + <elias length> = run of literals, or
+			; 1 + <elias offset msb> + [7-bits of offset lsb + 1-bit of length] + <elias length> = another match
+AfterMatch1		add a : jr nc,RunOfLiterals
+
+UsualMatch:			; this is the case of usual match+offset
+				add a : jr nc,LongerOffets : jr nz,ShorterOffsets	; NZ after NC == "confirmed C"
+					RELOAD_BITS : jr c,ShorterOffsets
+
+LongerOffets			inc c : INLINE_READ_GAMMA				; reading gamma requires C=1
+				call z,ReloadReadGamma
+
+ProcessOffset			exa : xor a : sub c
+				ret z							; end-of-data marker (only checked for longer offsets)
+
+				rra : ld (PrevOffset+1),a
+				ld a,(hl) : inc hl
+				rra : ld (PrevOffset),a
+
+			; lowest bit is the first bit of the gamma code for length
+			jr c,CopyMatch2
+
+				; this wastes 1 t-state for longer matches far away,
+				; but saves 4 t-states for longer nearby (seems to pay off in testing)
+				ld c,b
+LongerMatch			inc c
+				; doing SCF here ensures that AF' has flag C ON and costs
+				; cheaper than doing SCF in the ShortestOffsets branch
+				scf : exa
+
+				INLINE_READ_GAMMA
+				call z,ReloadReadGamma
+
+CopyMatch3			push hl						; preserve source
+				ld hl,(PrevOffset)				; restore offset
+				add hl,de					; HL = dest - offset
+				; because BC>=3-1, we can do 2 x LDI safely
+				ldi : ldir : inc c : ldi
+				pop hl						; restore source
+
+			; after a match you can have either
+			; 0 + <elias length> = run of literals, or
+			; 1 + <elias offset msb> + [7-bits of offset lsb + 1-bit of length] + <elias length> = another match
+AfterMatch3		add a : jr c,UsualMatch
+
+RunOfLiterals:			inc c : add a : jr nc,LongerRun : jr nz,CopyLiteral	; NZ after NC == "confirmed C"
+					RELOAD_BITS : jr c,CopyLiteral
+
+LongerRun			INLINE_READ_GAMMA : jr nz,CopyLiterals
+					RELOAD_BITS
+				call nc,ReadGammaAligned
+
+CopyLiterals			ldi
+CopyLiteral			ldir
+
+			; after a literal run you can have either
+			; 0 + <elias length> = match using a repeated offset, or
+			; 1 + <elias offset msb> + [7-bits of offset lsb + 1-bit of length] + <elias length> = another match
+			add a : jr c,UsualMatch
+
+RepMatch:			inc c : add a : jr nc,LongerRepMatch : jr nz,CopyMatch1	; NZ after NC == "confirmed C"
+					RELOAD_BITS : jr c,CopyMatch1
+
+LongerRepMatch			INLINE_READ_GAMMA
+				jp nz,CopyMatch1
+
+				; this is a crafty equivalent of
+				; CALL ReloadReadGamma : JP CopyMatch1
+				push ix
+
+;
+;  the subroutine for reading the remainder of the partly read Elias gamma code.
+;  it has two entry points: ReloadReadGamma first refills the bit reservoir in A,
+;  while ReadGammaAligned assumes that the bit reservoir has just been refilled.
+
+ReloadReadGamma:	RELOAD_BITS
+			ret c
+
+ReadGammaAligned:	add a : rl c
+			add a : ret c
+
+			add a : rl c
+ReadingLongGammaRLA	rla	; this should really be an ADD A, but since flag C
+				; is always off here, this saves us a byte (see below)
+
+ReadingLongGamma		; this loop does not need unrolling,
+				; as it does not get much use anyway
+				ret c
+				add a : rl c : rl b
+				add a :	jr nz,ReadingLongGamma
+
+			ld a,(hl) : inc hl
+			jr ReadingLongGammaRLA
+
--- a/loader/tools/dali/salvador/src/expand.c
+++ b/loader/tools/dali/salvador/src/expand.c
@ -0,0 +1,336 @@
+/*
+ * expand.c - decompressor implementation
+ *
+ * Copyright (C) 2021 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Implements the ZX0 encoding designed by Einar Saukas. https://github.com/einar-saukas/ZX0
+ * Also inspired by Charles Bloom's compression blog. http://cbloomrants.blogspot.com/
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include "format.h"
+#include "expand.h"
+#include "libsalvador.h"
+
+#ifdef _MSC_VER
+#define FORCE_INLINE __forceinline
+#else /* _MSC_VER */
+#define FORCE_INLINE __attribute__((always_inline))
+#endif /* _MSC_VER */
+
+static inline FORCE_INLINE int salvador_read_bit(const unsigned char **ppInBlock, const unsigned char *pDataEnd, int *nCurBitMask, unsigned char *bits) {
+   int nBit;
+
+   const unsigned char* pInBlock = *ppInBlock;
+
+   if ((*nCurBitMask) == 0) {
+      if (pInBlock >= pDataEnd) return -1;
+      (*bits) = *pInBlock++;
+      (*nCurBitMask) = 128;
+   }
+
+   nBit = ((*bits) & 128) ? 1 : 0;
+
+   (*bits) <<= 1;
+   (*nCurBitMask) >>= 1;
+
+   *ppInBlock = pInBlock;
+   return nBit;
+}
+
+static inline FORCE_INLINE int salvador_read_elias(const unsigned char** ppInBlock, const unsigned char* pDataEnd, const int nInitialValue, const int nIsBackward, int* nCurBitMask, unsigned char* bits) {
+   int nValue = nInitialValue;
+
+   if (nIsBackward) {
+      while (salvador_read_bit(ppInBlock, pDataEnd, nCurBitMask, bits) == 1) {
+         nValue = (nValue << 1) | salvador_read_bit(ppInBlock, pDataEnd, nCurBitMask, bits);
+      }
+   }
+   else {
+      while (!salvador_read_bit(ppInBlock, pDataEnd, nCurBitMask, bits)) {
+         nValue = (nValue << 1) | salvador_read_bit(ppInBlock, pDataEnd, nCurBitMask, bits);
+      }
+   }
+
+   return nValue;
+}
+
+static inline FORCE_INLINE int salvador_read_elias_inverted(const unsigned char** ppInBlock, const unsigned char* pDataEnd, const int nInitialValue, int* nCurBitMask, unsigned char* bits) {
+   int nValue = nInitialValue;
+
+   while (!salvador_read_bit(ppInBlock, pDataEnd, nCurBitMask, bits)) {
+      nValue = (nValue << 1) | (salvador_read_bit(ppInBlock, pDataEnd, nCurBitMask, bits) ^ 1);
+   }
+
+   return nValue;
+}
+
+static inline FORCE_INLINE int salvador_read_elias_prefix(const unsigned char** ppInBlock, const unsigned char* pDataEnd, const int nInitialValue, const int nIsBackward, int* nCurBitMask, unsigned char* bits, unsigned int nFirstBit) {
+   int nValue = nInitialValue;
+
+   if (nIsBackward) {
+      if (nFirstBit) {
+         nValue = (nValue << 1) | salvador_read_bit(ppInBlock, pDataEnd, nCurBitMask, bits);
+         while (salvador_read_bit(ppInBlock, pDataEnd, nCurBitMask, bits) == 1) {
+            nValue = (nValue << 1) | salvador_read_bit(ppInBlock, pDataEnd, nCurBitMask, bits);
+         }
+      }
+   }
+   else {
+      if (!nFirstBit) {
+         nValue = (nValue << 1) | salvador_read_bit(ppInBlock, pDataEnd, nCurBitMask, bits);
+         while (!salvador_read_bit(ppInBlock, pDataEnd, nCurBitMask, bits)) {
+            nValue = (nValue << 1) | salvador_read_bit(ppInBlock, pDataEnd, nCurBitMask, bits);
+         }
+      }
+   }
+
+   return nValue;
+}
+
+/**
+ * Get maximum decompressed size of compressed data
+ *
+ * @param pInputData compressed data
+ * @param nInputSize compressed size in bytes
+ * @param nFlags compression flags (set to FLG_IS_INVERTED)
+ *
+ * @return maximum decompressed size
+ */
+size_t salvador_get_max_decompressed_size(const unsigned char *pInputData, size_t nInputSize, const unsigned int nFlags) {
+   const unsigned char* pInputDataEnd = pInputData + nInputSize;
+   int nCurBitMask = 0;
+   unsigned char bits = 0;
+   int nMatchOffset = 1;
+   int nIsFirstCommand = 1;
+   const int nIsInverted = (nFlags & FLG_IS_INVERTED) && !(nFlags & FLG_IS_BACKWARD);
+   const int nIsBackward = (nFlags & FLG_IS_BACKWARD) ? 1 : 0;
+   int nDecompressedSize = 0;
+
+   if (pInputData >= pInputDataEnd)
+      return -1;
+
+   while (1) {
+      unsigned int nIsMatchWithOffset;
+
+      if (nIsFirstCommand) {
+         /* The first command is always literals */
+         nIsFirstCommand = 0;
+         nIsMatchWithOffset = 0;
+      }
+      else {
+         /* Read match with offset / literals bit */
+         nIsMatchWithOffset = salvador_read_bit(&pInputData, pInputDataEnd, &nCurBitMask, &bits);
+         if (nIsMatchWithOffset == -1)
+            return -1;
+      }
+
+      if (nIsMatchWithOffset == 0) {
+         unsigned int nLiterals = salvador_read_elias(&pInputData, pInputDataEnd, 1, nIsBackward, &nCurBitMask, &bits);
+
+         /* Count literals */
+
+         if ((pInputData + nLiterals) <= pInputDataEnd) {
+            pInputData += nLiterals;
+            nDecompressedSize += nLiterals;
+         }
+         else {
+            return -1;
+         }
+
+         /* Read match with offset / rep match bit */
+
+         nIsMatchWithOffset = salvador_read_bit(&pInputData, pInputDataEnd, &nCurBitMask, &bits);
+         if (nIsMatchWithOffset == -1)
+            return -1;
+      }
+
+      unsigned int nMatchLen;
+
+      if (nIsMatchWithOffset) {
+         /* Match with offset */
+
+         unsigned int nMatchOffsetHighByte;
+
+         if (nIsInverted)
+            nMatchOffsetHighByte = salvador_read_elias_inverted(&pInputData, pInputDataEnd, 1, &nCurBitMask, &bits);
+         else
+            nMatchOffsetHighByte = salvador_read_elias(&pInputData, pInputDataEnd, 1, nIsBackward, &nCurBitMask, &bits);
+
+         if (nMatchOffsetHighByte == 256)
+            break;
+         nMatchOffsetHighByte--;
+
+         if (pInputData >= pInputDataEnd)
+            return -1;
+
+         unsigned int nMatchOffsetLowByte = (unsigned int)(*pInputData++);
+         if (nIsBackward)
+            nMatchOffset = (nMatchOffsetHighByte << 7) | (nMatchOffsetLowByte >> 1);
+         else
+            nMatchOffset = (nMatchOffsetHighByte << 7) | (127 - (nMatchOffsetLowByte >> 1));
+         nMatchOffset++;
+
+         nMatchLen = salvador_read_elias_prefix(&pInputData, pInputDataEnd, 1, nIsBackward, &nCurBitMask, &bits, nMatchOffsetLowByte & 1);
+
+         nMatchLen += (2 - 1);
+      }
+      else {
+         /* Rep-match */
+
+         nMatchLen = salvador_read_elias(&pInputData, pInputDataEnd, 1, nIsBackward, &nCurBitMask, &bits);
+      }
+
+      /* Count matched bytes */
+      nDecompressedSize += nMatchLen;
+   }
+
+   return nDecompressedSize;
+}
+
+/**
+ * Decompress data in memory
+ *
+ * @param pInputData compressed data
+ * @param pOutData buffer for decompressed data
+ * @param nInputSize compressed size in bytes
+ * @param nMaxOutBufferSize maximum capacity of decompression buffer
+ * @param nDictionarySize size of dictionary in front of input data (0 for none)
+ * @param nFlags compression flags (set to FLG_IS_INVERTED)
+ *
+ * @return actual decompressed size, or -1 for error
+ */
+size_t salvador_decompress(const unsigned char *pInputData, unsigned char *pOutData, size_t nInputSize, size_t nMaxOutBufferSize, size_t nDictionarySize, const unsigned int nFlags) {
+   const unsigned char *pInputDataEnd = pInputData + nInputSize;
+   unsigned char *pCurOutData = pOutData + nDictionarySize;
+   const unsigned char *pOutDataEnd = pCurOutData + nMaxOutBufferSize;
+   int nCurBitMask = 0;
+   unsigned char bits = 0;
+   int nMatchOffset = 1;
+   int nIsFirstCommand = 1;
+   const int nIsInverted = (nFlags & FLG_IS_INVERTED) && !(nFlags & FLG_IS_BACKWARD);
+   const int nIsBackward = (nFlags & FLG_IS_BACKWARD) ? 1 : 0;
+
+   if (pInputData >= pInputDataEnd && pCurOutData < pOutDataEnd)
+      return -1;
+
+   while (1) {
+      unsigned int nIsMatchWithOffset;
+
+      if (nIsFirstCommand) {
+         /* The first command is always literals */
+         nIsFirstCommand = 0;
+         nIsMatchWithOffset = 0;
+      }
+      else {
+         /* Read match with offset / literals bit */
+         nIsMatchWithOffset = salvador_read_bit(&pInputData, pInputDataEnd, &nCurBitMask, &bits);
+         if (nIsMatchWithOffset == -1)
+            return -1;
+      }
+
+      if (nIsMatchWithOffset == 0) {
+         unsigned int nLiterals = salvador_read_elias(&pInputData, pInputDataEnd, 1, nIsBackward, &nCurBitMask, &bits);
+
+         /* Copy literals */
+
+         if ((pInputData + nLiterals) <= pInputDataEnd &&
+            (pCurOutData + nLiterals) <= pOutDataEnd) {
+            memcpy(pCurOutData, pInputData, nLiterals);
+            pInputData += nLiterals;
+            pCurOutData += nLiterals;
+         }
+         else {
+            return -1;
+         }
+
+         /* Read match with offset / rep match bit */
+
+         nIsMatchWithOffset = salvador_read_bit(&pInputData, pInputDataEnd, &nCurBitMask, &bits);
+         if (nIsMatchWithOffset == -1)
+            return -1;
+      }
+
+      unsigned int nMatchLen;
+
+      if (nIsMatchWithOffset) {
+         /* Match with offset */
+
+         unsigned int nMatchOffsetHighByte;
+
+         if (nIsInverted)
+            nMatchOffsetHighByte = salvador_read_elias_inverted(&pInputData, pInputDataEnd, 1, &nCurBitMask, &bits);
+         else
+            nMatchOffsetHighByte = salvador_read_elias(&pInputData, pInputDataEnd, 1, nIsBackward, &nCurBitMask, &bits);
+
+         if (nMatchOffsetHighByte == 256)
+            break;
+         nMatchOffsetHighByte--;
+
+         if (pInputData >= pInputDataEnd)
+            return -1;
+
+         unsigned int nMatchOffsetLowByte = (unsigned int)(*pInputData++);
+         if (nIsBackward)
+            nMatchOffset = (nMatchOffsetHighByte << 7) | (nMatchOffsetLowByte >> 1);
+         else
+            nMatchOffset = (nMatchOffsetHighByte << 7) | (127 - (nMatchOffsetLowByte >> 1));
+         nMatchOffset++;
+
+         nMatchLen = salvador_read_elias_prefix(&pInputData, pInputDataEnd, 1, nIsBackward, &nCurBitMask, &bits, nMatchOffsetLowByte & 1);
+
+         nMatchLen += (2 - 1);
+      }
+      else {
+         /* Rep-match */
+
+         nMatchLen = salvador_read_elias(&pInputData, pInputDataEnd, 1, nIsBackward, &nCurBitMask, &bits);
+      }
+
+      /* Copy matched bytes */
+      const unsigned char* pSrc = pCurOutData - nMatchOffset;
+      if (pSrc >= pOutData) {
+         if ((pSrc + nMatchLen) <= pOutDataEnd) {
+            if ((pCurOutData + nMatchLen) <= pOutDataEnd) {
+               while (nMatchLen) {
+                  *pCurOutData++ = *pSrc++;
+                  nMatchLen--;
+               }
+            }
+            else {
+               return -1;
+            }
+         }
+         else {
+            return -1;
+         }
+      }
+      else {
+         return -1;
+      }
+   }
+
+   return (size_t)(pCurOutData - pOutData) - nDictionarySize;
+}
--- a/loader/tools/dali/salvador/src/expand.h
+++ b/loader/tools/dali/salvador/src/expand.h
@ -0,0 +1,69 @@
+/*
+ * expand.h - decompressor definitions
+ *
+ * Copyright (C) 2021 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Implements the ZX0 encoding designed by Einar Saukas. https://github.com/einar-saukas/ZX0
+ * Also inspired by Charles Bloom's compression blog. http://cbloomrants.blogspot.com/
+ *
+ */
+
+#ifndef _EXPAND_H
+#define _EXPAND_H
+
+#include <stdlib.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Get maximum decompressed size of compressed data
+ *
+ * @param pInputData compressed data
+ * @param nInputSize compressed size in bytes
+ * @param nFlags compression flags (set to FLG_IS_INVERTED)
+ *
+ * @return maximum decompressed size
+ */
+size_t salvador_get_max_decompressed_size(const unsigned char *pInputData, size_t nInputSize, const unsigned int nFlags);
+
+/**
+ * Decompress data in memory
+ *
+ * @param pInputData compressed data
+ * @param pOutData buffer for decompressed data
+ * @param nInputSize compressed size in bytes
+ * @param nMaxOutBufferSize maximum capacity of decompression buffer
+ * @param nDictionarySize size of dictionary in front of input data (0 for none)
+ * @param nFlags compression flags (set to FLG_IS_INVERTED)
+ *
+ * @return actual decompressed size, or -1 for error
+ */
+size_t salvador_decompress(const unsigned char *pInputData, unsigned char *pOutData, size_t nInputSize, size_t nMaxOutBufferSize, size_t nDictionarySize, const unsigned int nFlags);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _EXPAND_H */
--- a/loader/tools/dali/salvador/src/format.h
+++ b/loader/tools/dali/salvador/src/format.h
@ -0,0 +1,43 @@
+/*
+ * format.h - byte stream format definitions
+ *
+ * Copyright (C) 2021 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Implements the ZX0 encoding designed by Einar Saukas. https://github.com/einar-saukas/ZX0
+ * Also inspired by Charles Bloom's compression blog. http://cbloomrants.blogspot.com/
+ *
+ */
+
+#ifndef _FORMAT_H
+#define _FORMAT_H
+
+#define MIN_OFFSET 1
+#define MAX_OFFSET 0x7f80
+
+#define MAX_VARLEN 0xffff
+
+#define BLOCK_SIZE 0x10000
+
+#define MIN_MATCH_SIZE 1
+
+#endif /* _FORMAT_H */
--- a/loader/tools/dali/salvador/src/libdivsufsort/CHANGELOG.md
+++ b/loader/tools/dali/salvador/src/libdivsufsort/CHANGELOG.md
@ -0,0 +1,21 @@
+# libdivsufsort Change Log
+
+See full changelog at: https://github.com/y-256/libdivsufsort/commits
+
+## [2.0.1] - 2010-11-11
+### Fixed
+* Wrong variable used in `divbwt` function
+* Enclose some string variables with double quotation marks in include/CMakeLists.txt
+* Fix typo in include/CMakeLists.txt
+
+## 2.0.0 - 2008-08-23
+### Changed
+* Switch the build system to [CMake](http://www.cmake.org/)
+* Improve the performance of the suffix-sorting algorithm
+
+### Added
+* OpenMP support
+* 64-bit version of divsufsort
+
+[Unreleased]: https://github.com/y-256/libdivsufsort/compare/2.0.1...HEAD
+[2.0.1]: https://github.com/y-256/libdivsufsort/compare/2.0.0...2.0.1
--- a/loader/tools/dali/salvador/src/libdivsufsort/CMakeLists.txt
+++ b/loader/tools/dali/salvador/src/libdivsufsort/CMakeLists.txt
@ -0,0 +1,99 @@
+### cmake file for building libdivsufsort Package ###
+cmake_minimum_required(VERSION 2.4.4)
+set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules")
+include(AppendCompilerFlags)
+
+## Project information ##
+project(libdivsufsort C)
+set(PROJECT_VENDOR "Yuta Mori")
+set(PROJECT_CONTACT "yuta.256@gmail.com")
+set(PROJECT_URL "https://github.com/y-256/libdivsufsort")
+set(PROJECT_DESCRIPTION "A lightweight suffix sorting library")
+include(VERSION.cmake)
+
+## CPack configuration ##
+set(CPACK_GENERATOR "TGZ;TBZ2;ZIP")
+set(CPACK_SOURCE_GENERATOR "TGZ;TBZ2;ZIP")
+include(ProjectCPack)
+
+## Project options ##
+option(BUILD_SHARED_LIBS "Set to OFF to build static libraries" ON)
+option(BUILD_EXAMPLES "Build examples" ON)
+option(BUILD_DIVSUFSORT64 "Build libdivsufsort64" OFF)
+option(USE_OPENMP "Use OpenMP for parallelization" OFF)
+option(WITH_LFS "Enable Large File Support" ON)
+
+## Installation directories ##
+set(LIB_SUFFIX "" CACHE STRING "Define suffix of directory name (32 or 64)")
+
+set(CMAKE_INSTALL_RUNTIMEDIR "" CACHE PATH "Specify the output directory for dll runtimes (default is bin)")
+if(NOT CMAKE_INSTALL_RUNTIMEDIR)
+  set(CMAKE_INSTALL_RUNTIMEDIR "${CMAKE_INSTALL_PREFIX}/bin")
+endif(NOT CMAKE_INSTALL_RUNTIMEDIR)
+
+set(CMAKE_INSTALL_LIBDIR "" CACHE PATH "Specify the output directory for libraries (default is lib)")
+if(NOT CMAKE_INSTALL_LIBDIR)
+  set(CMAKE_INSTALL_LIBDIR "${CMAKE_INSTALL_PREFIX}/lib${LIB_SUFFIX}")
+endif(NOT CMAKE_INSTALL_LIBDIR)
+
+set(CMAKE_INSTALL_INCLUDEDIR "" CACHE PATH "Specify the output directory for header files (default is include)")
+if(NOT CMAKE_INSTALL_INCLUDEDIR)
+  set(CMAKE_INSTALL_INCLUDEDIR "${CMAKE_INSTALL_PREFIX}/include")
+endif(NOT CMAKE_INSTALL_INCLUDEDIR)
+
+set(CMAKE_INSTALL_PKGCONFIGDIR "" CACHE PATH "Specify the output directory for pkgconfig files (default is lib/pkgconfig)")
+if(NOT CMAKE_INSTALL_PKGCONFIGDIR)
+  set(CMAKE_INSTALL_PKGCONFIGDIR "${CMAKE_INSTALL_LIBDIR}/pkgconfig")
+endif(NOT CMAKE_INSTALL_PKGCONFIGDIR)
+
+## Build type ##
+if(NOT CMAKE_BUILD_TYPE)
+  set(CMAKE_BUILD_TYPE "Release")
+elseif(CMAKE_BUILD_TYPE STREQUAL "Debug")
+  set(CMAKE_VERBOSE_MAKEFILE ON)
+endif(NOT CMAKE_BUILD_TYPE)
+
+## Compiler options ##
+if(MSVC)
+  append_c_compiler_flags("/W4" "VC" CMAKE_C_FLAGS)
+  append_c_compiler_flags("/Oi;/Ot;/Ox;/Oy" "VC" CMAKE_C_FLAGS_RELEASE)
+  if(USE_OPENMP)
+    append_c_compiler_flags("/openmp" "VC" CMAKE_C_FLAGS)
+  endif(USE_OPENMP)
+elseif(BORLAND)
+  append_c_compiler_flags("-w" "BCC" CMAKE_C_FLAGS)
+  append_c_compiler_flags("-Oi;-Og;-Os;-Ov;-Ox" "BCC" CMAKE_C_FLAGS_RELEASE)
+else(MSVC)
+  if(CMAKE_COMPILER_IS_GNUCC)
+    append_c_compiler_flags("-Wall" "GCC" CMAKE_C_FLAGS)
+    append_c_compiler_flags("-fomit-frame-pointer" "GCC" CMAKE_C_FLAGS_RELEASE)
+    if(USE_OPENMP)
+      append_c_compiler_flags("-fopenmp" "GCC" CMAKE_C_FLAGS)
+    endif(USE_OPENMP)
+  else(CMAKE_COMPILER_IS_GNUCC)
+    append_c_compiler_flags("-Wall" "UNKNOWN" CMAKE_C_FLAGS)
+    append_c_compiler_flags("-fomit-frame-pointer" "UNKNOWN" CMAKE_C_FLAGS_RELEASE)
+    if(USE_OPENMP)
+      append_c_compiler_flags("-fopenmp;-openmp;-omp" "UNKNOWN" CMAKE_C_FLAGS)
+    endif(USE_OPENMP)
+  endif(CMAKE_COMPILER_IS_GNUCC)
+endif(MSVC)
+
+## Add definitions ##
+add_definitions(-DHAVE_CONFIG_H=1 -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS)
+
+## Add subdirectories ##
+add_subdirectory(pkgconfig)
+add_subdirectory(include)
+add_subdirectory(lib)
+if(BUILD_EXAMPLES)
+  add_subdirectory(examples)
+endif(BUILD_EXAMPLES)
+
+## Add 'uninstall' target ##
+CONFIGURE_FILE(
+  "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules/cmake_uninstall.cmake.in"
+  "${CMAKE_CURRENT_BINARY_DIR}/CMakeModules/cmake_uninstall.cmake"
+  IMMEDIATE @ONLY)
+ADD_CUSTOM_TARGET(uninstall
+  "${CMAKE_COMMAND}" -P "${CMAKE_CURRENT_BINARY_DIR}/CMakeModules/cmake_uninstall.cmake")
--- a/loader/tools/dali/salvador/src/libdivsufsort/CMakeModules/AppendCompilerFlags.cmake
+++ b/loader/tools/dali/salvador/src/libdivsufsort/CMakeModules/AppendCompilerFlags.cmake
@ -0,0 +1,38 @@
+include(CheckCSourceCompiles)
+include(CheckCXXSourceCompiles)
+
+macro(append_c_compiler_flags _flags _name _result)
+  set(SAFE_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
+  string(REGEX REPLACE "[-+/ ]" "_" cname "${_name}")
+  string(TOUPPER "${cname}" cname)
+  foreach(flag ${_flags})
+    string(REGEX REPLACE "^[-+/ ]+(.*)[-+/ ]*$" "\\1" flagname "${flag}")
+    string(REGEX REPLACE "[-+/ ]" "_" flagname "${flagname}")
+    string(TOUPPER "${flagname}" flagname)
+    set(have_flag "HAVE_${cname}_${flagname}")
+    set(CMAKE_REQUIRED_FLAGS "${flag}")
+    check_c_source_compiles("int main() { return 0; }" ${have_flag})
+    if(${have_flag})
+      set(${_result} "${${_result}} ${flag}")
+    endif(${have_flag})
+  endforeach(flag)
+  set(CMAKE_REQUIRED_FLAGS ${SAFE_CMAKE_REQUIRED_FLAGS})
+endmacro(append_c_compiler_flags)
+
+macro(append_cxx_compiler_flags _flags _name _result)
+  set(SAFE_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
+  string(REGEX REPLACE "[-+/ ]" "_" cname "${_name}")
+  string(TOUPPER "${cname}" cname)
+  foreach(flag ${_flags})
+    string(REGEX REPLACE "^[-+/ ]+(.*)[-+/ ]*$" "\\1" flagname "${flag}")
+    string(REGEX REPLACE "[-+/ ]" "_" flagname "${flagname}")
+    string(TOUPPER "${flagname}" flagname)
+    set(have_flag "HAVE_${cname}_${flagname}")
+    set(CMAKE_REQUIRED_FLAGS "${flag}")
+    check_cxx_source_compiles("int main() { return 0; }" ${have_flag})
+    if(${have_flag})
+      set(${_result} "${${_result}} ${flag}")
+    endif(${have_flag})
+  endforeach(flag)
+  set(CMAKE_REQUIRED_FLAGS ${SAFE_CMAKE_REQUIRED_FLAGS})
+endmacro(append_cxx_compiler_flags)
--- a/loader/tools/dali/salvador/src/libdivsufsort/CMakeModules/CheckFunctionKeywords.cmake
+++ b/loader/tools/dali/salvador/src/libdivsufsort/CMakeModules/CheckFunctionKeywords.cmake
@ -0,0 +1,15 @@
+include(CheckCSourceCompiles)
+
+macro(check_function_keywords _wordlist)
+  set(${_result} "")
+  foreach(flag ${_wordlist})
+    string(REGEX REPLACE "[-+/ ()]" "_" flagname "${flag}")
+    string(TOUPPER "${flagname}" flagname)
+    set(have_flag "HAVE_${flagname}")
+    check_c_source_compiles("${flag} void func(); void func() { } int main() { func(); return 0; }" ${have_flag})
+    if(${have_flag} AND NOT ${_result})
+      set(${_result} "${flag}")
+#      break()
+    endif(${have_flag} AND NOT ${_result})
+  endforeach(flag)
+endmacro(check_function_keywords)
--- a/loader/tools/dali/salvador/src/libdivsufsort/CMakeModules/CheckLFS.cmake
+++ b/loader/tools/dali/salvador/src/libdivsufsort/CMakeModules/CheckLFS.cmake
@ -0,0 +1,109 @@
+## Checks for large file support ##
+include(CheckIncludeFile)
+include(CheckSymbolExists)
+include(CheckTypeSize)
+
+macro(check_lfs _isenable)
+  set(LFS_OFF_T "")
+  set(LFS_FOPEN "")
+  set(LFS_FSEEK "")
+  set(LFS_FTELL "")
+  set(LFS_PRID "")
+
+  if(${_isenable})
+    set(SAFE_CMAKE_REQUIRED_DEFINITIONS "${CMAKE_REQUIRED_DEFINITIONS}")
+    set(CMAKE_REQUIRED_DEFINITIONS ${CMAKE_REQUIRED_DEFINITIONS}
+        -D_LARGEFILE_SOURCE -D_LARGE_FILES -D_FILE_OFFSET_BITS=64
+        -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS)
+
+    check_include_file("sys/types.h" HAVE_SYS_TYPES_H)
+    check_include_file("inttypes.h" HAVE_INTTYPES_H)
+    check_include_file("stddef.h" HAVE_STDDEF_H)
+    check_include_file("stdint.h" HAVE_STDINT_H)
+
+    # LFS type1: 8 <= sizeof(off_t), fseeko, ftello
+    check_type_size("off_t" SIZEOF_OFF_T)
+    if(SIZEOF_OFF_T GREATER 7)
+      check_symbol_exists("fseeko" "stdio.h" HAVE_FSEEKO)
+      check_symbol_exists("ftello" "stdio.h" HAVE_FTELLO)
+      if(HAVE_FSEEKO AND HAVE_FTELLO)
+        set(LFS_OFF_T "off_t")
+        set(LFS_FOPEN "fopen")
+        set(LFS_FSEEK "fseeko")
+        set(LFS_FTELL "ftello")
+        check_symbol_exists("PRIdMAX" "inttypes.h" HAVE_PRIDMAX)
+        if(HAVE_PRIDMAX)
+          set(LFS_PRID "PRIdMAX")
+        else(HAVE_PRIDMAX)
+          check_type_size("long" SIZEOF_LONG)
+          check_type_size("int" SIZEOF_INT)
+          if(SIZEOF_OFF_T GREATER SIZEOF_LONG)
+            set(LFS_PRID "\"lld\"")
+          elseif(SIZEOF_LONG GREATER SIZEOF_INT)
+            set(LFS_PRID "\"ld\"")
+          else(SIZEOF_OFF_T GREATER SIZEOF_LONG)
+            set(LFS_PRID "\"d\"")
+          endif(SIZEOF_OFF_T GREATER SIZEOF_LONG)
+        endif(HAVE_PRIDMAX)
+      endif(HAVE_FSEEKO AND HAVE_FTELLO)
+    endif(SIZEOF_OFF_T GREATER 7)
+
+    # LFS type2: 8 <= sizeof(off64_t), fopen64, fseeko64, ftello64
+    if(NOT LFS_OFF_T)
+      check_type_size("off64_t" SIZEOF_OFF64_T)
+      if(SIZEOF_OFF64_T GREATER 7)
+        check_symbol_exists("fopen64" "stdio.h" HAVE_FOPEN64)
+        check_symbol_exists("fseeko64" "stdio.h" HAVE_FSEEKO64)
+        check_symbol_exists("ftello64" "stdio.h" HAVE_FTELLO64)
+        if(HAVE_FOPEN64 AND HAVE_FSEEKO64 AND HAVE_FTELLO64)
+          set(LFS_OFF_T "off64_t")
+          set(LFS_FOPEN "fopen64")
+          set(LFS_FSEEK "fseeko64")
+          set(LFS_FTELL "ftello64")
+          check_symbol_exists("PRIdMAX" "inttypes.h" HAVE_PRIDMAX)
+          if(HAVE_PRIDMAX)
+            set(LFS_PRID "PRIdMAX")
+          else(HAVE_PRIDMAX)
+            check_type_size("long" SIZEOF_LONG)
+            check_type_size("int" SIZEOF_INT)
+            if(SIZEOF_OFF64_T GREATER SIZEOF_LONG)
+              set(LFS_PRID "\"lld\"")
+            elseif(SIZEOF_LONG GREATER SIZEOF_INT)
+              set(LFS_PRID "\"ld\"")
+            else(SIZEOF_OFF64_T GREATER SIZEOF_LONG)
+              set(LFS_PRID "\"d\"")
+            endif(SIZEOF_OFF64_T GREATER SIZEOF_LONG)
+          endif(HAVE_PRIDMAX)
+        endif(HAVE_FOPEN64 AND HAVE_FSEEKO64 AND HAVE_FTELLO64)
+      endif(SIZEOF_OFF64_T GREATER 7)
+    endif(NOT LFS_OFF_T)
+
+    # LFS type3: 8 <= sizeof(__int64), _fseeki64, _ftelli64
+    if(NOT LFS_OFF_T)
+      check_type_size("__int64" SIZEOF___INT64)
+      if(SIZEOF___INT64 GREATER 7)
+        check_symbol_exists("_fseeki64" "stdio.h" HAVE__FSEEKI64)
+        check_symbol_exists("_ftelli64" "stdio.h" HAVE__FTELLI64)
+        if(HAVE__FSEEKI64 AND HAVE__FTELLI64)
+          set(LFS_OFF_T "__int64")
+          set(LFS_FOPEN "fopen")
+          set(LFS_FSEEK "_fseeki64")
+          set(LFS_FTELL "_ftelli64")
+          set(LFS_PRID  "\"I64d\"")
+        endif(HAVE__FSEEKI64 AND HAVE__FTELLI64)
+      endif(SIZEOF___INT64 GREATER 7)
+    endif(NOT LFS_OFF_T)
+
+    set(CMAKE_REQUIRED_DEFINITIONS "${SAFE_CMAKE_REQUIRED_DEFINITIONS}")
+  endif(${_isenable})
+
+  if(NOT LFS_OFF_T)
+    ## not found
+    set(LFS_OFF_T "long")
+    set(LFS_FOPEN "fopen")
+    set(LFS_FSEEK "fseek")
+    set(LFS_FTELL "ftell")
+    set(LFS_PRID  "\"ld\"")
+  endif(NOT LFS_OFF_T)
+
+endmacro(check_lfs)
--- a/loader/tools/dali/salvador/src/libdivsufsort/CMakeModules/ProjectCPack.cmake
+++ b/loader/tools/dali/salvador/src/libdivsufsort/CMakeModules/ProjectCPack.cmake
@ -0,0 +1,38 @@
+# If the cmake version includes cpack, use it
+IF(EXISTS "${CMAKE_ROOT}/Modules/CPack.cmake")
+  SET(CPACK_PACKAGE_DESCRIPTION_SUMMARY "${PROJECT_DESCRIPTION}")
+  SET(CPACK_PACKAGE_VENDOR "${PROJECT_VENDOR}")
+  SET(CPACK_PACKAGE_DESCRIPTION_FILE "${CMAKE_CURRENT_SOURCE_DIR}/README.md")
+  SET(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE")
+  SET(CPACK_PACKAGE_VERSION_MAJOR "${PROJECT_VERSION_MAJOR}")
+  SET(CPACK_PACKAGE_VERSION_MINOR "${PROJECT_VERSION_MINOR}")
+  SET(CPACK_PACKAGE_VERSION_PATCH "${PROJECT_VERSION_PATCH}")
+#  SET(CPACK_PACKAGE_INSTALL_DIRECTORY "${PROJECT_NAME} ${PROJECT_VERSION}")
+  SET(CPACK_SOURCE_PACKAGE_FILE_NAME "${PROJECT_NAME}-${PROJECT_VERSION_FULL}")
+
+  IF(NOT DEFINED CPACK_SYSTEM_NAME)
+    SET(CPACK_SYSTEM_NAME "${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}")
+  ENDIF(NOT DEFINED CPACK_SYSTEM_NAME)
+
+  IF(${CPACK_SYSTEM_NAME} MATCHES Windows)
+    IF(CMAKE_CL_64)
+      SET(CPACK_SYSTEM_NAME win64-${CMAKE_SYSTEM_PROCESSOR})
+    ELSE(CMAKE_CL_64)
+      SET(CPACK_SYSTEM_NAME win32-${CMAKE_SYSTEM_PROCESSOR})
+    ENDIF(CMAKE_CL_64)
+  ENDIF(${CPACK_SYSTEM_NAME} MATCHES Windows)
+
+  IF(NOT DEFINED CPACK_PACKAGE_FILE_NAME)
+    SET(CPACK_PACKAGE_FILE_NAME "${CPACK_SOURCE_PACKAGE_FILE_NAME}-${CPACK_SYSTEM_NAME}")
+  ENDIF(NOT DEFINED CPACK_PACKAGE_FILE_NAME)
+
+  SET(CPACK_PACKAGE_CONTACT "${PROJECT_CONTACT}")
+  IF(UNIX)
+    SET(CPACK_STRIP_FILES "")
+    SET(CPACK_SOURCE_STRIP_FILES "")
+#    SET(CPACK_PACKAGE_EXECUTABLES "ccmake" "CMake")
+  ENDIF(UNIX)
+  SET(CPACK_SOURCE_IGNORE_FILES "/CVS/" "/build/" "/\\\\.build/" "/\\\\.svn/" "~$")
+  # include CPack model once all variables are set
+  INCLUDE(CPack)
+ENDIF(EXISTS "${CMAKE_ROOT}/Modules/CPack.cmake")
--- a/loader/tools/dali/salvador/src/libdivsufsort/CMakeModules/cmake_uninstall.cmake.in
+++ b/loader/tools/dali/salvador/src/libdivsufsort/CMakeModules/cmake_uninstall.cmake.in
@ -0,0 +1,36 @@
+IF(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
+  MESSAGE(FATAL_ERROR "Cannot find install manifest: \"@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt\"")
+ENDIF(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
+
+FILE(READ "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt" files)
+STRING(REGEX REPLACE "\n" ";" files "${files}")
+
+SET(NUM 0)
+FOREACH(file ${files})
+  IF(EXISTS "$ENV{DESTDIR}${file}")
+    MESSAGE(STATUS "Looking for \"$ENV{DESTDIR}${file}\" - found")
+    SET(UNINSTALL_CHECK_${NUM} 1)
+  ELSE(EXISTS "$ENV{DESTDIR}${file}")
+    MESSAGE(STATUS "Looking for \"$ENV{DESTDIR}${file}\" - not found")
+    SET(UNINSTALL_CHECK_${NUM} 0)
+  ENDIF(EXISTS "$ENV{DESTDIR}${file}")
+  MATH(EXPR NUM "1 + ${NUM}")
+ENDFOREACH(file)
+
+SET(NUM 0)
+FOREACH(file ${files})
+  IF(${UNINSTALL_CHECK_${NUM}})
+    MESSAGE(STATUS "Uninstalling \"$ENV{DESTDIR}${file}\"")
+    EXEC_PROGRAM(
+      "@CMAKE_COMMAND@" ARGS "-E remove \"$ENV{DESTDIR}${file}\""
+      OUTPUT_VARIABLE rm_out
+      RETURN_VALUE rm_retval
+      )
+    IF(NOT "${rm_retval}" STREQUAL 0)
+      MESSAGE(FATAL_ERROR "Problem when removing \"$ENV{DESTDIR}${file}\"")
+    ENDIF(NOT "${rm_retval}" STREQUAL 0)
+  ENDIF(${UNINSTALL_CHECK_${NUM}})
+  MATH(EXPR NUM "1 + ${NUM}")
+ENDFOREACH(file)
+
+FILE(REMOVE "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
--- a/loader/tools/dali/salvador/src/libdivsufsort/LICENSE
+++ b/loader/tools/dali/salvador/src/libdivsufsort/LICENSE
@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2003 Yuta Mori All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/loader/tools/dali/salvador/src/libdivsufsort/README.md
+++ b/loader/tools/dali/salvador/src/libdivsufsort/README.md
@ -0,0 +1,140 @@
+# libdivsufsort
+
+libdivsufsort is a software library that implements a lightweight suffix array construction algorithm.
+
+## News
+* 2015-03-21: The project has moved from [Google Code](http://code.google.com/p/libdivsufsort/) to [GitHub](https://github.com/y-256/libdivsufsort)
+
+## Introduction
+This library provides a simple and an efficient C API to construct a suffix array and a Burrows-Wheeler transformed string from a given string over a constant-size alphabet.
+The algorithm runs in O(n log n) worst-case time using only 5n+O(1) bytes of memory space, where n is the length of
+the string.
+
+## Build requirements
+* An ANSI C Compiler (e.g. GNU GCC)
+* [CMake](http://www.cmake.org/ "CMake") version 2.4.2 or newer
+* CMake-supported build tool
+
+## Building on GNU/Linux
+1. Get the source code from GitHub. You can either
+    * use git to clone the repository
+    ```
+    git clone https://github.com/y-256/libdivsufsort.git
+    ```
+    * or download a [zip file](../../archive/master.zip) directly
+2. Create a `build` directory in the package source directory.
+```shell
+$ cd libdivsufsort
+$ mkdir build
+$ cd build
+```
+3. Configure the package for your system.
+If you want to install to a different location,  change the -DCMAKE_INSTALL_PREFIX option.
+```shell
+$ cmake -DCMAKE_BUILD_TYPE="Release" \
+-DCMAKE_INSTALL_PREFIX="/usr/local" ..
+```
+4. Compile the package.
+```shell
+$ make
+```
+5. (Optional) Install the library and header files.
+```shell
+$ sudo make install
+```
+
+## API
+```c
+/* Data types */
+typedef int32_t saint_t;
+typedef int32_t saidx_t;
+typedef uint8_t sauchar_t;
+
+/*
+ * Constructs the suffix array of a given string.
+ * @param T[0..n-1] The input string.
+ * @param SA[0..n-1] The output array or suffixes.
+ * @param n The length of the given string.
+ * @return 0 if no error occurred, -1 or -2 otherwise.
+ */
+saint_t
+divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n);
+
+/*
+ * Constructs the burrows-wheeler transformed string of a given string.
+ * @param T[0..n-1] The input string.
+ * @param U[0..n-1] The output string. (can be T)
+ * @param A[0..n-1] The temporary array. (can be NULL)
+ * @param n The length of the given string.
+ * @return The primary index if no error occurred, -1 or -2 otherwise.
+ */
+saidx_t
+divbwt(const sauchar_t *T, sauchar_t *U, saidx_t *A, saidx_t n);
+```
+
+## Example Usage
+```c
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <divsufsort.h>
+
+int main() {
+    // intput data
+    char *Text = "abracadabra";
+    int n = strlen(Text);
+    int i, j;
+
+    // allocate
+    int *SA = (int *)malloc(n * sizeof(int));
+
+    // sort
+    divsufsort((unsigned char *)Text, SA, n);
+
+    // output
+    for(i = 0; i < n; ++i) {
+        printf("SA[%2d] = %2d: ", i, SA[i]);
+        for(j = SA[i]; j < n; ++j) {
+            printf("%c", Text[j]);
+        }
+        printf("$\n");
+    }
+
+    // deallocate
+    free(SA);
+
+    return 0;
+}
+```
+See the [examples](examples) directory for a few other examples.
+
+## Benchmarks
+See [Benchmarks](https://github.com/y-256/libdivsufsort/blob/wiki/SACA_Benchmarks.md) page for details.
+
+## License
+libdivsufsort is released under the [MIT license](LICENSE "MIT license").
+> The MIT License (MIT)
+>
+> Copyright (c) 2003 Yuta Mori All rights reserved.
+>
+> Permission is hereby granted, free of charge, to any person obtaining a copy
+> of this software and associated documentation files (the "Software"), to deal
+> in the Software without restriction, including without limitation the rights
+> to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+> copies of the Software, and to permit persons to whom the Software is
+> furnished to do so, subject to the following conditions:
+>
+> The above copyright notice and this permission notice shall be included in all
+> copies or substantial portions of the Software.
+>
+> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+> SOFTWARE.
+
+## Author
+* Yuta Mori
--- a/loader/tools/dali/salvador/src/libdivsufsort/VERSION.cmake
+++ b/loader/tools/dali/salvador/src/libdivsufsort/VERSION.cmake
@ -0,0 +1,23 @@
+set(PROJECT_VERSION_MAJOR "2")
+set(PROJECT_VERSION_MINOR "0")
+set(PROJECT_VERSION_PATCH "2")
+set(PROJECT_VERSION_EXTRA "-1")
+set(PROJECT_VERSION "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}")
+set(PROJECT_VERSION_FULL "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}${PROJECT_VERSION_EXTRA}")
+
+set(LIBRARY_VERSION "3.0.1")
+set(LIBRARY_SOVERSION "3")
+
+## Git revision number ##
+if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git")
+  execute_process(COMMAND git describe --tags HEAD
+    WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
+    OUTPUT_VARIABLE GIT_DESCRIBE_TAGS ERROR_QUIET)
+  if(GIT_DESCRIBE_TAGS)
+    string(REGEX REPLACE "^v(.*)" "\\1" GIT_REVISION "${GIT_DESCRIBE_TAGS}")
+    string(STRIP "${GIT_REVISION}" GIT_REVISION)
+    if(GIT_REVISION)
+      set(PROJECT_VERSION_FULL "${GIT_REVISION}")
+    endif(GIT_REVISION)
+  endif(GIT_DESCRIBE_TAGS)
+endif(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git")
--- a/loader/tools/dali/salvador/src/libdivsufsort/examples/CMakeLists.txt
+++ b/loader/tools/dali/salvador/src/libdivsufsort/examples/CMakeLists.txt
@ -0,0 +1,11 @@
+## Add definitions ##
+add_definitions(-D_LARGEFILE_SOURCE -D_LARGE_FILES -D_FILE_OFFSET_BITS=64)
+
+## Targets ##
+include_directories("${CMAKE_CURRENT_SOURCE_DIR}/../include"
+                    "${CMAKE_CURRENT_BINARY_DIR}/../include")
+link_directories("${CMAKE_CURRENT_BINARY_DIR}/../lib")
+foreach(src suftest mksary sasearch bwt unbwt)
+  add_executable(${src} ${src}.c)
+  target_link_libraries(${src} divsufsort)
+endforeach(src)
--- a/loader/tools/dali/salvador/src/libdivsufsort/examples/bwt.c
+++ b/loader/tools/dali/salvador/src/libdivsufsort/examples/bwt.c
@ -0,0 +1,220 @@
+/*
+ * bwt.c for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+#include <stdio.h>
+#if HAVE_STRING_H
+# include <string.h>
+#endif
+#if HAVE_STDLIB_H
+# include <stdlib.h>
+#endif
+#if HAVE_MEMORY_H
+# include <memory.h>
+#endif
+#if HAVE_STDDEF_H
+# include <stddef.h>
+#endif
+#if HAVE_STRINGS_H
+# include <strings.h>
+#endif
+#if HAVE_SYS_TYPES_H
+# include <sys/types.h>
+#endif
+#if HAVE_IO_H && HAVE_FCNTL_H
+# include <io.h>
+# include <fcntl.h>
+#endif
+#include <time.h>
+#include <divsufsort.h>
+#include "lfs.h"
+
+
+static
+size_t
+write_int(FILE *fp, saidx_t n) {
+  unsigned char c[4];
+  c[0] = (unsigned char)((n >>  0) & 0xff), c[1] = (unsigned char)((n >>  8) & 0xff),
+  c[2] = (unsigned char)((n >> 16) & 0xff), c[3] = (unsigned char)((n >> 24) & 0xff);
+  return fwrite(c, sizeof(unsigned char), 4, fp);
+}
+
+static
+void
+print_help(const char *progname, int status) {
+  fprintf(stderr,
+          "bwt, a burrows-wheeler transform program, version %s.\n",
+          divsufsort_version());
+  fprintf(stderr, "usage: %s [-b num] INFILE OUTFILE\n", progname);
+  fprintf(stderr, "  -b num    set block size to num MiB [1..512] (default: 32)\n\n");
+  exit(status);
+}
+
+int
+main(int argc, const char *argv[]) {
+  FILE *fp, *ofp;
+  const char *fname, *ofname;
+  sauchar_t *T;
+  saidx_t *SA;
+  LFS_OFF_T n;
+  size_t m;
+  saidx_t pidx;
+  clock_t start,finish;
+  saint_t i, blocksize = 32, needclose = 3;
+
+  /* Check arguments. */
+  if((argc == 1) ||
+     (strcmp(argv[1], "-h") == 0) ||
+     (strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); }
+  if((argc != 3) && (argc != 5)) { print_help(argv[0], EXIT_FAILURE); }
+  i = 1;
+  if(argc == 5) {
+    if(strcmp(argv[i], "-b") != 0) { print_help(argv[0], EXIT_FAILURE); }
+    blocksize = atoi(argv[i + 1]);
+    if(blocksize < 0) { blocksize = 1; }
+    else if(512 < blocksize) { blocksize = 512; }
+    i += 2;
+  }
+  blocksize <<= 20;
+
+  /* Open a file for reading. */
+  if(strcmp(argv[i], "-") != 0) {
+#if HAVE_FOPEN_S
+    if(fopen_s(&fp, fname = argv[i], "rb") != 0) {
+#else
+    if((fp = LFS_FOPEN(fname = argv[i], "rb")) == NULL) {
+#endif
+      fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], fname);
+      perror(NULL);
+      exit(EXIT_FAILURE);
+    }
+  } else {
+#if HAVE__SETMODE && HAVE__FILENO
+    if(_setmode(_fileno(stdin), _O_BINARY) == -1) {
+      fprintf(stderr, "%s: Cannot set mode: ", argv[0]);
+      perror(NULL);
+      exit(EXIT_FAILURE);
+    }
+#endif
+    fp = stdin;
+    fname = "stdin";
+    needclose ^= 1;
+  }
+  i += 1;
+
+  /* Open a file for writing. */
+  if(strcmp(argv[i], "-") != 0) {
+#if HAVE_FOPEN_S
+    if(fopen_s(&ofp, ofname = argv[i], "wb") != 0) {
+#else
+    if((ofp = LFS_FOPEN(ofname = argv[i], "wb")) == NULL) {
+#endif
+      fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], ofname);
+      perror(NULL);
+      exit(EXIT_FAILURE);
+    }
+  } else {
+#if HAVE__SETMODE && HAVE__FILENO
+    if(_setmode(_fileno(stdout), _O_BINARY) == -1) {
+      fprintf(stderr, "%s: Cannot set mode: ", argv[0]);
+      perror(NULL);
+      exit(EXIT_FAILURE);
+    }
+#endif
+    ofp = stdout;
+    ofname = "stdout";
+    needclose ^= 2;
+  }
+
+  /* Get the file size. */
+  if(LFS_FSEEK(fp, 0, SEEK_END) == 0) {
+    n = LFS_FTELL(fp);
+    rewind(fp);
+    if(n < 0) {
+      fprintf(stderr, "%s: Cannot ftell `%s': ", argv[0], fname);
+      perror(NULL);
+      exit(EXIT_FAILURE);
+    }
+    if(0x20000000L < n) { n = 0x20000000L; }
+    if((blocksize == 0) || (n < blocksize)) { blocksize = (saidx_t)n; }
+  } else if(blocksize == 0) { blocksize = 32 << 20; }
+
+  /* Allocate 5blocksize bytes of memory. */
+  T = (sauchar_t *)malloc(blocksize * sizeof(sauchar_t));
+  SA = (saidx_t *)malloc(blocksize * sizeof(saidx_t));
+  if((T == NULL) || (SA == NULL)) {
+    fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
+    exit(EXIT_FAILURE);
+  }
+
+  /* Write the blocksize. */
+  if(write_int(ofp, blocksize) != 4) {
+    fprintf(stderr, "%s: Cannot write to `%s': ", argv[0], ofname);
+    perror(NULL);
+    exit(EXIT_FAILURE);
+  }
+
+  fprintf(stderr, "  BWT (blocksize %" PRIdSAINT_T ") ... ", blocksize);
+  start = clock();
+  for(n = 0; 0 < (m = fread(T, sizeof(sauchar_t), blocksize, fp)); n += m) {
+    /* Burrows-Wheeler Transform. */
+    pidx = divbwt(T, T, SA, m);
+    if(pidx < 0) {
+      fprintf(stderr, "%s (bw_transform): %s.\n",
+        argv[0],
+        (pidx == -1) ? "Invalid arguments" : "Cannot allocate memory");
+      exit(EXIT_FAILURE);
+    }
+
+    /* Write the bwted data. */
+    if((write_int(ofp, pidx) != 4) ||
+       (fwrite(T, sizeof(sauchar_t), m, ofp) != m)) {
+      fprintf(stderr, "%s: Cannot write to `%s': ", argv[0], ofname);
+      perror(NULL);
+      exit(EXIT_FAILURE);
+    }
+  }
+  if(ferror(fp)) {
+    fprintf(stderr, "%s: Cannot read from `%s': ", argv[0], fname);
+    perror(NULL);
+    exit(EXIT_FAILURE);
+  }
+  finish = clock();
+  fprintf(stderr, "%" PRIdOFF_T " bytes: %.4f sec\n",
+    n, (double)(finish - start) / (double)CLOCKS_PER_SEC);
+
+  /* Close files */
+  if(needclose & 1) { fclose(fp); }
+  if(needclose & 2) { fclose(ofp); }
+
+  /* Deallocate memory. */
+  free(SA);
+  free(T);
+
+  return 0;
+}
--- a/loader/tools/dali/salvador/src/libdivsufsort/examples/mksary.c
+++ b/loader/tools/dali/salvador/src/libdivsufsort/examples/mksary.c
@ -0,0 +1,193 @@
+/*
+ * mksary.c for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+#include <stdio.h>
+#if HAVE_STRING_H
+# include <string.h>
+#endif
+#if HAVE_STDLIB_H
+# include <stdlib.h>
+#endif
+#if HAVE_MEMORY_H
+# include <memory.h>
+#endif
+#if HAVE_STDDEF_H
+# include <stddef.h>
+#endif
+#if HAVE_STRINGS_H
+# include <strings.h>
+#endif
+#if HAVE_SYS_TYPES_H
+# include <sys/types.h>
+#endif
+#if HAVE_IO_H && HAVE_FCNTL_H
+# include <io.h>
+# include <fcntl.h>
+#endif
+#include <time.h>
+#include <divsufsort.h>
+#include "lfs.h"
+
+
+static
+void
+print_help(const char *progname, int status) {
+  fprintf(stderr,
+          "mksary, a simple suffix array builder, version %s.\n",
+          divsufsort_version());
+  fprintf(stderr, "usage: %s INFILE OUTFILE\n\n", progname);
+  exit(status);
+}
+
+int
+main(int argc, const char *argv[]) {
+  FILE *fp, *ofp;
+  const char *fname, *ofname;
+  sauchar_t *T;
+  saidx_t *SA;
+  LFS_OFF_T n;
+  clock_t start, finish;
+  saint_t needclose = 3;
+
+  /* Check arguments. */
+  if((argc == 1) ||
+     (strcmp(argv[1], "-h") == 0) ||
+     (strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); }
+  if(argc != 3) { print_help(argv[0], EXIT_FAILURE); }
+
+  /* Open a file for reading. */
+  if(strcmp(argv[1], "-") != 0) {
+#if HAVE_FOPEN_S
+    if(fopen_s(&fp, fname = argv[1], "rb") != 0) {
+#else
+    if((fp = LFS_FOPEN(fname = argv[1], "rb")) == NULL) {
+#endif
+      fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], fname);
+      perror(NULL);
+      exit(EXIT_FAILURE);
+    }
+  } else {
+#if HAVE__SETMODE && HAVE__FILENO
+    if(_setmode(_fileno(stdin), _O_BINARY) == -1) {
+      fprintf(stderr, "%s: Cannot set mode: ", argv[0]);
+      perror(NULL);
+      exit(EXIT_FAILURE);
+    }
+#endif
+    fp = stdin;
+    fname = "stdin";
+    needclose ^= 1;
+  }
+
+  /* Open a file for writing. */
+  if(strcmp(argv[2], "-") != 0) {
+#if HAVE_FOPEN_S
+    if(fopen_s(&ofp, ofname = argv[2], "wb") != 0) {
+#else
+    if((ofp = LFS_FOPEN(ofname = argv[2], "wb")) == NULL) {
+#endif
+      fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], ofname);
+      perror(NULL);
+      exit(EXIT_FAILURE);
+    }
+  } else {
+#if HAVE__SETMODE && HAVE__FILENO
+    if(_setmode(_fileno(stdout), _O_BINARY) == -1) {
+      fprintf(stderr, "%s: Cannot set mode: ", argv[0]);
+      perror(NULL);
+      exit(EXIT_FAILURE);
+    }
+#endif
+    ofp = stdout;
+    ofname = "stdout";
+    needclose ^= 2;
+  }
+
+  /* Get the file size. */
+  if(LFS_FSEEK(fp, 0, SEEK_END) == 0) {
+    n = LFS_FTELL(fp);
+    rewind(fp);
+    if(n < 0) {
+      fprintf(stderr, "%s: Cannot ftell `%s': ", argv[0], fname);
+      perror(NULL);
+      exit(EXIT_FAILURE);
+    }
+    if(0x7fffffff <= n) {
+      fprintf(stderr, "%s: Input file `%s' is too big.\n", argv[0], fname);
+      exit(EXIT_FAILURE);
+    }
+  } else {
+    fprintf(stderr, "%s: Cannot fseek `%s': ", argv[0], fname);
+    perror(NULL);
+    exit(EXIT_FAILURE);
+  }
+
+  /* Allocate 5blocksize bytes of memory. */
+  T = (sauchar_t *)malloc((size_t)n * sizeof(sauchar_t));
+  SA = (saidx_t *)malloc((size_t)n * sizeof(saidx_t));
+  if((T == NULL) || (SA == NULL)) {
+    fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
+    exit(EXIT_FAILURE);
+  }
+
+  /* Read n bytes of data. */
+  if(fread(T, sizeof(sauchar_t), (size_t)n, fp) != (size_t)n) {
+    fprintf(stderr, "%s: %s `%s': ",
+      argv[0],
+      (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in",
+      fname);
+    perror(NULL);
+    exit(EXIT_FAILURE);
+  }
+  if(needclose & 1) { fclose(fp); }
+
+  /* Construct the suffix array. */
+  fprintf(stderr, "%s: %" PRIdOFF_T " bytes ... ", fname, n);
+  start = clock();
+  if(divsufsort(T, SA, (saidx_t)n) != 0) {
+    fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
+    exit(EXIT_FAILURE);
+  }
+  finish = clock();
+  fprintf(stderr, "%.4f sec\n", (double)(finish - start) / (double)CLOCKS_PER_SEC);
+
+  /* Write the suffix array. */
+  if(fwrite(SA, sizeof(saidx_t), (size_t)n, ofp) != (size_t)n) {
+    fprintf(stderr, "%s: Cannot write to `%s': ", argv[0], ofname);
+    perror(NULL);
+    exit(EXIT_FAILURE);
+  }
+  if(needclose & 2) { fclose(ofp); }
+
+  /* Deallocate memory. */
+  free(SA);
+  free(T);
+
+  return 0;
+}
--- a/loader/tools/dali/salvador/src/libdivsufsort/examples/sasearch.c
+++ b/loader/tools/dali/salvador/src/libdivsufsort/examples/sasearch.c
@ -0,0 +1,165 @@
+/*
+ * sasearch.c for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+#include <stdio.h>
+#if HAVE_STRING_H
+# include <string.h>
+#endif
+#if HAVE_STDLIB_H
+# include <stdlib.h>
+#endif
+#if HAVE_MEMORY_H
+# include <memory.h>
+#endif
+#if HAVE_STDDEF_H
+# include <stddef.h>
+#endif
+#if HAVE_STRINGS_H
+# include <strings.h>
+#endif
+#if HAVE_SYS_TYPES_H
+# include <sys/types.h>
+#endif
+#if HAVE_IO_H && HAVE_FCNTL_H
+# include <io.h>
+# include <fcntl.h>
+#endif
+#include <divsufsort.h>
+#include "lfs.h"
+
+
+static
+void
+print_help(const char *progname, int status) {
+  fprintf(stderr,
+          "sasearch, a simple SA-based full-text search tool, version %s\n",
+          divsufsort_version());
+  fprintf(stderr, "usage: %s PATTERN FILE SAFILE\n\n", progname);
+  exit(status);
+}
+
+int
+main(int argc, const char *argv[]) {
+  FILE *fp;
+  const char *P;
+  sauchar_t *T;
+  saidx_t *SA;
+  LFS_OFF_T n;
+  size_t Psize;
+  saidx_t i, size, left;
+
+  if((argc == 1) ||
+     (strcmp(argv[1], "-h") == 0) ||
+     (strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); }
+  if(argc != 4) { print_help(argv[0], EXIT_FAILURE); }
+
+  P = argv[1];
+  Psize = strlen(P);
+
+  /* Open a file for reading. */
+#if HAVE_FOPEN_S
+  if(fopen_s(&fp, argv[2], "rb") != 0) {
+#else
+  if((fp = LFS_FOPEN(argv[2], "rb")) == NULL) {
+#endif
+    fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], argv[2]);
+    perror(NULL);
+    exit(EXIT_FAILURE);
+  }
+
+  /* Get the file size. */
+  if(LFS_FSEEK(fp, 0, SEEK_END) == 0) {
+    n = LFS_FTELL(fp);
+    rewind(fp);
+    if(n < 0) {
+      fprintf(stderr, "%s: Cannot ftell `%s': ", argv[0], argv[2]);
+      perror(NULL);
+      exit(EXIT_FAILURE);
+    }
+  } else {
+    fprintf(stderr, "%s: Cannot fseek `%s': ", argv[0], argv[2]);
+    perror(NULL);
+    exit(EXIT_FAILURE);
+  }
+
+  /* Allocate 5n bytes of memory. */
+  T = (sauchar_t *)malloc((size_t)n * sizeof(sauchar_t));
+  SA = (saidx_t *)malloc((size_t)n * sizeof(saidx_t));
+  if((T == NULL) || (SA == NULL)) {
+    fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
+    exit(EXIT_FAILURE);
+  }
+
+  /* Read n bytes of data. */
+  if(fread(T, sizeof(sauchar_t), (size_t)n, fp) != (size_t)n) {
+    fprintf(stderr, "%s: %s `%s': ",
+      argv[0],
+      (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in",
+      argv[2]);
+    perror(NULL);
+    exit(EXIT_FAILURE);
+  }
+  fclose(fp);
+
+  /* Open the SA file for reading. */
+#if HAVE_FOPEN_S
+  if(fopen_s(&fp, argv[3], "rb") != 0) {
+#else
+  if((fp = LFS_FOPEN(argv[3], "rb")) == NULL) {
+#endif
+    fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], argv[3]);
+    perror(NULL);
+    exit(EXIT_FAILURE);
+  }
+
+  /* Read n * sizeof(saidx_t) bytes of data. */
+  if(fread(SA, sizeof(saidx_t), (size_t)n, fp) != (size_t)n) {
+    fprintf(stderr, "%s: %s `%s': ",
+      argv[0],
+      (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in",
+      argv[3]);
+    perror(NULL);
+    exit(EXIT_FAILURE);
+  }
+  fclose(fp);
+
+  /* Search and print */
+  size = sa_search(T, (saidx_t)n,
+                   (const sauchar_t *)P, (saidx_t)Psize,
+                   SA, (saidx_t)n, &left);
+  for(i = 0; i < size; ++i) {
+    fprintf(stdout, "%" PRIdSAIDX_T "\n", SA[left + i]);
+  }
+
+  /* Deallocate memory. */
+  free(SA);
+  free(T);
+
+  return 0;
+}
--- a/loader/tools/dali/salvador/src/libdivsufsort/examples/suftest.c
+++ b/loader/tools/dali/salvador/src/libdivsufsort/examples/suftest.c
@ -0,0 +1,164 @@
+/*
+ * suftest.c for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+#include <stdio.h>
+#if HAVE_STRING_H
+# include <string.h>
+#endif
+#if HAVE_STDLIB_H
+# include <stdlib.h>
+#endif
+#if HAVE_MEMORY_H
+# include <memory.h>
+#endif
+#if HAVE_STDDEF_H
+# include <stddef.h>
+#endif
+#if HAVE_STRINGS_H
+# include <strings.h>
+#endif
+#if HAVE_SYS_TYPES_H
+# include <sys/types.h>
+#endif
+#if HAVE_IO_H && HAVE_FCNTL_H
+# include <io.h>
+# include <fcntl.h>
+#endif
+#include <time.h>
+#include <divsufsort.h>
+#include "lfs.h"
+
+
+static
+void
+print_help(const char *progname, int status) {
+  fprintf(stderr,
+          "suftest, a suffixsort tester, version %s.\n",
+          divsufsort_version());
+  fprintf(stderr, "usage: %s FILE\n\n", progname);
+  exit(status);
+}
+
+int
+main(int argc, const char *argv[]) {
+  FILE *fp;
+  const char *fname;
+  sauchar_t *T;
+  saidx_t *SA;
+  LFS_OFF_T n;
+  clock_t start, finish;
+  saint_t needclose = 1;
+
+  /* Check arguments. */
+  if((argc == 1) ||
+     (strcmp(argv[1], "-h") == 0) ||
+     (strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); }
+  if(argc != 2) { print_help(argv[0], EXIT_FAILURE); }
+
+  /* Open a file for reading. */
+  if(strcmp(argv[1], "-") != 0) {
+#if HAVE_FOPEN_S
+    if(fopen_s(&fp, fname = argv[1], "rb") != 0) {
+#else
+    if((fp = LFS_FOPEN(fname = argv[1], "rb")) == NULL) {
+#endif
+      fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], fname);
+      perror(NULL);
+      exit(EXIT_FAILURE);
+    }
+  } else {
+#if HAVE__SETMODE && HAVE__FILENO
+    if(_setmode(_fileno(stdin), _O_BINARY) == -1) {
+      fprintf(stderr, "%s: Cannot set mode: ", argv[0]);
+      perror(NULL);
+      exit(EXIT_FAILURE);
+    }
+#endif
+    fp = stdin;
+    fname = "stdin";
+    needclose = 0;
+  }
+
+  /* Get the file size. */
+  if(LFS_FSEEK(fp, 0, SEEK_END) == 0) {
+    n = LFS_FTELL(fp);
+    rewind(fp);
+    if(n < 0) {
+      fprintf(stderr, "%s: Cannot ftell `%s': ", argv[0], fname);
+      perror(NULL);
+      exit(EXIT_FAILURE);
+    }
+    if(0x7fffffff <= n) {
+      fprintf(stderr, "%s: Input file `%s' is too big.\n", argv[0], fname);
+      exit(EXIT_FAILURE);
+    }
+  } else {
+    fprintf(stderr, "%s: Cannot fseek `%s': ", argv[0], fname);
+    perror(NULL);
+    exit(EXIT_FAILURE);
+  }
+
+  /* Allocate 5n bytes of memory. */
+  T = (sauchar_t *)malloc((size_t)n * sizeof(sauchar_t));
+  SA = (saidx_t *)malloc((size_t)n * sizeof(saidx_t));
+  if((T == NULL) || (SA == NULL)) {
+    fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
+    exit(EXIT_FAILURE);
+  }
+
+  /* Read n bytes of data. */
+  if(fread(T, sizeof(sauchar_t), (size_t)n, fp) != (size_t)n) {
+    fprintf(stderr, "%s: %s `%s': ",
+      argv[0],
+      (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in",
+      argv[1]);
+    perror(NULL);
+    exit(EXIT_FAILURE);
+  }
+  if(needclose & 1) { fclose(fp); }
+
+  /* Construct the suffix array. */
+  fprintf(stderr, "%s: %" PRIdOFF_T " bytes ... ", fname, n);
+  start = clock();
+  if(divsufsort(T, SA, (saidx_t)n) != 0) {
+    fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
+    exit(EXIT_FAILURE);
+  }
+  finish = clock();
+  fprintf(stderr, "%.4f sec\n", (double)(finish - start) / (double)CLOCKS_PER_SEC);
+
+  /* Check the suffix array. */
+  if(sufcheck(T, SA, (saidx_t)n, 1) != 0) { exit(EXIT_FAILURE); }
+
+  /* Deallocate memory. */
+  free(SA);
+  free(T);
+
+  return 0;
+}
--- a/loader/tools/dali/salvador/src/libdivsufsort/examples/unbwt.c
+++ b/loader/tools/dali/salvador/src/libdivsufsort/examples/unbwt.c
@ -0,0 +1,207 @@
+/*
+ * unbwt.c for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+#include <stdio.h>
+#if HAVE_STRING_H
+# include <string.h>
+#endif
+#if HAVE_STDLIB_H
+# include <stdlib.h>
+#endif
+#if HAVE_MEMORY_H
+# include <memory.h>
+#endif
+#if HAVE_STDDEF_H
+# include <stddef.h>
+#endif
+#if HAVE_STRINGS_H
+# include <strings.h>
+#endif
+#if HAVE_SYS_TYPES_H
+# include <sys/types.h>
+#endif
+#if HAVE_IO_H && HAVE_FCNTL_H
+# include <io.h>
+# include <fcntl.h>
+#endif
+#include <time.h>
+#include <divsufsort.h>
+#include "lfs.h"
+
+
+static
+size_t
+read_int(FILE *fp, saidx_t *n) {
+  unsigned char c[4];
+  size_t m = fread(c, sizeof(unsigned char), 4, fp);
+  if(m == 4) {
+    *n = (c[0] <<  0) | (c[1] <<  8) |
+         (c[2] << 16) | (c[3] << 24);
+  }
+  return m;
+}
+
+static
+void
+print_help(const char *progname, int status) {
+  fprintf(stderr,
+          "unbwt, an inverse burrows-wheeler transform program, version %s.\n",
+          divsufsort_version());
+  fprintf(stderr, "usage: %s INFILE OUTFILE\n\n", progname);
+  exit(status);
+}
+
+int
+main(int argc, const char *argv[]) {
+  FILE *fp, *ofp;
+  const char *fname, *ofname;
+  sauchar_t *T;
+  saidx_t *A;
+  LFS_OFF_T n;
+  size_t m;
+  saidx_t pidx;
+  clock_t start, finish;
+  saint_t err, blocksize, needclose = 3;
+
+  /* Check arguments. */
+  if((argc == 1) ||
+     (strcmp(argv[1], "-h") == 0) ||
+     (strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); }
+  if(argc != 3) { print_help(argv[0], EXIT_FAILURE); }
+
+  /* Open a file for reading. */
+  if(strcmp(argv[1], "-") != 0) {
+#if HAVE_FOPEN_S
+    if(fopen_s(&fp, fname = argv[1], "rb") != 0) {
+#else
+    if((fp = LFS_FOPEN(fname = argv[1], "rb")) == NULL) {
+#endif
+      fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], fname);
+      perror(NULL);
+      exit(EXIT_FAILURE);
+    }
+  } else {
+#if HAVE__SETMODE && HAVE__FILENO
+    if(_setmode(_fileno(stdin), _O_BINARY) == -1) {
+      fprintf(stderr, "%s: Cannot set mode: ", argv[0]);
+      perror(NULL);
+      exit(EXIT_FAILURE);
+    }
+#endif
+    fp = stdin;
+    fname = "stdin";
+    needclose ^= 1;
+  }
+
+  /* Open a file for writing. */
+  if(strcmp(argv[2], "-") != 0) {
+#if HAVE_FOPEN_S
+    if(fopen_s(&ofp, ofname = argv[2], "wb") != 0) {
+#else
+    if((ofp = LFS_FOPEN(ofname = argv[2], "wb")) == NULL) {
+#endif
+      fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], ofname);
+      perror(NULL);
+      exit(EXIT_FAILURE);
+    }
+  } else {
+#if HAVE__SETMODE && HAVE__FILENO
+    if(_setmode(_fileno(stdout), _O_BINARY) == -1) {
+      fprintf(stderr, "%s: Cannot set mode: ", argv[0]);
+      perror(NULL);
+      exit(EXIT_FAILURE);
+    }
+#endif
+    ofp = stdout;
+    ofname = "stdout";
+    needclose ^= 2;
+  }
+
+  /* Read the blocksize. */
+  if(read_int(fp, &blocksize) != 4) {
+    fprintf(stderr, "%s: Cannot read from `%s': ", argv[0], fname);
+    perror(NULL);
+    exit(EXIT_FAILURE);
+  }
+
+  /* Allocate 5blocksize bytes of memory. */
+  T = (sauchar_t *)malloc(blocksize * sizeof(sauchar_t));
+  A = (saidx_t *)malloc(blocksize * sizeof(saidx_t));
+  if((T == NULL) || (A == NULL)) {
+    fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
+    exit(EXIT_FAILURE);
+  }
+
+  fprintf(stderr, "UnBWT (blocksize %" PRIdSAINT_T ") ... ", blocksize);
+  start = clock();
+  for(n = 0; (m = read_int(fp, &pidx)) != 0; n += m) {
+    /* Read blocksize bytes of data. */
+    if((m != 4) || ((m = fread(T, sizeof(sauchar_t), blocksize, fp)) == 0)) {
+      fprintf(stderr, "%s: %s `%s': ",
+        argv[0],
+        (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in",
+        fname);
+      perror(NULL);
+      exit(EXIT_FAILURE);
+    }
+
+    /* Inverse Burrows-Wheeler Transform. */
+    if((err = inverse_bw_transform(T, T, A, m, pidx)) != 0) {
+      fprintf(stderr, "%s (reverseBWT): %s.\n",
+        argv[0],
+        (err == -1) ? "Invalid data" : "Cannot allocate memory");
+      exit(EXIT_FAILURE);
+    }
+
+    /* Write m bytes of data. */
+    if(fwrite(T, sizeof(sauchar_t), m, ofp) != m) {
+      fprintf(stderr, "%s: Cannot write to `%s': ", argv[0], ofname);
+      perror(NULL);
+      exit(EXIT_FAILURE);
+    }
+  }
+  if(ferror(fp)) {
+    fprintf(stderr, "%s: Cannot read from `%s': ", argv[0], fname);
+    perror(NULL);
+    exit(EXIT_FAILURE);
+  }
+  finish = clock();
+  fprintf(stderr, "%" PRIdOFF_T " bytes: %.4f sec\n",
+    n, (double)(finish - start) / (double)CLOCKS_PER_SEC);
+
+  /* Close files */
+  if(needclose & 1) { fclose(fp); }
+  if(needclose & 2) { fclose(ofp); }
+
+  /* Deallocate memory. */
+  free(A);
+  free(T);
+
+  return 0;
+}
--- a/loader/tools/dali/salvador/src/libdivsufsort/include/CMakeLists.txt
+++ b/loader/tools/dali/salvador/src/libdivsufsort/include/CMakeLists.txt
@ -0,0 +1,162 @@
+include(CheckIncludeFiles)
+include(CheckIncludeFile)
+include(CheckSymbolExists)
+include(CheckTypeSize)
+include(CheckFunctionKeywords)
+include(CheckLFS)
+
+## Checks for header files ##
+check_include_file("inttypes.h" HAVE_INTTYPES_H)
+check_include_file("memory.h" HAVE_MEMORY_H)
+check_include_file("stddef.h" HAVE_STDDEF_H)
+check_include_file("stdint.h" HAVE_STDINT_H)
+check_include_file("stdlib.h" HAVE_STDLIB_H)
+check_include_file("string.h" HAVE_STRING_H)
+check_include_file("strings.h" HAVE_STRINGS_H)
+check_include_file("sys/types.h" HAVE_SYS_TYPES_H)
+if(HAVE_INTTYPES_H)
+  set(INCFILE "#include <inttypes.h>")
+elseif(HAVE_STDINT_H)
+  set(INCFILE "#include <stdint.h>")
+else(HAVE_INTTYPES_H)
+  set(INCFILE "")
+endif(HAVE_INTTYPES_H)
+
+## create configuration files from .cmake file ##
+if(BUILD_EXAMPLES)
+  ## Checks for WinIO ##
+  if(WIN32)
+    check_include_file("io.h" HAVE_IO_H)
+    check_include_file("fcntl.h" HAVE_FCNTL_H)
+    check_symbol_exists("_setmode" "io.h;fcntl.h" HAVE__SETMODE)
+    if(NOT HAVE__SETMODE)
+      check_symbol_exists("setmode" "io.h;fcntl.h" HAVE_SETMODE)
+    endif(NOT HAVE__SETMODE)
+    check_symbol_exists("_fileno" "stdio.h" HAVE__FILENO)
+    check_symbol_exists("fopen_s" "stdio.h" HAVE_FOPEN_S)
+    check_symbol_exists("_O_BINARY" "fcntl.h" HAVE__O_BINARY)
+  endif(WIN32)
+
+  ## Checks for large file support ##
+  check_lfs(WITH_LFS)
+  configure_file("${CMAKE_CURRENT_SOURCE_DIR}/lfs.h.cmake" "${CMAKE_CURRENT_BINARY_DIR}/lfs.h" @ONLY)
+endif(BUILD_EXAMPLES)
+
+## generate config.h ##
+check_function_keywords("inline;__inline;__inline__;__declspec(dllexport);__declspec(dllimport)")
+if(HAVE_INLINE)
+  set(INLINE "inline")
+elseif(HAVE___INLINE)
+  set(INLINE "__inline")
+elseif(HAVE___INLINE__)
+  set(INLINE "__inline__")
+else(HAVE_INLINE)
+  set(INLINE "")
+endif(HAVE_INLINE)
+configure_file("${CMAKE_CURRENT_SOURCE_DIR}/config.h.cmake" "${CMAKE_CURRENT_BINARY_DIR}/config.h")
+
+## Checks for types ##
+# sauchar_t (8bit)
+check_type_size("uint8_t" UINT8_T)
+if(HAVE_UINT8_T)
+  set(SAUCHAR_TYPE "uint8_t")
+else(HAVE_UINT8_T)
+  check_type_size("unsigned char" SIZEOF_UNSIGNED_CHAR)
+  if("${SIZEOF_UNSIGNED_CHAR}" STREQUAL "1")
+    set(SAUCHAR_TYPE "unsigned char")
+  else("${SIZEOF_UNSIGNED_CHAR}" STREQUAL "1")
+    message(FATAL_ERROR "Cannot find unsigned 8-bit integer type")
+  endif("${SIZEOF_UNSIGNED_CHAR}" STREQUAL "1")
+endif(HAVE_UINT8_T)
+# saint_t (32bit)
+check_type_size("int32_t" INT32_T)
+if(HAVE_INT32_T)
+  set(SAINT32_TYPE "int32_t")
+  check_symbol_exists("PRId32" "inttypes.h" HAVE_PRID32)
+  if(HAVE_PRID32)
+    set(SAINT32_PRId "PRId32")
+  else(HAVE_PRID32)
+    set(SAINT32_PRId "\"d\"")
+  endif(HAVE_PRID32)
+else(HAVE_INT32_T)
+  check_type_size("int" SIZEOF_INT)
+  check_type_size("long" SIZEOF_LONG)
+  check_type_size("short" SIZEOF_SHORT)
+  check_type_size("__int32" SIZEOF___INT32)
+  if("${SIZEOF_INT}" STREQUAL "4")
+    set(SAINT32_TYPE "int")
+    set(SAINT32_PRId "\"d\"")
+  elseif("${SIZEOF_LONG}" STREQUAL "4")
+    set(SAINT32_TYPE "long")
+    set(SAINT32_PRId "\"ld\"")
+  elseif("${SIZEOF_SHORT}" STREQUAL "4")
+    set(SAINT32_TYPE "short")
+    set(SAINT32_PRId "\"d\"")
+  elseif("${SIZEOF___INT32}" STREQUAL "4")
+    set(SAINT32_TYPE "__int32")
+    set(SAINT32_PRId "\"d\"")
+  else("${SIZEOF_INT}" STREQUAL "4")
+    message(FATAL_ERROR "Cannot find 32-bit integer type")
+  endif("${SIZEOF_INT}" STREQUAL "4")
+endif(HAVE_INT32_T)
+# saint64_t (64bit)
+if(BUILD_DIVSUFSORT64)
+  check_type_size("int64_t" INT64_T)
+  if(HAVE_INT64_T)
+    set(SAINT64_TYPE "int64_t")
+    check_symbol_exists("PRId64" "inttypes.h" HAVE_PRID64)
+    if(HAVE_PRID64)
+      set(SAINT64_PRId "PRId64")
+    else(HAVE_PRID64)
+      set(SAINT64_PRId "\"lld\"")
+    endif(HAVE_PRID64)
+  else(HAVE_INT64_T)
+    check_type_size("int" SIZEOF_INT)
+    check_type_size("long" SIZEOF_LONG)
+    check_type_size("long long" SIZEOF_LONG_LONG)
+    check_type_size("__int64" SIZEOF___INT64)
+    if("${SIZEOF_INT}" STREQUAL "8")
+      set(SAINT64_TYPE "int")
+      set(SAINT64_PRId "\"d\"")
+    elseif("${SIZEOF_LONG}" STREQUAL "8")
+      set(SAINT64_TYPE "long")
+      set(SAINT64_PRId "\"ld\"")
+    elseif("${SIZEOF_LONG_LONG}" STREQUAL "8")
+      set(SAINT64_TYPE "long long")
+      set(SAINT64_PRId "\"lld\"")
+    elseif("${SIZEOF___INT64}" STREQUAL "8")
+      set(SAINT64_TYPE "__int64")
+      set(SAINT64_PRId "\"I64d\"")
+    else("${SIZEOF_INT}" STREQUAL "8")
+      message(SEND_ERROR "Cannot find 64-bit integer type")
+      set(BUILD_DIVSUFSORT64 OFF)
+    endif("${SIZEOF_INT}" STREQUAL "8")
+  endif(HAVE_INT64_T)
+endif(BUILD_DIVSUFSORT64)
+
+## generate divsufsort.h ##
+set(DIVSUFSORT_IMPORT "")
+set(DIVSUFSORT_EXPORT "")
+if(BUILD_SHARED_LIBS)
+  if(HAVE___DECLSPEC_DLLIMPORT_)
+    set(DIVSUFSORT_IMPORT "__declspec(dllimport)")
+  endif(HAVE___DECLSPEC_DLLIMPORT_)
+  if(HAVE___DECLSPEC_DLLEXPORT_)
+    set(DIVSUFSORT_EXPORT "__declspec(dllexport)")
+  endif(HAVE___DECLSPEC_DLLEXPORT_)
+endif(BUILD_SHARED_LIBS)
+set(W64BIT "")
+set(SAINDEX_TYPE "${SAINT32_TYPE}")
+set(SAINDEX_PRId "${SAINT32_PRId}")
+set(SAINT_PRId "${SAINT32_PRId}")
+configure_file("${CMAKE_CURRENT_SOURCE_DIR}/divsufsort.h.cmake"
+               "${CMAKE_CURRENT_BINARY_DIR}/divsufsort.h" @ONLY)
+install(FILES "${CMAKE_CURRENT_BINARY_DIR}/divsufsort.h" DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
+if(BUILD_DIVSUFSORT64)
+  set(W64BIT "64")
+  set(SAINDEX_TYPE "${SAINT64_TYPE}")
+  set(SAINDEX_PRId "${SAINT64_PRId}")
+  configure_file("${CMAKE_CURRENT_SOURCE_DIR}/divsufsort.h.cmake"
+                 "${CMAKE_CURRENT_BINARY_DIR}/divsufsort64.h" @ONLY)
+  install(FILES "${CMAKE_CURRENT_BINARY_DIR}/divsufsort64.h" DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
+endif(BUILD_DIVSUFSORT64)
--- a/loader/tools/dali/salvador/src/libdivsufsort/include/config.h.cmake
+++ b/loader/tools/dali/salvador/src/libdivsufsort/include/config.h.cmake
@ -0,0 +1,81 @@
+/*
+ * config.h for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _CONFIG_H
+#define _CONFIG_H 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/** Define to the version of this package. **/
+#cmakedefine PROJECT_VERSION_FULL "${PROJECT_VERSION_FULL}"
+
+/** Define to 1 if you have the header files. **/
+#cmakedefine HAVE_INTTYPES_H 1
+#cmakedefine HAVE_STDDEF_H 1
+#cmakedefine HAVE_STDINT_H 1
+#cmakedefine HAVE_STDLIB_H 1
+#cmakedefine HAVE_STRING_H 1
+#cmakedefine HAVE_STRINGS_H 1
+#cmakedefine HAVE_MEMORY_H 1
+#cmakedefine HAVE_SYS_TYPES_H 1
+
+/** for WinIO **/
+#cmakedefine HAVE_IO_H 1
+#cmakedefine HAVE_FCNTL_H 1
+#cmakedefine HAVE__SETMODE 1
+#cmakedefine HAVE_SETMODE 1
+#cmakedefine HAVE__FILENO 1
+#cmakedefine HAVE_FOPEN_S 1
+#cmakedefine HAVE__O_BINARY 1
+#ifndef HAVE__SETMODE
+# if HAVE_SETMODE
+#  define _setmode setmode
+#  define HAVE__SETMODE 1
+# endif
+# if HAVE__SETMODE && !HAVE__O_BINARY
+#  define _O_BINARY 0
+#  define HAVE__O_BINARY 1
+# endif
+#endif
+
+/** for inline **/
+#ifndef INLINE
+# define INLINE @INLINE@
+#endif
+
+/** for VC++ warning **/
+#ifdef _MSC_VER
+#pragma warning(disable: 4127)
+#endif
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */
+
+#endif /* _CONFIG_H */
--- a/loader/tools/dali/salvador/src/libdivsufsort/include/divsufsort.h
+++ b/loader/tools/dali/salvador/src/libdivsufsort/include/divsufsort.h
@ -0,0 +1,189 @@
+/*
+ * divsufsort.h for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _DIVSUFSORT_H
+#define _DIVSUFSORT_H 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#define DIVSUFSORT_API
+
+/*- Datatypes -*/
+#ifndef SAUCHAR_T
+#define SAUCHAR_T
+typedef unsigned char sauchar_t;
+#endif /* SAUCHAR_T */
+#ifndef SAINT_T
+#define SAINT_T
+typedef int saint_t;
+#endif /* SAINT_T */
+#ifndef SAIDX_T
+#define SAIDX_T
+typedef int saidx_t;
+#endif /* SAIDX_T */
+#ifndef PRIdSAIDX_T
+#define PRIdSAIDX_T "d"
+#endif
+
+/*- divsufsort context */
+typedef struct _divsufsort_ctx_t {
+   saidx_t *bucket_A;
+   saidx_t *bucket_B;
+} divsufsort_ctx_t;
+
+/*- Prototypes -*/
+
+/**
+ * Initialize suffix array context
+ *
+ * @return 0 for success, or non-zero in case of an error
+ */
+int divsufsort_init(divsufsort_ctx_t *ctx);
+
+/**
+ * Destroy suffix array context
+ *
+ * @param ctx suffix array context to destroy
+ */
+void divsufsort_destroy(divsufsort_ctx_t *ctx);
+
+/**
+ * Constructs the suffix array of a given string.
+ * @param ctx suffix array context
+ * @param T[0..n-1] The input string.
+ * @param SA[0..n-1] The output array of suffixes.
+ * @param n The length of the given string.
+ * @return 0 if no error occurred, -1 or -2 otherwise.
+ */
+DIVSUFSORT_API
+saint_t divsufsort_build_array(divsufsort_ctx_t *ctx, const sauchar_t *T, saidx_t *SA, saidx_t n);
+
+#if 0
+/**
+ * Constructs the burrows-wheeler transformed string of a given string.
+ * @param T[0..n-1] The input string.
+ * @param U[0..n-1] The output string. (can be T)
+ * @param A[0..n-1] The temporary array. (can be NULL)
+ * @param n The length of the given string.
+ * @return The primary index if no error occurred, -1 or -2 otherwise.
+ */
+DIVSUFSORT_API
+saidx_t
+divbwt(const sauchar_t *T, sauchar_t *U, saidx_t *A, saidx_t n);
+
+/**
+ * Returns the version of the divsufsort library.
+ * @return The version number string.
+ */
+DIVSUFSORT_API
+const char *
+divsufsort_version(void);
+
+
+/**
+ * Constructs the burrows-wheeler transformed string of a given string and suffix array.
+ * @param T[0..n-1] The input string.
+ * @param U[0..n-1] The output string. (can be T)
+ * @param SA[0..n-1] The suffix array. (can be NULL)
+ * @param n The length of the given string.
+ * @param idx The output primary index.
+ * @return 0 if no error occurred, -1 or -2 otherwise.
+ */
+DIVSUFSORT_API
+saint_t
+bw_transform(const sauchar_t *T, sauchar_t *U,
+             saidx_t *SA /* can NULL */,
+             saidx_t n, saidx_t *idx);
+
+/**
+ * Inverse BW-transforms a given BWTed string.
+ * @param T[0..n-1] The input string.
+ * @param U[0..n-1] The output string. (can be T)
+ * @param A[0..n-1] The temporary array. (can be NULL)
+ * @param n The length of the given string.
+ * @param idx The primary index.
+ * @return 0 if no error occurred, -1 or -2 otherwise.
+ */
+DIVSUFSORT_API
+saint_t
+inverse_bw_transform(const sauchar_t *T, sauchar_t *U,
+                     saidx_t *A /* can NULL */,
+                     saidx_t n, saidx_t idx);
+
+/**
+ * Checks the correctness of a given suffix array.
+ * @param T[0..n-1] The input string.
+ * @param SA[0..n-1] The input suffix array.
+ * @param n The length of the given string.
+ * @param verbose The verbose mode.
+ * @return 0 if no error occurred.
+ */
+DIVSUFSORT_API
+saint_t
+sufcheck(const sauchar_t *T, const saidx_t *SA, saidx_t n, saint_t verbose);
+
+/**
+ * Search for the pattern P in the string T.
+ * @param T[0..Tsize-1] The input string.
+ * @param Tsize The length of the given string.
+ * @param P[0..Psize-1] The input pattern string.
+ * @param Psize The length of the given pattern string.
+ * @param SA[0..SAsize-1] The input suffix array.
+ * @param SAsize The length of the given suffix array.
+ * @param idx The output index.
+ * @return The count of matches if no error occurred, -1 otherwise.
+ */
+DIVSUFSORT_API
+saidx_t
+sa_search(const sauchar_t *T, saidx_t Tsize,
+          const sauchar_t *P, saidx_t Psize,
+          const saidx_t *SA, saidx_t SAsize,
+          saidx_t *left);
+
+/**
+ * Search for the character c in the string T.
+ * @param T[0..Tsize-1] The input string.
+ * @param Tsize The length of the given string.
+ * @param SA[0..SAsize-1] The input suffix array.
+ * @param SAsize The length of the given suffix array.
+ * @param c The input character.
+ * @param idx The output index.
+ * @return The count of matches if no error occurred, -1 otherwise.
+ */
+DIVSUFSORT_API
+saidx_t
+sa_simplesearch(const sauchar_t *T, saidx_t Tsize,
+                const saidx_t *SA, saidx_t SAsize,
+                saint_t c, saidx_t *left);
+#endif
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */
+
+#endif /* _DIVSUFSORT_H */
--- a/loader/tools/dali/salvador/src/libdivsufsort/include/divsufsort.h.cmake
+++ b/loader/tools/dali/salvador/src/libdivsufsort/include/divsufsort.h.cmake
@ -0,0 +1,180 @@
+/*
+ * divsufsort@W64BIT@.h for libdivsufsort@W64BIT@
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _DIVSUFSORT@W64BIT@_H
+#define _DIVSUFSORT@W64BIT@_H 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+@INCFILE@
+
+#ifndef DIVSUFSORT_API
+# ifdef DIVSUFSORT_BUILD_DLL
+#  define DIVSUFSORT_API @DIVSUFSORT_EXPORT@
+# else
+#  define DIVSUFSORT_API @DIVSUFSORT_IMPORT@
+# endif
+#endif
+
+/*- Datatypes -*/
+#ifndef SAUCHAR_T
+#define SAUCHAR_T
+typedef @SAUCHAR_TYPE@ sauchar_t;
+#endif /* SAUCHAR_T */
+#ifndef SAINT_T
+#define SAINT_T
+typedef @SAINT32_TYPE@ saint_t;
+#endif /* SAINT_T */
+#ifndef SAIDX@W64BIT@_T
+#define SAIDX@W64BIT@_T
+typedef @SAINDEX_TYPE@ saidx@W64BIT@_t;
+#endif /* SAIDX@W64BIT@_T */
+#ifndef PRIdSAINT_T
+#define PRIdSAINT_T @SAINT_PRId@
+#endif /* PRIdSAINT_T */
+#ifndef PRIdSAIDX@W64BIT@_T
+#define PRIdSAIDX@W64BIT@_T @SAINDEX_PRId@
+#endif /* PRIdSAIDX@W64BIT@_T */
+
+
+/*- Prototypes -*/
+
+/**
+ * Constructs the suffix array of a given string.
+ * @param T[0..n-1] The input string.
+ * @param SA[0..n-1] The output array of suffixes.
+ * @param n The length of the given string.
+ * @return 0 if no error occurred, -1 or -2 otherwise.
+ */
+DIVSUFSORT_API
+saint_t
+divsufsort@W64BIT@(const sauchar_t *T, saidx@W64BIT@_t *SA, saidx@W64BIT@_t n);
+
+/**
+ * Constructs the burrows-wheeler transformed string of a given string.
+ * @param T[0..n-1] The input string.
+ * @param U[0..n-1] The output string. (can be T)
+ * @param A[0..n-1] The temporary array. (can be NULL)
+ * @param n The length of the given string.
+ * @return The primary index if no error occurred, -1 or -2 otherwise.
+ */
+DIVSUFSORT_API
+saidx@W64BIT@_t
+divbwt@W64BIT@(const sauchar_t *T, sauchar_t *U, saidx@W64BIT@_t *A, saidx@W64BIT@_t n);
+
+/**
+ * Returns the version of the divsufsort library.
+ * @return The version number string.
+ */
+DIVSUFSORT_API
+const char *
+divsufsort@W64BIT@_version(void);
+
+
+/**
+ * Constructs the burrows-wheeler transformed string of a given string and suffix array.
+ * @param T[0..n-1] The input string.
+ * @param U[0..n-1] The output string. (can be T)
+ * @param SA[0..n-1] The suffix array. (can be NULL)
+ * @param n The length of the given string.
+ * @param idx The output primary index.
+ * @return 0 if no error occurred, -1 or -2 otherwise.
+ */
+DIVSUFSORT_API
+saint_t
+bw_transform@W64BIT@(const sauchar_t *T, sauchar_t *U,
+             saidx@W64BIT@_t *SA /* can NULL */,
+             saidx@W64BIT@_t n, saidx@W64BIT@_t *idx);
+
+/**
+ * Inverse BW-transforms a given BWTed string.
+ * @param T[0..n-1] The input string.
+ * @param U[0..n-1] The output string. (can be T)
+ * @param A[0..n-1] The temporary array. (can be NULL)
+ * @param n The length of the given string.
+ * @param idx The primary index.
+ * @return 0 if no error occurred, -1 or -2 otherwise.
+ */
+DIVSUFSORT_API
+saint_t
+inverse_bw_transform@W64BIT@(const sauchar_t *T, sauchar_t *U,
+                     saidx@W64BIT@_t *A /* can NULL */,
+                     saidx@W64BIT@_t n, saidx@W64BIT@_t idx);
+
+/**
+ * Checks the correctness of a given suffix array.
+ * @param T[0..n-1] The input string.
+ * @param SA[0..n-1] The input suffix array.
+ * @param n The length of the given string.
+ * @param verbose The verbose mode.
+ * @return 0 if no error occurred.
+ */
+DIVSUFSORT_API
+saint_t
+sufcheck@W64BIT@(const sauchar_t *T, const saidx@W64BIT@_t *SA, saidx@W64BIT@_t n, saint_t verbose);
+
+/**
+ * Search for the pattern P in the string T.
+ * @param T[0..Tsize-1] The input string.
+ * @param Tsize The length of the given string.
+ * @param P[0..Psize-1] The input pattern string.
+ * @param Psize The length of the given pattern string.
+ * @param SA[0..SAsize-1] The input suffix array.
+ * @param SAsize The length of the given suffix array.
+ * @param idx The output index.
+ * @return The count of matches if no error occurred, -1 otherwise.
+ */
+DIVSUFSORT_API
+saidx@W64BIT@_t
+sa_search@W64BIT@(const sauchar_t *T, saidx@W64BIT@_t Tsize,
+          const sauchar_t *P, saidx@W64BIT@_t Psize,
+          const saidx@W64BIT@_t *SA, saidx@W64BIT@_t SAsize,
+          saidx@W64BIT@_t *left);
+
+/**
+ * Search for the character c in the string T.
+ * @param T[0..Tsize-1] The input string.
+ * @param Tsize The length of the given string.
+ * @param SA[0..SAsize-1] The input suffix array.
+ * @param SAsize The length of the given suffix array.
+ * @param c The input character.
+ * @param idx The output index.
+ * @return The count of matches if no error occurred, -1 otherwise.
+ */
+DIVSUFSORT_API
+saidx@W64BIT@_t
+sa_simplesearch@W64BIT@(const sauchar_t *T, saidx@W64BIT@_t Tsize,
+                const saidx@W64BIT@_t *SA, saidx@W64BIT@_t SAsize,
+                saint_t c, saidx@W64BIT@_t *left);
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */
+
+#endif /* _DIVSUFSORT@W64BIT@_H */
--- a/loader/tools/dali/salvador/src/libdivsufsort/include/divsufsort_config.h
+++ b/loader/tools/dali/salvador/src/libdivsufsort/include/divsufsort_config.h
@ -0,0 +1,9 @@
+#define HAVE_STRING_H 1
+#define HAVE_STDLIB_H 1
+#define HAVE_MEMORY_H 1
+#define HAVE_STDINT_H 1
+#define INLINE inline
+
+#ifdef _MSC_VER
+#pragma warning( disable : 4244 )
+#endif /* _MSC_VER */
--- a/loader/tools/dali/salvador/src/libdivsufsort/include/divsufsort_private.h
+++ b/loader/tools/dali/salvador/src/libdivsufsort/include/divsufsort_private.h
@ -0,0 +1,205 @@
+/*
+ * divsufsort_private.h for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _DIVSUFSORT_PRIVATE_H
+#define _DIVSUFSORT_PRIVATE_H 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#include "divsufsort_config.h"
+#include <assert.h>
+#include <stdio.h>
+#if HAVE_STRING_H
+# include <string.h>
+#endif
+#if HAVE_STDLIB_H
+# include <stdlib.h>
+#endif
+#if HAVE_MEMORY_H
+# include <memory.h>
+#endif
+#if HAVE_STDDEF_H
+# include <stddef.h>
+#endif
+#if HAVE_STRINGS_H
+# include <strings.h>
+#endif
+#if HAVE_INTTYPES_H
+# include <inttypes.h>
+#else
+# if HAVE_STDINT_H
+#  include <stdint.h>
+# endif
+#endif
+#if defined(BUILD_DIVSUFSORT64)
+# include "divsufsort64.h"
+# ifndef SAIDX_T
+#  define SAIDX_T
+#  define saidx_t saidx64_t
+# endif /* SAIDX_T */
+# ifndef PRIdSAIDX_T
+#  define PRIdSAIDX_T PRIdSAIDX64_T
+# endif /* PRIdSAIDX_T */
+# define divsufsort divsufsort64
+# define divbwt divbwt64
+# define divsufsort_version divsufsort64_version
+# define bw_transform bw_transform64
+# define inverse_bw_transform inverse_bw_transform64
+# define sufcheck sufcheck64
+# define sa_search sa_search64
+# define sa_simplesearch sa_simplesearch64
+# define sssort sssort64
+# define trsort trsort64
+#else
+# include "divsufsort.h"
+#endif
+
+
+/*- Constants -*/
+#if !defined(UINT8_MAX)
+# define UINT8_MAX (255)
+#endif /* UINT8_MAX */
+#if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1)
+# undef ALPHABET_SIZE
+#endif
+#if !defined(ALPHABET_SIZE)
+# define ALPHABET_SIZE (UINT8_MAX + 1)
+#endif
+/* for divsufsort.c */
+#define BUCKET_A_SIZE (ALPHABET_SIZE)
+#define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE)
+/* for sssort.c */
+#if defined(SS_INSERTIONSORT_THRESHOLD)
+# if SS_INSERTIONSORT_THRESHOLD < 1
+#  undef SS_INSERTIONSORT_THRESHOLD
+#  define SS_INSERTIONSORT_THRESHOLD (1)
+# endif
+#else
+# define SS_INSERTIONSORT_THRESHOLD (8)
+#endif
+#if defined(SS_BLOCKSIZE)
+# if SS_BLOCKSIZE < 0
+#  undef SS_BLOCKSIZE
+#  define SS_BLOCKSIZE (0)
+# elif 32768 <= SS_BLOCKSIZE
+#  undef SS_BLOCKSIZE
+#  define SS_BLOCKSIZE (32767)
+# endif
+#else
+# define SS_BLOCKSIZE (1024)
+#endif
+/* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */
+#if SS_BLOCKSIZE == 0
+# if defined(BUILD_DIVSUFSORT64)
+#  define SS_MISORT_STACKSIZE (96)
+# else
+#  define SS_MISORT_STACKSIZE (64)
+# endif
+#elif SS_BLOCKSIZE <= 4096
+# define SS_MISORT_STACKSIZE (16)
+#else
+# define SS_MISORT_STACKSIZE (24)
+#endif
+#if defined(BUILD_DIVSUFSORT64)
+# define SS_SMERGE_STACKSIZE (64)
+#else
+# define SS_SMERGE_STACKSIZE (32)
+#endif
+/* for trsort.c */
+#define TR_INSERTIONSORT_THRESHOLD (8)
+#if defined(BUILD_DIVSUFSORT64)
+# define TR_STACKSIZE (96)
+#else
+# define TR_STACKSIZE (64)
+#endif
+
+
+/*- Macros -*/
+#ifndef SWAP
+# define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0)
+#endif /* SWAP */
+#ifndef MIN
+# define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b))
+#endif /* MIN */
+#ifndef MAX
+# define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b))
+#endif /* MAX */
+#define STACK_PUSH(_a, _b, _c, _d)\
+  do {\
+    assert(ssize < STACK_SIZE);\
+    stack[ssize].a = (_a), stack[ssize].b = (_b),\
+    stack[ssize].c = (_c), stack[ssize++].d = (_d);\
+  } while(0)
+#define STACK_PUSH5(_a, _b, _c, _d, _e)\
+  do {\
+    assert(ssize < STACK_SIZE);\
+    stack[ssize].a = (_a), stack[ssize].b = (_b),\
+    stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\
+  } while(0)
+#define STACK_POP(_a, _b, _c, _d)\
+  do {\
+    assert(0 <= ssize);\
+    if(ssize == 0) { return; }\
+    (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
+    (_c) = stack[ssize].c, (_d) = stack[ssize].d;\
+  } while(0)
+#define STACK_POP5(_a, _b, _c, _d, _e)\
+  do {\
+    assert(0 <= ssize);\
+    if(ssize == 0) { return; }\
+    (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
+    (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\
+  } while(0)
+/* for divsufsort.c */
+#define BUCKET_A(_c0) bucket_A[(_c0)]
+#if ALPHABET_SIZE == 256
+#define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)])
+#define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)])
+#else
+#define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)])
+#define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)])
+#endif
+
+
+/*- Private Prototypes -*/
+/* sssort.c */
+void
+sssort(const sauchar_t *Td, const saidx_t *PA,
+       saidx_t *first, saidx_t *last,
+       saidx_t *buf, saidx_t bufsize,
+       saidx_t depth, saidx_t n, saint_t lastsuffix);
+/* trsort.c */
+void
+trsort(saidx_t *ISA, saidx_t *SA, saidx_t n, saidx_t depth);
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */
+
+#endif /* _DIVSUFSORT_PRIVATE_H */
--- a/loader/tools/dali/salvador/src/libdivsufsort/include/lfs.h.cmake
+++ b/loader/tools/dali/salvador/src/libdivsufsort/include/lfs.h.cmake
@ -0,0 +1,56 @@
+/*
+ * lfs.h for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _LFS_H
+#define _LFS_H 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#ifndef __STRICT_ANSI__
+# define LFS_OFF_T @LFS_OFF_T@
+# define LFS_FOPEN @LFS_FOPEN@
+# define LFS_FTELL @LFS_FTELL@
+# define LFS_FSEEK @LFS_FSEEK@
+# define LFS_PRId  @LFS_PRID@
+#else
+# define LFS_OFF_T long
+# define LFS_FOPEN fopen
+# define LFS_FTELL ftell
+# define LFS_FSEEK fseek
+# define LFS_PRId "ld"
+#endif
+#ifndef PRIdOFF_T
+# define PRIdOFF_T LFS_PRId
+#endif
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */
+
+#endif /* _LFS_H */
--- a/loader/tools/dali/salvador/src/libdivsufsort/lib/CMakeLists.txt
+++ b/loader/tools/dali/salvador/src/libdivsufsort/lib/CMakeLists.txt
@ -0,0 +1,31 @@
+include_directories("${CMAKE_CURRENT_SOURCE_DIR}/../include"
+                    "${CMAKE_CURRENT_BINARY_DIR}/../include")
+
+set(divsufsort_SRCS divsufsort.c sssort.c trsort.c utils.c)
+
+## libdivsufsort ##
+add_library(divsufsort ${divsufsort_SRCS})
+install(TARGETS divsufsort
+  RUNTIME DESTINATION ${CMAKE_INSTALL_RUNTIMEDIR}
+  LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+  ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
+set_target_properties(divsufsort PROPERTIES
+  VERSION   "${LIBRARY_VERSION}"
+  SOVERSION "${LIBRARY_SOVERSION}"
+  DEFINE_SYMBOL DIVSUFSORT_BUILD_DLL
+  RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/../examples")
+
+## libdivsufsort64 ##
+if(BUILD_DIVSUFSORT64)
+  add_library(divsufsort64 ${divsufsort_SRCS})
+  install(TARGETS divsufsort64
+    RUNTIME DESTINATION ${CMAKE_INSTALL_RUNTIMEDIR}
+    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
+  set_target_properties(divsufsort64 PROPERTIES
+    VERSION   "${LIBRARY_VERSION}"
+    SOVERSION "${LIBRARY_SOVERSION}"
+    DEFINE_SYMBOL DIVSUFSORT_BUILD_DLL
+    COMPILE_FLAGS "-DBUILD_DIVSUFSORT64"
+    RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/../examples")
+endif(BUILD_DIVSUFSORT64)
--- a/loader/tools/dali/salvador/src/libdivsufsort/lib/divsufsort.c
+++ b/loader/tools/dali/salvador/src/libdivsufsort/lib/divsufsort.c
@ -0,0 +1,431 @@
+/*
+ * divsufsort.c for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "divsufsort_private.h"
+#ifdef _OPENMP
+# include <omp.h>
+#endif
+
+
+/*- Private Functions -*/
+
+/* Sorts suffixes of type B*. */
+static
+saidx_t
+sort_typeBstar(const sauchar_t *T, saidx_t *SA,
+               saidx_t *bucket_A, saidx_t *bucket_B,
+               saidx_t n) {
+  saidx_t *PAb, *ISAb, *buf;
+#ifdef _OPENMP
+  saidx_t *curbuf;
+  saidx_t l;
+#endif
+  saidx_t i, j, k, t, m, bufsize;
+  saint_t c0, c1;
+#ifdef _OPENMP
+  saint_t d0, d1;
+  int tmp;
+#endif
+
+  /* Initialize bucket arrays. */
+  for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; }
+  for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; }
+
+  /* Count the number of occurrences of the first one or two characters of each
+     type A, B and B* suffix. Moreover, store the beginning position of all
+     type B* suffixes into the array SA. */
+  for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) {
+    /* type A suffix. */
+    do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1));
+    if(0 <= i) {
+      /* type B* suffix. */
+      ++BUCKET_BSTAR(c0, c1);
+      SA[--m] = i;
+      /* type B suffix. */
+      for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) {
+        ++BUCKET_B(c0, c1);
+      }
+    }
+  }
+  m = n - m;
+/*
+note:
+  A type B* suffix is lexicographically smaller than a type B suffix that
+  begins with the same first two characters.
+*/
+
+  /* Calculate the index of start/end point of each bucket. */
+  for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) {
+    t = i + BUCKET_A(c0);
+    BUCKET_A(c0) = i + j; /* start point */
+    i = t + BUCKET_B(c0, c0);
+    for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) {
+      j += BUCKET_BSTAR(c0, c1);
+      BUCKET_BSTAR(c0, c1) = j; /* end point */
+      i += BUCKET_B(c0, c1);
+    }
+  }
+
+  if(0 < m) {
+    /* Sort the type B* suffixes by their first two characters. */
+    PAb = SA + n - m; ISAb = SA + m;
+    for(i = m - 2; 0 <= i; --i) {
+      t = PAb[i], c0 = T[t], c1 = T[t + 1];
+      SA[--BUCKET_BSTAR(c0, c1)] = i;
+    }
+    t = PAb[m - 1], c0 = T[t], c1 = T[t + 1];
+    SA[--BUCKET_BSTAR(c0, c1)] = m - 1;
+
+    /* Sort the type B* substrings using sssort. */
+#ifdef _OPENMP
+    tmp = omp_get_max_threads();
+    buf = SA + m, bufsize = (n - (2 * m)) / tmp;
+    c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m;
+#pragma omp parallel default(shared) private(curbuf, k, l, d0, d1, tmp)
+    {
+      tmp = omp_get_thread_num();
+      curbuf = buf + tmp * bufsize;
+      k = 0;
+      for(;;) {
+        #pragma omp critical(sssort_lock)
+        {
+          if(0 < (l = j)) {
+            d0 = c0, d1 = c1;
+            do {
+              k = BUCKET_BSTAR(d0, d1);
+              if(--d1 <= d0) {
+                d1 = ALPHABET_SIZE - 1;
+                if(--d0 < 0) { break; }
+              }
+            } while(((l - k) <= 1) && (0 < (l = k)));
+            c0 = d0, c1 = d1, j = k;
+          }
+        }
+        if(l == 0) { break; }
+        sssort(T, PAb, SA + k, SA + l,
+               curbuf, bufsize, 2, n, *(SA + k) == (m - 1));
+      }
+    }
+#else
+    buf = SA + m, bufsize = n - (2 * m);
+    for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) {
+      for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
+        i = BUCKET_BSTAR(c0, c1);
+        if(1 < (j - i)) {
+          sssort(T, PAb, SA + i, SA + j,
+                 buf, bufsize, 2, n, *(SA + i) == (m - 1));
+        }
+      }
+    }
+#endif
+
+    /* Compute ranks of type B* substrings. */
+    for(i = m - 1; 0 <= i; --i) {
+      if(0 <= SA[i]) {
+        j = i;
+        do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i]));
+        SA[i + 1] = i - j;
+        if(i <= 0) { break; }
+      }
+      j = i;
+      do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0);
+      ISAb[SA[i]] = j;
+    }
+
+    /* Construct the inverse suffix array of type B* suffixes using trsort. */
+    trsort(ISAb, SA, m, 1);
+
+    /* Set the sorted order of tyoe B* suffixes. */
+    for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
+      for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
+      if(0 <= i) {
+        t = i;
+        for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { }
+        SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t;
+      }
+    }
+
+    /* Calculate the index of start/end point of each bucket. */
+    BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */
+    for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) {
+      i = BUCKET_A(c0 + 1) - 1;
+      for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) {
+        t = i - BUCKET_B(c0, c1);
+        BUCKET_B(c0, c1) = i; /* end point */
+
+        /* Move all type B* suffixes to the correct position. */
+        for(i = t, j = BUCKET_BSTAR(c0, c1);
+            j <= k;
+            --i, --k) { SA[i] = SA[k]; }
+      }
+      BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */
+      BUCKET_B(c0, c0) = i; /* end point */
+    }
+  }
+
+  return m;
+}
+
+/* Constructs the suffix array by using the sorted order of type B* suffixes. */
+static
+void
+construct_SA(const sauchar_t *T, saidx_t *SA,
+             saidx_t *bucket_A, saidx_t *bucket_B,
+             saidx_t n, saidx_t m) {
+  saidx_t *i, *j, *k;
+  saidx_t s;
+  saint_t c0, c1, c2;
+
+  if(0 < m) {
+    /* Construct the sorted order of type B suffixes by using
+       the sorted order of type B* suffixes. */
+    for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
+      /* Scan the suffix array from right to left. */
+      for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
+          j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
+          i <= j;
+          --j) {
+        if(0 < (s = *j)) {
+          assert(T[s] == c1);
+          assert(((s + 1) < n) && (T[s] <= T[s + 1]));
+          assert(T[s - 1] <= T[s]);
+          *j = ~s;
+          c0 = T[--s];
+          if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
+          if(c0 != c2) {
+            if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
+            k = SA + BUCKET_B(c2 = c0, c1);
+          }
+          assert(k < j);
+          *k-- = s;
+        } else {
+          assert(((s == 0) && (T[s] == c1)) || (s < 0));
+          *j = ~s;
+        }
+      }
+    }
+  }
+
+  /* Construct the suffix array by using
+     the sorted order of type B suffixes. */
+  k = SA + BUCKET_A(c2 = T[n - 1]);
+  *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1);
+  /* Scan the suffix array from left to right. */
+  for(i = SA, j = SA + n; i < j; ++i) {
+    if(0 < (s = *i)) {
+      assert(T[s - 1] >= T[s]);
+      c0 = T[--s];
+      if((s == 0) || (T[s - 1] < c0)) { s = ~s; }
+      if(c0 != c2) {
+        BUCKET_A(c2) = k - SA;
+        k = SA + BUCKET_A(c2 = c0);
+      }
+      assert(i < k);
+      *k++ = s;
+    } else {
+      assert(s < 0);
+      *i = ~s;
+    }
+  }
+}
+
+#if 0
+/* Constructs the burrows-wheeler transformed string directly
+   by using the sorted order of type B* suffixes. */
+static
+saidx_t
+construct_BWT(const sauchar_t *T, saidx_t *SA,
+              saidx_t *bucket_A, saidx_t *bucket_B,
+              saidx_t n, saidx_t m) {
+  saidx_t *i, *j, *k, *orig;
+  saidx_t s;
+  saint_t c0, c1, c2;
+
+  if(0 < m) {
+    /* Construct the sorted order of type B suffixes by using
+       the sorted order of type B* suffixes. */
+    for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
+      /* Scan the suffix array from right to left. */
+      for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
+          j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
+          i <= j;
+          --j) {
+        if(0 < (s = *j)) {
+          assert(T[s] == c1);
+          assert(((s + 1) < n) && (T[s] <= T[s + 1]));
+          assert(T[s - 1] <= T[s]);
+          c0 = T[--s];
+          *j = ~((saidx_t)c0);
+          if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
+          if(c0 != c2) {
+            if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
+            k = SA + BUCKET_B(c2 = c0, c1);
+          }
+          assert(k < j);
+          *k-- = s;
+        } else if(s != 0) {
+          *j = ~s;
+#ifndef NDEBUG
+        } else {
+          assert(T[s] == c1);
+#endif
+        }
+      }
+    }
+  }
+
+  /* Construct the BWTed string by using
+     the sorted order of type B suffixes. */
+  k = SA + BUCKET_A(c2 = T[n - 1]);
+  *k++ = (T[n - 2] < c2) ? ~((saidx_t)T[n - 2]) : (n - 1);
+  /* Scan the suffix array from left to right. */
+  for(i = SA, j = SA + n, orig = SA; i < j; ++i) {
+    if(0 < (s = *i)) {
+      assert(T[s - 1] >= T[s]);
+      c0 = T[--s];
+      *i = c0;
+      if((0 < s) && (T[s - 1] < c0)) { s = ~((saidx_t)T[s - 1]); }
+      if(c0 != c2) {
+        BUCKET_A(c2) = k - SA;
+        k = SA + BUCKET_A(c2 = c0);
+      }
+      assert(i < k);
+      *k++ = s;
+    } else if(s != 0) {
+      *i = ~s;
+    } else {
+      orig = i;
+    }
+  }
+
+  return orig - SA;
+}
+#endif
+
+/*---------------------------------------------------------------------------*/
+
+/**
+ * Initialize suffix array context
+ *
+ * @return 0 for success, or non-zero in case of an error
+ */
+int divsufsort_init(divsufsort_ctx_t *ctx) {
+   ctx->bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t));
+   ctx->bucket_B = NULL;
+
+   if (ctx->bucket_A) {
+      ctx->bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t));
+
+      if (ctx->bucket_B)
+         return 0;
+   }
+
+   divsufsort_destroy(ctx);
+   return -1;
+}
+
+/**
+ * Destroy suffix array context
+ *
+ * @param ctx suffix array context to destroy
+ */
+void divsufsort_destroy(divsufsort_ctx_t *ctx) {
+   if (ctx->bucket_B) {
+      free(ctx->bucket_B);
+      ctx->bucket_B = NULL;
+   }
+
+   if (ctx->bucket_A) {
+      free(ctx->bucket_A);
+      ctx->bucket_A = NULL;
+   }
+}
+
+/*- Function -*/
+
+saint_t
+divsufsort_build_array(divsufsort_ctx_t *ctx, const sauchar_t *T, saidx_t *SA, saidx_t n) {
+  saidx_t m;
+  saint_t err = 0;
+
+  /* Check arguments. */
+  if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; }
+  else if(n == 0) { return 0; }
+  else if(n == 1) { SA[0] = 0; return 0; }
+  else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; }
+
+  /* Suffixsort. */
+  if((ctx->bucket_A != NULL) && (ctx->bucket_B != NULL)) {
+    m = sort_typeBstar(T, SA, ctx->bucket_A, ctx->bucket_B, n);
+    construct_SA(T, SA, ctx->bucket_A, ctx->bucket_B, n, m);
+  } else {
+    err = -2;
+  }
+
+  return err;
+}
+
+#if 0
+saidx_t
+divbwt(const sauchar_t *T, sauchar_t *U, saidx_t *A, saidx_t n) {
+  saidx_t *B;
+  saidx_t *bucket_A, *bucket_B;
+  saidx_t m, pidx, i;
+
+  /* Check arguments. */
+  if((T == NULL) || (U == NULL) || (n < 0)) { return -1; }
+  else if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; }
+
+  if((B = A) == NULL) { B = (saidx_t *)malloc((size_t)(n + 1) * sizeof(saidx_t)); }
+  bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t));
+  bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t));
+
+  /* Burrows-Wheeler Transform. */
+  if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) {
+    m = sort_typeBstar(T, B, bucket_A, bucket_B, n);
+    pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m);
+
+    /* Copy to output string. */
+    U[0] = T[n - 1];
+    for(i = 0; i < pidx; ++i) { U[i + 1] = (sauchar_t)B[i]; }
+    for(i += 1; i < n; ++i) { U[i] = (sauchar_t)B[i]; }
+    pidx += 1;
+  } else {
+    pidx = -2;
+  }
+
+  free(bucket_B);
+  free(bucket_A);
+  if(A == NULL) { free(B); }
+
+  return pidx;
+}
+
+const char *
+divsufsort_version(void) {
+  return PROJECT_VERSION_FULL;
+}
+#endif
--- a/loader/tools/dali/salvador/src/libdivsufsort/lib/divsufsort_utils.c
+++ b/loader/tools/dali/salvador/src/libdivsufsort/lib/divsufsort_utils.c
@ -0,0 +1,383 @@
+/*
+ * utils.c for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "divsufsort_private.h"
+
+
+/*- Private Function -*/
+
+#if 0
+/* Binary search for inverse bwt. */
+static
+saidx_t
+binarysearch_lower(const saidx_t *A, saidx_t size, saidx_t value) {
+  saidx_t half, i;
+  for(i = 0, half = size >> 1;
+      0 < size;
+      size = half, half >>= 1) {
+    if(A[i + half] < value) {
+      i += half + 1;
+      half -= (size & 1) ^ 1;
+    }
+  }
+  return i;
+}
+
+
+/*- Functions -*/
+
+/* Burrows-Wheeler transform. */
+saint_t
+bw_transform(const sauchar_t *T, sauchar_t *U, saidx_t *SA,
+             saidx_t n, saidx_t *idx) {
+  saidx_t *A, i, j, p, t;
+  saint_t c;
+
+  /* Check arguments. */
+  if((T == NULL) || (U == NULL) || (n < 0) || (idx == NULL)) { return -1; }
+  if(n <= 1) {
+    if(n == 1) { U[0] = T[0]; }
+    *idx = n;
+    return 0;
+  }
+
+  if((A = SA) == NULL) {
+    i = divbwt(T, U, NULL, n);
+    if(0 <= i) { *idx = i; i = 0; }
+    return (saint_t)i;
+  }
+
+  /* BW transform. */
+  if(T == U) {
+    t = n;
+    for(i = 0, j = 0; i < n; ++i) {
+      p = t - 1;
+      t = A[i];
+      if(0 <= p) {
+        c = T[j];
+        U[j] = (j <= p) ? T[p] : (sauchar_t)A[p];
+        A[j] = c;
+        j++;
+      } else {
+        *idx = i;
+      }
+    }
+    p = t - 1;
+    if(0 <= p) {
+      c = T[j];
+      U[j] = (j <= p) ? T[p] : (sauchar_t)A[p];
+      A[j] = c;
+    } else {
+      *idx = i;
+    }
+  } else {
+    U[0] = T[n - 1];
+    for(i = 0; A[i] != 0; ++i) { U[i + 1] = T[A[i] - 1]; }
+    *idx = i + 1;
+    for(++i; i < n; ++i) { U[i] = T[A[i] - 1]; }
+  }
+
+  if(SA == NULL) {
+    /* Deallocate memory. */
+    free(A);
+  }
+
+  return 0;
+}
+
+/* Inverse Burrows-Wheeler transform. */
+saint_t
+inverse_bw_transform(const sauchar_t *T, sauchar_t *U, saidx_t *A,
+                     saidx_t n, saidx_t idx) {
+  saidx_t C[ALPHABET_SIZE];
+  sauchar_t D[ALPHABET_SIZE];
+  saidx_t *B;
+  saidx_t i, p;
+  saint_t c, d;
+
+  /* Check arguments. */
+  if((T == NULL) || (U == NULL) || (n < 0) || (idx < 0) ||
+     (n < idx) || ((0 < n) && (idx == 0))) {
+    return -1;
+  }
+  if(n <= 1) { return 0; }
+
+  if((B = A) == NULL) {
+    /* Allocate n*sizeof(saidx_t) bytes of memory. */
+    if((B = (saidx_t *)malloc((size_t)n * sizeof(saidx_t))) == NULL) { return -2; }
+  }
+
+  /* Inverse BW transform. */
+  for(c = 0; c < ALPHABET_SIZE; ++c) { C[c] = 0; }
+  for(i = 0; i < n; ++i) { ++C[T[i]]; }
+  for(c = 0, d = 0, i = 0; c < ALPHABET_SIZE; ++c) {
+    p = C[c];
+    if(0 < p) {
+      C[c] = i;
+      D[d++] = (sauchar_t)c;
+      i += p;
+    }
+  }
+  for(i = 0; i < idx; ++i) { B[C[T[i]]++] = i; }
+  for( ; i < n; ++i)       { B[C[T[i]]++] = i + 1; }
+  for(c = 0; c < d; ++c) { C[c] = C[D[c]]; }
+  for(i = 0, p = idx; i < n; ++i) {
+    U[i] = D[binarysearch_lower(C, d, p)];
+    p = B[p - 1];
+  }
+
+  if(A == NULL) {
+    /* Deallocate memory. */
+    free(B);
+  }
+
+  return 0;
+}
+
+/* Checks the suffix array SA of the string T. */
+saint_t
+sufcheck(const sauchar_t *T, const saidx_t *SA,
+         saidx_t n, saint_t verbose) {
+  saidx_t C[ALPHABET_SIZE];
+  saidx_t i, p, q, t;
+  saint_t c;
+
+  if(verbose) { fprintf(stderr, "sufcheck: "); }
+
+  /* Check arguments. */
+  if((T == NULL) || (SA == NULL) || (n < 0)) {
+    if(verbose) { fprintf(stderr, "Invalid arguments.\n"); }
+    return -1;
+  }
+  if(n == 0) {
+    if(verbose) { fprintf(stderr, "Done.\n"); }
+    return 0;
+  }
+
+  /* check range: [0..n-1] */
+  for(i = 0; i < n; ++i) {
+    if((SA[i] < 0) || (n <= SA[i])) {
+      if(verbose) {
+        fprintf(stderr, "Out of the range [0,%" PRIdSAIDX_T "].\n"
+                        "  SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "\n",
+                        n - 1, i, SA[i]);
+      }
+      return -2;
+    }
+  }
+
+  /* check first characters. */
+  for(i = 1; i < n; ++i) {
+    if(T[SA[i - 1]] > T[SA[i]]) {
+      if(verbose) {
+        fprintf(stderr, "Suffixes in wrong order.\n"
+                        "  T[SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "]=%d"
+                        " > T[SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "]=%d\n",
+                        i - 1, SA[i - 1], T[SA[i - 1]], i, SA[i], T[SA[i]]);
+      }
+      return -3;
+    }
+  }
+
+  /* check suffixes. */
+  for(i = 0; i < ALPHABET_SIZE; ++i) { C[i] = 0; }
+  for(i = 0; i < n; ++i) { ++C[T[i]]; }
+  for(i = 0, p = 0; i < ALPHABET_SIZE; ++i) {
+    t = C[i];
+    C[i] = p;
+    p += t;
+  }
+
+  q = C[T[n - 1]];
+  C[T[n - 1]] += 1;
+  for(i = 0; i < n; ++i) {
+    p = SA[i];
+    if(0 < p) {
+      c = T[--p];
+      t = C[c];
+    } else {
+      c = T[p = n - 1];
+      t = q;
+    }
+    if((t < 0) || (p != SA[t])) {
+      if(verbose) {
+        fprintf(stderr, "Suffix in wrong position.\n"
+                        "  SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T " or\n"
+                        "  SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "\n",
+                        t, (0 <= t) ? SA[t] : -1, i, SA[i]);
+      }
+      return -4;
+    }
+    if(t != q) {
+      ++C[c];
+      if((n <= C[c]) || (T[SA[C[c]]] != c)) { C[c] = -1; }
+    }
+  }
+
+  if(1 <= verbose) { fprintf(stderr, "Done.\n"); }
+  return 0;
+}
+
+
+static
+int
+_compare(const sauchar_t *T, saidx_t Tsize,
+         const sauchar_t *P, saidx_t Psize,
+         saidx_t suf, saidx_t *match) {
+  saidx_t i, j;
+  saint_t r;
+  for(i = suf + *match, j = *match, r = 0;
+      (i < Tsize) && (j < Psize) && ((r = T[i] - P[j]) == 0); ++i, ++j) { }
+  *match = j;
+  return (r == 0) ? -(j != Psize) : r;
+}
+
+/* Search for the pattern P in the string T. */
+saidx_t
+sa_search(const sauchar_t *T, saidx_t Tsize,
+          const sauchar_t *P, saidx_t Psize,
+          const saidx_t *SA, saidx_t SAsize,
+          saidx_t *idx) {
+  saidx_t size, lsize, rsize, half;
+  saidx_t match, lmatch, rmatch;
+  saidx_t llmatch, lrmatch, rlmatch, rrmatch;
+  saidx_t i, j, k;
+  saint_t r;
+
+  if(idx != NULL) { *idx = -1; }
+  if((T == NULL) || (P == NULL) || (SA == NULL) ||
+     (Tsize < 0) || (Psize < 0) || (SAsize < 0)) { return -1; }
+  if((Tsize == 0) || (SAsize == 0)) { return 0; }
+  if(Psize == 0) { if(idx != NULL) { *idx = 0; } return SAsize; }
+
+  for(i = j = k = 0, lmatch = rmatch = 0, size = SAsize, half = size >> 1;
+      0 < size;
+      size = half, half >>= 1) {
+    match = MIN(lmatch, rmatch);
+    r = _compare(T, Tsize, P, Psize, SA[i + half], &match);
+    if(r < 0) {
+      i += half + 1;
+      half -= (size & 1) ^ 1;
+      lmatch = match;
+    } else if(r > 0) {
+      rmatch = match;
+    } else {
+      lsize = half, j = i, rsize = size - half - 1, k = i + half + 1;
+
+      /* left part */
+      for(llmatch = lmatch, lrmatch = match, half = lsize >> 1;
+          0 < lsize;
+          lsize = half, half >>= 1) {
+        lmatch = MIN(llmatch, lrmatch);
+        r = _compare(T, Tsize, P, Psize, SA[j + half], &lmatch);
+        if(r < 0) {
+          j += half + 1;
+          half -= (lsize & 1) ^ 1;
+          llmatch = lmatch;
+        } else {
+          lrmatch = lmatch;
+        }
+      }
+
+      /* right part */
+      for(rlmatch = match, rrmatch = rmatch, half = rsize >> 1;
+          0 < rsize;
+          rsize = half, half >>= 1) {
+        rmatch = MIN(rlmatch, rrmatch);
+        r = _compare(T, Tsize, P, Psize, SA[k + half], &rmatch);
+        if(r <= 0) {
+          k += half + 1;
+          half -= (rsize & 1) ^ 1;
+          rlmatch = rmatch;
+        } else {
+          rrmatch = rmatch;
+        }
+      }
+
+      break;
+    }
+  }
+
+  if(idx != NULL) { *idx = (0 < (k - j)) ? j : i; }
+  return k - j;
+}
+
+/* Search for the character c in the string T. */
+saidx_t
+sa_simplesearch(const sauchar_t *T, saidx_t Tsize,
+                const saidx_t *SA, saidx_t SAsize,
+                saint_t c, saidx_t *idx) {
+  saidx_t size, lsize, rsize, half;
+  saidx_t i, j, k, p;
+  saint_t r;
+
+  if(idx != NULL) { *idx = -1; }
+  if((T == NULL) || (SA == NULL) || (Tsize < 0) || (SAsize < 0)) { return -1; }
+  if((Tsize == 0) || (SAsize == 0)) { return 0; }
+
+  for(i = j = k = 0, size = SAsize, half = size >> 1;
+      0 < size;
+      size = half, half >>= 1) {
+    p = SA[i + half];
+    r = (p < Tsize) ? T[p] - c : -1;
+    if(r < 0) {
+      i += half + 1;
+      half -= (size & 1) ^ 1;
+    } else if(r == 0) {
+      lsize = half, j = i, rsize = size - half - 1, k = i + half + 1;
+
+      /* left part */
+      for(half = lsize >> 1;
+          0 < lsize;
+          lsize = half, half >>= 1) {
+        p = SA[j + half];
+        r = (p < Tsize) ? T[p] - c : -1;
+        if(r < 0) {
+          j += half + 1;
+          half -= (lsize & 1) ^ 1;
+        }
+      }
+
+      /* right part */
+      for(half = rsize >> 1;
+          0 < rsize;
+          rsize = half, half >>= 1) {
+        p = SA[k + half];
+        r = (p < Tsize) ? T[p] - c : -1;
+        if(r <= 0) {
+          k += half + 1;
+          half -= (rsize & 1) ^ 1;
+        }
+      }
+
+      break;
+    }
+  }
+
+  if(idx != NULL) { *idx = (0 < (k - j)) ? j : i; }
+  return k - j;
+}
+#endif
--- a/loader/tools/dali/salvador/src/libdivsufsort/lib/sssort.c
+++ b/loader/tools/dali/salvador/src/libdivsufsort/lib/sssort.c
@ -0,0 +1,815 @@
+/*
+ * sssort.c for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "divsufsort_private.h"
+
+
+/*- Private Functions -*/
+
+static const saint_t lg_table[256]= {
+ -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+  5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
+};
+
+#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
+
+static INLINE
+saint_t
+ss_ilg(saidx_t n) {
+#if SS_BLOCKSIZE == 0
+# if defined(BUILD_DIVSUFSORT64)
+  return (n >> 32) ?
+          ((n >> 48) ?
+            ((n >> 56) ?
+              56 + lg_table[(n >> 56) & 0xff] :
+              48 + lg_table[(n >> 48) & 0xff]) :
+            ((n >> 40) ?
+              40 + lg_table[(n >> 40) & 0xff] :
+              32 + lg_table[(n >> 32) & 0xff])) :
+          ((n & 0xffff0000) ?
+            ((n & 0xff000000) ?
+              24 + lg_table[(n >> 24) & 0xff] :
+              16 + lg_table[(n >> 16) & 0xff]) :
+            ((n & 0x0000ff00) ?
+               8 + lg_table[(n >>  8) & 0xff] :
+               0 + lg_table[(n >>  0) & 0xff]));
+# else
+  return (n & 0xffff0000) ?
+          ((n & 0xff000000) ?
+            24 + lg_table[(n >> 24) & 0xff] :
+            16 + lg_table[(n >> 16) & 0xff]) :
+          ((n & 0x0000ff00) ?
+             8 + lg_table[(n >>  8) & 0xff] :
+             0 + lg_table[(n >>  0) & 0xff]);
+# endif
+#elif SS_BLOCKSIZE < 256
+  return lg_table[n];
+#else
+  return (n & 0xff00) ?
+          8 + lg_table[(n >> 8) & 0xff] :
+          0 + lg_table[(n >> 0) & 0xff];
+#endif
+}
+
+#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
+
+#if SS_BLOCKSIZE != 0
+
+static const saint_t sqq_table[256] = {
+  0,  16,  22,  27,  32,  35,  39,  42,  45,  48,  50,  53,  55,  57,  59,  61,
+ 64,  65,  67,  69,  71,  73,  75,  76,  78,  80,  81,  83,  84,  86,  87,  89,
+ 90,  91,  93,  94,  96,  97,  98,  99, 101, 102, 103, 104, 106, 107, 108, 109,
+110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
+128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
+143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155,
+156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168,
+169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180,
+181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191,
+192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201,
+202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211,
+212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221,
+221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230,
+230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238,
+239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247,
+247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255
+};
+
+static INLINE
+saidx_t
+ss_isqrt(saidx_t x) {
+  saidx_t y, e;
+
+  if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; }
+  e = (x & 0xffff0000) ?
+        ((x & 0xff000000) ?
+          24 + lg_table[(x >> 24) & 0xff] :
+          16 + lg_table[(x >> 16) & 0xff]) :
+        ((x & 0x0000ff00) ?
+           8 + lg_table[(x >>  8) & 0xff] :
+           0 + lg_table[(x >>  0) & 0xff]);
+
+  if(e >= 16) {
+    y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7);
+    if(e >= 24) { y = (y + 1 + x / y) >> 1; }
+    y = (y + 1 + x / y) >> 1;
+  } else if(e >= 8) {
+    y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1;
+  } else {
+    return sqq_table[x] >> 4;
+  }
+
+  return (x < (y * y)) ? y - 1 : y;
+}
+
+#endif /* SS_BLOCKSIZE != 0 */
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Compares two suffixes. */
+static INLINE
+saint_t
+ss_compare(const sauchar_t *T,
+           const saidx_t *p1, const saidx_t *p2,
+           saidx_t depth) {
+  const sauchar_t *U1, *U2, *U1n, *U2n;
+
+  for(U1 = T + depth + *p1,
+      U2 = T + depth + *p2,
+      U1n = T + *(p1 + 1) + 2,
+      U2n = T + *(p2 + 1) + 2;
+      (U1 < U1n) && (U2 < U2n) && (*U1 == *U2);
+      ++U1, ++U2) {
+  }
+
+  return U1 < U1n ?
+        (U2 < U2n ? *U1 - *U2 : 1) :
+        (U2 < U2n ? -1 : 0);
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+#if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1)
+
+/* Insertionsort for small size groups */
+static
+void
+ss_insertionsort(const sauchar_t *T, const saidx_t *PA,
+                 saidx_t *first, saidx_t *last, saidx_t depth) {
+  saidx_t *i, *j;
+  saidx_t t;
+  saint_t r;
+
+  for(i = last - 2; first <= i; --i) {
+    for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) {
+      do { *(j - 1) = *j; } while((++j < last) && (*j < 0));
+      if(last <= j) { break; }
+    }
+    if(r == 0) { *j = ~*j; }
+    *(j - 1) = t;
+  }
+}
+
+#endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */
+
+
+/*---------------------------------------------------------------------------*/
+
+#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
+
+static INLINE
+void
+ss_fixdown(const sauchar_t *Td, const saidx_t *PA,
+           saidx_t *SA, saidx_t i, saidx_t size) {
+  saidx_t j, k;
+  saidx_t v;
+  saint_t c, d, e;
+
+  for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
+    d = Td[PA[SA[k = j++]]];
+    if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; }
+    if(d <= c) { break; }
+  }
+  SA[i] = v;
+}
+
+/* Simple top-down heapsort. */
+static
+void
+ss_heapsort(const sauchar_t *Td, const saidx_t *PA, saidx_t *SA, saidx_t size) {
+  saidx_t i, m;
+  saidx_t t;
+
+  m = size;
+  if((size % 2) == 0) {
+    m--;
+    if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); }
+  }
+
+  for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); }
+  if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); }
+  for(i = m - 1; 0 < i; --i) {
+    t = SA[0], SA[0] = SA[i];
+    ss_fixdown(Td, PA, SA, 0, i);
+    SA[i] = t;
+  }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Returns the median of three elements. */
+static INLINE
+saidx_t *
+ss_median3(const sauchar_t *Td, const saidx_t *PA,
+           saidx_t *v1, saidx_t *v2, saidx_t *v3) {
+  saidx_t *t;
+  if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); }
+  if(Td[PA[*v2]] > Td[PA[*v3]]) {
+    if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; }
+    else { return v3; }
+  }
+  return v2;
+}
+
+/* Returns the median of five elements. */
+static INLINE
+saidx_t *
+ss_median5(const sauchar_t *Td, const saidx_t *PA,
+           saidx_t *v1, saidx_t *v2, saidx_t *v3, saidx_t *v4, saidx_t *v5) {
+  saidx_t *t;
+  if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); }
+  if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); }
+  if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); }
+  if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); }
+  if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); }
+  if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; }
+  return v3;
+}
+
+/* Returns the pivot element. */
+static INLINE
+saidx_t *
+ss_pivot(const sauchar_t *Td, const saidx_t *PA, saidx_t *first, saidx_t *last) {
+  saidx_t *middle;
+  saidx_t t;
+
+  t = last - first;
+  middle = first + t / 2;
+
+  if(t <= 512) {
+    if(t <= 32) {
+      return ss_median3(Td, PA, first, middle, last - 1);
+    } else {
+      t >>= 2;
+      return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1);
+    }
+  }
+  t >>= 3;
+  first  = ss_median3(Td, PA, first, first + t, first + (t << 1));
+  middle = ss_median3(Td, PA, middle - t, middle, middle + t);
+  last   = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1);
+  return ss_median3(Td, PA, first, middle, last);
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Binary partition for substrings. */
+static INLINE
+saidx_t *
+ss_partition(const saidx_t *PA,
+                    saidx_t *first, saidx_t *last, saidx_t depth) {
+  saidx_t *a, *b;
+  saidx_t t;
+  for(a = first - 1, b = last;;) {
+    for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; }
+    for(; (a < --b) && ((PA[*b] + depth) <  (PA[*b + 1] + 1));) { }
+    if(b <= a) { break; }
+    t = ~*b;
+    *b = *a;
+    *a = t;
+  }
+  if(first < a) { *first = ~*first; }
+  return a;
+}
+
+/* Multikey introsort for medium size groups. */
+static
+void
+ss_mintrosort(const sauchar_t *T, const saidx_t *PA,
+              saidx_t *first, saidx_t *last,
+              saidx_t depth) {
+#define STACK_SIZE SS_MISORT_STACKSIZE
+  struct { saidx_t *a, *b, c; saint_t d; } stack[STACK_SIZE];
+  const sauchar_t *Td;
+  saidx_t *a, *b, *c, *d, *e, *f;
+  saidx_t s, t;
+  saint_t ssize;
+  saint_t limit;
+  saint_t v, x = 0;
+
+  for(ssize = 0, limit = ss_ilg(last - first);;) {
+
+    if((last - first) <= SS_INSERTIONSORT_THRESHOLD) {
+#if 1 < SS_INSERTIONSORT_THRESHOLD
+      if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); }
+#endif
+      STACK_POP(first, last, depth, limit);
+      continue;
+    }
+
+    Td = T + depth;
+    if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); }
+    if(limit < 0) {
+      for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) {
+        if((x = Td[PA[*a]]) != v) {
+          if(1 < (a - first)) { break; }
+          v = x;
+          first = a;
+        }
+      }
+      if(Td[PA[*first] - 1] < v) {
+        first = ss_partition(PA, first, a, depth);
+      }
+      if((a - first) <= (last - a)) {
+        if(1 < (a - first)) {
+          STACK_PUSH(a, last, depth, -1);
+          last = a, depth += 1, limit = ss_ilg(a - first);
+        } else {
+          first = a, limit = -1;
+        }
+      } else {
+        if(1 < (last - a)) {
+          STACK_PUSH(first, a, depth + 1, ss_ilg(a - first));
+          first = a, limit = -1;
+        } else {
+          last = a, depth += 1, limit = ss_ilg(a - first);
+        }
+      }
+      continue;
+    }
+
+    /* choose pivot */
+    a = ss_pivot(Td, PA, first, last);
+    v = Td[PA[*a]];
+    SWAP(*first, *a);
+
+    /* partition */
+    for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { }
+    if(((a = b) < last) && (x < v)) {
+      for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) {
+        if(x == v) { SWAP(*b, *a); ++a; }
+      }
+    }
+    for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { }
+    if((b < (d = c)) && (x > v)) {
+      for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
+        if(x == v) { SWAP(*c, *d); --d; }
+      }
+    }
+    for(; b < c;) {
+      SWAP(*b, *c);
+      for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) {
+        if(x == v) { SWAP(*b, *a); ++a; }
+      }
+      for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
+        if(x == v) { SWAP(*c, *d); --d; }
+      }
+    }
+
+    if(a <= d) {
+      c = b - 1;
+
+      if((s = a - first) > (t = b - a)) { s = t; }
+      for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
+      if((s = d - c) > (t = last - d - 1)) { s = t; }
+      for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
+
+      a = first + (b - a), c = last - (d - c);
+      b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth);
+
+      if((a - first) <= (last - c)) {
+        if((last - c) <= (c - b)) {
+          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
+          STACK_PUSH(c, last, depth, limit);
+          last = a;
+        } else if((a - first) <= (c - b)) {
+          STACK_PUSH(c, last, depth, limit);
+          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
+          last = a;
+        } else {
+          STACK_PUSH(c, last, depth, limit);
+          STACK_PUSH(first, a, depth, limit);
+          first = b, last = c, depth += 1, limit = ss_ilg(c - b);
+        }
+      } else {
+        if((a - first) <= (c - b)) {
+          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
+          STACK_PUSH(first, a, depth, limit);
+          first = c;
+        } else if((last - c) <= (c - b)) {
+          STACK_PUSH(first, a, depth, limit);
+          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
+          first = c;
+        } else {
+          STACK_PUSH(first, a, depth, limit);
+          STACK_PUSH(c, last, depth, limit);
+          first = b, last = c, depth += 1, limit = ss_ilg(c - b);
+        }
+      }
+    } else {
+      limit += 1;
+      if(Td[PA[*first] - 1] < v) {
+        first = ss_partition(PA, first, last, depth);
+        limit = ss_ilg(last - first);
+      }
+      depth += 1;
+    }
+  }
+#undef STACK_SIZE
+}
+
+#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
+
+
+/*---------------------------------------------------------------------------*/
+
+#if SS_BLOCKSIZE != 0
+
+static INLINE
+void
+ss_blockswap(saidx_t *a, saidx_t *b, saidx_t n) {
+  saidx_t t;
+  for(; 0 < n; --n, ++a, ++b) {
+    t = *a, *a = *b, *b = t;
+  }
+}
+
+static INLINE
+void
+ss_rotate(saidx_t *first, saidx_t *middle, saidx_t *last) {
+  saidx_t *a, *b, t;
+  saidx_t l, r;
+  l = middle - first, r = last - middle;
+  for(; (0 < l) && (0 < r);) {
+    if(l == r) { ss_blockswap(first, middle, l); break; }
+    if(l < r) {
+      a = last - 1, b = middle - 1;
+      t = *a;
+      do {
+        *a-- = *b, *b-- = *a;
+        if(b < first) {
+          *a = t;
+          last = a;
+          if((r -= l + 1) <= l) { break; }
+          a -= 1, b = middle - 1;
+          t = *a;
+        }
+      } while(1);
+    } else {
+      a = first, b = middle;
+      t = *a;
+      do {
+        *a++ = *b, *b++ = *a;
+        if(last <= b) {
+          *a = t;
+          first = a + 1;
+          if((l -= r + 1) <= r) { break; }
+          a += 1, b = middle;
+          t = *a;
+        }
+      } while(1);
+    }
+  }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+static
+void
+ss_inplacemerge(const sauchar_t *T, const saidx_t *PA,
+                saidx_t *first, saidx_t *middle, saidx_t *last,
+                saidx_t depth) {
+  const saidx_t *p;
+  saidx_t *a, *b;
+  saidx_t len, half;
+  saint_t q, r;
+  saint_t x;
+
+  for(;;) {
+    if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); }
+    else                { x = 0; p = PA +  *(last - 1); }
+    for(a = first, len = middle - first, half = len >> 1, r = -1;
+        0 < len;
+        len = half, half >>= 1) {
+      b = a + half;
+      q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth);
+      if(q < 0) {
+        a = b + 1;
+        half -= (len & 1) ^ 1;
+      } else {
+        r = q;
+      }
+    }
+    if(a < middle) {
+      if(r == 0) { *a = ~*a; }
+      ss_rotate(a, middle, last);
+      last -= middle - a;
+      middle = a;
+      if(first == middle) { break; }
+    }
+    --last;
+    if(x != 0) { while(*--last < 0) { } }
+    if(middle == last) { break; }
+  }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Merge-forward with internal buffer. */
+static
+void
+ss_mergeforward(const sauchar_t *T, const saidx_t *PA,
+                saidx_t *first, saidx_t *middle, saidx_t *last,
+                saidx_t *buf, saidx_t depth) {
+  saidx_t *a, *b, *c, *bufend;
+  saidx_t t;
+  saint_t r;
+
+  bufend = buf + (middle - first) - 1;
+  ss_blockswap(buf, first, middle - first);
+
+  for(t = *(a = first), b = buf, c = middle;;) {
+    r = ss_compare(T, PA + *b, PA + *c, depth);
+    if(r < 0) {
+      do {
+        *a++ = *b;
+        if(bufend <= b) { *bufend = t; return; }
+        *b++ = *a;
+      } while(*b < 0);
+    } else if(r > 0) {
+      do {
+        *a++ = *c, *c++ = *a;
+        if(last <= c) {
+          while(b < bufend) { *a++ = *b, *b++ = *a; }
+          *a = *b, *b = t;
+          return;
+        }
+      } while(*c < 0);
+    } else {
+      *c = ~*c;
+      do {
+        *a++ = *b;
+        if(bufend <= b) { *bufend = t; return; }
+        *b++ = *a;
+      } while(*b < 0);
+
+      do {
+        *a++ = *c, *c++ = *a;
+        if(last <= c) {
+          while(b < bufend) { *a++ = *b, *b++ = *a; }
+          *a = *b, *b = t;
+          return;
+        }
+      } while(*c < 0);
+    }
+  }
+}
+
+/* Merge-backward with internal buffer. */
+static
+void
+ss_mergebackward(const sauchar_t *T, const saidx_t *PA,
+                 saidx_t *first, saidx_t *middle, saidx_t *last,
+                 saidx_t *buf, saidx_t depth) {
+  const saidx_t *p1, *p2;
+  saidx_t *a, *b, *c, *bufend;
+  saidx_t t;
+  saint_t r;
+  saint_t x;
+
+  bufend = buf + (last - middle) - 1;
+  ss_blockswap(buf, middle, last - middle);
+
+  x = 0;
+  if(*bufend < 0)       { p1 = PA + ~*bufend; x |= 1; }
+  else                  { p1 = PA +  *bufend; }
+  if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; }
+  else                  { p2 = PA +  *(middle - 1); }
+  for(t = *(a = last - 1), b = bufend, c = middle - 1;;) {
+    r = ss_compare(T, p1, p2, depth);
+    if(0 < r) {
+      if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
+      *a-- = *b;
+      if(b <= buf) { *buf = t; break; }
+      *b-- = *a;
+      if(*b < 0) { p1 = PA + ~*b; x |= 1; }
+      else       { p1 = PA +  *b; }
+    } else if(r < 0) {
+      if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
+      *a-- = *c, *c-- = *a;
+      if(c < first) {
+        while(buf < b) { *a-- = *b, *b-- = *a; }
+        *a = *b, *b = t;
+        break;
+      }
+      if(*c < 0) { p2 = PA + ~*c; x |= 2; }
+      else       { p2 = PA +  *c; }
+    } else {
+      if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
+      *a-- = ~*b;
+      if(b <= buf) { *buf = t; break; }
+      *b-- = *a;
+      if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
+      *a-- = *c, *c-- = *a;
+      if(c < first) {
+        while(buf < b) { *a-- = *b, *b-- = *a; }
+        *a = *b, *b = t;
+        break;
+      }
+      if(*b < 0) { p1 = PA + ~*b; x |= 1; }
+      else       { p1 = PA +  *b; }
+      if(*c < 0) { p2 = PA + ~*c; x |= 2; }
+      else       { p2 = PA +  *c; }
+    }
+  }
+}
+
+/* D&C based merge. */
+static
+void
+ss_swapmerge(const sauchar_t *T, const saidx_t *PA,
+             saidx_t *first, saidx_t *middle, saidx_t *last,
+             saidx_t *buf, saidx_t bufsize, saidx_t depth) {
+#define STACK_SIZE SS_SMERGE_STACKSIZE
+#define GETIDX(a) ((0 <= (a)) ? (a) : (~(a)))
+#define MERGE_CHECK(a, b, c)\
+  do {\
+    if(((c) & 1) ||\
+       (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\
+      *(a) = ~*(a);\
+    }\
+    if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\
+      *(b) = ~*(b);\
+    }\
+  } while(0)
+  struct { saidx_t *a, *b, *c; saint_t d; } stack[STACK_SIZE];
+  saidx_t *l, *r, *lm, *rm;
+  saidx_t m, len, half;
+  saint_t ssize;
+  saint_t check, next;
+
+  for(check = 0, ssize = 0;;) {
+    if((last - middle) <= bufsize) {
+      if((first < middle) && (middle < last)) {
+        ss_mergebackward(T, PA, first, middle, last, buf, depth);
+      }
+      MERGE_CHECK(first, last, check);
+      STACK_POP(first, middle, last, check);
+      continue;
+    }
+
+    if((middle - first) <= bufsize) {
+      if(first < middle) {
+        ss_mergeforward(T, PA, first, middle, last, buf, depth);
+      }
+      MERGE_CHECK(first, last, check);
+      STACK_POP(first, middle, last, check);
+      continue;
+    }
+
+    for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1;
+        0 < len;
+        len = half, half >>= 1) {
+      if(ss_compare(T, PA + GETIDX(*(middle + m + half)),
+                       PA + GETIDX(*(middle - m - half - 1)), depth) < 0) {
+        m += half + 1;
+        half -= (len & 1) ^ 1;
+      }
+    }
+
+    if(0 < m) {
+      lm = middle - m, rm = middle + m;
+      ss_blockswap(lm, middle, m);
+      l = r = middle, next = 0;
+      if(rm < last) {
+        if(*rm < 0) {
+          *rm = ~*rm;
+          if(first < lm) { for(; *--l < 0;) { } next |= 4; }
+          next |= 1;
+        } else if(first < lm) {
+          for(; *r < 0; ++r) { }
+          next |= 2;
+        }
+      }
+
+      if((l - first) <= (last - r)) {
+        STACK_PUSH(r, rm, last, (next & 3) | (check & 4));
+        middle = lm, last = l, check = (check & 3) | (next & 4);
+      } else {
+        if((next & 2) && (r == middle)) { next ^= 6; }
+        STACK_PUSH(first, lm, l, (check & 3) | (next & 4));
+        first = r, middle = rm, check = (next & 3) | (check & 4);
+      }
+    } else {
+      if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) {
+        *middle = ~*middle;
+      }
+      MERGE_CHECK(first, last, check);
+      STACK_POP(first, middle, last, check);
+    }
+  }
+#undef STACK_SIZE
+}
+
+#endif /* SS_BLOCKSIZE != 0 */
+
+
+/*---------------------------------------------------------------------------*/
+
+/*- Function -*/
+
+/* Substring sort */
+void
+sssort(const sauchar_t *T, const saidx_t *PA,
+       saidx_t *first, saidx_t *last,
+       saidx_t *buf, saidx_t bufsize,
+       saidx_t depth, saidx_t n, saint_t lastsuffix) {
+  saidx_t *a;
+#if SS_BLOCKSIZE != 0
+  saidx_t *b, *middle, *curbuf;
+  saidx_t j, k, curbufsize, limit;
+#endif
+  saidx_t i;
+
+  if(lastsuffix != 0) { ++first; }
+
+#if SS_BLOCKSIZE == 0
+  ss_mintrosort(T, PA, first, last, depth);
+#else
+  if((bufsize < SS_BLOCKSIZE) &&
+      (bufsize < (last - first)) &&
+      (bufsize < (limit = ss_isqrt(last - first)))) {
+    if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; }
+    buf = middle = last - limit, bufsize = limit;
+  } else {
+    middle = last, limit = 0;
+  }
+  for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) {
+#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
+    ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth);
+#elif 1 < SS_BLOCKSIZE
+    ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth);
+#endif
+    curbufsize = last - (a + SS_BLOCKSIZE);
+    curbuf = a + SS_BLOCKSIZE;
+    if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; }
+    for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) {
+      ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth);
+    }
+  }
+#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
+  ss_mintrosort(T, PA, a, middle, depth);
+#elif 1 < SS_BLOCKSIZE
+  ss_insertionsort(T, PA, a, middle, depth);
+#endif
+  for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) {
+    if(i & 1) {
+      ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth);
+      a -= k;
+    }
+  }
+  if(limit != 0) {
+#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
+    ss_mintrosort(T, PA, middle, last, depth);
+#elif 1 < SS_BLOCKSIZE
+    ss_insertionsort(T, PA, middle, last, depth);
+#endif
+    ss_inplacemerge(T, PA, first, middle, last, depth);
+  }
+#endif
+
+  if(lastsuffix != 0) {
+    /* Insert last type B* suffix. */
+    saidx_t PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2;
+    for(a = first, i = *(first - 1);
+        (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth)));
+        ++a) {
+      *(a - 1) = *a;
+    }
+    *(a - 1) = i;
+  }
+}
--- a/loader/tools/dali/salvador/src/libdivsufsort/lib/trsort.c
+++ b/loader/tools/dali/salvador/src/libdivsufsort/lib/trsort.c
@ -0,0 +1,586 @@
+/*
+ * trsort.c for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "divsufsort_private.h"
+
+
+/*- Private Functions -*/
+
+static const saint_t lg_table[256]= {
+ -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+  5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
+};
+
+static INLINE
+saint_t
+tr_ilg(saidx_t n) {
+#if defined(BUILD_DIVSUFSORT64)
+  return (n >> 32) ?
+          ((n >> 48) ?
+            ((n >> 56) ?
+              56 + lg_table[(n >> 56) & 0xff] :
+              48 + lg_table[(n >> 48) & 0xff]) :
+            ((n >> 40) ?
+              40 + lg_table[(n >> 40) & 0xff] :
+              32 + lg_table[(n >> 32) & 0xff])) :
+          ((n & 0xffff0000) ?
+            ((n & 0xff000000) ?
+              24 + lg_table[(n >> 24) & 0xff] :
+              16 + lg_table[(n >> 16) & 0xff]) :
+            ((n & 0x0000ff00) ?
+               8 + lg_table[(n >>  8) & 0xff] :
+               0 + lg_table[(n >>  0) & 0xff]));
+#else
+  return (n & 0xffff0000) ?
+          ((n & 0xff000000) ?
+            24 + lg_table[(n >> 24) & 0xff] :
+            16 + lg_table[(n >> 16) & 0xff]) :
+          ((n & 0x0000ff00) ?
+             8 + lg_table[(n >>  8) & 0xff] :
+             0 + lg_table[(n >>  0) & 0xff]);
+#endif
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Simple insertionsort for small size groups. */
+static
+void
+tr_insertionsort(const saidx_t *ISAd, saidx_t *first, saidx_t *last) {
+  saidx_t *a, *b;
+  saidx_t t, r;
+
+  for(a = first + 1; a < last; ++a) {
+    for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) {
+      do { *(b + 1) = *b; } while((first <= --b) && (*b < 0));
+      if(b < first) { break; }
+    }
+    if(r == 0) { *b = ~*b; }
+    *(b + 1) = t;
+  }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+static INLINE
+void
+tr_fixdown(const saidx_t *ISAd, saidx_t *SA, saidx_t i, saidx_t size) {
+  saidx_t j, k;
+  saidx_t v;
+  saidx_t c, d, e;
+
+  for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
+    d = ISAd[SA[k = j++]];
+    if(d < (e = ISAd[SA[j]])) { k = j; d = e; }
+    if(d <= c) { break; }
+  }
+  SA[i] = v;
+}
+
+/* Simple top-down heapsort. */
+static
+void
+tr_heapsort(const saidx_t *ISAd, saidx_t *SA, saidx_t size) {
+  saidx_t i, m;
+  saidx_t t;
+
+  m = size;
+  if((size % 2) == 0) {
+    m--;
+    if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); }
+  }
+
+  for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); }
+  if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); }
+  for(i = m - 1; 0 < i; --i) {
+    t = SA[0], SA[0] = SA[i];
+    tr_fixdown(ISAd, SA, 0, i);
+    SA[i] = t;
+  }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Returns the median of three elements. */
+static INLINE
+saidx_t *
+tr_median3(const saidx_t *ISAd, saidx_t *v1, saidx_t *v2, saidx_t *v3) {
+  saidx_t *t;
+  if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); }
+  if(ISAd[*v2] > ISAd[*v3]) {
+    if(ISAd[*v1] > ISAd[*v3]) { return v1; }
+    else { return v3; }
+  }
+  return v2;
+}
+
+/* Returns the median of five elements. */
+static INLINE
+saidx_t *
+tr_median5(const saidx_t *ISAd,
+           saidx_t *v1, saidx_t *v2, saidx_t *v3, saidx_t *v4, saidx_t *v5) {
+  saidx_t *t;
+  if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); }
+  if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); }
+  if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); }
+  if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); }
+  if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); }
+  if(ISAd[*v3] > ISAd[*v4]) { return v4; }
+  return v3;
+}
+
+/* Returns the pivot element. */
+static INLINE
+saidx_t *
+tr_pivot(const saidx_t *ISAd, saidx_t *first, saidx_t *last) {
+  saidx_t *middle;
+  saidx_t t;
+
+  t = last - first;
+  middle = first + t / 2;
+
+  if(t <= 512) {
+    if(t <= 32) {
+      return tr_median3(ISAd, first, middle, last - 1);
+    } else {
+      t >>= 2;
+      return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1);
+    }
+  }
+  t >>= 3;
+  first  = tr_median3(ISAd, first, first + t, first + (t << 1));
+  middle = tr_median3(ISAd, middle - t, middle, middle + t);
+  last   = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1);
+  return tr_median3(ISAd, first, middle, last);
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+typedef struct _trbudget_t trbudget_t;
+struct _trbudget_t {
+  saidx_t chance;
+  saidx_t remain;
+  saidx_t incval;
+  saidx_t count;
+};
+
+static INLINE
+void
+trbudget_init(trbudget_t *budget, saidx_t chance, saidx_t incval) {
+  budget->chance = chance;
+  budget->remain = budget->incval = incval;
+}
+
+static INLINE
+saint_t
+trbudget_check(trbudget_t *budget, saidx_t size) {
+  if(size <= budget->remain) { budget->remain -= size; return 1; }
+  if(budget->chance == 0) { budget->count += size; return 0; }
+  budget->remain += budget->incval - size;
+  budget->chance -= 1;
+  return 1;
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+static INLINE
+void
+tr_partition(const saidx_t *ISAd,
+             saidx_t *first, saidx_t *middle, saidx_t *last,
+             saidx_t **pa, saidx_t **pb, saidx_t v) {
+  saidx_t *a, *b, *c, *d, *e, *f;
+  saidx_t t, s;
+  saidx_t x = 0;
+
+  for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { }
+  if(((a = b) < last) && (x < v)) {
+    for(; (++b < last) && ((x = ISAd[*b]) <= v);) {
+      if(x == v) { SWAP(*b, *a); ++a; }
+    }
+  }
+  for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { }
+  if((b < (d = c)) && (x > v)) {
+    for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
+      if(x == v) { SWAP(*c, *d); --d; }
+    }
+  }
+  for(; b < c;) {
+    SWAP(*b, *c);
+    for(; (++b < c) && ((x = ISAd[*b]) <= v);) {
+      if(x == v) { SWAP(*b, *a); ++a; }
+    }
+    for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
+      if(x == v) { SWAP(*c, *d); --d; }
+    }
+  }
+
+  if(a <= d) {
+    c = b - 1;
+    if((s = a - first) > (t = b - a)) { s = t; }
+    for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
+    if((s = d - c) > (t = last - d - 1)) { s = t; }
+    for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
+    first += (b - a), last -= (d - c);
+  }
+  *pa = first, *pb = last;
+}
+
+static
+void
+tr_copy(saidx_t *ISA, const saidx_t *SA,
+        saidx_t *first, saidx_t *a, saidx_t *b, saidx_t *last,
+        saidx_t depth) {
+  /* sort suffixes of middle partition
+     by using sorted order of suffixes of left and right partition. */
+  saidx_t *c, *d, *e;
+  saidx_t s, v;
+
+  v = b - SA - 1;
+  for(c = first, d = a - 1; c <= d; ++c) {
+    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
+      *++d = s;
+      ISA[s] = d - SA;
+    }
+  }
+  for(c = last - 1, e = d + 1, d = b; e < d; --c) {
+    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
+      *--d = s;
+      ISA[s] = d - SA;
+    }
+  }
+}
+
+static
+void
+tr_partialcopy(saidx_t *ISA, const saidx_t *SA,
+               saidx_t *first, saidx_t *a, saidx_t *b, saidx_t *last,
+               saidx_t depth) {
+  saidx_t *c, *d, *e;
+  saidx_t s, v;
+  saidx_t rank, lastrank, newrank = -1;
+
+  v = b - SA - 1;
+  lastrank = -1;
+  for(c = first, d = a - 1; c <= d; ++c) {
+    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
+      *++d = s;
+      rank = ISA[s + depth];
+      if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
+      ISA[s] = newrank;
+    }
+  }
+
+  lastrank = -1;
+  for(e = d; first <= e; --e) {
+    rank = ISA[*e];
+    if(lastrank != rank) { lastrank = rank; newrank = e - SA; }
+    if(newrank != rank) { ISA[*e] = newrank; }
+  }
+
+  lastrank = -1;
+  for(c = last - 1, e = d + 1, d = b; e < d; --c) {
+    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
+      *--d = s;
+      rank = ISA[s + depth];
+      if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
+      ISA[s] = newrank;
+    }
+  }
+}
+
+static
+void
+tr_introsort(saidx_t *ISA, const saidx_t *ISAd,
+             saidx_t *SA, saidx_t *first, saidx_t *last,
+             trbudget_t *budget) {
+#define STACK_SIZE TR_STACKSIZE
+  struct { const saidx_t *a; saidx_t *b, *c; saint_t d, e; }stack[STACK_SIZE];
+  saidx_t *a, *b, *c;
+  saidx_t t;
+  saidx_t v, x = 0;
+  saidx_t incr = ISAd - ISA;
+  saint_t limit, next;
+  saint_t ssize, trlink = -1;
+
+  for(ssize = 0, limit = tr_ilg(last - first);;) {
+
+    if(limit < 0) {
+      if(limit == -1) {
+        /* tandem repeat partition */
+        tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1);
+
+        /* update ranks */
+        if(a < last) {
+          for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
+        }
+        if(b < last) {
+          for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; }
+        }
+
+        /* push */
+        if(1 < (b - a)) {
+          STACK_PUSH5(NULL, a, b, 0, 0);
+          STACK_PUSH5(ISAd - incr, first, last, -2, trlink);
+          trlink = ssize - 2;
+        }
+        if((a - first) <= (last - b)) {
+          if(1 < (a - first)) {
+            STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink);
+            last = a, limit = tr_ilg(a - first);
+          } else if(1 < (last - b)) {
+            first = b, limit = tr_ilg(last - b);
+          } else {
+            STACK_POP5(ISAd, first, last, limit, trlink);
+          }
+        } else {
+          if(1 < (last - b)) {
+            STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink);
+            first = b, limit = tr_ilg(last - b);
+          } else if(1 < (a - first)) {
+            last = a, limit = tr_ilg(a - first);
+          } else {
+            STACK_POP5(ISAd, first, last, limit, trlink);
+          }
+        }
+      } else if(limit == -2) {
+        /* tandem repeat copy */
+        a = stack[--ssize].b, b = stack[ssize].c;
+        if(stack[ssize].d == 0) {
+          tr_copy(ISA, SA, first, a, b, last, ISAd - ISA);
+        } else {
+          if(0 <= trlink) { stack[trlink].d = -1; }
+          tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA);
+        }
+        STACK_POP5(ISAd, first, last, limit, trlink);
+      } else {
+        /* sorted partition */
+        if(0 <= *first) {
+          a = first;
+          do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a));
+          first = a;
+        }
+        if(first < last) {
+          a = first; do { *a = ~*a; } while(*++a < 0);
+          next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1;
+          if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } }
+
+          /* push */
+          if(trbudget_check(budget, a - first)) {
+            if((a - first) <= (last - a)) {
+              STACK_PUSH5(ISAd, a, last, -3, trlink);
+              ISAd += incr, last = a, limit = next;
+            } else {
+              if(1 < (last - a)) {
+                STACK_PUSH5(ISAd + incr, first, a, next, trlink);
+                first = a, limit = -3;
+              } else {
+                ISAd += incr, last = a, limit = next;
+              }
+            }
+          } else {
+            if(0 <= trlink) { stack[trlink].d = -1; }
+            if(1 < (last - a)) {
+              first = a, limit = -3;
+            } else {
+              STACK_POP5(ISAd, first, last, limit, trlink);
+            }
+          }
+        } else {
+          STACK_POP5(ISAd, first, last, limit, trlink);
+        }
+      }
+      continue;
+    }
+
+    if((last - first) <= TR_INSERTIONSORT_THRESHOLD) {
+      tr_insertionsort(ISAd, first, last);
+      limit = -3;
+      continue;
+    }
+
+    if(limit-- == 0) {
+      tr_heapsort(ISAd, first, last - first);
+      for(a = last - 1; first < a; a = b) {
+        for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; }
+      }
+      limit = -3;
+      continue;
+    }
+
+    /* choose pivot */
+    a = tr_pivot(ISAd, first, last);
+    SWAP(*first, *a);
+    v = ISAd[*first];
+
+    /* partition */
+    tr_partition(ISAd, first, first + 1, last, &a, &b, v);
+    if((last - first) != (b - a)) {
+      next = (ISA[*a] != v) ? tr_ilg(b - a) : -1;
+
+      /* update ranks */
+      for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
+      if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } }
+
+      /* push */
+      if((1 < (b - a)) && (trbudget_check(budget, b - a))) {
+        if((a - first) <= (last - b)) {
+          if((last - b) <= (b - a)) {
+            if(1 < (a - first)) {
+              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+              STACK_PUSH5(ISAd, b, last, limit, trlink);
+              last = a;
+            } else if(1 < (last - b)) {
+              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+              first = b;
+            } else {
+              ISAd += incr, first = a, last = b, limit = next;
+            }
+          } else if((a - first) <= (b - a)) {
+            if(1 < (a - first)) {
+              STACK_PUSH5(ISAd, b, last, limit, trlink);
+              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+              last = a;
+            } else {
+              STACK_PUSH5(ISAd, b, last, limit, trlink);
+              ISAd += incr, first = a, last = b, limit = next;
+            }
+          } else {
+            STACK_PUSH5(ISAd, b, last, limit, trlink);
+            STACK_PUSH5(ISAd, first, a, limit, trlink);
+            ISAd += incr, first = a, last = b, limit = next;
+          }
+        } else {
+          if((a - first) <= (b - a)) {
+            if(1 < (last - b)) {
+              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+              STACK_PUSH5(ISAd, first, a, limit, trlink);
+              first = b;
+            } else if(1 < (a - first)) {
+              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+              last = a;
+            } else {
+              ISAd += incr, first = a, last = b, limit = next;
+            }
+          } else if((last - b) <= (b - a)) {
+            if(1 < (last - b)) {
+              STACK_PUSH5(ISAd, first, a, limit, trlink);
+              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+              first = b;
+            } else {
+              STACK_PUSH5(ISAd, first, a, limit, trlink);
+              ISAd += incr, first = a, last = b, limit = next;
+            }
+          } else {
+            STACK_PUSH5(ISAd, first, a, limit, trlink);
+            STACK_PUSH5(ISAd, b, last, limit, trlink);
+            ISAd += incr, first = a, last = b, limit = next;
+          }
+        }
+      } else {
+        if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; }
+        if((a - first) <= (last - b)) {
+          if(1 < (a - first)) {
+            STACK_PUSH5(ISAd, b, last, limit, trlink);
+            last = a;
+          } else if(1 < (last - b)) {
+            first = b;
+          } else {
+            STACK_POP5(ISAd, first, last, limit, trlink);
+          }
+        } else {
+          if(1 < (last - b)) {
+            STACK_PUSH5(ISAd, first, a, limit, trlink);
+            first = b;
+          } else if(1 < (a - first)) {
+            last = a;
+          } else {
+            STACK_POP5(ISAd, first, last, limit, trlink);
+          }
+        }
+      }
+    } else {
+      if(trbudget_check(budget, last - first)) {
+        limit = tr_ilg(last - first), ISAd += incr;
+      } else {
+        if(0 <= trlink) { stack[trlink].d = -1; }
+        STACK_POP5(ISAd, first, last, limit, trlink);
+      }
+    }
+  }
+#undef STACK_SIZE
+}
+
+
+
+/*---------------------------------------------------------------------------*/
+
+/*- Function -*/
+
+/* Tandem repeat sort */
+void
+trsort(saidx_t *ISA, saidx_t *SA, saidx_t n, saidx_t depth) {
+  saidx_t *ISAd;
+  saidx_t *first, *last;
+  trbudget_t budget;
+  saidx_t t, skip, unsorted;
+
+  trbudget_init(&budget, tr_ilg(n) * 2 / 3, n);
+/*  trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */
+  for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) {
+    first = SA;
+    skip = 0;
+    unsorted = 0;
+    do {
+      if((t = *first) < 0) { first -= t; skip += t; }
+      else {
+        if(skip != 0) { *(first + skip) = skip; skip = 0; }
+        last = SA + ISA[t] + 1;
+        if(1 < (last - first)) {
+          budget.count = 0;
+          tr_introsort(ISA, ISAd, SA, first, last, &budget);
+          if(budget.count != 0) { unsorted += budget.count; }
+          else { skip = first - last; }
+        } else if((last - first) == 1) {
+          skip = -1;
+        }
+        first = last;
+      }
+    } while(first < (SA + n));
+    if(skip != 0) { *(first + skip) = skip; }
+    if(unsorted == 0) { break; }
+  }
+}
--- a/loader/tools/dali/salvador/src/libdivsufsort/pkgconfig/CMakeLists.txt
+++ b/loader/tools/dali/salvador/src/libdivsufsort/pkgconfig/CMakeLists.txt
@ -0,0 +1,9 @@
+## generate libdivsufsort.pc ##
+set(W64BIT "")
+configure_file("${CMAKE_CURRENT_SOURCE_DIR}/libdivsufsort.pc.cmake" "${CMAKE_CURRENT_BINARY_DIR}/libdivsufsort.pc" @ONLY)
+install(FILES "${CMAKE_CURRENT_BINARY_DIR}/libdivsufsort.pc" DESTINATION ${CMAKE_INSTALL_PKGCONFIGDIR})
+if(BUILD_DIVSUFSORT64)
+  set(W64BIT "64")
+  configure_file("${CMAKE_CURRENT_SOURCE_DIR}/libdivsufsort.pc.cmake" "${CMAKE_CURRENT_BINARY_DIR}/libdivsufsort64.pc" @ONLY)
+  install(FILES "${CMAKE_CURRENT_BINARY_DIR}/libdivsufsort64.pc" DESTINATION ${CMAKE_INSTALL_PKGCONFIGDIR})
+endif(BUILD_DIVSUFSORT64)
--- a/loader/tools/dali/salvador/src/libdivsufsort/pkgconfig/libdivsufsort.pc.cmake
+++ b/loader/tools/dali/salvador/src/libdivsufsort/pkgconfig/libdivsufsort.pc.cmake
@ -0,0 +1,11 @@
+prefix=@CMAKE_INSTALL_PREFIX@
+exec_prefix=${prefix}
+libdir=@CMAKE_INSTALL_LIBDIR@
+includedir=@CMAKE_INSTALL_INCLUDEDIR@
+
+Name: @PROJECT_NAME@@W64BIT@
+Description: @PROJECT_DESCRIPTION@
+Version: @PROJECT_VERSION_FULL@
+URL: @PROJECT_URL@
+Libs: -L${libdir} -ldivsufsort@W64BIT@
+Cflags: -I${includedir}
--- a/loader/tools/dali/salvador/src/libsalvador.h
+++ b/loader/tools/dali/salvador/src/libsalvador.h
@ -0,0 +1,41 @@
+/*
+ * libsalvador.h - library definitions
+ *
+ * Copyright (C) 2021 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Implements the ZX0 encoding designed by Einar Saukas. https://github.com/einar-saukas/ZX0
+ * Also inspired by Charles Bloom's compression blog. http://cbloomrants.blogspot.com/
+ *
+ */
+
+#ifndef _LIB_SALVADOR_H
+#define _LIB_SALVADOR_H
+
+#include "format.h"
+#include "shrink.h"
+#include "expand.h"
+
+#define FLG_IS_INVERTED  1       /**< Use inverted (V2) format */
+#define FLG_IS_BACKWARD  2       /**< Use backward encoding */
+
+#endif /* _LIB_SALVADOR_H */
--- a/loader/tools/dali/salvador/src/matchfinder.c
+++ b/loader/tools/dali/salvador/src/matchfinder.c
@ -0,0 +1,393 @@
+/*
+ * matchfinder.c - LZ match finder implementation
+ *
+ * The following copying information applies to this specific source code file:
+ *
+ * Written in 2019-2021 by Emmanuel Marty <marty.emmanuel@gmail.com>
+ * Portions written in 2014-2015 by Eric Biggers <ebiggers3@gmail.com>
+ *
+ * To the extent possible under law, the author(s) have dedicated all copyright
+ * and related and neighboring rights to this software to the public domain
+ * worldwide via the Creative Commons Zero 1.0 Universal Public Domain
+ * Dedication (the "CC0").
+ *
+ * This software is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the CC0 for more details.
+ *
+ * You should have received a copy of the CC0 along with this software; if not
+ * see <http://creativecommons.org/publicdomain/zero/1.0/>.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Implements the ZX0 encoding designed by Einar Saukas. https://github.com/einar-saukas/ZX0
+ * Also inspired by Charles Bloom's compression blog. http://cbloomrants.blogspot.com/
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include "matchfinder.h"
+#include "format.h"
+#include "libsalvador.h"
+
+/**
+ * Hash index into TAG_BITS
+ *
+ * @param nIndex index value
+ *
+ * @return hash
+ */
+static inline int salvador_get_index_tag(unsigned int nIndex) {
+   return (int)(((unsigned long long)nIndex * 11400714819323198485ULL) >> (64ULL - TAG_BITS));
+}
+
+/**
+ * Parse input data, build suffix array and overlaid data structures to speed up match finding
+ *
+ * @param pCompressor compression context
+ * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
+ * @param nInWindowSize total input size in bytes (previously compressed bytes + bytes to compress)
+ *
+ * @return 0 for success, non-zero for failure
+ */
+int salvador_build_suffix_array(salvador_compressor *pCompressor, const unsigned char *pInWindow, const int nInWindowSize) {
+   unsigned long long *intervals = pCompressor->intervals;
+
+   /* Build suffix array from input data */
+   saidx_t *suffixArray = (saidx_t*)intervals;
+   if (divsufsort_build_array(&pCompressor->divsufsort_context, pInWindow, suffixArray, nInWindowSize) != 0) {
+      return 100;
+   }
+
+   int i, r;
+
+   for (i = nInWindowSize - 1; i >= 0; i--) {
+      intervals[i] = suffixArray[i];
+   }
+
+   int *PLCP = (int*)pCompressor->pos_data;  /* Use temporarily */
+   int *Phi = PLCP;
+   int nCurLen = 0;
+
+   /* Compute the permuted LCP first (Kärkkäinen method) */
+   Phi[intervals[0]] = -1;
+   for (i = 1; i < nInWindowSize; i++)
+      Phi[intervals[i]] = (unsigned int)intervals[i - 1];
+   for (i = 0; i < nInWindowSize; i++) {
+      if (Phi[i] == -1) {
+         PLCP[i] = 0;
+         continue;
+      }
+      int nMaxLen = (i > Phi[i]) ? (nInWindowSize - i) : (nInWindowSize - Phi[i]);
+      while (nCurLen < nMaxLen && pInWindow[i + nCurLen] == pInWindow[Phi[i] + nCurLen]) nCurLen++;
+      PLCP[i] = nCurLen;
+      if (nCurLen > 0)
+         nCurLen--;
+   }
+
+   /* Rotate permuted LCP into the LCP. This has better cache locality than the direct Kasai LCP method. This also
+    * saves us from having to build the inverse suffix array index, as the LCP is calculated without it using this method,
+    * and the interval builder below doesn't need it either. */
+   intervals[0] &= POS_MASK;
+
+   for (i = 1; i < nInWindowSize; i++) {
+      int nIndex = (int)(intervals[i] & POS_MASK);
+      int nLen = PLCP[nIndex];
+      if (nLen < MIN_MATCH_SIZE)
+         nLen = 0;
+      if (nLen > LCP_MAX)
+         nLen = LCP_MAX;
+      int nTaggedLen = 0;
+      if (nLen)
+         nTaggedLen = (nLen << TAG_BITS) | (salvador_get_index_tag((unsigned int)nIndex) & ((1 << TAG_BITS) - 1));
+      intervals[i] = ((unsigned long long)nIndex) | (((unsigned long long)nTaggedLen) << LCP_SHIFT);
+   }
+
+   /**
+    * Build intervals for finding matches
+    *
+    * Methodology and code fragment taken from wimlib (CC0 license):
+    * https://wimlib.net/git/?p=wimlib;a=blob_plain;f=src/lcpit_matchfinder.c;h=a2d6a1e0cd95200d1f3a5464d8359d5736b14cbe;hb=HEAD
+    */
+   unsigned long long * const SA_and_LCP = intervals;
+   unsigned long long *pos_data = pCompressor->pos_data;
+   unsigned long long next_interval_idx;
+   unsigned long long *top = pCompressor->open_intervals;
+   unsigned long long prev_pos = SA_and_LCP[0] & POS_MASK;
+
+   *top = 0;
+   intervals[0] = 0;
+   next_interval_idx = 1;
+
+   for (r = 1; r < nInWindowSize; r++) {
+      const unsigned long long next_pos = SA_and_LCP[r] & POS_MASK;
+      const unsigned long long next_lcp = SA_and_LCP[r] & LCP_MASK;
+      const unsigned long long top_lcp = *top & LCP_MASK;
+
+      if (next_lcp == top_lcp) {
+         /* Continuing the deepest open interval  */
+         pos_data[prev_pos] = *top;
+      }
+      else if (next_lcp > top_lcp) {
+         /* Opening a new interval  */
+         *++top = next_lcp | next_interval_idx++;
+         pos_data[prev_pos] = *top;
+      }
+      else {
+         /* Closing the deepest open interval  */
+         pos_data[prev_pos] = *top;
+         for (;;) {
+            const unsigned long long closed_interval_idx = *top-- & POS_MASK;
+            const unsigned long long superinterval_lcp = *top & LCP_MASK;
+
+            if (next_lcp == superinterval_lcp) {
+               /* Continuing the superinterval */
+               intervals[closed_interval_idx] = *top;
+               break;
+            }
+            else if (next_lcp > superinterval_lcp) {
+               /* Creating a new interval that is a
+                * superinterval of the one being
+                * closed, but still a subinterval of
+                * its superinterval  */
+               *++top = next_lcp | next_interval_idx++;
+               intervals[closed_interval_idx] = *top;
+               break;
+            }
+            else {
+               /* Also closing the superinterval  */
+               intervals[closed_interval_idx] = *top;
+            }
+         }
+      }
+      prev_pos = next_pos;
+   }
+
+   /* Close any still-open intervals.  */
+   pos_data[prev_pos] = *top;
+   for (; top > pCompressor->open_intervals; top--)
+      intervals[*top & POS_MASK] = *(top - 1);
+
+   /* Success */
+   return 0;
+}
+
+/**
+ * Find matches at the specified offset in the input window
+ *
+ * @param pCompressor compression context
+ * @param nOffset offset to find matches at, in the input window
+ * @param pMatches pointer to returned matches
+ * @param pMatchDepth pointer to returned match depths
+ * @param nMaxMatches maximum number of matches to return (0 for none)
+ *
+ * @return number of matches
+ */
+static int salvador_find_matches_at(salvador_compressor *pCompressor, const int nOffset, salvador_match *pMatches, unsigned short *pMatchDepth, const int nMaxMatches) {
+   unsigned long long *intervals = pCompressor->intervals;
+   unsigned long long *pos_data = pCompressor->pos_data;
+   unsigned long long ref;
+   unsigned long long super_ref;
+   unsigned long long match_pos;
+   salvador_match *matchptr;
+   unsigned short *depthptr;
+   int nPrevOffset, nPrevLen, nCurDepth;
+   unsigned short* cur_depth;
+   int nMatchOffset, nMatchLen;
+   const int nMaxOffset = pCompressor->max_offset;
+
+   /**
+    * Find matches using intervals
+    *
+    * Taken from wimlib (CC0 license):
+    * https://wimlib.net/git/?p=wimlib;a=blob_plain;f=src/lcpit_matchfinder.c;h=a2d6a1e0cd95200d1f3a5464d8359d5736b14cbe;hb=HEAD
+    */
+
+    /* Get the deepest lcp-interval containing the current suffix. */
+   ref = pos_data[nOffset];
+
+   pos_data[nOffset] = 0;
+
+   /* Ascend until we reach a visited interval, the root, or a child of the
+    * root.  Link unvisited intervals to the current suffix as we go.  */
+   while ((super_ref = intervals[ref & POS_MASK]) & LCP_MASK) {
+      intervals[ref & POS_MASK] = nOffset | VISITED_FLAG;
+      ref = super_ref;
+   }
+
+   if (super_ref == 0) {
+      /* In this case, the current interval may be any of:
+       * (1) the root;
+       * (2) an unvisited child of the root */
+
+      if (ref != 0)  /* Not the root?  */
+         intervals[ref & POS_MASK] = nOffset | VISITED_FLAG;
+      return 0;
+   }
+
+   /* Ascend indirectly via pos_data[] links.  */
+   
+   match_pos = super_ref & EXCL_VISITED_MASK;
+   matchptr = pMatches;
+   depthptr = pMatchDepth;
+
+   nPrevOffset = 0;
+   nPrevLen = 0;
+   nCurDepth = 0;
+   cur_depth = NULL;
+   
+   if ((matchptr - pMatches) < nMaxMatches) {
+      nMatchOffset = (int)(nOffset - match_pos);
+
+      if (nMatchOffset <= nMaxOffset) {
+         nMatchLen = (int)(ref >> (LCP_SHIFT + TAG_BITS));
+
+         matchptr->length = nMatchLen;
+         matchptr->offset = nMatchOffset;
+         matchptr++;
+         
+         *depthptr = nCurDepth = 0;
+         cur_depth = depthptr++;
+
+         nPrevLen = nMatchLen;
+         nPrevOffset = nMatchOffset;
+      }
+   }
+
+   for (;;) {
+      if ((super_ref = pos_data[match_pos]) > ref) {
+         match_pos = intervals[super_ref & POS_MASK] & EXCL_VISITED_MASK;
+
+         if ((matchptr - pMatches) < nMaxMatches) {
+            nMatchOffset = (int)(nOffset - match_pos);
+
+            if (nMatchOffset <= nMaxOffset && nMatchOffset != nPrevOffset) {
+               nMatchLen = (int)(ref >> (LCP_SHIFT + TAG_BITS));
+
+               if (nPrevOffset && nPrevLen > 2 && nMatchOffset == (nPrevOffset - 1) && nMatchLen == (nPrevLen - 1) && cur_depth && nCurDepth < LCP_MAX) {
+                  *cur_depth = ++nCurDepth;
+               }
+               else {
+                  matchptr->length = nMatchLen;
+                  matchptr->offset = nMatchOffset;
+                  matchptr++;
+                  
+                  *depthptr = nCurDepth = 0;
+                  cur_depth = depthptr++;
+               }
+
+               nPrevLen = nMatchLen;
+               nPrevOffset = nMatchOffset;
+            }
+         }
+      }
+
+      while ((super_ref = pos_data[match_pos]) > ref)
+         match_pos = intervals[super_ref & POS_MASK] & EXCL_VISITED_MASK;
+
+      intervals[ref & POS_MASK] = nOffset | VISITED_FLAG;
+      pos_data[match_pos] = (unsigned long long)ref;
+
+      if ((matchptr - pMatches) < nMaxMatches) {
+         nMatchOffset = (int)(nOffset - match_pos);
+
+         if (nMatchOffset <= nMaxOffset && nMatchOffset != nPrevOffset) {
+            nMatchLen = (int)(ref >> (LCP_SHIFT + TAG_BITS));
+
+            if (nPrevOffset && nPrevLen > 2 && nMatchOffset == (nPrevOffset - 1) && nMatchLen == (nPrevLen - 1) && cur_depth && nCurDepth < LCP_MAX) {
+               *cur_depth = ++nCurDepth;
+            }
+            else {
+               matchptr->length = nMatchLen;
+               matchptr->offset = nMatchOffset;
+               matchptr++;
+               
+               *depthptr = nCurDepth = 0;
+               cur_depth = depthptr++;
+            }
+
+            nPrevLen = nMatchLen;
+            nPrevOffset = nMatchOffset;
+         }
+      }
+
+      if (super_ref == 0)
+         break;
+      ref = super_ref;
+      match_pos = intervals[ref & POS_MASK] & EXCL_VISITED_MASK;
+
+      if ((matchptr - pMatches) < nMaxMatches) {
+         nMatchOffset = (int)(nOffset - match_pos);
+
+         if (nMatchOffset <= nMaxOffset && nMatchOffset != nPrevOffset) {
+            nMatchLen = (int)(ref >> (LCP_SHIFT + TAG_BITS));
+
+            if (nPrevOffset && nPrevLen > 2 && nMatchOffset == (nPrevOffset - 1) && nMatchLen == (nPrevLen - 1) && cur_depth && nCurDepth < LCP_MAX) {
+               *cur_depth = ++nCurDepth;
+            }
+            else {
+               matchptr->length = nMatchLen;
+               matchptr->offset = nMatchOffset;
+               matchptr++;
+               
+               *depthptr = nCurDepth = 0;
+               cur_depth = depthptr++;
+            }
+
+            nPrevLen = nMatchLen;
+            nPrevOffset = nMatchOffset;
+         }
+      }
+   }
+
+   return (int)(matchptr - pMatches);
+}
+
+/**
+ * Skip previously compressed bytes
+ *
+ * @param pCompressor compression context
+ * @param nStartOffset current offset in input window (typically 0)
+ * @param nEndOffset offset to skip to in input window (typically the number of previously compressed bytes)
+ */
+void salvador_skip_matches(salvador_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
+   salvador_match match;
+   unsigned short depth;
+   int i;
+
+   /* Skipping still requires scanning for matches, as this also performs a lazy update of the intervals. However,
+    * we don't store the matches. */
+   for (i = nStartOffset; i < nEndOffset; i++) {
+      salvador_find_matches_at(pCompressor, i, &match, &depth, 0);
+   }
+}
+
+/**
+ * Find all matches for the data to be compressed
+ *
+ * @param pCompressor compression context
+ * @param nMatchesPerOffset maximum number of matches to store for each offset
+ * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
+ * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
+ * @param nBlockFlags bit 0: 1 for first block, 0 otherwise; bit 1: 1 for last block, 0 otherwise
+ */
+void salvador_find_all_matches(salvador_compressor *pCompressor, const int nMatchesPerOffset, const int nStartOffset, const int nEndOffset, const int nBlockFlags) {
+   salvador_match *pMatch = pCompressor->match;
+   unsigned short *pMatchDepth = pCompressor->match_depth;
+   int i;
+
+   for (i = nStartOffset; i < nEndOffset; i++) {
+      int nMatches = salvador_find_matches_at(pCompressor, i, pMatch, pMatchDepth, nMatchesPerOffset);
+
+      if (nMatches < nMatchesPerOffset) {
+         memset(pMatch + nMatches, 0, (nMatchesPerOffset - nMatches) * sizeof(salvador_match));
+         memset(pMatchDepth + nMatches, 0, (nMatchesPerOffset - nMatches) * sizeof(unsigned short));
+      }
+
+      pMatch += nMatchesPerOffset;
+      pMatchDepth += nMatchesPerOffset;
+   }
+}
--- a/loader/tools/dali/salvador/src/matchfinder.h
+++ b/loader/tools/dali/salvador/src/matchfinder.h
@ -0,0 +1,77 @@
+/*
+ * matchfinder.h - LZ match finder definitions
+ *
+ * Copyright (C) 2021 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Implements the ZX0 encoding designed by Einar Saukas. https://github.com/einar-saukas/ZX0
+ * Also inspired by Charles Bloom's compression blog. http://cbloomrants.blogspot.com/
+ *
+ */
+
+#ifndef _MATCHFINDER_H
+#define _MATCHFINDER_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Forward declarations */
+typedef struct _salvador_match salvador_match;
+typedef struct _salvador_compressor salvador_compressor;
+
+/**
+ * Parse input data, build suffix array and overlaid data structures to speed up match finding
+ *
+ * @param pCompressor compression context
+ * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
+ * @param nInWindowSize total input size in bytes (previously compressed bytes + bytes to compress)
+ *
+ * @return 0 for success, non-zero for failure
+ */
+int salvador_build_suffix_array(salvador_compressor *pCompressor, const unsigned char *pInWindow, const int nInWindowSize);
+
+/**
+ * Skip previously compressed bytes
+ *
+ * @param pCompressor compression context
+ * @param nStartOffset current offset in input window (typically 0)
+ * @param nEndOffset offset to skip to in input window (typically the number of previously compressed bytes)
+ */
+void salvador_skip_matches(salvador_compressor *pCompressor, const int nStartOffset, const int nEndOffset);
+
+/**
+ * Find all matches for the data to be compressed
+ *
+ * @param pCompressor compression context
+ * @param nMatchesPerOffset maximum number of matches to store for each offset
+ * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
+ * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
+ * @param nBlockFlags bit 0: 1 for first block, 0 otherwise; bit 1: 1 for last block, 0 otherwise
+ */
+void salvador_find_all_matches(salvador_compressor *pCompressor, const int nMatchesPerOffset, const int nStartOffset, const int nEndOffset, const int nBlockFlags);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _MATCHFINDER_H */
--- a/loader/tools/dali/salvador/src/salvador.c
+++ b/loader/tools/dali/salvador/src/salvador.c
--- a/loader/tools/dali/salvador/src/shrink.c
+++ b/loader/tools/dali/salvador/src/shrink.c
--- a/loader/tools/dali/salvador/src/shrink.h
+++ b/loader/tools/dali/salvador/src/shrink.h
@ -0,0 +1,175 @@
+/*
+ * shrink.h - compressor definitions
+ *
+ * Copyright (C) 2021 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Implements the ZX0 encoding designed by Einar Saukas. https://github.com/einar-saukas/ZX0
+ * Also inspired by Charles Bloom's compression blog. http://cbloomrants.blogspot.com/
+ *
+ */
+
+#ifndef _SHRINK_H
+#define _SHRINK_H
+
+#include "divsufsort.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define LCP_BITS 18
+#define TAG_BITS 4
+#define LCP_MAX ((1U<<(LCP_BITS - TAG_BITS)) - 1)
+#define LCP_AND_TAG_MAX ((1U<<LCP_BITS) - 1)
+#define LCP_SHIFT (63-LCP_BITS)
+#define LCP_MASK (((1ULL<<LCP_BITS) - 1) << LCP_SHIFT)
+#define POS_MASK ((1ULL<<LCP_SHIFT) - 1)
+#define VISITED_FLAG 0x8000000000000000ULL
+#define EXCL_VISITED_MASK  0x7fffffffffffffffULL
+
+#define NINITIAL_ARRIVALS_PER_POSITION 40
+#define NMAX_ARRIVALS_PER_POSITION 92
+#define NMATCHES_PER_INDEX 64
+#define MATCHES_PER_INDEX_SHIFT 6
+
+#define LEAVE_ALONE_MATCH_SIZE 340
+
+/** One match option */
+typedef struct _salvador_match {
+   unsigned int length:14;
+   unsigned int offset:17;
+} salvador_match;
+
+/** One finalized match */
+typedef struct _salvador_final_match {
+   int length;
+   int offset;
+} salvador_final_match;
+
+/** Forward arrival slot */
+typedef struct _salvador_arrival {
+   int cost;
+
+   unsigned int from_pos:17;
+   int from_slot:11;
+
+   unsigned int rep_offset;
+
+   unsigned int rep_pos:17;
+   unsigned int match_len:14;
+
+   int num_literals;
+   int score;
+} salvador_arrival;
+
+/** Visited match */
+typedef int salvador_visited;
+
+/** Compression statistics */
+typedef struct _salvador_stats {
+   int num_literals;
+   int num_normal_matches;
+   int num_rep_matches;
+   int num_eod;
+
+   int safe_dist;
+
+   int min_literals;
+   int max_literals;
+   int total_literals;
+
+   int min_offset;
+   int max_offset;
+   long long total_offsets;
+
+   int min_match_len;
+   int max_match_len;
+   int total_match_lens;
+
+   int min_rle1_len;
+   int max_rle1_len;
+   int total_rle1_lens;
+
+   int min_rle2_len;
+   int max_rle2_len;
+   int total_rle2_lens;
+
+   int commands_divisor;
+   int literals_divisor;
+   int match_divisor;
+   int rle1_divisor;
+   int rle2_divisor;
+} salvador_stats;
+
+/** Compression context */
+typedef struct _salvador_compressor {
+   divsufsort_ctx_t divsufsort_context;
+   unsigned long long *intervals;
+   unsigned long long *pos_data;
+   unsigned long long *open_intervals;
+   salvador_match *match;
+   unsigned short *match_depth;
+   salvador_final_match *best_match;
+   salvador_arrival *arrival;
+   int *first_offset_for_byte;
+   int *next_offset_for_pos;
+   int *offset_cache;
+   int flags;
+   int block_size;
+   int max_offset;
+   int max_arrivals_per_position;
+   salvador_stats stats;
+} salvador_compressor;
+
+/**
+ * Get maximum compressed size of input(source) data
+ *
+ * @param nInputSize input(source) size in bytes
+ *
+ * @return maximum compressed size
+ */
+size_t salvador_get_max_compressed_size(const size_t nInputSize);
+
+/**
+ * Compress memory
+ *
+ * @param pInputData pointer to input(source) data to compress
+ * @param pOutBuffer buffer for compressed data
+ * @param nInputSize input(source) size in bytes
+ * @param nMaxOutBufferSize maximum capacity of compression buffer
+ * @param nFlags compression flags (set to FLG_IS_INVERTED)
+ * @param nMaxOffset maximum match offset to use (0 for default)
+ * @param nDictionarySize size of dictionary in front of input data (0 for none)
+ * @param progress progress function, called after compressing each block, or NULL for none
+ * @param pStats pointer to compression stats that are filled if this function is successful, or NULL
+ *
+ * @return actual compressed size, or -1 for error
+ */
+size_t salvador_compress(const unsigned char *pInputData, unsigned char *pOutBuffer, const size_t nInputSize, const size_t nMaxOutBufferSize,
+   const unsigned int nFlags, const size_t nMaxOffset, const size_t nDictionarySize, void(*progress)(long long nOriginalSize, long long nCompressedSize), salvador_stats *pStats);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SHRINK_H */
--- a/loader/tools/dali/sfx.asm
+++ b/loader/tools/dali/sfx.asm
@ -0,0 +1,385 @@
+;
+; (c) Copyright 2021 by Tobias Bindhammer. All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions are met:
+;     * Redistributions of source code must retain the above copyright
+;       notice, this list of conditions and the following disclaimer.
+;     * Redistributions in binary form must reproduce the above copyright
+;       notice, this list of conditions and the following disclaimer in the
+;       documentation and/or other materials provided with the distribution.
+;     * The name of its author may not be used to endorse or promote products
+;       derived from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+; DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+; ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;
+
+!cpu 6510
+
+BITS_LEFT		= 0
+
+.depacker_dst		= $01
+.smc_offsetd 		= .depacker_dst - (.dali_code_end - .dali_code_start)
+!ifdef SFX_FAST {
+DALI_FAST_SRC		= lz_src 		- .smc_offsetd + 2
+DALI_FAST_DST		= lz_dst		- .smc_offsetd + 2
+DALI_FAST_SFX_ADDR	= lz_sfx_addr		- .smc_offsetd + 2
+DALI_FAST_DATA_END 	= lz_data_end		- .smc_offsetd + 2
+DALI_FAST_DATA_SIZE_HI	= lz_data_size_hi	- .smc_offsetd + 2
+DALI_FAST_01		= lz_01			- .smc_offsetd + 2
+DALI_FAST_CLI		= lz_cli		- .smc_offsetd + 2
+} else {
+DALI_SMALL_SRC		= lz_src		- .smc_offsetd + 2
+DALI_SMALL_DST		= lz_dst		- .smc_offsetd + 2
+DALI_SMALL_SFX_ADDR	= lz_sfx_addr		- .smc_offsetd + 2
+DALI_SMALL_DATA_END 	= lz_data_end		- .smc_offsetd + 2
+DALI_SMALL_DATA_SIZE_HI	= lz_data_size_hi	- .smc_offsetd + 2
+}
+
+!macro get_lz_bit {
+	!if BITS_LEFT = 1 {
+		asl <lz_bits
+	} else {
+		lsr <lz_bits
+	}
+}
+
+!macro set_lz_bit_marker {
+	!if BITS_LEFT = 1 {
+		rol
+	} else {
+		ror
+	}
+}
+
+!macro get_lz_length ~.entry {
+!ifdef SFX_FAST {
+-						;get_length as inline
+		+get_lz_bit			;fetch payload bit
+		rol				;can also moved to front and executed once on start
+.entry
+		+get_lz_bit
+		bcc -
+
+		bne +
+		jsr lz_refill_bits
+		beq lz_eof			;underflow. must have been 0
+
+		sbc #$01
+} else {
+.entry		jsr get_length
+		sbc #$01
+		bcc lz_eof			;underflow. must have been 0
+}
+}
+
+		* = $0801
+.dali_code_start
+                !byte $0b,$08
+		;could place opcodes in linenumber and do sys 2051? 2049? -> anc $08 would not hurt, but $9e hurts
+		!word 1602
+		!byte $9e
+		!text "2061"
+		!byte $00,$00,$00
+
+		;/!\ ATTENTION, the depacker just fits into ZP this way, if it gets larger, the copy routine will overwrite $00, as it is a 8-bit address sta
+		sei
+
+!ifdef SFX_FAST {
+		;full zp code will be copied, but later less bytes will be copied back
+		ldx #<($100 + (.depacker_end - .restore_end))
+		txs
+
+}
+		ldy #.depacker_end - .depacker_start
+-
+!ifdef SFX_FAST {
+		pha				;saved zp to stack down to $02
+		lax <.depacker_dst - 1,y	;saves a byte, 2 byte compared to lda $0000,y
+}
+		ldx .depacker_code - 1,y
+		stx <.depacker_dst - 1,y
+		dey
+		bne -
+                jmp .depack
+
+		;------------------
+		;depacker starts here
+		;------------------
+.dali_code_end
+.depacker_code
+!pseudopc .depacker_dst {
+.depacker_start
+		!byte $34
+lz_bits
+!if BITS_LEFT = 1 {
+		!byte $40
+} else {
+		!byte $02
+}
+
+.depack
+!ifdef SFX_FAST {
+lz_01 = * + 1
+		lda #$37			;replace value for $01 in saved ZP on stack
+		pha
+}
+-						;copy data to end of ram ($ffff)
+                dey
+lz_data_end = * + 1
+.src		lda $beef,y
+.dst		sta $ff00,y
+                tya				;annoying, but need to copy from $ff ... $00
+                bne -
+
+                dec <.src + 2
+lz_data_size_hi = * + 1
+                lda #$00			;check for last page to copy
+                dcp <.dst + 2
+                bne -
+
+		;ldy #$00			;is already 0
+
+		;------------------
+		;LITERAL
+		;------------------
+.lz_start_over
+		lda #$01			;we fall through this check on entry and start with literal
+		+get_lz_bit
+!ifdef SFX_FAST {
+		bcc .literal
+		bcs .lz_new_offset		;after each match check for another match or literal?
+-                                                       ;lz_length as inline
+		+get_lz_bit                     ;fetch payload bit
+		rol                             ;can also moved to front and executed once on start
+.literal
+		+get_lz_bit
+		bcc -
+
+		bne +
+		jsr lz_refill_bits
+		beq .lz_l_page                  ;happens very seldom, so let's do that with lz_l_page that also decrements lz_len_hi, it returns on c = 1, what is always true after jsr .lz_length
+
+		tax
+.lz_l_page_
+} else {
+		bcs .lz_new_offset		;after each match check for another match or literal?
+.literal
+		jsr get_length
+		tax
+		beq .lz_l_page
+.lz_l_page_
+}
+cp_literal
+lz_src = * + 1
+		lda $beef,y			;looks expensive, but is cheaper than loop
+		sta (lz_dst),y
+                inc <lz_src + 0
+                bne +
+                inc <lz_src + 1
+
+                inc <lz_dst + 0
+                bne +
+                inc <lz_dst + 1
+
+		dex
+		bne cp_literal
+		lda <lz_len_hi
+		bne .lz_l_page			;happens very seldom
+
+		;------------------
+		;NEW OR OLD OFFSET
+		;------------------
+
+		rol				;A = 0, C = 1 -> same as lda #$01
+		+get_lz_bit
+		bcs .lz_new_offset		;either match with new offset or old offset
+
+		;------------------
+		;DO MATCH
+		;------------------
+.lz_match
+		jsr get_length
+!ifdef SFX_FAST {
+		sbc #$01			;saves the sec and iny later on, if it results in a = $ff, no problem, we branch with the beq later on
+		sec
+} else {
+.lz_m_page_
+		sbc #$01			;saves the sec and iny later on, if it results in a = $ff, no problem, we branch with the beq later on
+		bcs .lz_match_
+		dcp <lz_len_hi			;as a = $ff this will decrement <lz_len_hi and set carry again in any case
+}
+.lz_match_
+		eor #$ff
+		tay
+!ifdef SFX_FAST {
+.lz_m_page_
+}
+		eor #$ff			;restore A
+.lz_match__					;entry from new_offset handling
+		adc <lz_dst + 0
+		sta <lz_dst + 0
+!ifdef SFX_FAST {
+		bcs .lz_clc			;/!\ branch happens less than fall through, only in case of branch carry needs to be cleared :-(
+		dec <lz_dst + 1
+} else {
+		bcs +
+		dec <lz_dst + 1
+
+		clc
+}
+.lz_clc_
+.lz_offset_lo = * + 1
+		sbc #$00
+		sta <.lz_msrcr + 0
+!ifdef SFX_FAST {
+		lax <lz_dst + 1
+} else {
+		lda <lz_dst + 1
+}
+.lz_offset_hi = * + 1
+		sbc #$00
+		sta <.lz_msrcr + 1
+.cp_match
+.lz_msrcr = * + 1
+		lda $beef,y
+lz_dst = * + 1
+		sta $4000,y
+		iny
+		bne .cp_match
+		inc <lz_dst + 1
+
+lz_len_hi = * + 1
+		lda #$00			;check for more loop runs
+		beq .lz_start_over
+!ifdef SFX_FAST {
+.lz_l_page
+.lz_m_page
+		dec <lz_len_hi
+		txa
+		beq .lz_l_page_
+		tya
+		beq .lz_m_page_
+
+.lz_clc
+		clc
+		bcc .lz_clc_
+} else {
+		tya
+		beq .lz_m_page_
+.lz_l_page
+		dec <lz_len_hi
+		bcs cp_literal
+}
+		;------------------
+		;FETCH A NEW OFFSET
+		;------------------
+
+		+get_lz_length ~.lz_new_offset
+		lsr
+		sta <.lz_offset_hi		;hibyte of offset
+
+		lda (lz_src),y			;fetch another byte directly
+		ror
+		sta <.lz_offset_lo
+
+		inc <lz_src + 0
+		bne +
+		inc <lz_src + 1
+
+		lda #$01
+!ifdef SFX_FAST {
+		ldy #$fe
+		bcs .lz_match__			;length = 2 ^ $ff, do it the very short way :-)
+-
+		+get_lz_bit			;fetch first payload bit
+
+		rol				;can also moved to front and executed once on start
+		+get_lz_bit
+		bcc -
+		bne .lz_match_
+		ldy #$00
+		jsr lz_refill_bits		;fetch remaining bits
+		bne .lz_match_
+		inc <lz_len_hi
+} else {
+		jsr .get_length_bt
+}
+		bcs .lz_match_
+
+lz_refill_bits
+		tax
+		lda (lz_src),y
+		+set_lz_bit_marker
+		sta <lz_bits
+		inc <lz_src + 0
+		bne +
+		inc <lz_src + 1
+
+		txa
+		bcs .end_bit_16
+
+		;fetch up to 8 bits first, if first byte overflows, stash away byte and fetch more bits as MSB
+.lz_get_loop
+		+get_lz_bit			;fetch payload bit
+.get_length_
+		rol				;can also moved to front and executed once on start
+		bcs .get_length_16		;first 1 drops out from lowbyte, need to extend to 16 bit, unfortunatedly this does not work with inverted numbers
+get_length
+		+get_lz_bit
+.get_length_bt
+		bcc .lz_get_loop
+		beq lz_refill_bits
+		rts
+
+.get_length_16
+		pha				;save LSB
+		tya				;start with MSB = 1
+		jsr .get_length_		;get up to 7 more bits
+		sta <lz_len_hi			;save MSB
+		pla				;restore LSB
+!ifdef SFX_FAST {
+		bne .end_bit_16
+		dec <lz_len_hi
+		tya
+}
+.end_bit_16
+		rts
+lz_eof
+		;------------------
+		;exit code for sfx only
+		;------------------
+
+!ifdef SFX_FAST {
+.restore_end
+-
+		pla
+		tsx
+		sta <(.depacker_dst - ($100 - (.restore_end - .depacker_start))),x
+		bne -
+		pha				;end up with SP = $ff, let's be nice :-)
+lz_cli
+		sei
+}
+lz_sfx_addr = * + 1
+		jmp $0000
+.depacker_end
+}
+
+!ifdef .second_pass {				;emmit warnings only once in second pass
+!ifdef SFX_FAST {
+!warn "zp saved/restored up to: ",.restore_end - .depacker_dst
+}
+!warn "sfx zp size: ", .depacker_end - .depacker_start
+!warn "sfx size: ", * - .dali_code_start
+}
+.second_pass
--- a/loader/tools/dali/sfx_fast.asm
+++ b/loader/tools/dali/sfx_fast.asm
@ -0,0 +1,314 @@
+;
+; (c) Copyright 2021 by Tobias Bindhammer. All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions are met:
+;     * Redistributions of source code must retain the above copyright
+;       notice, this list of conditions and the following disclaimer.
+;     * Redistributions in binary form must reproduce the above copyright
+;       notice, this list of conditions and the following disclaimer in the
+;       documentation and/or other materials provided with the distribution.
+;     * The name of its author may not be used to endorse or promote products
+;       derived from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+; DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+; ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;
+
+!cpu 6510
+
+BITS_LEFT	= 0
+
+.depacker	= $01
+.smc_offsetd 	= .depacker - (.dali_code_end - .dali_code_start)
+DALI_SRC	= lz_src - .smc_offsetd + 2
+DALI_DST	= lz_dst      - .smc_offsetd + 2
+DALI_SFX_ADDR	= lz_sfx_addr - .smc_offsetd + 2
+DALI_DATA_END 	= lz_data_end      - .smc_offsetd + 2
+DALI_DATA_SIZE_HI = lz_data_size_hi - .smc_offsetd + 2
+DALI_01		= lz_01 - .smc_offsetd + 2
+DALI_CLI	= lz_cli - .smc_offsetd + 2
+
+!macro get_lz_bit {
+	!if BITS_LEFT = 1 {
+		asl <lz_bits
+	} else {
+		lsr <lz_bits
+	}
+}
+
+!macro set_lz_bit_marker {
+	!if BITS_LEFT = 1 {
+		rol
+	} else {
+		ror
+	}
+}
+
+		* = $0801
+.dali_code_start
+                !byte $0b,$08
+		!word 1602
+		!byte $9e
+		!text "2061"
+		!byte $00,$00,$00
+
+		;/!\ ATTENTION, the depacker just fits into ZP this way, if it gets larger, the copy routine will overwrite $00, as it is a 8-bit address sta
+		sei
+
+		;full zp code will be copied, but later less bytes will be copied back
+		ldx #<($100 + (.depacker_end - .restore_end))
+		txs
+
+		ldy #.depacker_end - .depacker_start
+-
+		pha				;saved zp to stack down to $02
+		lax <.depacker - 1,y		;saves a byte, 2 byte compared to lda $0000,y
+		ldx .depacker_code - 1,y
+		stx <.depacker - 1,y
+		dey
+		bne -
+                jmp .depack
+
+		;------------------
+		;depacker starts here
+		;------------------
+.dali_code_end
+.depacker_code
+!pseudopc .depacker {
+.depacker_start
+		!byte $34
+lz_bits
+!if BITS_LEFT = 1 {
+		!byte $40
+} else {
+		!byte $02
+}
+
+.depack
+lz_01 = * + 1
+		lda #$37			;replace value for $01 in saved ZP on stack
+		pha
+-						;copy data to end of ram ($ffff)
+                dey
+lz_data_end = * + 1
+.src		lda .data_end - $100,y
+.dst		sta $ff00,y
+                tya				;annoying, but need to copy from $ff ... $00
+                bne -
+
+                dec <.src + 2
+lz_data_size_hi = * + 1
+                lda #>(.data_end - .data) + 1	;check for last page to copy
+                dcp <.dst + 2
+                bne -
+
+		;ldy #$00			;is already 0
+
+		;------------------
+		;LITERAL
+		;------------------
+.lz_start_over
+		lda #$01			;we fall through this check on entry and start with literal
+		+get_lz_bit
+		bcs .lz_new_offset		;after each match check for another match or literal?
+.literal
+		jsr .get_length
+		tax
+		beq .lz_l_page_
+.cp_literal
+lz_src = * + 1
+		lda .data,y			;looks expensive, but is cheaper than loop
+		sta (lz_dst),y
+		iny
+		dex
+		bne .cp_literal
+
+		dey				;this way we force increment of lz_dst + 1 if y = 0
+		tya
+		adc <lz_dst + 0
+		sta <lz_dst + 0			;XXX TODO final add of y, could be combined with next add? -> postpone until match that will happen necessarily later on?
+		bcc +
+		inc <lz_dst + 1
+
+		tya
+		sec
+		adc <lz_src + 0
+		sta <lz_src + 0
+		bcc +
+		inc <lz_src + 1
+
+		ldy <.lz_len_hi
+		bne .lz_l_page			;happens very seldom
+
+		;------------------
+		;NEW OR OLD OFFSET
+		;------------------
+
+		lda #$01
+		+get_lz_bit
+		bcs .lz_new_offset		;either match with new offset or old offset
+
+		;------------------
+		;DO MATCH
+		;------------------
+.lz_match
+		jsr .get_length
+.lz_m_page
+		sbc #$01			;saves the sec and iny later on, if it results in a = $ff, no problem, we branch with the beq later on
+		bcc .lz_dcp
+.lz_match_
+		eor #$ff
+		tay
+		eor #$ff			;restore A
+.lz_match__					;entry from new_offset handling
+		adc <lz_dst + 0
+		sta <lz_dst + 0
+		bcs .lz_clc			;/!\ branch happens less than fall through, only in case of branch carry needs to be cleared :-(
+		dec <lz_dst + 1
+.lz_clc_
+.lz_offset_lo = * + 1
+		sbc #$00
+		sta <.lz_msrcr + 0
+		lda <lz_dst + 1
+.lz_offset_hi = * + 1
+		sbc #$00
+		sta <.lz_msrcr + 1
+.cp_match
+.lz_msrcr = * + 1
+		lda $beef,y
+lz_dst = * + 1
+		sta $4000,y
+		iny
+		bne .cp_match
+		inc <lz_dst + 1
+
+.lz_len_hi = * + 1
+		lda #$00			;check for more loop runs
+		beq .lz_start_over
+		tya
+		beq .lz_m_page
+.lz_dcp
+		dcp <.lz_len_hi			;as a = $ff this will decrement <.lz_len_hi and set carry again in any case
+		bcs .lz_match_
+.lz_clc
+		clc
+		bcc .lz_clc_
+
+.lz_l_page
+		sec				;only needs to be set for consecutive rounds of literals, happens very seldom
+		ldy #$00
+.lz_l_page_
+		dec <.lz_len_hi
+		bcs .cp_literal
+
+		;------------------
+		;FETCH A NEW OFFSET
+		;------------------
+-						;get_length as inline
+		+get_lz_bit			;fetch payload bit
+		rol				;can also moved to front and executed once on start
+.lz_new_offset
+		+get_lz_bit
+		bcc -
+
+		bne +
+		jsr .lz_refill_bits
+
+		sbc #$01
+
+		bcc .lz_eof			;underflow. must have been 0
+		lsr
+		sta <.lz_offset_hi		;hibyte of offset
+
+		lda (lz_src),y			;fetch another byte directly
+		ror
+		sta <.lz_offset_lo
+
+		inc <lz_src + 0
+		bne +
+		inc <lz_src + 1
+
+						;XXX TODO would be nice to have inverted data sent, but would mean MSB also receives inverted bits? sucks. As soon as we refill bits we fall into loop that checks overflow on LSB, should check for bcc however :-( then things would work
+						;would work on offset MSB, but need to clear lz_len_hi after that
+		lda #$01
+		ldy #$fe
+		bcs .lz_match__			;length = 2 ^ $ff, do it the very short way :-)
+-
+		+get_lz_bit			;fetch first payload bit
+
+		rol				;can also moved to front and executed once on start
+		+get_lz_bit
+		bcc -
+		bne .lz_match_
+		ldy #$00
+		jsr .lz_refill_bits		;fetch remaining bits
+		bcs .lz_match_
+
+.lz_refill_bits
+		tax
+		lda (lz_src),y
+		+set_lz_bit_marker
+		sta <lz_bits
+		inc <lz_src + 0
+		bne +
+		inc <lz_src + 1
+
+		txa
+		bcs .end_bit_16
+
+		;fetch up to 8 bits first, if first byte overflows, stash away byte and fetch more bits as MSB
+.lz_get_loop
+		+get_lz_bit			;fetch payload bit
+.get_length_
+		rol				;can also moved to front and executed once on start
+		bcs .get_length_16		;first 1 drops out from lowbyte, need to extend to 16 bit, unfortunatedly this does not work with inverted numbers
+.get_length
+		+get_lz_bit
+		bcc .lz_get_loop
+		beq .lz_refill_bits
+		rts
+
+.get_length_16
+		pha				;save LSB
+		tya				;start with MSB = 1
+		jsr .get_length_		;get up to 7 more bits
+		sta <.lz_len_hi			;save MSB
+		pla				;restore LSB
+.end_bit_16
+		rts
+.lz_eof
+		;------------------
+		;exit code for sfx only
+		;------------------
+
+.restore_end
+		;restore zp up to $dc
+-
+		pla
+		tsx
+		sta <(.depacker - ($100 - (.restore_end - .depacker_start))),x
+		bne -
+		pha				;end up with SP = $ff, let's be nice :-)
+lz_cli
+		sei
+lz_sfx_addr = * + 1
+		jmp $0000
+.depacker_end
+}
+
+;!warn "fixup size: ",.depacker_end - .restore_end
+!warn "zp saved up to: ",.restore_end - .depacker
+;!warn "sfx zp size: ", .depacker_end - .depacker_start
+!warn "sfx size: ", * - .dali_code_start
+.data
+		;!bin "test.lz"
+.data_end
--- a/loader/tools/dali/sfx_small.asm
+++ b/loader/tools/dali/sfx_small.asm
@ -0,0 +1,267 @@
+;
+; (c) Copyright 2021 by Tobias Bindhammer. All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions are met:
+;     * Redistributions of source code must retain the above copyright
+;       notice, this list of conditions and the following disclaimer.
+;     * Redistributions in binary form must reproduce the above copyright
+;       notice, this list of conditions and the following disclaimer in the
+;       documentation and/or other materials provided with the distribution.
+;     * The name of its author may not be used to endorse or promote products
+;       derived from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+; DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+; ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;
+
+!cpu 6510
+
+BITS_LEFT		= 0
+
+.depacker		= $01
+.smc_offsetd 		= .depacker - (.dali_code_end - .dali_code_start)
+DALI_SMALL_SRC		= lz_src - .smc_offsetd + 2
+DALI_SMALL_DST		= lz_dst      - .smc_offsetd + 2
+DALI_SMALL_SFX_ADDR	= lz_sfx_addr - .smc_offsetd + 2
+DALI_SMALL_DATA_END 	= lz_data_end      - .smc_offsetd + 2
+DALI_SMALL_DATA_SIZE_HI = lz_data_size_hi - .smc_offsetd + 2
+
+!macro get_lz_bit {
+	!if BITS_LEFT = 1 {
+		asl <lz_bits
+	} else {
+		lsr <lz_bits
+	}
+}
+
+!macro set_lz_bit_marker {
+	!if BITS_LEFT = 1 {
+		rol
+	} else {
+		ror
+	}
+}
+
+		* = $0801
+.dali_code_start
+                !byte $0b,$08
+		!word 1602
+		!byte $9e
+		!text "2061"
+		!byte $00,$00,$00
+
+		;/!\ ATTENTION, the depacker just fits into ZP this way, if it gets larger, the copy routine will overwrite $00, as it is a 8-bit address sta
+		sei
+
+		ldy #.depacker_end - .depacker_start
+-
+		ldx .depacker_code - 1,y
+		stx <.depacker - 1,y
+		dey
+		bne -
+                jmp .depack
+
+		;------------------
+		;depacker starts here
+		;------------------
+.dali_code_end
+.depacker_code
+!pseudopc .depacker {
+.depacker_start
+		!byte $34
+lz_bits
+!if BITS_LEFT = 1 {
+		!byte $40
+} else {
+		!byte $02
+}
+
+.depack
+-						;copy data to end of ram ($ffff)
+                dey
+lz_data_end = * + 1
+.src		lda .data_end - $100,y
+.dst		sta $ff00,y
+                tya				;annoying, but need to copy from $ff ... $00
+                bne -
+
+                dec <.src + 2
+lz_data_size_hi = * + 1
+                lda #>(.data_end - .data) + 1	;check for last page to copy
+                dcp <.dst + 2
+                bne -
+
+		;ldy #$00			;is already 0
+
+		;------------------
+		;LITERAL
+		;------------------
+.lz_start_over
+		lda #$01			;we fall through this check on entry and start with literal
+		+get_lz_bit
+		bcs .lz_new_offset		;after each match check for another match or literal?
+.literal
+		jsr .get_length
+		tax
+		beq .lz_l_page_
+.cp_literal
+lz_src = * + 1
+		lda .data,y			;looks expensive, but is cheaper than loop
+		sta (lz_dst),y
+
+                inc <lz_src + 0
+                bne +
+                inc <lz_src + 1
+
+                inc <lz_dst + 0
+                bne +
+                inc <lz_dst + 1
+
+		dex
+		bne .cp_literal
+
+		lda <.lz_len_hi
+		bne .lz_l_page			;happens very seldom
+
+		;------------------
+		;NEW OR OLD OFFSET
+		;------------------
+
+		rol
+		+get_lz_bit
+		bcc .lz_match			;either match with new offset or old offset
+
+		;------------------
+		;FETCH A NEW OFFSET
+		;------------------
+
+.lz_new_offset
+		jsr .get_length
+		sbc #$01
+
+		bcc .lz_eof			;underflow. must have been 0
+		lsr
+		sta <.lz_offset_hi		;hibyte of offset
+
+		lda (lz_src),y			;fetch another byte directly
+		ror
+		sta <.lz_offset_lo
+
+		inc <lz_src + 0
+		bne +
+		inc <lz_src + 1
+
+		lda #$01
+		jsr .get_length_bt
+.lz_match_
+		eor #$ff
+		tay
+		eor #$ff			;restore A
+.lz_match__					;entry from new_offset handling
+		adc <lz_dst + 0
+		sta <lz_dst + 0
+		bcc +
+		clc
+		top
+
+		dec <lz_dst + 1
+.lz_clc_
+.lz_offset_lo = * + 1
+		sbc #$00
+		sta <.lz_msrcr + 0
+		lda <lz_dst + 1
+.lz_offset_hi = * + 1
+		sbc #$00
+		sta <.lz_msrcr + 1
+.cp_match
+.lz_msrcr = * + 1
+		lda $beef,y
+lz_dst = * + 1
+		sta $4000,y
+		iny
+		bne .cp_match
+		inc <lz_dst + 1
+
+.lz_len_hi = * + 1
+		lda #$00			;check for more loop runs
+		beq .lz_start_over
+		tya
+		beq .lz_m_page
+
+		;------------------
+		;DO MATCH
+		;------------------
+.lz_match
+		jsr .get_length
+.lz_m_page
+		sbc #$01			;saves the sec and iny later on, if it results in a = $ff, no problem, we branch with the beq later on
+		bcs .lz_match_
+.lz_dcp
+		dcp <.lz_len_hi			;as a = $ff this will decrement <.lz_len_hi and set carry again in any case
+		bcs .lz_match_
+.lz_l_page
+.lz_l_page_
+		dec <.lz_len_hi
+		bcs .cp_literal
+
+.lz_refill_bits
+		tax
+		lda (lz_src),y
+		+set_lz_bit_marker
+		sta <lz_bits
+		inc <lz_src + 0
+		bne +
+		inc <lz_src + 1
+
+		txa
+		bcs .end_bit_16
+
+		;fetch up to 8 bits first, if first byte overflows, stash away byte and fetch more bits as MSB
+.lz_get_loop
+		+get_lz_bit			;fetch payload bit
+.get_length_
+		rol				;can also moved to front and executed once on start
+		bcs .get_length_16		;first 1 drops out from lowbyte, need to extend to 16 bit, unfortunatedly this does not work with inverted numbers
+.get_length
+		+get_lz_bit
+.get_length_bt
+		bcc .lz_get_loop
+		beq .lz_refill_bits
+		rts
+
+.get_length_16
+		pha				;save LSB
+		tya				;start with MSB = 1
+		jsr .get_length_		;get up to 7 more bits
+		sta <.lz_len_hi			;save MSB
+		pla				;restore LSB
+.end_bit_16
+		rts
+.lz_eof
+		;------------------
+		;exit code for sfx only
+		;------------------
+
+.restore_end
+		;restore zp up to $dc
+lz_sfx_addr = * + 1
+		jmp $0000
+.depacker_end
+}
+
+;!warn "fixup size: ",.depacker_end - .restore_end
+!warn "zp saved up to: ",.restore_end - .depacker
+;!warn "sfx zp size: ", .depacker_end - .depacker_start
+!warn "sfx size: ", * - .dali_code_start
+.data
+		;!bin "test.lz"
+.data_end
--- a/loader/tools/dali/testfile.lz
+++ b/loader/tools/dali/testfile.lz
--- a/loader/tools/doynamite1.1/decrunch.asm
+++ b/loader/tools/doynamite1.1/decrunch.asm
@ -0,0 +1,333 @@
+;DYNAMIC_TABLES
+;FAST_LITERAL				;/!\ does not work with current implementation of krill loader, as it touches the lowbyte of the sector_pointers
+;FAST_MATCH
+
+;-------------------------------------------------------------------------------
+;Regular version of the Lempel-Ziv decompressor
+;-------------------------------------------------------------------------------
+;lz_match	= $f9			;Match source pointer
+;lz_dst		= $fb			;Decompression destination pointer.
+;					;Initialize this to whatever address
+;					;you want to decompress to
+;
+;lz_bits	= $fd			;Internal shift register
+;
+;lz_scratch	= $fe			;Temporary zeropage storage
+;
+;lz_sector	= $0400			;The one-page buffer from which the
+;					;compressed data is actually read,
+;					;and which gets refilled by
+;					;lz_fetch_sector
+
+
+;-------------------------------------------------------------------------------
+;This is the user's hook to replenish the sector buffer with some new bytes.
+;
+;A and Y are expected to be preserved while carry must remain set on exit.
+;X should point to the first byte of the new data, e.g. zero for a full 256-byte
+;page of data or two to skip past the sector and track links.
+;
+;When fetching from a larger in-memory array rather than a single sector buffer
+;the lz_sector_ptr1..3 pointers will need to be patched up
+;-------------------------------------------------------------------------------
+;lz_fetch_sector
+;		inc lz_sector_ptr1+1
+;		inc lz_sector_ptr2+1
+;		inc lz_sector_ptr3+1
+;		rts
+
+
+;-------------------------------------------------------------------------------
+;This is the main lz_decrunch function which may be called to decompress an
+;entire file.
+;
+;On entry and exit the X register points to the next available byte in the
+;sector buffer, in ascending order from $00 to $ff.
+;This implies that the initial sector must have already been fetched, and that a
+;file ending with X wrapped to $00 will have needlessly fetched an extra sector
+;(which may be taken advantage of when decoding a contiguous set of files.)
+;-------------------------------------------------------------------------------
+
+		;******** Start the next match/literal run ********
+
+lz_decrunch
+		sty lz_sector_ptr1+1
+		sty lz_sector_ptr2+1
+		sty lz_sector_ptr3+1
+
+		;fetch depack address
+		jsr _lz_refill_bits
+		sty lz_dst
+		jsr _lz_refill_bits
+		sty lz_dst+1
+
+!ifdef DYNAMIC_TABLES {
+		;load 24 byte long tables
+		lda #<_lz_moff_length
+		sta _lz_cp+1
+-
+		jsr _lz_refill_bits
+_lz_cp		sty _lz_moff_length
+		inc _lz_cp+1
+		lda _lz_cp+1
+		cmp #<(_lz_moff_length+$18)
+		bne -
+		;sec
+} else {
+		sec
+}
+					;This is the main entry point. Forcibly
+_lz_type_refill	jsr _lz_refill_bits	;fill up the the bit buffer on entry
+		bne _lz_type_cont	;(BRA)
+
+!ifdef FAST_LITERAL {
+_lz_maximum	jsr lz_fetch_sector	;Grab a new sector for the literal loop. Carry is set, so we fall through next check
+_lz_mfinish	bcc *+4
+		inc lz_dst+1
+} else {
+_lz_mfinish	bcc *+4
+_lz_maximum	inc lz_dst+1		;This is also used by maximum length
+}
+					;literals needing an explicit type bit
+
+		;Literal or match to follow?
+		asl lz_bits
+_lz_type_cont	bcc _lz_do_match
+		beq _lz_type_refill
+
+		;******** Process literal run ********
+
+		lda #%00000000		;Decode run length
+_lz_lrun_loop	rol
+		asl lz_bits
+		bcs _lz_lrun_test
+_lz_lrun_back	asl lz_bits
+		bne _lz_lrun_loop
+
+		jsr _lz_refill_bits
+		bne _lz_lrun_loop	;(BRA)
+
+_lz_lrun_test	bne _lz_lrun_gotten
+		jsr _lz_refill_bits
+		bcc _lz_lrun_back
+
+_lz_lrun_gotten
+		sta _lz_copy_cnt+1	;Store LSB of run-length
+                ldy #$00
+!ifdef FAST_LITERAL {
+		stx lz_sector_ptr2	;Store x as lowbyte, so we can use y for lda + sta, and we don't need to bother about overruns
+_lz_lcopy
+lz_sector_ptr2	= *+1			;Copy the literal data.
+		lda lz_sector,y
+} else {
+_lz_lcopy
+lz_sector_ptr2	= *+1			;Copy the literal data.
+		lda lz_sector,x
+		inx
+		bne *+5
+		jsr lz_fetch_sector	;Grab a new sector for the literal loop
+}
+		sta (lz_dst),y
+		iny
+_lz_copy_cnt	cpy #$00
+		bne _lz_lcopy
+
+		;Time to advance the destination pointer.
+		;Maximum run length literals exit here as a type-bit needs
+		;to be fetched afterwards
+		tya
+		beq _lz_maximum		;maximum literal run, bump sector pointers and so on
+		clc
+!ifdef FAST_LITERAL {
+		adc lz_sector_ptr2
+		tax			;fix x
+		bcc *+6
+		jsr lz_fetch_sector	;Grab a new sector for the literal loop
+		clc
+		tya
+}
+		adc lz_dst+0
+		sta lz_dst+0
+		bcc _lz_do_match
+		inc lz_dst+1
+
+		;One literal run following another only makes sense if the
+		;first run is of maximum length and had to be split. As that
+		;case has been taken care of we can safely omit the type bit
+		;here
+
+
+		;******** Process match ********
+
+_lz_do_match	lda #%00100000		;Determine offset length by a two-bit
+_lz_moff_range	asl lz_bits		;prefix combined with the first run
+		bne *+5			;length bit (where a one identifies
+		jsr _lz_refill_bits	;a two-byte match).
+		rol			;The rest of the length bits will
+		bcc _lz_moff_range	;then follow *after* the offset data
+
+		tay
+		lda _lz_moff_length,y
+		beq _lz_moff_far
+
+_lz_moff_loop	asl lz_bits		;Load partial offset byte
+		bne *+9
+		sty lz_scratch
+		jsr _lz_refill_bits
+		ldy lz_scratch
+
+		rol
+		bcc _lz_moff_loop
+
+		bmi _lz_moff_near
+
+_lz_moff_far	sta lz_scratch		;Save the bits we just read as the
+					;high-byte
+
+lz_sector_ptr3	= *+1
+		lda lz_sector,x		;For large offsets we can load the
+		inx			;low-byte straight from the stream
+		bne *+5			;without going throught the shift
+		jsr lz_fetch_sector	;register
+
+;		sec
+		adc _lz_moff_adjust_lo,y ;y .. 2 .. 5? ?! necessary with a full lowbyte?!?!
+		bcs _lz_moff_pageok
+		dec lz_scratch
+		sec
+_lz_moff_pageok	adc lz_dst+0
+		sta lz_match+0
+
+		lda lz_scratch
+		adc _lz_moff_adjust_hi,y
+		sec
+		bcs _lz_moff_join	;(BRA)
+
+_lz_moff_near
+;		sec			;Special case handling of <8 bit offsets.
+	 	adc _lz_moff_adjust_lo,y;We may can safely ignore the MSB from
+;		sec			;the base adjustment table as the
+		adc lz_dst+0		;maximum base (for a 4/5/6/7 bit
+		sta lz_match+0		;length sequence) is 113
+		lda #$ff
+_lz_moff_join	adc lz_dst+1
+		sta lz_match+1
+
+		cpy #$04		;Get any remaning run length bits
+		lda #%00000001
+		bcs _lz_mrun_start      ;Sentinel check can be skipped in that case
+
+_lz_mrun_loop	asl lz_bits
+		bne *+5
+		jsr _lz_refill_bits
+		rol
+		asl lz_bits
+		bcc _lz_mrun_loop
+		bne _lz_mrun_gotten
+		jsr _lz_refill_bits
+		bcc _lz_mrun_loop
+
+					;XXX TODO only needed on near matches as offset = 1 in that case
+_lz_mrun_gotten	tay			;A 257-byte (=>$00) run serves as a
+		beq _lz_end_of_file	;sentinel
+_lz_mrun_start
+		sta _lz_mcopy_len
+
+		ldy #$ff		;The copy loop. This needs to be run
+					;forwards since RLE-style matches can overlap the destination
+!ifdef FAST_MATCH {
+		lsr			;Check bit 0
+		bcc _lz_odd		;Odd or even number? Enter at right position of our loop that always copies two bytes in one go
+}
+_lz_mcopy
+		iny
+		lda (lz_match),y	;Copy one byte
+!ifdef FAST_MATCH {
+		sta (lz_dst),y
+_lz_odd
+		iny			;Next byte
+		lda (lz_match),y	;And another one
+}
+		sta (lz_dst),y
+_lz_mcopy_len	= *+1
+		cpy #$ff
+		bne _lz_mcopy
+
+		tya			;Advance destination pointer
+;		sec
+		adc lz_dst+0
+		sta lz_dst+0
+		jmp _lz_mfinish
+
+
+		;******** Offset coding tables ********
+
+		;This length table is a bit funky. The idea here is to use the
+		;value as the initial value of the shift register instead of
+		;keeping a separate counter.
+		;In other words we iterate until the leading one is shifted out.
+		;Then afterwards the bit just below it (our new sign bit) is set
+		;if the offset is shorter than 8-bits, and conversely it's
+		;cleared if we need to fetch a separate low-byte
+		;as well.
+		;The fact that the sign bit is cleared as a flag is compensated
+		;for in the lz_moff_adjust_hi table
+
+!ifndef DYNAMIC_TABLES {
+_lz_moff_length
+		;Long (>2 byte matches)
+		!byte %00011111		;4 bits
+		!byte %00000011		;7 bits
+		!byte %01011111		;10 bits
+		!byte %00001011		;13 bits
+		;Short (2 byte matches)
+		!byte %01011111		;10 bits
+		!byte %00000000		;8 bits
+		!byte %00000111		;6 bits
+		!byte %00111111		;3 bits
+_lz_moff_adjust_hi = *-2
+		;Long (>2 byte matches)
+;		!byte %11111111		;1-16 (unreferenced)
+;		!byte %11111111		;17-144 (unreferenced)
+		!byte %01111111		;145-1168
+		!byte %01111011		;1169-9360
+		;Short (2 byte matches)
+		!byte %01111110		;329-1352
+		!byte %11111110		;73-328
+;		!byte %11111111		;9-72 (unreferenced)
+;		!byte %11111111		;1-8 (unreferenced)
+
+_lz_moff_adjust_lo = * - 1
+		;Long (>2 byte matches)
+		;!byte %11111110		;1-16
+		!byte %11101110		;17-144
+		!byte %01101110		;145-1168
+		!byte %01101110		;1169-9360
+		;Short (2 byte matches)
+		!byte %10110110		;329-1352
+		!byte %10110110		;73-328
+		!byte %11110110		;9-72
+		!byte %11111110		;1-8
+		;******** Fetch some more bits to work with ********
+}
+
+lz_sector_ptr1	= *+1
+_lz_refill_bits	ldy lz_sector,x
+		sty lz_bits
+;		sec
+		rol lz_bits
+		inx
+		beq lz_fetch_sector
+_lz_end_of_file	rts
+
+lz_fetch_sector
+		inc lz_sector_ptr1+1
+		inc lz_sector_ptr2+1
+		inc lz_sector_ptr3+1
+		rts
+
+!ifdef DYNAMIC_TABLES {
+_lz_moff_length
+_lz_moff_adjust_lo = * + 8
+_lz_moff_adjust_hi = * + 16
+}
--- a/loader/tools/doynamite1.1/krill/doynaxdecomp.s
+++ b/loader/tools/doynamite1.1/krill/doynaxdecomp.s
@ -0,0 +1,411 @@
+
+.macro SETDECOMPGETBYTE
+                sta toloadbt + $01
+                sty toloadbt + $02
+.endmacro
+
+;-------------------------------------------------------------------------------
+;Regular version of the Lempel-Ziv decompressor
+;-------------------------------------------------------------------------------
+lz_dst          = decdestlo             ;Decompression destination pointer.
+                                        ;Initialize this to whatever address
+                                        ;you want to decompress to
+
+lz_bits         = DECOMPVARS + $00      ;Shift register. Initialized to $80
+                                        ;for a new file
+
+lz_scratch      = DECOMPVARS + $01      ;Temporary zeropage storage
+lz_match        = DECOMPVARS + $02	;Source pointer for match
+
+lz_sector       = $0400                 ;The one-page buffer from which the
+                                        ;compressed data is actually read,
+                                        ;and which gets refilled by
+                                        ;lz_fetch_sector.
+
+.ifndef DYNLINK_EXPORT
+    .if GETC_API
+                .assert .lobyte(getcmem) <> .lobyte(getcmemfin), error, "Error: Invalid code optimization"
+                .assert .lobyte(getcmem) <> .lobyte(getcmemeof), error, "Error: Invalid code optimization"
+    .endif; GETC_API
+    .if BYTESTREAM
+        .if LOAD_VIA_KERNAL_FALLBACK
+                .assert .lobyte(getcmem) <> .lobyte(getckernal), error, "Error: Invalid code optimization"
+        .endif; LOAD_VIA_KERNAL_FALLBACK
+                .assert .lobyte(getcmem) <> .lobyte(getcload),   error, "Error: Invalid code optimization"
+    .endif; BYTESTREAM
+.endif; !DYNLINK_EXPORT
+
+
+;-------------------------------------------------------------------------------
+;This is the user's hook to replenish the sector buffer with some new bytes.
+;
+;A and Y are expected to be preserved while carry must remain set on exit.
+;X should point to the first byte of the new data, e.g. zero for a full 256-byte
+;page of data or two to skip past the sector and track links.
+;
+;When fetching from a larger in-memory array rather than a single sector buffer
+;the lz_sector_ptr1..3 pointers will need to be patched up
+;-------------------------------------------------------------------------------
+lz_fetch_sector:
+		pha
+                sty save_y+1
+
+.if BYTESTREAM
+                lda toloadbt + $01
+                cmp #.lobyte(getcload)
+                beq isloading
+
+    .if LOAD_VIA_KERNAL_FALLBACK
+                cmp #.lobyte(getcmem)
+                beq getblkfrommem
+                jsr getckernal
+                sta onebytebuffer
+                lda #.lobyte(onebytebuffer - $ff)
+                ldy #.hibyte(onebytebuffer - $ff)
+                ldx #$ff
+                bne setblockpntrs; jmp
+
+onebytebuffer:  .byte $00
+    .endif; LOAD_VIA_KERNAL_FALLBACK
+.endif; BYTESTREAM
+
+getblkfrommem:  ldx getcmemadr + $01
+                lda #$00
+                sta getcmemadr + $01
+                ldy getcmemadr + $02
+                inc getcmemadr + $02
+
+.if BYTESTREAM
+                jmp setblockpntrs; jmp
+
+isloading:      jsr maybegetblock
+		CHUNKCHECK
+                lda toloadbt + $01
+                cmp #.lobyte(getcload)
+                bne getblkfrommem
+
+                ldx YPNTRBUF
+                inx
+                bne :+; branch if first block
+                jsr toloadbt
+                ldx YPNTRBUF
+                SKIPBYTE
+:               dex
+                lda #$ff
+                sta YPNTRBUF
+updateblkpntrs: lda getdbyte + $01
+                ldy getdbyte + $02
+.endif; BYTESTREAM
+
+setblockpntrs:
+                sta lz_sector_ptr1 + $00
+                sta lz_sector_ptr2 + $00
+                sta lz_sector_ptr3 + $00
+                sty lz_sector_ptr1 + $01
+                sty lz_sector_ptr2 + $01
+                sty lz_sector_ptr3 + $01
+
+save_y:         ldy #$00
+                pla
+                sec
+                rts
+
+.if BYTESTREAM
+maybegetblock:  lda toloadbt + $01
+                eor #.lobyte(getcload)
+                beq dogetblock
+                rts
+dogetblock:
+    .if LOAD_UNDER_D000_DFFF & (PLATFORM <> diskio::platform::COMMODORE_16)
+                ENABLE_IO_SPACE_Y
+    .endif; LOAD_UNDER_D000_DFFF & (PLATFORM <> diskio::platform::COMMODORE_16)
+                BRANCH_IF_BLOCK_NOT_READY :++
+                jsr getnewblk
+                lda toloadbt + $01
+                cmp #.lobyte(getcmem)
+                bne :+
+                jsr getcmem
+:               rts
+:               clc
+                jmp loadbytret
+.endif; BYTESTREAM
+
+toloadbt:       jmp getcmem
+
+
+decompress:     CHUNKENTRY
+                jsr toloadbt
+storedadrl:     sta lz_dst + $00
+                jsr toloadbt
+storedadrh:     sta lz_dst + $01
+                CHUNKSETUP
+                jsr lz_fetch_sector
+                ; fall through
+
+;-------------------------------------------------------------------------------
+;This is the main lz_decrunch function which may be called to decompress an
+;entire file.
+;
+;On entry and exit the X register points to the next available byte in the
+;sector buffer, in ascending order from $00 to $ff.
+;This implies that the initial sector must have already been fetched, and that a
+;file ending with X wrapped to $00 will have needlessly fetched an extra sector
+;(which may be taken advantage of when decoding a contiguous set of files.)
+;-------------------------------------------------------------------------------
+
+		;******** Start the next match/literal run ********
+
+lz_decrunch:	sec			;This is the main entry point. Forcibly
+_lz_type_refill:
+		jsr _lz_refill_bits	;fill up the the bit buffer on entry
+		bne _lz_type_cont	;(BRA)
+
+		;Wrap the high-byte of the destination pointer.
+_lz_mfinish:	bcc *+4
+_lz_maximum:	inc lz_dst+1		;This is also used by maximum length
+					;literals needing an explicit type bit
+
+.if BYTESTREAM
+                jsr maybegetblock
+.endif; BYTESTREAM
+		CHUNKCHECK
+
+		;Literal or match to follow?
+		asl lz_bits
+_lz_type_cont:	bcc _lz_do_match
+		beq lz_decrunch
+
+
+		;******** Process literal run ********
+
+		lda #%00000000		;Decode run length
+_lz_lrun_loop:	rol
+		asl lz_bits
+		bcs _lz_lrun_test
+_lz_lrun_back:	asl lz_bits
+		bne _lz_lrun_loop
+
+		jsr _lz_refill_bits
+		bne _lz_lrun_loop	;(BRA)
+
+_lz_lrun_test:	bne _lz_lrun_gotten
+		jsr _lz_refill_bits
+		bcc _lz_lrun_back
+
+_lz_lrun_gotten:
+		sta _lz_copy_cnt+1	;Store LSB of run-length
+		ldy #$00
+_lz_lcopy:
+lz_sector_ptr2	= *+1			;Copy the literal data. Note the
+		lda lz_sector,x
+		inx
+		bne *+5
+		jsr lz_fetch_sector	;Grab a new sector for the literal loop
+		sta (lz_dst),y
+		iny
+_lz_copy_cnt:	cpy #$00
+		bne _lz_lcopy
+
+		;Time to advance the destination pointer.
+		;Maximum run length literals exit here as a type-bit needs
+		;to be fetched afterwards
+		tya
+		beq _lz_maximum
+		clc
+		adc lz_dst+0
+		sta lz_dst+0
+		bcc *+4
+		inc lz_dst+1
+
+.if BYTESTREAM
+                jsr maybegetblock
+.endif; BYTESTREAM
+		CHUNKCHECK
+
+		;One literal run following another only makes sense if the
+		;first run is of maximum length and had to be split. As that
+		;case has been taken care of we can safely omit the type bit
+		;here
+
+
+		;******** Process match ********
+
+_lz_do_match:	lda #%00100000		;Determine offset length by a two-bit
+_lz_moff_range:	asl lz_bits		;prefix combined with the first run
+		bne *+5			;length bit (where a one identifies
+		jsr _lz_refill_bits	;a two-byte match).
+		rol			;The rest of the length bits will
+		bcc _lz_moff_range	;then follow *after* the offset data
+
+		tay
+		lda _lz_moff_length,y
+		beq _lz_moff_far
+
+_lz_moff_loop:	asl lz_bits		;Load partial offset byte
+		bne *+9
+		sty lz_scratch
+		jsr _lz_refill_bits
+		ldy lz_scratch
+
+		rol
+		bcc _lz_moff_loop
+
+		bmi _lz_moff_near
+
+_lz_moff_far:	sta lz_scratch		;Save the bits we just read as the
+					;high-byte
+
+lz_sector_ptr3	= *+1
+		lda lz_sector,x		;For large offsets we can load the
+		inx			;low-byte straight from the stream
+		bne *+5			;without going throught the shift
+		jsr lz_fetch_sector	;register
+
+;		sec
+		adc _lz_moff_adjust_lo,y
+		bcs _lz_moff_pageok
+		dec lz_scratch
+		sec
+_lz_moff_pageok:
+		adc lz_dst+0
+		sta lz_match+0
+
+		lda lz_scratch
+		adc _lz_moff_adjust_hi,y
+		sec
+		bcs _lz_moff_join	;(BRA)
+
+_lz_moff_near:
+;		sec			;Special case handling of <8 bit offsets.
+	 	adc _lz_moff_adjust_lo,y;We may can safely ignore the MSB from
+;		sec			;the base adjustment table as the
+		adc lz_dst+0		;maximum base (for a 4/5/6/7 bit
+		sta lz_match+0		;length sequence) is 113
+		lda #$ff
+_lz_moff_join:	adc lz_dst+1
+		sta lz_match+1
+
+		cpy #$04		;Get any remaning run length bits
+		lda #%00000001
+		bcs _lz_mrun_gotten
+
+_lz_mrun_loop:	asl lz_bits
+		bne *+5
+		jsr _lz_refill_bits
+		rol
+		asl lz_bits
+		bcc _lz_mrun_loop
+		bne _lz_mrun_gotten
+		jsr _lz_refill_bits
+		bcc _lz_mrun_loop
+
+_lz_mrun_gotten:
+		tay			;A 257-byte (=>$00) run serves as a
+		beq _lz_end_of_file	;sentinel
+
+		sta _lz_mcopy_len
+
+		ldy #$ff		;The copy loop. This needs to be run
+		lsr
+		bcc _lz_odd
+_lz_mcopy:	iny			;forwards since RLE-style matches can
+		lda (lz_match),y
+		sta (lz_dst),y
+_lz_odd:
+		iny
+		lda (lz_match),y
+		sta (lz_dst),y
+_lz_mcopy_len	= *+1
+		cpy #$ff
+		bne _lz_mcopy
+
+		tya			;Advance destination pointer
+;		sec
+		adc lz_dst+0
+		sta lz_dst+0
+		jmp _lz_mfinish
+
+
+		;******** Fetch some more bits to work with ********
+
+lz_sector_ptr1	= *+1
+_lz_refill_bits:
+		ldy lz_sector,x
+		sty lz_bits
+		inx
+		bne *+5
+		jsr lz_fetch_sector
+;		sec
+		rol lz_bits
+		rts
+
+_lz_end_of_file:
+                ; housekeeping to finish decompression
+.if BYTESTREAM
+    .if LOAD_VIA_KERNAL_FALLBACK
+                lda toloadbt + $01
+                cmp #.lobyte(getckernal)
+                beq decompfinished
+    .endif; LOAD_VIA_KERNAL_FALLBACK
+                stx YPNTRBUF
+.endif; BYTESTREAM
+                stx getcmemadr + $01
+                dec getcmemadr + $02
+.if GETC_API
+                lda getcmemadr + $01
+                sta getcmemfin + $01
+                lda getcmemadr + $02
+                sta getcmemfin + $02
+.endif; GETC_API
+
+                ; decompression finished
+decompfinished: CHUNKEOF
+                rts
+
+
+		;******** Offset coding tables ********
+
+		;This length table is a bit funky. The idea here is to use the
+		;value as the initial value of the shift register instead of
+		;keeping a separate counter.
+		;In other words we iterate until the leading one is shifted out.
+		;Then afterwards the bit just below it (our new sign bit) is set
+		;if the offset is shorter than 8-bits, and conversely it's
+		;cleared if we need to fetch a separate low-byte
+		;as well.
+		;The fact that the sign bit is cleared as a flag is compensated
+		;for in the lz_moff_adjust_hi table
+
+_lz_moff_length:
+		;Long (>2 byte matches)
+		.byte %00011111		;4 bits
+		.byte %00000011		;7 bits
+		.byte %01011111		;10 bits
+		.byte %00001011		;13 bits
+		;Short (2 byte matches)
+		.byte %01011111		;10 bits
+		.byte %00000000		;8 bits
+		.byte %00000111		;6 bits
+		.byte %00111111		;3 bits
+_lz_moff_adjust_lo:
+		;Long (>2 byte matches)
+		.byte %11111110		;1-16
+		.byte %11101110		;17-144
+		.byte %01101110		;145-1168
+		.byte %01101110		;1169-9360
+		;Short (2 byte matches)
+		.byte %10110110		;329-1352
+		.byte %10110110		;73-328
+		.byte %11110110		;9-72
+		.byte %11111110		;1-8
+_lz_moff_adjust_hi = *-2
+		;Long (>2 byte matches)
+;		.byte %11111111		;1-16 (unreferenced)
+;		.byte %11111111		;17-144 (unreferenced)
+		.byte %01111111		;145-1168
+		.byte %01111011		;1169-9360
+		;Short (2 byte matches)
+		.byte %01111110		;329-1352
+		.byte %11111110		;73-328
+;		.byte %11111111		;9-72 (unreferenced)
+;		.byte %11111111		;1-8 (unreferenced)
--- a/loader/tools/doynamite1.1/lz.c
+++ b/loader/tools/doynamite1.1/lz.c
--- a/loader/tools/doynamite1.1/readme.txt
+++ b/loader/tools/doynamite1.1/readme.txt
@ -0,0 +1,24 @@
+What changed compared to v1.0?
+· Literals are now copied forwards and thus the safety margin is low (typically 4 bytes or less) - Needless to say that the format is incompatible with v1.0
+· Selfextracting executables can now be generated by the cruncher
+· An optimal loading address can be generated, based on the estimated safety margin
+· No more .prg/.bin autodetection. Default input filetype is now .prg, raw binaries can be packed with --binfile
+· Iterative mechanism to find the best offset lengths and possibility to add the offset table to the output file
+· Simple version in /simple/ generates ~0,5% bigger files but has a $e1 bytes big depacker and depacks around 5-10% faster. This is achieved by rearranging the encoding and forgoing on the offsets base adjustment.
+
+The file to be compressed is a required argument, in addition the following switches are available:
+· -o: Specify the output file name. By default the input file with an .lz extension.
+· --window: Specify the window size for with streaming decompression.
+· --per-page: Force the windowed encoding for regular files. This is handy when combining both types of data.
+· --cut-input: Only specific segment of the file.
+· --offset-lengths: Use an alternate set of offset lengths. For e.g. --offset-lengths 3/6/8/10:4/7/10/13
+· --emit-offset-tables: Generate the appropriate decruncher tables for the chosen offset lengths.
+· --statistics: Display some basic information about the types of matches made.
+· --best-offset-tables: Find the best offset lengths for optimal compression results. This is still slow and done lazy.
+· --binfile: Read from a raw binary without a preceeding loadaddress. As a default the cruncher expects a .prg-style file.
+· --include-tables: Include the offset tables (24 byte without gaps) to the output file, straight after load- and depack-address.
+· --sfx: Spit out a selfextracting .prg
+· --level: Spit out a level-packed file, including a generated load- and depack-address (the load-address of the .prg).
+· --raw: Spit out the raw pakced data without any additional bytes added before.
+
+In case the readme.txt of the previous version might be a good read.
--- a/loader/tools/doynamite1.1/sfx.asm
+++ b/loader/tools/doynamite1.1/sfx.asm
@ -0,0 +1,232 @@
+!cpu 6510
+
+;EXAMPLE FOR OFFICIAL VERSION
+
+lz_sector      = ($ffff - (data_end-data) + 1) & $ff00
+
+decruncher = $00c2
+
+		* = $0801
+		;basicline 1 SYS2061
+		!byte $0b,$08,$39,$05,$9e,$32
+		!byte $30,$36,$31,$00,$00,$00
+
+		sei
+		inc $01
+
+		ldx #$ff
+		txs
+
+		inx
+-
+		lda copy_start,x
+		sta decruncher,x
+		inx
+		bne -
+
+		ldy #(>(data_end-data)) + 1
+-
+		;src should be data + packed_size
+		dex
+src		lda data_end-$100,x
+dst		sta $ff00,x
+		txa
+		bne -
+
+		dec src+2
+		dec dst+2
+		dey
+		bne -
+
+		ldx #<($ffff - (data_end-copy_end) + 1)
+		jmp go
+
+copy_start
+!pseudopc decruncher {
+		;fetch depack addr (use --add-depack-addr on lz)
+lz_match       !byte $00,$00
+lz_bits        !byte $00
+lz_scratch     !byte $00
+
+go
+		;******** Start the next match/literal run ********
+lz_decrunch
+		;XXX TODO lz_bist auch gleich passend füllen bei sfx, nicht in stream schreiben?
+		sec			;This is the main entry point. Forcibly
+_lz_type_refill	jsr _lz_refill_bits	;fill up the the bit buffer on entry
+		bne _lz_type_cont	;(BRA)
+
+		;Wrap the high-byte of the destination pointer.
+_lz_mfinish	bcc *+4
+_lz_maximum	inc+1 lz_dst+1		;This is also used by maximum length
+					;literals needing an explicit type bit
+
+		;Literal or match to follow?
+		asl lz_bits
+_lz_type_cont	bcc _lz_do_match
+		beq _lz_type_refill
+
+		;******** Process literal run ********
+
+		lda #%00000000		;Decode run length
+_lz_lrun_loop	rol
+		asl lz_bits
+		bcs _lz_lrun_test
+_lz_lrun_back	asl lz_bits
+		bne _lz_lrun_loop
+
+		jsr _lz_refill_bits
+		bne _lz_lrun_loop	;(BRA)
+
+_lz_lrun_test	bne _lz_lrun_gotten
+		jsr _lz_refill_bits
+		bcc _lz_lrun_back
+
+_lz_lrun_gotten
+		sta+1 _lz_copy_cnt+1	;Store LSB of run-length
+                ldy #$00
+_lz_lcopy
+lz_sector_ptr2	= *+1			;Copy the literal data.
+		lda lz_sector,x
+		inx
+		bne *+5
+		jsr lz_fetch_sector
+lz_dst = * + 1
+		sta $4000,y
+		iny
+_lz_copy_cnt	cpy #$00
+		bne _lz_lcopy
+
+		;Time to advance the destination pointer.
+		;Maximum run length literals exit here as a type-bit needs
+		;to be fetched afterwards
+		tya
+		beq _lz_maximum		;maximum literal run, bump sector pointers and so on
+		clc
+		adc+1 lz_dst+0
+		sta+1 lz_dst+0
+		bcc _lz_do_match
+		inc+1 lz_dst+1
+
+		;******** Process match ********
+
+_lz_do_match	lda #%00100000		;Determine offset length by a two-bit
+_lz_moff_range	asl lz_bits		;prefix combined with the first run
+		bne *+5			;length bit (where a one identifies
+		jsr _lz_refill_bits	;a two-byte match).
+		rol			;The rest of the length bits will
+		bcc _lz_moff_range	;then follow *after* the offset data
+
+		tay
+		lda _lz_moff_length,y
+		beq _lz_moff_far
+
+_lz_moff_loop	asl lz_bits		;Load partial offset byte
+		bne *+9
+		sty lz_scratch
+		jsr _lz_refill_bits
+		ldy lz_scratch
+
+		rol
+		bcc _lz_moff_loop
+
+		bmi _lz_moff_near
+
+_lz_moff_far	sta lz_scratch		;Save the bits we just read as the
+					;high-byte
+
+lz_sector_ptr3	= *+1
+		lda lz_sector,x		;For large offsets we can load the
+		inx			;low-byte straight from the stream
+		bne *+5			;without going throught the shift
+		jsr lz_fetch_sector	;register
+
+;		sec
+		adc _lz_moff_adjust_lo,y ;y .. 2 .. 5? ?! necessary with a full lowbyte?!?!
+		bcs _lz_moff_pageok
+		dec lz_scratch
+		sec
+_lz_moff_pageok	adc+1 lz_dst+0
+		sta lz_match+0
+
+		lda lz_scratch
+		adc _lz_moff_adjust_hi,y
+		sec
+		bcs _lz_moff_join	;(BRA)
+
+_lz_moff_near
+;		sec			;Special case handling of <8 bit offsets.
+	 	adc _lz_moff_adjust_lo,y;We may can safely ignore the MSB from
+;		sec			;the base adjustment table as the
+		adc+1 lz_dst+0		;maximum base (for a 4/5/6/7 bit
+		sta lz_match+0		;length sequence) is 113
+		lda #$ff
+_lz_moff_join	adc+1 lz_dst+1
+		sta lz_match+1
+
+		cpy #$04		;Get any remaning run length bits
+		lda #%00000001
+		bcs _lz_mrun_start      ;Sentinel check can be skipped in that case
+
+_lz_mrun_loop	asl lz_bits
+		bne *+5
+		jsr _lz_refill_bits
+		rol
+		asl lz_bits
+		bcc _lz_mrun_loop
+		bne _lz_mrun_gotten
+		jsr _lz_refill_bits
+		bcc _lz_mrun_loop
+
+		;XXX TODO only needed on near matches as offset = 1 in that case
+_lz_mrun_gotten	tay			;A 257-byte (=>$00) run serves as a
+		beq _lz_end_of_file	;sentinel
+_lz_mrun_start
+		sta _lz_mcopy_len
+
+		ldy #$ff		;The copy loop. This needs to be run
+					;forwards since RLE-style matches can overlap the destination
+_lz_mcopy
+		iny
+		lda (lz_match),y	;Copy one byte
+		sta (lz_dst),y
+_lz_mcopy_len	= *+1
+		cpy #$ff
+		bne _lz_mcopy
+
+		tya			;Advance destination pointer
+;		sec
+		adc+1 lz_dst+0
+		sta+1 lz_dst+0
+		jmp _lz_mfinish
+
+lz_sector_ptr1	= *+1
+_lz_refill_bits	ldy lz_sector,x
+		sty lz_bits
+;		sec
+		rol lz_bits
+		inx
+		bne +
+
+lz_fetch_sector
+		inc lz_sector_ptr1+1
+		inc lz_sector_ptr2+1
+		inc lz_sector_ptr3+1
+
+		rts
+
+_lz_end_of_file
+
+		dec $01
+		cli
+		!byte $4c
+
+_lz_moff_length = * + 2
+_lz_moff_adjust_lo = _lz_moff_length + 8
+_lz_moff_adjust_hi = _lz_moff_length + 16
+
+}
+copy_end = * + 26
+data
+;!bin "d.lz",,2
+data_end
--- a/loader/tools/doynamite1.1/simple/decrunch.asm
+++ b/loader/tools/doynamite1.1/simple/decrunch.asm
@ -0,0 +1,237 @@
+;those will make the depacker ~5% faster but bloat it:
+;FAST_LITERAL_COPY = 1			;will increase size by 11 bytes
+;FAST_MATCH_COPY = 1			;will increase size by 11 bytes
+
+;-------------------------------------------------------------------------------
+;Regular version of the Lempel-Ziv decompressor
+;-------------------------------------------------------------------------------
+;lz_match	= $f9			;Match source pointer
+;lz_dst		= $fb			;Decompression destination pointer.
+;					;Initialize this to whatever address
+;					;you want to decompress to
+;
+;lz_bits	= $fd			;Internal shift register
+;
+;lz_sector	= $0400			;The one-page buffer from which the
+;					;compressed data is actually read,
+;					;and which gets refilled by
+;					;lz_fetch_sector
+
+
+;-------------------------------------------------------------------------------
+;This is the user's hook to replenish the sector buffer with some new bytes.
+;
+;A and Y are expected to be preserved while carry must remain set on exit.
+;X should point to the first byte of the new data, e.g. zero for a full 256-byte
+;page of data or two to skip past the sector and track links.
+;
+;When fetching from a larger in-memory array rather than a single sector buffer
+;the lz_sector_ptr1..3 pointers will need to be patched up
+;-------------------------------------------------------------------------------
+;lz_fetch_sector
+;		inc lz_sector_ptr1+1
+;		inc lz_sector_ptr2+1
+;		inc lz_sector_ptr3+1
+;		rts
+
+
+;-------------------------------------------------------------------------------
+;Typical usage
+;-------------------------------------------------------------------------------
+;		ldx #>source
+;		ldy #<source
+;		jsr lz_decrunch
+
+
+;-------------------------------------------------------------------------------
+;This is the main lz_decrunch function which may be called to decompress an
+;entire file.
+;
+;On entry and exit the X register points to the next available byte in the
+;sector buffer, in ascending order from $00 to $ff.
+;This implies that the initial sector must have already been fetched, and that a
+;file ending with X wrapped to $00 will have needlessly fetched an extra sector
+;(which may be taken advantage of when decoding a contiguous set of files.)
+;-------------------------------------------------------------------------------
+
+		;******** Start the next match/literal run ********
+
+lz_decrunch
+		sty lz_sector_ptr1+1
+		sty lz_sector_ptr2+1
+		sty lz_sector_ptr3+1
+
+		;fetch depack addr
+		jsr _lz_refill_bits
+		sty lz_dst
+		jsr _lz_refill_bits
+		sty lz_dst+1
+
+		sec			;This is the main entry point. Forcibly
+_lz_type_refill	jsr _lz_refill_bits	;fill up the the bit buffer on entry
+		bne _lz_type_cont	;(BRA)
+
+		;Wrap the high-byte of the destination pointer.
+_lz_mfinish	bcc *+4
+_lz_maximum	inc lz_dst+1		;This is also used by maximum length
+					;literals needing an explicit type bit
+_lz_type_check
+		;Literal or match to follow?
+		asl lz_bits
+_lz_type_cont	bcc _lz_do_match
+		beq _lz_type_refill	;no more bits left, fetch new bits and reevaluate
+
+		;******** Process literal run ********
+
+		lda #$00
+-
+		rol
+		asl lz_bits
+		bne *+5
+		jsr _lz_refill_bits
+		bcc _lz_lrun_gotten
+
+		asl lz_bits
+		bne -
+		jsr _lz_refill_bits
+		bne -
+
+_lz_lrun_gotten
+		sta _lz_copy_cnt+1	;Store LSB of run-length
+		ldy #$00
+_lz_lcopy
+lz_sector_ptr2	= *+1			;Copy the literal data.
+		lda lz_sector,x
+		inx
+		bne *+5
+		jsr lz_fetch_sector
+		sta (lz_dst),y
+		iny
+_lz_copy_cnt	cpy #$00
+		bne _lz_lcopy
+
+		;Time to advance the destination pointer.
+		;Maximum run length literals exit here as a type-bit needs
+		;to be fetched afterwards
+		tya
+		beq _lz_maximum		;maximum literal run, bump sector pointers and so on
+		clc
+		adc lz_dst+0
+		sta lz_dst+0
+		bcc _lz_do_match
+		inc lz_dst+1
+
+		;One literal run following another only makes sense if the
+		;first run is of maximum length and had to be split. As that
+		;case has been taken care of we can safely omit the type bit
+		;here
+
+
+		;******** Process match ********
+
+		;check bit -> 1 -> short match (len 2)
+		;else long match
+_lz_do_match
+		lda #$01		;this could be made shorter by using the last bitfetch of the upcoming loop and restoring the carry again by a cmp #$02. Saves bytes, but makes things slower, as eof check is also done with all short matches then
+		asl lz_bits		;first length bit (where a one identifies
+		bne *+5			;a two-byte match)
+		jsr _lz_refill_bits
+		bcc lz_get_offs		;all done, length is 2, skip further bitfetches
+-
+		asl lz_bits
+		bne *+5
+		jsr _lz_refill_bits
+		rol
+		asl lz_bits
+		bne *+5
+		jsr _lz_refill_bits
+		bcc -
+
+		tay			;A 257-byte (=>$00) run serves as a
+		beq _lz_end_of_file	;sentinel
+lz_get_offs
+		sta _lz_mcopy_len	;store length at final destination
+		lda #%11000000		;fetch 2 more prefix bits
+		rol			;previous bit is still in carry \o/
+-
+		asl lz_bits		;XXX TODO in ultra slim variant this could be a subroutine
+		bne *+5			;XXX TODO this code could also be called twice
+		jsr _lz_refill_bits
+		rol
+		bcs -
+
+		tay
+		lda lentab,y
+		beq lz_far		;XXX TODO currently 8 and 9 bit long offsets carry teh same value here, so if one wants to use both, one has to set the value for 8 bit long offsets
+					;to $ff and check things here with a cmp #$ff. Including teh eor #$ff in lz_far or not will then solvet he problem. This is faster though.
+-
+		asl lz_bits
+		bne *+5
+		jsr _lz_refill_bits
+		rol
+		bcs -
+		bmi lz_short
+lz_far
+		eor #$ff		;negate
+		tay
+lz_sector_ptr3	= *+1
+		lda lz_sector,x		;For large offsets we can load the
+		inx			;low-byte straight from the stream
+		bne lz_join		;without going throught the shift
+		jsr lz_fetch_sector	;register
+		!byte $2c		;skip next two bytes
+lz_short
+		ldy #$ff		;XXX TODO GNAAA y is set twice to $ff in the case of short matches
+lz_join
+		adc lz_dst		;subtract offset from lz_dst
+		sta lz_match
+		tya			;hibyte
+		adc lz_dst+1
+		sta lz_match+1
+		ldy #$ff		;The copy loop. This needs to be run
+					;forwards since RLE-style matches can overlap the destination
+_lz_mcopy
+		iny
+		lda (lz_match),y	;Copy one byte
+		sta (lz_dst),y
+_lz_mcopy_len	= *+1
+		cpy #$ff
+		bne _lz_mcopy
+
+		tya			;Advance destination pointer
+;		sec
+		adc lz_dst+0
+		sta lz_dst+0
+		jmp _lz_mfinish
+
+		;******** Fetch some more bits to work with ********
+
+lz_sector_ptr1	= *+1
+_lz_refill_bits	ldy lz_sector,x
+		sty lz_bits
+;		sec
+		rol lz_bits
+		inx
+		bne +
+lz_fetch_sector
+		inc lz_sector_ptr1+1
+		inc lz_sector_ptr2+1
+		inc lz_sector_ptr3+1
+
+_lz_end_of_file	rts
+
+lentab
+		;XXX TODO combine tables so that values used twice are eliminated? -> more classes can be used?
+		;XXX TODO best rearrange tables by using lookup table?
+
+		;short offset init values
+		!byte %11011111
+		!byte %11111011
+		!byte %00000000
+		!byte %10000000
+
+		;long offset init values
+		!byte %11101111
+		!byte %11111101
+		!byte %10000000
+		!byte %11110000
--- a/loader/tools/doynamite1.1/simple/lz.c
+++ b/loader/tools/doynamite1.1/simple/lz.c
--- a/loader/tools/doynamite1.1/test.asm
+++ b/loader/tools/doynamite1.1/test.asm
@ -0,0 +1,77 @@
+!cpu 6510
+
+;EXAMPLE FOR OFFICIAL VERSION
+
+count = $cffe
+
+lz_match       = $f9
+lz_dst         = $fb
+lz_bits        = $fd
+
+lz_scratch     = $fe
+
+lz_sector      = $0400
+
+         * = $0900
+!src "decrunch.asm"
+
+         * = $0801
+         ;basicline 1 SYS2061
+         !byte $0b,$08,$39,$05,$9e,$32
+         !byte $30,$36,$31,$00,$00,$00
+
+         sei
+         lda #$35
+         sta $01
+         lda #$00
+         sta count
+         sta count+1
+         lda #$01
+         sta $d019
+         sta $d01a
+         lda #$7f
+         sta $dc0d
+         lda $dc0d
+         lda $d011
+         and #$7f
+         sta $d011
+         lda #$fa
+         sta $d012
+         lda #<irq
+         sta $fffe
+         lda #>irq
+         sta $ffff
+         cli
+         jsr go
+         sei
+         inc $d020
+         jmp *
+irq
+         dec $d019
+         inc count
+         bne +
+         inc count+1
+
+         rti
+go
+         ldx #<data
+         ldy #>data
+
+         jsr lz_decrunch
+         inc $d020
+         sei
+         jmp *
+
+*=$4a00
+data
+         !bin "b.lz",,2
+;*=$6a38
+;data
+;         !bin "d.lz",,2
+;*=$3804
+;data
+;         !bin "ras.lz",,2
+;*=$4c74
+;data
+;         !bin "c.lz",,2
+
--- a/loader/tools/exomizer-3.1/changelog.txt
+++ b/loader/tools/exomizer-3.1/changelog.txt
@ -0,0 +1,293 @@
+2020-12-22
+Release of 3.1.0 source code and Win32 binaries.
+ Fixed bug in the sfxdecr.s affecting c128 reported by Fredrik Ramsberg.
+ Implemented split encoding support  (-E) in the 6502 decruncher.
+ Updated raw decrunchers for Zilog Z80 contributed by Antonio Villena.
+ Added raw decrunchers for Intel 8080 contributed by Ivan Gorodetsky.
+ Added raw decrunchers for ARM 32bit thumb2 contributed by ALeX Kazik.
+ Implement optional forward decrunching properly in exodecrunch.s and as a
+  consequence of this also remove krilldecr.s
+ Implement sequence read pointer blacklist to be able to avoid reading from
+  specified memory areas like hardware registers (Experimental feature),
+  Requested by Oziphantom.
+ update dasm and add acme exodecrunch sources
+ Improve compression by previous offset reuse, -P-32 to disable
+ Extended -e and -E to be able to reuse/calculate shared header/table info
+  for multiple crunched files. (Experimental feature, no direct support in
+  exodecrunch.s yet), Requested by Lazycow
+
+2019-01-05
+Release of 3.0.2 source code and Win32 binaries.
+ Add documentation about level, mem and raw outfile structure.
+ Fix raw -d -r combination to reverse the inbuffer before decrunch.
+ Fix -P0 literal sequences bug in exodec.c, Bug reported by Nino Porcino.
+ improved cruncher tuning for slightly improved compression on average.
+ Added sfx support for a new target, BBC Micro B (-t 0xbbcb).
+ Added -P+16 awareness to exodecrunch.s
+ Fix bucket optimization to be -T+4 aware in search.c, Bug reported by
+  Ciccioriccio.
+ Fix absolute offset overflow bug in sfxdecr.s. Bug reported by Comos.
+
+2018-08-10
+Release of 3.0.1 source code and Win32 binaries.
+ Add missing clc to the new 6502 decrunchers. Bug reported by Soci.
+
+2018-05-16
+Release of 3.0.0 source code and Win32 binaries.
+ Up to almost 50% faster 6502 decruncher (sfx and stand-alone). However the
+  bitstream format has changed in an incompatible way. See exo30info.txt for
+  more info.
+
+2018-03-08
+Release of 2.0.11 source code and Win32 binaries.
+ No change from preview 3
+2018-03-06
+Release of 2.0.11 preview 3
+ Improved desfx c64 IO banking handling and automatic decrunched area
+  detection.
+ sfx: Detected in-file type exported making target auto detection possible for
+  some sfx bin and sfx <addr> (not sfx basic or sfx sys).
+ sfx: Applesingle file write now writes the header descriptors in in ascending
+  order, suggested by Oliver Schmidt.
+
+2018-02-20
+Release of 2.0.11 preview 2
+ Fix compression improvement loop crashing bug exposed by the thread safety
+  changes for library compilation, reported by Lasse Öörni.
+ Support the AppleSingle format instead of the 4 byte cc65 header, suggested
+  by Oliver Schmidt.
+
+2018-02-11
+Release of 2.0.11 preview 1
+ Add i_raw assembler directive for sfx to generate header less output.
+ Make sfx 0xa2 fall back to 4 byte Apple II cc65 headers for in-files instead
+  of prg-headers.
+ Add sfx bin directive as a shortcut for creating sfx files with as little
+  impact on memory outside of the decrunched area as possible. Perfect for
+  crunching Apple II binary files.
+
+2017-12-25
+Release of 2.0.10 source code and Win32 binaries.
+ Fix broken things in rawdecrs folder since 2.0.9
+ Add PET 4032 as sfx target from the nanoflite github fork.
+
+2017-12-16
+Release of 2.0.10 preview 3
+ Fix core dump when using max_passes=1 caused by static var removal.
+ Change zp-usage of the sfx-decr for plus4/c16 to avoid overwriting current
+  device number address.
+
+2017-12-09
+Release of 2.0.10 preview 2
+ Add used encoding to the crunch_info struct returned by core crunch func.
+ Rework core compression core to not use local static vars to simplify use
+  when compiled into a library.
+ rework -C into a generic favor speed flag and make it disable the crunch
+  result changes too.
+ Improve crunch result slightly by also consider same length sequences at
+  larger offsets too (but by doing this also slowing it down).
+
+20170708
+Release of 2.0.10 preview 1
+ Add a brief output mode enabled with -B, suggested by both Daniel Hotorp and
+  Bacchus independently.
+ Display progress indication only on ttys and not when output is redirected,
+  inspired by input from  Daniel Hotorp.
+ Make it possible to add offset and length to plain and prg file loading.
+ Improve sfx memory layout dump, suggested by Steffen Görzig.
+ sfx -Di_decr_table=2 should disable i_irq_during, reported by Steffen Görzig.
+ Updated z80 decrunchers, now with License information.
+ More portable by not using negative exit codes.
+ Add new keyword systrim to the sfx command. It behaves like the sys keyword
+  but will also remove the sysline from the loaded infile.
+ Exit with an error if the parsing of the sfxdecr.s fails. This might happen
+  with user provided assembly given by the options -x -X -s -f, reported by
+  Stefan A. Haubenthal.
+ Change -mtune flag to make exomizer build on more platforms "out of the box".
+
+2015-09-21
+Release of 2.0.9 source code, Win32 and DOS binaries.
+ Fix gcc-compiler warnings.
+ sfx decr src comments echoed to stdout, reported by iAN CooG, fix by soci.
+ NULL pointer dereference crash, reported by Flavio, fix by soci.
+
+2015-09-20
+Release of 2.0.8 source code, Win32 and DOS binaries.
+ Fix bug reported and analyzed by Adrien Destugues. The ECHO token in asm.y
+  collides with the flex ECHO macro. The cause is that Bison 2.3a and newer
+  stopped to generate defines for the declared tokens. To resolve this the ECHO
+  token has been renamed to ECHO1.
+ Add -E flag to not write the encoding to the outfile.
+ Remove max nr of chunks limit from the chunkpool allocator.
+ Enforce match max_len everwhere, bug reported by Zik / Futurs.
+
+2013-04-14
+Release of 2.0.7 source code, Win32 and DOS binaries.
+ Bugfixed commodore sfx targets to automatically disable irq when decrunching
+  over system areas. This together with moving the table to zero-page,
+  -Di_table_addr=0x2, allows decrunching $0200-<end of mem> without corruption
+  for all commodore targets except for the vic20-configs without a 3kB memory
+  expansion since they have a memory hole at $0400-$1000.
+ Bugfixed z80 decrunchers from Metalbrain.
+ Bugfixed sfx c16/plus4 target where the default irq could corrupt memory
+  while decrunching data that covers $07f6-$0800, reported by Luca/FIRE.
+ Bugfixed sfx c16/plus4 target where the default decrunch effect could corrupt
+  memory while decrunching data that covers $0be7, reported by Luca/FIRE.
+ Added feature to sfx-mode that complains if the data it too big to fit in the
+  available memory of the selected target, suggested by Luca/FIRE.
+ Added c16 target, -t 4, like -t4 but with smaller memory, suggested by
+  Luca/FIRE.
+
+2013-01-27
+Release of 2.0.6 source code, Win32 and DOS binaries.
+ New improvements to the z80 decrunchers, again smaller and faster.
+
+2013-01-12
+Release of 2.0.5 source code, Win32 and DOS binaries.
+ Add -C and -M <length> flags that trades crunch result for speed. It is now
+  possible to really speed up crunching, even for "worst case"-type files.
+ Now skips the DAG traversing of the final pass if the encoding hasn't changed
+  since the previous pass.
+
+2012-08-16
+Release of 2.0.4 source code, Win32 and DOS binaries.
+ Bug in z80 decrunchers fixed by Metalbrain. Thanks goes to Hervé Monchatre
+  and Tim Riemann (Octoate) for reporting.
+ Implement sfx basic for the Apple II target.
+ Improve documentation slightly for the sfx and level commands.
+
+2012-03-25
+Release of 2.0.3 source code, Win32 and DOS binaries.
+ z80 decrunchers improved by Antonio Villena, now smaller and faster.
+
+2011-08-19
+Release of 2.0.2 source code, Win32 and DOS binaries.
+ Added 6809-decruncher contributed by Edouard Forler.
+ Fix language errors in the documentation. Thanks to Csabo/LOD.
+ Remove bogus printout about the default decrunch effect when using a custom
+  decrunch effect. Bug reported by Csabo/LOD.
+ Fix bug that prevented the correct error message from showing when trying to
+  combine a basic start and a non rom config for the sfx command.  Bug reported
+  by iAN CooG.
+
+2011-02-06
+Release of 2.0.1 source code, Win32 and DOS binaries.
+ Fix bug in log.c that caused the desfx command to loop if the log level was
+  lower than debug. Bug report by iAN CooG.
+2011-01-22
+Release of 2.0 source code, Win32 and DOS binaries.
+ Fix bug in sfx decruncher setup where data was copied unnecessary. Triggered
+  by using -Di_load_addr, reported by nbla000
+ Add i_line_number symbol to be able to change the default line number of the
+  basic line of the sfx decruncher. Feature suggested by Chicken
+ 6502 emulator core now features memory read/write callbacks
+ Add desfx -e flag to override the automatic entry point detection, feature
+  suggestion with patch by iAN CooG/HokutoForce. (Patch reworked somewhat,
+  error handling added)
+ Fix desfx handling of files that decrunch to and including 0xffff, Bug report
+  with patch by iAN CooG/HokutoForce. (Patch reworked somewhat, end addresses
+  are exclusive everywhere else)
+ Fix argument handling of desfx, Bug report + patch by iAN CooG/HokutoForce.
+
+2008-09-08
+Release of 2.0beta7 source code, Win32 and DOS binaries.
+ Refactored some functions from exo_main.c and exo_helper.c into a new file,
+  exo_util.c so the testprogram could use them too.
+ Fixed a bug in exo_main where the sys/call basic token was mixed up with
+  the basic_txt_start. This confused thes sfx sys sub-command.
+ Fixed a bug in krilldecr.s. The handling of literal sequences longer than
+  256 bytes was broken.
+ Added a testprogram to test that the decrunchers in the exodecrs folder
+  works. This feature uses the 6502 emulation core to run test programs. It
+  tests literal sequences longer than 256 bytes and run lengths longer than 256
+  bytes for all four decruncher variants, backward, forward, streaming and
+  chunk streaming.
+ Added a z80 decruncher contributed by Metalbrain.
+ Added an experimental desfx command that should be able to decrunch sfx
+  crunched files. Uses the 6502 emulation core to run the sfx-decrunchers.
+ Added a 6502 emulator core.
+ Fix rare situation when sfxdecr.s failed to assemble.
+ Improve the help text when no sub-command is given.
+ Improve the handling of ROM/RAM-banking and NMIs for the atari target in the
+  sfx decruncher.
+ Fix broken op definition ldy abs,x in the sfx assembler, bug reported by
+   Stefano Tognon.
+
+2007-05-13
+Prerelease of 2.0beta7 source code, Win32 and DOS binaries.
+ Add loading of Oric tap files to the sfx command.
+ Add support for microcontroller RAM to Oric sfx target.
+ Add flags to customize enter and exit code for the sfx sub-command.
+ The level and mem sub-commands now output more info about the generated file.
+
+2007-01-28
+Release of 2.0beta6 source code, Win32 and DOS binaries.
+ Fix c128 target bugs in the sfx command
+ Add an experimental Oric 1 target to the sfx command.
+
+2006-10-08
+Release of 2.0beta5 source code, Win32 and DOS binaries.
+ Add a decruncher able to decrunch forwards, exodepack.s, contributed by
+  Krill/Plush.
+ Add support for forward crunching to the mem and level sub commands.
+ Remove broken usage of membuf_append that breaks on big-endian targets. Bug
+  reported by MagerValp.
+ Add a dasm version of the exodecruncher source.
+
+2006-07-08
+Release of 2.0beta4 source code, Win32 and DOS binaries.
+ Change my email address (again).
+ Added option to the Apple target to disconnect the loaded DOS.
+ Fix length field error in generated Apple II file header.
+ Don't use setjmp/longjmp due to obscure bug when building for mingw with -O3
+  -fomit-frame-pointer. Bug reported by iAN CooG/HokutoForce.
+ Add multipass functionality to the assembler in order to be able to do more
+  complex things in the decruncher source.
+ fix the sfx decruncher to work for a wider range of i_load_addr values.
+  Bug reported by iAN CooG/HokutoForce.
+ Add optional offset,len for raw command file reading.
+
+2005-11-14
+Release of 2.0beta3 source code, win32 and dos binaries.
+ i_ram_exit bug for the C64 target found by iAN CooG/HokutoForce fixed.
+ Effect shorthands -n for no effect, -x1 for Accumulator based border flash,
+  -x2 for X-register based border flash and -x3 for Y-register based border
+  flash. -x<fast effect assembler fragment> -X<slow effect assembler fragment>
+ removed the i_fast_effect symbol. Now border effects are fast and blinking
+  char is slow.
+ Added flag -x<effect assembler fragment> to the sfx command.
+ Improved docs about the mem command and assembler symbols.
+ fixed spelling bug in i_effect=2 mode. Bug found by iAN CooG/HokutoForce.
+
+2005-11-02
+Release of 2.0beta2 win32 and dos binaries.
+ added results for canterbury and calgary corpae.
+ beginning a documentation section in exo20info.txt
+ changed the exoraw binary into a sub command of exomizer.
+ renamed symbols i_(ram|irq)_on_(entering|exit) to i_(ram|irq)_(enter|exit)
+ renamed symboli_config_effect to i_effect
+ added symbols i_(ram|irq)_during.
+ added new sfx target -t 168 ($a8), The Atari 400/800 XL/XE computer family.
+  The sfx file is written as a xex-file.
+ added ability to autodetect and read Atari Xex-files.
+ cmpression results should be exact as 1.1.5 now (if the -c flag is used).
+
+2005-04-10
+replaced 2.0beta1
+ tuned the cruncher for better compression, should be on pair with 1.1.5 now.
+ modified the usage of the symbol i_ram_on_exit
+ documented symbol i_table_addr
+ added symbol i_irq_on_exit
+
+2005-04-06
+replaced 2.0beta1
+ fixed +4 bank optmization in sfx decruncher
+ fixed bug in non-literal-sequence sfx decruncher that caused
+  the decrunched data to be offset in memory.
+
+2005-04-03
+replaced 2.0beta1
+ fixed -Di_config_effect
+
+2005-04-02
+initial release of 2.0beta1 no source.
--- a/loader/tools/exomizer-3.1/exo20info.txt
+++ b/loader/tools/exomizer-3.1/exo20info.txt
@ -0,0 +1,674 @@
+Exomizer 2.0
+
+FEATURES
+
+o  sfx mode:
+   1) Several different decrunch effects.
+   2) Complete user control over memory and IRQ configuration.
+   3) Can start BASIC programs properly with built in trampoline by using the
+      command 'sfx basic' for the Commodore, Apple II and Oric targets.
+   3) The decruncher code is assembled on the fly for maximum flexibility by an
+      embedded assembler.
+   4) Will adapt the decruncher to include only features used by the compressed
+      stream to keep the size down.
+   5) Supports the c64, c128 and the vic20 (in several configurations).
+   6) The Atari 400/800 XL/XE family is supported as sfx target 168.
+   7) Can generate relocated sfx targets without BASIC line.
+   8) The Apple ][+ (and //e) is supported as sfx target 162.
+   9) The Oric 8-bit family is supported as sfx target 1.
+  10) The Commodore PET 4032 is supported as sfx target 4032.
+
+o  Reads Atari xex-files and auto detects the RUNAD vector if the sys keyword
+   is given to the sfx command.
+
+o  Can load plain files using the <filename>@<address> syntax.
+
+o  Has a literal sequence escape mechanism that detects and handles sequences
+   of uncrunchable data properly.
+
+o  The level mode of exomizer crunches each in-file separately and appends
+   them to the out-file.
+
+o  raw mode:
+   1) Crunches plain files instead of .prg files.
+   2) Crunches forward or backwards instead of just backwards.
+   3) Also decrunches.
+   4) Handles files larger than 64k. The Canterbury and Calgary corpora
+      is now possible to crunch.
+   5) Comes with two different C-source decrunchers, one thread safe forward
+      pull decruncher and one low-level backwards push decruncher, perfect to
+      base an assembler port on.
+
+o  desfx mode that features decrunching of sfx crunched files.
+
+o  Contributed decruncher source (only 2.0 compatible so far)
+   1) z80 decrunchers by Metalbrain, Antonio Villena. (rawdecrs/z80/*)
+   2) 6809 decruncher by Edouard Forler.  (rawdecrs/6809/*)
+
+-------------------------------------------------------------------------------
+-- A beginning of something like documentation --------------------------------
+-------------------------------------------------------------------------------
+A note about addresses:
+An address in this context is a positive 16-bit value [0 - 65536]. It can be
+given in decimal, hexadecimal (prefixed with $, & or 0x) or octal (prefixed with
+0) notation. Examples: 3, 54, $3456, 0x1111, 06543.
+
+Please be aware that in some command line shells $ and & have special meaning
+if used unescaped. If you use $ or & as hexadecimal prefix and see strange
+behaviour from exomizer you might be bitten by this. Try 0x instead and see if
+things improve.
+
+All addresses that end an interval of any kind, printed by exomizer or given by
+the user as an argument, are exclusive. In other words they point to the
+address just following the last byte of the memory area the interval covers.
+
+Exomizer has four sub commands: mem, level, sfx and raw. They all compress
+data. They share the following option flags:
+----------------
+-c      This flag enables 1.x compatibility mode. It disables the use of the
+        literal sequence escape mechanism. It will make the output files
+        compatible with Exomizer v1.x.
+
+-C      This flag increases crunching speed and as a side effect also reduces
+        the crunch result. This flag is for the impatient developer.
+
+-e <encoding parameters>|@<encoding file name>
+        Uses the given encoding parameters for crunching instead of trying to
+        calculate an optimal set. The encoding must be in the following format:
+        xxxxxxxxxxxxxxxx,xxxx,xxxxxxxxxxxxxxxx,xxxxxxxxxxxxxxxx
+        where each x must be a hexadecimal character, 0-9 or A-F.
+        The Exomizer prints the encoding parameters used when writing the
+        output file in this format in order to allow for easy copy and paste.
+        Please note that this option sets the initial encoding parameters and
+        not the encoding parameters used in the output file. To set the
+        encoding parameters of the output file this option must be used in
+        combination with the -p 1 option.
+        If the encoding starts with a '@' then the rest will be used as a file
+        name to read the encoding from. It can be used to read encoding files
+        written by the -E flag described below. Please note that the -P and -T
+        flags must be the same for this to work since the format is different
+        for different flag combinations.
+
+-E      This flag enables an experimental feature for encoding reuse between
+        multiple files. Please note that there is no direct support for
+        decrunching these files yet apart from the raw -d command and in the
+        6502 decruncher exodecrs/exodecrunch.s (when enabled). It changes the
+	behavior of exomizer in the following ways:
+        1) It won't include the encoding in the compressed files.
+        2) The compressed files will be named as the outfile (-o) with an
+           appended number. ".00" for the first file, ".01" for the second and
+           so on.
+        3) The outfile (-o) will be used to write a file containing only the
+           encoding.
+        4) If more than one in-file is given they will all be compressed using
+           The same encoding which will be optimal for those files in
+           combination.
+        5) When used with -E, the mem subcommand limits the -l setting to only
+           accept the value "none". It will also change the default from "auto"
+           to "none".
+
+-m <offset>
+        Limits the maximum sequence offset used by the cruncher to <offset>.
+        The default limit is an offset of 65535 bytes. A smaller offset
+        increases the crunching speed but reduces the crunch result. If a
+        circular buffer decruncher is used it is important that the maximum
+        offset doesn't exceed the buffer size.
+
+-M <length>
+        Limits the maximum sequence length used by the cruncher to <length>.
+        The default limit is an length of 65535 bytes. A smaller length
+        increases the crunching speed but reduces the crunch result.
+
+-o <out-file>
+        Sets out-file name.
+
+-p <passes>
+        Limits the maximum number of optimization passes the cruncher uses when
+        calculating the encoding parameters. default is 65535. However the
+        cruncher will always stop when no further improvement to the crunch
+        result is made.
+
+-T <value>
+        Bit-field that deactivates bit stream traits. Valid values are 0 - 7.
+        0 bit: Sequences with length 1, 0=use (default), 1=deactivate
+        1 bit: Sequences with lengths where the low byte is 1, 2 or 3 and the
+               high byte is > 0, 0=use (default), 1=deactivate
+        2 bit: Literal sequences, same as -c, 0=use (default), 1=deactivate
+
+-P <value>
+        Bit-field that disables bit stream protocol modifications. A value of
+        0 makes the bit stream compatible with exomizer 2.0. You can prefix the
+        value with a + or - to set/clear the bits the value represents. Valid
+        values are 0 - 63.
+        0 bit: Big endian bit orientation, 1=enable (default), 0=disable
+        1 bit: Copy byte when reading > 7 bits, 1=enable (default), 0=disable
+        2 bit: Implicit first literal byte, 1=enable (default), 0=disable
+        3 bit: Align bit stream to stream start, 0=disable (default), 1=enable
+        4 bit: Four offset decoding tables, 0=disable (default), 1=enable
+
+-q      Enables quiet mode which will disable display output.
+
+-V      Enables brief mode which will disable most display output.
+
+-v      Displays Exomizer version, email address for support and the usage
+        license.
+
+--      Will make all arguments that follow be treated as non-options, even if
+        they begins with a - character.
+
+-?      Displays a help screen that briefly describes the command line syntax
+        and the valid options. Also for the sub command if one is given.
+
+-------------------------------------------------------------------------------
+-- Information relevant for the mem, sfx and level sub commands ---------------
+-------------------------------------------------------------------------------
+
+The mem, level and sfx command all crunches files backwards. The files are
+loaded into a 16-bit address space and are also limited in size to that 16-bit
+address space. The format of the input files can be prg files, Atari xex
+files, Oric tap files, AppleSingle files, BBC Micro Standard Archive Format
+file tuples or located plain files (plain files that are given an address to
+load to.) Please note that all input file formats is valid for all sfx targets
+which can be a bit unexpected.
+
+ Plain raw files are loaded by adding an address to it using the @-character.
+It is also possible to specify an optional offset and optional length if only a
+part of the file is to be loaded. Like addresses, the offsets and the lengths
+can be given in either decimal or hexadecimal notation using either 0x or $
+prefixes. The offset can also be negative and will then be applied from the end
+of the file instead of from the start.
+
+<file name>@<load address>[,[<offset>][,[<length>]]]
+
+* To load a raw file to 0x3456, append @0x3456 to the file name.
+* To load a raw file to 0x0400 and skip the 16 first bytes and read the
+  following 37 bytes, append @0x400,16,37 to the file name.
+* To load the first 256 bytes of a raw file to 0x0900, append @$900,,256 or
+  @$900,0,256 to the file name.
+
+ To load a prg file to the address contained in its prg-header you simply add
+the file name to the command line. You can append an optional alternative load
+address to the file name to override the load address in the header. And like
+for the raw files it is also possible to specify an optional offset to start
+from in the file and a length.
+
+<file name>[,[<new load address>][,[<offset>][,[<length>]]]]
+
+Examples:
+* To load a prg file to 0x3456, append ,0x3456 to the file name.
+* To load a prg file to its default address and skip the 16 first bytes, append
+  ,,16 to the file name.
+* To load the last 254 bytes of a file to 0x3450, append ,0x3450,-0xfe to the
+  file name.
+
+ Relocation does not work for xex or tap files. A relocated xex or tap file
+will simply be treated as if it was a relocated prg file.
+
+ When given multiple input files the sfx and mem commands loads all files to
+where they are located and then crunch the covered memory area. Any unused
+space between files will be zero-filled. Data segments in xex-files are loaded
+sequentially in the same way. INITAD segments in xex-files are ignored.
+
+ The level command, on the other hand, loads each input file separately,
+crunches it and then appends the crunched data to the output file.
+
+ The auto detection of xex or Oric tap files is not perfect. Prg files that
+load at $ffff or $1616 will be wrongly detected as xex or Oric tap. To disable
+the auto detection relocate the prg files to the same address they normally
+load to.
+
+-------------------------------------------------------------------------------
+-- The mem sub command --------------------------------------------------------
+-------------------------------------------------------------------------------
+ The mem command generated files that are used for decrunching from memory.
+Normally these files are linked into the program images either by machine code
+monitors or by assembler directives like incbin.
+
+ The mem command writes its output in prg format. It has the following option:
+
+-l <address>|none
+        Sets the load address of the output prg file to <address>. If the
+        string none is given the output will be a plain file. Defaults to the
+        address that gives the crunched data as much overlap with the
+        decrunched data as possible but still allow for in-place decrunching.
+        However, the file doesn't need to be decrunched from that address. Any
+        address that doesn't cause data being decrunched to overwrite data yet
+        to be decrunched will work.
+-f      Crunch the file forward instead of backwards that is the default. This
+        means that in this mode the read and write pointers move forward in
+        memory while decrunching. The exodecrunch.s file supports optional
+        forward decrunching but defaults to backwards. Read the comments in the
+        file for more details on the available options.
+
+-------------------------------------------------------------------------------
+-- The level sub command ------------------------------------------------------
+-------------------------------------------------------------------------------
+
+ The level command writes its output file so that the crunched bytes are
+returned in correct order for decrunching in the fly while streaming in the
+bytes from secondary storage.
+
+-f      Crunch the file forward instead of backwards that is the default. This
+        means that in this mode the write pointer move forward in memory while
+        decrunching. The exodecrunch.s does not support forward decrunching.
+        For this feature to work You'll have to use Krill's contributed
+        decruncher.
+
+-------------------------------------------------------------------------------
+-- The sfx sub command --------------------------------------------------------
+-------------------------------------------------------------------------------
+
+ The sfx command generates a runnable stand alone in memory decrunching file.
+Its first argument must always be the run address. It may be given as an
+address or of the following string sys, systrim, bin and basic.
+
+ If the run address is an actual address then it will be used as the target
+address of a jmp instruction executed after the decrunching.
+
+ If the run address is the string sys then the run address will be
+auto detected by scanning the area of the basic start for a SYS (or CALL) BASIC
+command.
+
+ The keyword systrim behaves just like keyword sys but it also excludes the
+memory area that contains the SYS BASIC line from being included in the
+crunched file. This is very handy for the unexpanded VIC-20 target which
+normally can't decrunch sfx-ed programs residing at the BASIC start since it
+has a memory hole that will overlap with the crunched data. By using systrim,
+the start of decrunched data will be moved a few bytes and by that also move
+the crunched data out of the memory hole.
+
+ If the run address is the string bin then the run address will be detected
+from the in-file. If no in-file is given that contains any run address then it
+will be set to the start address of the combined memory area of the loaded in-
+files. An implicit -Di_load_addr=<A> will be used as well so the load and run
+addresses of the generated file will be A too. The generated file will not
+contain any basic stub.
+
+ If the run address is the string basic then the computer will run a BASIC
+program after decrunch. This is not yet implemented for the Atari target.
+The Oric target support BASIC start for the Oric 1 and Atmos computers but not
+for the Telestrat.
+
+ The run BASIC mode takes up to three optional addresses. They are in order:
+start of BASIC program, end of BASIC program  and highest address used by
+BASIC.
+
+$ exomizer sfx basic[,<start>[,<end>[,<high>]]]
+
+ The start of BASIC, <start>, is the address where the BASIC program starts in
+memory. For many but not all targets it defaults to $0801.
+
+ The end of BASIC, <end>, is the address where the BASIC variables starts in
+memory. It defaults to the address where the BASIC program ends.
+
+ The highest address used by BASIC, <high>, is the address where the BASIC
+variables end. Its default value depends on the target.
+
+ The BASIC interpreter will write all variables, strings and arrays between the
+<end> address and the <high> address.
+
+ Normally these addresses are auto detected and/or defaulted. However, if non
+standard settings are needed they can be changed. Examples of such situations
+could be when a BASIC program needs to be run at a non standard address and/or
+some graphic needs to be protected from BASIC variables.
+
+ The output of the sfx command is a runnable program file that decrunches
+itself in memory when run. If an explicit run address or the string bin is
+given then some targets like Atari 8-bit, Oric 1, BBC Micro B and Apple II will
+be auto detected based on the format of the in files. For other situations or
+if auto detection fails then the target defaults to C64 unless the -t <target>
+option is given. This option can also be used to override the target auto
+detection if necessary.
+
+The output file format depends on the target used:
+
+ For all Commodore machines the output is a runnable prg-file that starts
+with a BASIC line.
+
+ For the Atari target the output is an auto running xex-file.
+
+ For the Apple target the output is an AppleSingle file that almost always
+is a PRODOS bin file (file-type 6). But if the loaded file was a AppleSingle
+system file then it will be a system file (file-type $ff). However if the run
+address is the string basic, then it will be a PRODOS Applesoft basic file.
+(file-type $fc)
+
+ For the BBC target the output is a BBC Micro Standard Archive Format file
+tuple. (http://archive.retro-kit.co.uk/bbc.nvg.org/std-format.php3.html)
+
+For the Oric family the output is a Oric tap file.
+
+ Please note that the sfx command doesn't support extended memory since it
+can't decrunch into banked memory.
+
+ The sfx command has the following options:
+
+-t<target>
+        Sets the decruncher target. The possible targets are:
+           1: Oric 1, Oric Atmos and Telestrat
+          20: Commodore Vic20, unexpanded memory
+          23: Commodore Vic20, 3kB memory expansion
+          52: Commodore Vic20, 8 - 32kB memory expansion
+          55: Commodore Vic20  8 - 32kB + 3kB memory expansion
+          16: Commodore c16
+           4: Commodore plus4
+          64: Commodore c64 (The default target)
+         128: Commodore c128
+        4032: Commodore PET 4032
+        0xa2: Apple ][+ and //e
+        0xa8: Atari 400/800 XL/XE
+      0xbbcb: BBC Micro B
+
+-X<custom slow effect assembler fragment>
+-x[1-3]|<custom fast effect assembler fragment>
+        Sets the decrunch effect. If an assembler fragment is given, it must
+        not change the X-register, Y-register or modify the carry flag.
+        Separate instructions with a space. Please note that including spaces
+        in the fragment requires quoting to work. An example of a slow C64
+        x-scroll-register effect: -X "txa and #07 ora #$c8 sta $d016"
+         Use a value of 1 - 3 to pick a preset effect:
+        1:      Accumulator based fast border flash
+        2:      X-register based fast border flash
+        3:      Y-register based fast border flash
+        Default effect is a slow character flash. The 1-3 presets is only
+        available for targets that supports changing border color.
+
+-n      No decrunch effect. This option can't be combined with -X or -x
+
+-D<symbol>=<value>
+        Defines symbols for the sfx assembler. More advanced features of the
+        decruncher is controlled by defining symbols. More on this in the
+        decruncher symbol reference section below.
+
+-s<custom decruncher enter assembler fragment>
+        Specifies an optional assembler fragment to be executed when the
+        decruncher starts decrunching. It must not change the Y-register. This
+        assembler fragment is intended for things like clearing the screen,
+        changing background and/or border color, changing a c128 to 2MHz mode
+        or enable 'Skip internal cycles' and burst mode on a DTV2. The fragment
+        syntax is the same as for the -X and -x options.
+-f<custom decruncher exit assembler fragment>
+        Specifies an optional assembler fragment to be executed when the
+        decruncher has finished decrunching. This assembler fragment is
+        intended for things like restoring a c128 to 1MHz or disabling 'Skip
+        internal cycles' and burst mode on a DTV2. The fragment syntax is the
+        same as for the -X and -x options.
+
+-------------------------------------------------------------------------------
+-- The desfx sub command ------------------------------------------------------
+-------------------------------------------------------------------------------
+
+ The desfx command decrunches a file generated by the sfx command and writes it
+as a prg-file. It auto detects the entry point from the in-file and runs the
+embedded decruncher in order to get to the decrunched data. It auto detects the
+decrunched area to be compatible with more versions of exomized sfx files but
+might fail if the decrunched area is unusual.
+
+ The desfx command has the following options:
+
+-e <address>|load
+        Overrides the entry point of the decruncher with <address> or if the
+        string load is given, the load address of the input file will be used.
+
+-------------------------------------------------------------------------------
+-- The raw sub command --------------------------------------------------------
+-------------------------------------------------------------------------------
+
+ Now let's go back to the command that handles plain files, raw.
+The raw command can both crunch and decrunch both forward and backward. It also
+handles generic files without any of the above mentioned restrictions. There's
+no hard coded limit in file size but it gets increasingly slow with larger
+files so there's a practical limit of about 1MB. The output file format of this
+command is a plain file.
+
+It is possible to give an optional offset and length for the in-file like this:
+<in-file>[,offset[,length]]. Negative offsets and lengths are allowed. They
+will wrap around to the end of the file.
+
+To skip the first two bytes but read all the rest of a file do like this:
+$ exomizer raw <in-file>,2 -o <out-file>
+
+To skip the first two bytes and the last two bytes of a file do like this:
+$ exomizer raw <in-file>,2,-2 -o <out-file>
+
+To read 10 bytes at offset 13 from the end of the file do like this:
+$ exomizer raw <in-file>,-13,10 -o <out-file>
+
+-------------------------------------------------------------------------------
+-- The included decruncher source code with examples --------------------------
+-------------------------------------------------------------------------------
+
+There are two directories with source code in the distribution:
+
+exodecrs/
+         This contains the source code of four 6502 assembler decrunchers. One
+        backwards decruncher that decrunches data crunched by the mem sub
+        command. It also contains a forward decruncher contributed by
+        Krill/Plush for data crunched by the mem sub command and the -f option.
+        Call these two decrunchers once to decrunch the data to its targeted
+        location.
+         Also contained in this directory are two streaming forward decrunchers
+        for data crunched with the raw sub command. Both use a circular buffer
+        and has a get_byte like interface that are called repeatedly to get one
+        byte or a block of decrunched data until the end of data is signaled
+        by the call returning with the carry flag set. However they are not
+        capable of handling the literal sequence escape mechanism so data must
+        be cruncher with the -c flag.
+         Also included are a Makefile and some example code and data that use
+        the four decrunchers. The assembler syntax is ca65, the assembler of
+        the cc65 tool chain. This tool chain is needed to build the examples.
+
+rawdecrs/
+         This directory contains two decrunches written in C. One backwards
+        decruncher with a call once and all data are decrunched interface.
+        And one forward streaming decruncher with a get_decrunched_byte
+        interface. Also included are example code using the decrunchers and
+        a Makefile that can be used for building them. An ANSI-C compiler are
+        necessary for this.
+
+All of the included makefiles have only been tested with Gnu make.
+
+-------------------------------------------------------------------------------
+-- Credits for Exomizer 2.0 ---------------------------------------------------
+-------------------------------------------------------------------------------
+
+Feature suggestions
+-------------------
+Csabo / LOD
+Ninja / The Dreams
+Richard / TND
+Jack Alien / Remember
+Krill / Plush
+
+Contributed code
+----------------
+Krill / Plush
+Metalbrain
+
+Bug reports
+-----------
+iAN CooG / HokutoForce
+
+Regression testing
+------------------
+Ninja / The Dreams
+Csabo / LOD
+iAN CooG / HokutoForce
+Lasse Öörni
+
+Inspiration
+-----------
+PuCrunch
+The Oneway crunchers, packers and linkers, Byte Boiler, AB, cruel etc.
+The Final Super-Compressor by Meidinger & Borris
+Timecruncher 3.1 by Matcham of Network
+
+-------------------------------------------------------------------------------
+-- Credits for Exomizer 1.x ---------------------------------------------------
+-------------------------------------------------------------------------------
+Inspiration
+-----------
+ The Oneway crunchers: Byteboiler, The Cruncher AB and CruelCrunch.
+ Pasi Ojala's PuCrunch
+
+Beta testing
+------------
+ Lasse Öörni
+ Csabo / LOD
+ Overkiller / Hokuto Force / PoL
+ iAN CooG / Hokuto Force
+ Rafal Szyja
+ bubis / Resource
+ Ninja / The Dreams
+
+Bug Reporting
+-------------
+ Csabo / LOD
+ Soci / Singular
+ 6R6 / Nostalgia
+ Ninja / The Dreams
+
+Feedback/Suggestions
+--------------------
+ Count Zer0
+ bubis / Resource
+ Ninja / The Dreams
+
+-------------------------------------------------------------------------------
+-- Contact information --------------------------------------------------------
+-------------------------------------------------------------------------------
+
+ Please don't hesitate to contact me if you have any feature/improvement
+suggestions, bugs to report or perhaps just have something to say about the
+exomizer program.
+
+My name is Magnus Lind and my email address is magli143@gmail.com
+
+-------------------------------------------------------------------------------
+-- Reference for the sfx decruncher symbols -----------------------------------
+-------------------------------------------------------------------------------
+
+The sfx decruncher is tweaked by defining optional symbols to the assembler on
+the command line using the -D<symbol>=<value> flag:
+
+The following symbol controls if headers are written to the sfx file:
+ i_raw
+
+ If it is set to anything but 0 then no headers will be written to the sfx file
+regardless of what the documentation of other optional symbols say.
+
+The following symbol controls the sfx load address:
+ i_load_addr
+
+ If it is unset then the sfx file will load to the default BASIC start address
+of the target machine. For Atari the default load address is $2000. For the
+Oric the default load address is $0500.
+
+ If it is set then the sfx file will load to the given address and it will not
+have any BASIC line. (The Atari target never has a BASIC line). The run address
+is the same as the load address.
+
+The following symbol controls the sfx BASIC line number:
+ i_line_number
+
+If unset it will default to 20. This symbol can't be combined with i_load_addr.
+
+The following symbols control the memory configuration:
+ i_ram_enter      (defaults to standard ROM configuration)
+ i_ram_during
+ i_ram_exit       (defaults to standard ROM configuration)
+
+The i_ram_enter symbol differs from the other two. It isn't used to set the
+memory configuration. It tells the exomizer of which memory configuration to
+expect when the decruncher is run. This information is used to minimize the
+decruncher size.
+
+For the c64 and the c128 target the value of the above symbols will be stored
+into $ff00 (c128) or $01 (c64) to set the mem configuration. For the c16/+4
+target the possible values are 0=ROM and 1=RAM. For the Atari the value will be
+stored in $d301. The Oric target supports 0=ROM and 1=RAM where 1 will enable
+decrunching into the top 16kB RAM supplied by the microdisc controller.
+
+Default is $37 for the c64, $00 for the c128, 0 for the c16/+4 and Oric and $ff
+for the Atari.
+
+The following symbols control IRQs. (0=disabled,SEI, 1=enabled,CLI)
+ i_irq_enter      (defaults to 1)
+ i_irq_during
+ i_irq_exit
+
+The following symbols control NMIs, this is only implemented for the Atari
+target where the value will be stored in $d40e
+ i_nmi_enter      (defaults to $40)
+ i_nmi_during
+ i_nmi_exit
+
+To locate the decrunch table elsewhere than the default tape buffer or $0600
+for The Atari target, use the symbol i_table_addr:
+ -Di_table_addr=<address>
+
+The Apple ][+ target has an option to disconnect DOS. To do this, set the
+i_a2_disable_dos symbol to any value:
+ -Di_a2_disable_dos=1
+
+The Apple ][+ target writes files in the AppleSingle format. This option
+overrides the PRODOS filetype of the target file to the set value. To force a
+file of system type set the i_a2_file_type symbol to the file type value.
+See http://www.easy68k.com/paulrsm/6502/PDOS8TRM.HTM#B.4.2.4 for a list of
+valid file types:
+ -Di_a2_file_type=<file-type>
+
+Decruncher effects are not selected by the use of symbols. They are chosen by
+the -x or -X flags or deactivated by -n.
+
+-------------------------------------------------------------------------------
+-- Comparison of outfile structure of the level, mem and raw sub commands --
+-------------------------------------------------------------------------------
+ The crunched files generated by the mem and level sub command are structurally
+identical to the files generated by the raw sub command except that they also
+contains the decrunch address appended or prepended. For mem there is also the
+prg header <load le>. The following table shows how the output of the mem and
+level sub commands are related to the output of the raw sub command.
+
+ [exomizer mem -lnone]    == [exomizer raw -b] + <--decr le>
+ [exomizer mem -f -lnone] == <decr be++> + [exomizer raw]
+ [exomizer mem]           == <load le> + [exomizer raw -b] + <--decr le>
+ [exomizer mem -f]        == <load le> + <decr addr be++> + [exomizer raw]
+ [exomizer level]         == <--decr be> + [exomizer raw -b -r]
+ [exomizer level -f]      == <decr be++> + [exomizer raw]
+
+ <decr be|le> is the decrunch pointer, a 16 bit address written in either big
+endian or little endian.
+
+ For lines that contans the raw sub command with the -b flag the decruncher
+writes backwards in memory and will predecrement (--) the decrunch address. The
+value of the address in the file will be the address that immediately follows
+the last byte of the uncrunched data.
+
+ For lines that contains the raw sub command without the -b flag the decruncher
+writes forward in memory and will postincrement (++) the decrunch address. The
+value of the address in the file will be the address of the first byte of the
+uncrunched data.
+
+This also means that it is possible to decrunch level and mem files using the
+raw sub command if you use the offset and len options to skip the extra
+addresses. Please note that the resulting files will be raw and not prg.
+
+ mem -lnone    => raw -d -b a.out,0,-2
+ mem -f -lnone => raw -d a.out,2
+ mem           => raw -d -b a.out,2,-2
+ mem -f        => raw -d a.out,4
+ level         => raw -d -b -r a.out,2
+ level -f      => raw -d a.out,2
+
+-------------------------------------------------------------------------------
+The command line syntax differs from v1.x. Here's a quick translation guide:
+old                                     new
+------------------                      -----------------------------
+exomizer -ssys <in-file>                exomizer sfx sys <in-file>
+exomizer -s$1234 <in-file>              exomizer sfx $1234 <in-file>
+exomizer -r <in-file>                   exomizer level <in-file>
+exomizer <in-file>                      exomizer mem -lnone <in-file>
+exomizer -l$1234 <in-file>              exomizer mem -l$1234 <in-file>
+exomizer -lauto  <in-file>              exomizer mem <in-file>
+-------------------------------------------------------------------------------
--- a/loader/tools/exomizer-3.1/exo31info.txt
+++ b/loader/tools/exomizer-3.1/exo31info.txt
@ -0,0 +1,210 @@
+
+This document only mentions what has changed since exomizer 2.0. For
+more general exomizer info please read the exo20info.txt file which also
+might contain updated information about previously existing features.
+
+Exomizer 3.1
+------------
+
+New features since 3.0
+----------------------
+o  Improved compression on average due to offset reuse. The bit stream format
+   for crunched files has been extended with a new bit to control this:
+
+   bit 5  Decides if we may reuse the latest sequence offset after a single
+          literal byte or a single literal sequence: 1=enable, 0=disable
+
+   This featue is enabled by default so the raw, mem and level modes now
+   default to -P39.
+
+o  The flags -e and -E have been extended to support split encoding. This
+   means that the encoding used by a crunched file is provided by a file of
+   its own and no longer included in the crunched file itself.
+
+   This can save space if several crunched files contain similar data and can
+   share the encoding. To facilitate this The -E flag now enables the mem,
+   level and raw sub command to generate a header that is optimized for all
+   the given input files. See the -E documentation in exo20info.txt for more
+   details. Support for this has been added to exodecr/exodecrunch.s but is
+   disabled by default.
+
+o  The exodecr/exodecrunch.s 6502 decruncher now supports forward decrunching.
+   There are also versions for dasm and acme too. They implement the same
+   feature set as the ca65 version.
+
+o  Changes to contributed decrunchers:
+   Added ARM 32bit thumb2 source code by ALeX Kazik
+   Added Intel 8080 source code by Ivan Gorodetsky
+   Updated Zilog Z80 source code by Antonio Villena
+
+o  The -P flag has been extended to make it possible to only affect certain
+   bits by prefixing a value with a - or a + character. This will modify the
+   current value using AND or OR respectively instead of replacing it. E.g.
+   -P+16 will set bit 4 but leave the other bits as they were. Likewise, -P-16
+   clears bit 4 but leave the others. The syntax also allows for chaining
+   multiple operations. E.g. -P-32+16 is allowed. An initial absolute value is
+   also accepted like this -P43-32+16 .
+
+o  Miscellaneous bug fixes, See changelog.txt for details.
+
+New features since 2.0
+---------------------
+
+o  New bit stream format for crunched files that allow for faster decrunching.
+
+o  The bit stream format is controlled by a new -P<bit-field> flag. The bits
+   control different aspects of the bit stream format. -P0 disables all new
+   aspects and will generate exomizer 2.0 compatible files. The raw, mem and
+   level modes default to -P7. (-P39 and new bit 5 since 3.1)
+
+   bit 0  Controls bit stream bit orientation, 1=big endian, 0=little endian
+   bit 1  Controls how more than 7 bits are shifted 1=split into a shift of
+          less than 8 bits + a byte, 0=all bits are shifted
+   bit 2  Implicit first literal byte: 1=enable, 0=disable
+   bit 3  Align bit stream towards start without shift flag bit included:
+          1=enable, 0=disable
+   bit 4  Decides if we are to have two lengths (1 and 2) or three lengths
+          (1, 2 and 3) using offset tables of their own: 0=two, 1=three
+
+   Please note that many decrunchers has not yet been ported to the new bit
+   stream format and requires a -P0 on the command-line. However,
+   exodecrs/exodecrunch.s, rawdecrs/exodecr.c and rawdecrs/exodecrunch.c will
+   work out of the box. Only the built in decruncher src/exodec.c handles all
+   possible bit combinations. Please check it out for implementation details.
+
+o  There is a new -T<bit-field> flag that controls traits of the bit stream
+   that don't affect the bit stream format. Typically traits are disallowed to
+   make the decruncher faster and/or shorter. The raw mode defaults to -T0.
+   The mem and level modes default to -T4.
+
+   bit 0  Disallow literal sequences, 0 allow, 1 disallow
+   bit 1  Disallow sequences with length 1, 0 allow, 1 disallow
+   bit 2  Disallow sequences with lengths (256*m + n) where m > 0 and n is in
+          the interval [0 - p] and p is 2 or 3 depending on bit 4 of the
+          -P<bit-field>, 0 allow, 1 disallow
+
+o  The sfx decruncher and exodecrs/exodecrunch.s take advantage of the new bit
+   stream format for decrunching speed improvements. They also become faster
+   if the sequence length is limited to 256 by using -M256 when crunching.
+   (exodecrs/exodecrunch.s has to be told by a define, see the comments in the
+   file for more info. It also needs the bit 2 trait (-T4) which is used by
+   default by the level and mem sub commands but not by raw)
+
+o  The sfx decruncher now recognizes a new symbol, i_perf, that controls the
+   decruncher size/speed performance trade-off. The valid values are -1 to 2
+   where -1 is slowest/shortest and 2 is fastest/largest. Default is 0. They
+   all are faster than exomizer 2.11 by some margin. None of the decrunchers
+   will destroy zero-page.
+
+o  The sfx decruncher supports the -P+16 flag to enable a dedicated offset
+   table for sequences of length 3. Using this flag might increase compression
+   a bit for some files but will also make the decrunch table larger, 204
+   bytes instead of the default 156 bytes.
+
+o  Very slightly improved compression.
+
+
+Contributing
+------------
+
+   You are, as before, very welcome to contribute decruncher source code for
+   unsupported targets and/or improving existing decrunchers.
+
+   However, The unclear licensing of some of the contributions so far has not
+   been ideal. So please decide on what license to use before contributing. If
+   you are unsure about the licensing then I recommend using the same license
+   that I use for my own decruncher sources. It is a plain zlib license with an
+   additional BSD like no-endorsement clause. See exodecrs/exodecrunch.s for
+   how it can be included in the source file as a comment block.
+
+Sources of inspiration
+----------------------
+
+   The new bit stream format features/traits and the 6502 decruncher speed
+   improvements have been inspired from several sources that I would like to
+   mention (in no particular order):
+
+   * The exoopt tool developed by Metalbrain and Antonio Villena
+     (z80 decruncher contributors, the new -P and -T flags should be able to
+      replicate all bit protocol variants that exoopt generates).
+   * The Doynamite 6502 decruncher by Doynax and the subsizer cruncher by TLR,
+     for raising the bar.
+   * Feedback from users
+
+Benchmarks for the improved 6502 decrunchers
+--------------------------------------------
+
+The measured time is the number of cycles the decruncher needs to run.
+Interrupt processing and other kinds of overhead that exists in a typical
+computer environment is not included. (M=*1000000 and k=*1000)
+
+* The benchmark for the sfx decruncher use the file zorrounpacked.prg file
+(https://web.archive.org/web/20050102175732/http://www.swolff.dk/cruncher/zorrounpacked.prg) also used in the README.txt of subsizer for comparisons:
+
+Sorted on size:
+| File name                      | Size  | Reduced | Cycles  | C/B    | B/kC  |
+|--------------------------------|-------|---------|---------|--------|-------|
+| zorro_exo31_-1-P+16.prg        | 30231 |  44.12% | 6073989 | 112.27 |  8.91 |
+| zorro_exo31_0-P+16.prg         | 30246 |  44.10% | 5933882 | 109.68 |  9.12 |
+| zorro_exo31_1-P+16.prg         | 30254 |  44.08% | 5670418 | 104.81 |  9.54 |
+| zorro_exo31_0-M256-P+16.prg    | 30267 |  44.06% | 5659124 | 104.60 |  9.56 |
+| zorro_exo31_1-M256-P+16.prg    | 30279 |  44.03% | 5450866 | 100.75 |  9.93 |
+| zorro_exo31_2-M256-P+16.prg    | 30305 |  43.99% | 5271433 |  97.43 | 10.26 |
+| zorro_exo31_2-M256.prg         | 30365 |  43.88% | 5268500 |  97.38 | 10.27 |
+| zorro_exo31_2-M256-P-32+16.prg | 30697 |  43.26% | 5123329 |  94.70 | 10.56 |
+| zorro_exo31_2-M256-P-32.prg    | 30758 |  43.15% | 5120580 |  94.65 | 10.57 |
+
+Sorted on cycles:
+| File name                      | Size  | Reduced | Cycles  | C/B    | B/kC  |
+|--------------------------------|-------|---------|---------|--------|-------|
+| zorro_exo31_2-M256-P-32.prg    | 30758 |  43.15% | 5120580 |  94.65 | 10.57 |
+| zorro_exo31_2-M256-P-32+16.prg | 30697 |  43.26% | 5123329 |  94.70 | 10.56 |
+| zorro_exo31_2-M256.prg         | 30365 |  43.88% | 5268500 |  97.38 | 10.27 |
+| zorro_exo31_2-M256-P+16.prg    | 30305 |  43.99% | 5271433 |  97.43 | 10.26 |
+| zorro_exo31_1-M256-P+16.prg    | 30279 |  44.03% | 5450866 | 100.75 |  9.93 |
+| zorro_exo31_0-M256-P+16.prg    | 30267 |  44.06% | 5659124 | 104.60 |  9.56 |
+| zorro_exo31_1-P+16.prg         | 30254 |  44.08% | 5670418 | 104.81 |  9.54 |
+| zorro_exo31_0-P+16.prg         | 30246 |  44.10% | 5933882 | 109.68 |  9.12 |
+| zorro_exo31_-1-P+16.prg        | 30231 |  44.12% | 6073989 | 112.27 |  8.91 |
+
+The information in the two tables above has been generatedby the exomizer
+desfx -S command.
+
+* The benchmark for the memory decruncher exodecrunch.s uses the "Pearls for
+Pigs" files (http://codebase64.org/doku.php?id=base:compression_benchmarks):
+
+(get_bits not in-lined, unlimited sequence lengths)
+| File name | Size  | Reduced | Cycles   | C/B    | B/kC  |
+|-----------|-------|---------|----------|--------|-------|
+| pfp_1.cru |  2934 |  73.35% |   744298 |  67.61 | 14.79 |
+| pfp_2.cru |  2157 |  56.63% |   468603 |  94.23 | 10.61 |
+| pfp_3.cru |  1727 |  56.27% |   349649 |  88.54 | 11.29 |
+| pfp_4.cru |  3427 |  51.15% |   662205 |  94.38 | 10.59 |
+| pfp_5.cru | 19186 |  44.80% |  3517541 | 101.20 |  9.88 |
+| pfp_6.cru |  8076 |  74.45% |  1974737 |  62.48 | 16.00 |
+| pfp_7.cru |  8644 |  57.61% |  1771969 |  86.90 | 11.51 |
+| pfp_8.cru |  2926 |  48.78% |   519864 |  91.00 | 10.99 |
+| pfp_9.cru |  5166 |  42.34% |   976430 | 108.98 |  9.18 |
+| Total     | 54243 |  57.75% | 10985296 |  85.57 | 11.69 |
+
+(get_bits in-lined, sequence lengths limited to 256 or less, no offset reuse)
+| File name          | Size  | Reduced | Cycles   | C/B   | B/kC  |
+|--------------------|-------|---------|----------|-------|-------|
+| pfp_1.cruiM256P-32 |  2948 |  73.22% |   672500 | 61.09 | 16.37 |
+| pfp_2.cruiM256P-32 |  2199 |  55.78% |   419715 | 84.40 | 11.85 |
+| pfp_3.cruiM256P-32 |  1784 |  54.82% |   318707 | 80.71 | 12.39 |
+| pfp_4.cruiM256P-32 |  3429 |  51.13% |   583501 | 83.17 | 12.02 |
+| pfp_5.cruiM256P-32 | 19586 |  43.65% |  3239878 | 93.21 | 10.73 |
+| pfp_6.cruiM256P-32 |  8392 |  73.45% |  1838881 | 58.18 | 17.19 |
+| pfp_7.cruiM256P-32 |  8763 |  57.03% |  1607365 | 78.82 | 12.69 |
+| pfp_8.cruiM256P-32 |  3073 |  46.21% |   489281 | 85.64 | 11.68 |
+| pfp_9.cruiM256P-32 |  5314 |  40.69% |   889859 | 99.31 | 10.07 |
+| Total              | 55488 |  56.78% | 10059687 | 78.36 | 12.76 |
+
+You can make these tables yourself by naming the pfp files pfp[1-9].raw,
+dropping them in the exodecrs/perf folder and in that folder execute the
+following command line: $ make pfp.stat pfp.statiM256P-32
+
+(You need to build exomizer in the src folder first and you also need to have
+cc65, gcc and gnu make on the path) It works in Linux and in a Windows
+mingw/msys2 environment in Windows.
--- a/loader/tools/exomizer-3.1/exobasic10b2.txt
+++ b/loader/tools/exomizer-3.1/exobasic10b2.txt
@ -0,0 +1,52 @@
+-------------------------------------------------------------------------------
+Features of the Exobasic 1.0b2 tool:
+-------------------------------------------------------------------------------
+
+1) Renumber the basic lines (standard), -n<n>,<m> (Beta quality)
+
+   The lines are renumbered starting with <n> and increasing with <m> for each
+   line.
+
+2) Renumber the basic lines (extreme), -N (Beta quality)
+
+  This will increase the file crunchability. This renumbering will renumber
+  many basic lines to 0. Yes, they will run correctly. :)
+
+2) Clobber the line links, -p (Release quality)
+
+  Normally a basic program file consists of a linked list of zero terminated
+  basic line strings. The links between the lines are redundant and can be
+  recreated. In fact the basic interpreter always recreate the links when
+  a basic program is loaded from the READY prompt. This is done to be able to
+  easily move basic programs between compatible platforms where the start
+  address of basic differs. The links contained in the file are only correct if
+  the file is loaded to the address it was saved from.
+
+  This means that we can write any values as the links as long as we recreate
+  them before running the program. The routine that recreates the links is at
+  $A533 (c64) or $8818 (c16/plus4).
+
+  The values this option actually writes to the links are optimized to increase
+  the file crunchability.
+
+3) Add a trampoline -t, (Release quality)
+
+  This feature adds a small machine language routine at the beginning of the
+  program. This routine does all the startup preparation work necessary and
+  then starts the program. If the "clobber line links" option is selected, the
+  added trampoline will recreate them. When a trampoline is added it is very
+  easy to crunch the file and then just jmp the trampoline to start it. For a
+  c16/plus4 trampoline add a -4 option to the commandline as well.
+
+4) Remove rem statements and unnecessary spaces, -r (Release quality)
+
+  This feature removes rem statements unless they are first on a line and the
+  line is a goto/gosub target. If so then only the rem statement is left.
+  It also removes unneccessary spaces between basic statements.
+
+5) Regenerate c16/plus4 stack color table, -c (Release quality)
+  This feature only works if a c16/plus4 trampoline is created with the -t and
+  -4 flags. It will regenerate a system color table located at the hardware
+  stack that the Exomisers built-in decruncher overwrites.
+
+-------------------------------------------------------------------------------
--- a/loader/tools/exomizer-3.1/exodecrs/Makefile
+++ b/loader/tools/exomizer-3.1/exodecrs/Makefile
@ -0,0 +1,87 @@
+#
+# Makefile for testrun
+#
+WFLAGS = -std=c89 -Wall -Wstrict-prototypes -D_XOPEN_SOURCE=600 -pedantic
+CFLAGS = $(WFLAGS) -O3 -ffast-math -fomit-frame-pointer
+LDFLAGS = -s
+
+#CFLAGS = -g $(WFLAGS)
+#LDFLAGS = -g
+
+TEST_OBJS = ../src/6502emu.o ../src/exo_util.o ../src/log.o ../src/areatrace.o ../src/vec.o ../src/buf_io.o ../src/buf.o ../src/table.o ../src/perf.o
+
+SOURCES1 = main1.os exostreamdecr1.os
+SOURCES2 = main2.os exostreamdecr2.os
+
+NAME1 = test.1stream.prg
+NAME2 = test.2stream.prg
+
+permutate2 = $(if $(strip $(2)), $(foreach res, $(call permutate2, $(firstword $(2)), $(wordlist 2, $(words $(2)), $(2))), $(join $(res), _) $(join $(res), $(1))), _ $(1))
+
+permutate = $(if $(strip $(1)), $(call permutate2, $(firstword $(1)), $(wordlist 2, $(words $(1)), $(1))))
+
+VARIANTS = $(call permutate, i e M c 4 o f)
+
+VARIANT_OPT = c^-DLITERAL_SEQUENCES_NOT_USED=1^-c M^-DMAX_SEQUENCE_LENGTH_256=1^-M256 i^-DINLINE_GET_BITS=1^ e^-DENABLE_SPLIT_ENCODING=1^ o^-DDONT_REUSE_OFFSET=1^-P-32 4^-DEXTRA_TABLE_ENTRY_FOR_LENGTH_THREE=1^-P+16 f^-DDECRUNCH_FORWARDS=1^-f
+
+echo = $(or $(info $(1) $(2)), $(2))
+
+variant = $(suffix $(basename $(1)))
+
+asmopt = $(filter-out , $(foreach opt, $(VARIANT_OPT), $(if $(findstring $(word 1, $(subst ^, , $(opt))), $(1)), $(word 2, $(subst ^, , $(opt))))))
+
+exoopt = $(filter-out , $(foreach opt, $(VARIANT_OPT), $(if $(findstring $(word 1, $(subst ^, , $(opt))), $(1)), $(word 3, $(subst ^, , $(opt))))))
+
+.PHONY: assert.data%
+.PRECIOUS: %.exo %stream.exo %.os test%.prg $(TEST_OBJS)
+.SECONDEXPANSION:
+
+build: testrun $$(foreach variant, $$(VARIANTS) 1stream 2stream, test.$$(variant).prg data.$$(variant).exo)
+	@$(RM) *.out
+	@./testrun $(foreach variant, $(VARIANTS), test.$(variant).prg data.$(variant).exo)
+	@$(MAKE) $(foreach variant, $(VARIANTS), assert.data.$(variant).exo)
+	@./testrun $(foreach variant, 1stream 2stream, test.$(variant).prg data.$(variant).exo@0x3000)
+	@$(MAKE) $(foreach variant, 1stream 2stream, assert.data.$(variant).exo)
+
+assert.data%:
+	cmp -i0:2 data$(call variant, $@).exo.out data.bin
+	@$(RM) data$(call variant, $@).exo.out
+
+testrun.test%.prg: testrun test%.prg data%.exo
+	@./testrun test$(call variant, $@).prg data$(call variant, $@).exo
+	@cmp -i0:2 data$(call variant, $@).exo.out data.bin && $(RM) data$(call variant, $@).exo.out
+
+testrun.test%stream.prg: testrun test%stream.prg data%stream.exo
+	@./testrun test$(call variant, $@).prg data$(call variant, $@).exo@0x3000
+	@cmp -i0:2 data$(call variant, $@).exo.out data.bin && $(RM) data$(call variant, $@).exo.out
+
+testrun: testrun.o $(TEST_OBJS)
+	@$(CC) $(LDFLAGS) -o $@ testrun.o $(TEST_OBJS)
+
+test%.prg: main%.os exodecrunch%.os
+	@echo "building $@"
+	@ld65 main$(call variant, $@).os exodecrunch$(call variant, $@).os -o $@ -Cc64.cfg
+
+$(NAME1): $(SOURCES1)
+	@ld65 $(SOURCES1) -o $@ -Cc64.cfg
+
+$(NAME2): $(SOURCES2)
+	@ld65 $(SOURCES2) -o $@ -Cc64.cfg
+
+%.os: $$(basename $$(basename $$@)).s
+	@ca65 $(call asmopt, $(call variant, $@)) $< -o $@
+
+clean:
+	@$(RM) testrun.o test*.prg *.os *.exo *.exo.out testrun
+
+%.exo: $$(basename $$(basename $$@)).bin
+	@../src/exomizer mem $(call exoopt, $(call variant, $@)) -q $<,0x3000 -o $@
+
+%stream.exo: $$(basename $$(basename $$@)).bin
+	@../src/exomizer raw -P0 -b -c -m 1024 -q -C $<,2 -o $@
+
+%.o:	%.c
+	@$(CC) -c $(CFLAGS) $(CPPFLAGS) -o $@ $<
+
+# cancel built in rule that disturb things
+%.out: %
--- a/Show more
+++ b/Show more