ys2-intro/loader/tools/b2/cruncher.c

748 lines
17 KiB
C
Raw Permalink Normal View History

2025-11-13 11:07:39 -05:00
#include "cruncher.h"
#include <stdio.h>
#include <stdlib.h>
#define log(format, ...)
//#define log(format, ...) fprintf (stderr, format, ## __VA_ARGS__)
#define NUM_BITS_SHORT_0 3
#define NUM_BITS_SHORT_1 6
#define NUM_BITS_SHORT_2 8
#define NUM_BITS_SHORT_3 10
#define NUM_BITS_LONG_0 4
#define NUM_BITS_LONG_1 7
#define NUM_BITS_LONG_2 10
#define NUM_BITS_LONG_3 13
#define LEN_SHORT_0 (1 << NUM_BITS_SHORT_0)
#define LEN_SHORT_1 (1 << NUM_BITS_SHORT_1)
#define LEN_SHORT_2 (1 << NUM_BITS_SHORT_2)
#define LEN_SHORT_3 (1 << NUM_BITS_SHORT_3)
#define LEN_LONG_0 (1 << NUM_BITS_LONG_0)
#define LEN_LONG_1 (1 << NUM_BITS_LONG_1)
#define LEN_LONG_2 (1 << NUM_BITS_LONG_2)
#define LEN_LONG_3 (1 << NUM_BITS_LONG_3)
#define COND_SHORT_0(o) ((o) >= 0 && (o) < LEN_SHORT_0)
#define COND_SHORT_1(o) ((o) >= LEN_SHORT_0 && (o) < LEN_SHORT_1)
#define COND_SHORT_2(o) ((o) >= LEN_SHORT_1 && (o) < LEN_SHORT_2)
#define COND_SHORT_3(o) ((o) >= LEN_SHORT_2 && (o) < LEN_SHORT_3)
#define COND_LONG_0(o) ((o) >= 0 && (o) < LEN_LONG_0)
#define COND_LONG_1(o) ((o) >= LEN_LONG_0 && (o) < LEN_LONG_1)
#define COND_LONG_2(o) ((o) >= LEN_LONG_1 && (o) < LEN_LONG_2)
#define COND_LONG_3(o) ((o) >= LEN_LONG_2 && (o) < LEN_LONG_3)
#define MAX_OFFSET LEN_LONG_3
#define MAX_OFFSET_SHORT LEN_SHORT_3
#define DECRUNCHER_LENGTH 0xd5
byte decrCode[DECRUNCHER_LENGTH] = {
0x0b, 0x08, 0x00, 0x00, 0x9e, 0x32, 0x30, 0x36, 0x31, 0x00, 0x00, 0x00, 0x78, 0xa9, 0x34, 0x85,
0x01, 0xa2, 0xb7, 0xbd, 0x1e, 0x08, 0x95, 0x0f, 0xca, 0xd0, 0xf8, 0x4c, 0x10, 0x00, 0xbd, 0xd6,
0x07, 0x9d, 0x00, 0xff, 0xe8, 0xd0, 0xf7, 0xc6, 0x12, 0xc6, 0x15, 0xa5, 0x12, 0xc9, 0x07, 0xb0,
0xed, 0x20, 0xa0, 0x00, 0xb0, 0x17, 0x20, 0x8e, 0x00, 0x85, 0x36, 0xa0, 0x00, 0x20, 0xad, 0x00,
0x91, 0x77, 0xc8, 0xc0, 0x00, 0xd0, 0xf6, 0x20, 0x83, 0x00, 0xc8, 0xf0, 0xe4, 0x20, 0x8e, 0x00,
0xaa, 0xe8, 0xf0, 0x71, 0x86, 0x7b, 0xa9, 0x00, 0xe0, 0x03, 0x2a, 0x20, 0x9b, 0x00, 0x20, 0x9b,
0x00, 0xaa, 0xb5, 0xbf, 0xf0, 0x07, 0x20, 0x9b, 0x00, 0xb0, 0xfb, 0x30, 0x07, 0x49, 0xff, 0xa8,
0x20, 0xad, 0x00, 0xae, 0xa0, 0xff, 0x65, 0x77, 0x85, 0x74, 0x98, 0x65, 0x78, 0x85, 0x75, 0xa0,
0x00, 0xb9, 0xad, 0xde, 0x99, 0x00, 0x00, 0xc8, 0xc0, 0x00, 0xd0, 0xf5, 0x20, 0x83, 0x00, 0xd0,
0xa0, 0x18, 0x98, 0x65, 0x77, 0x85, 0x77, 0x90, 0x02, 0xe6, 0x78, 0x60, 0xa9, 0x01, 0x20, 0xa0,
0x00, 0x90, 0x05, 0x20, 0x9b, 0x00, 0x10, 0xf6, 0x60, 0x20, 0xa0, 0x00, 0x2a, 0x60, 0x06, 0xbe,
0xd0, 0x08, 0x48, 0x20, 0xad, 0x00, 0x2a, 0x85, 0xbe, 0x68, 0x60, 0xad, 0xed, 0xfe, 0xe6, 0xae,
0xd0, 0x02, 0xe6, 0xaf, 0x60, 0xa9, 0x37, 0x85, 0x01, 0x4c, 0x00, 0x00, 0x80, 0xdf, 0xfb, 0x00,
0x80, 0xef, 0xfd, 0x80, 0xf0
};
byte *ibuf;
byte *obuf;
uint ibufSize;
int get; //points to ibuf[]
uint put; //points to obuf[]
typedef struct {
uint cost;
uint next;
uint litLen;
uint offset;
} node;
typedef struct {
byte value;
byte valueAfter;
uint length;
} RLEInfo;
node *context;
uint *link;
RLEInfo *rleInfo;
uint first[65536];
uint last[65536];
byte curByte;
byte curCnt;
uint curIndex;
void wBit(uint bit) {
if(curCnt == 0) {
obuf[curIndex] = curByte;
curIndex = put;
curCnt = 8;
curByte = 0;
put++;
}
curByte <<= 1;
curByte |= (bit & 1);
curCnt--;
}
void wFlush() {
while(curCnt != 0) {
curByte <<= 1;
curCnt--;
}
obuf[curIndex] = curByte;
}
void wByte(uint b) {
obuf[put] = b;
put++;
}
void wBytes(uint get, uint len) {
uint i;
for(i = 0; i < len; i++) {
wByte(ibuf[get]);
get++;
}
}
void wLength(uint len) {
// if(len == 0) return; // Should never happen
byte bit = 0x80;
while((len & bit) == 0) {
bit >>= 1;
}
while(bit > 1) {
wBit(1);
bit >>= 1;
wBit(((len & bit) == 0) ? 0 : 1);
}
if(len < 0x80) {
wBit(0);
}
}
void wOffset(uint offset, uint len) {
uint i = 0;
uint n = 0;
uint b;
if(len == 1) {
if(COND_SHORT_0(offset)) {
i = 0;
n = NUM_BITS_SHORT_0;
}
if(COND_SHORT_1(offset)) {
i = 1;
n = NUM_BITS_SHORT_1;
}
if(COND_SHORT_2(offset)) {
i = 2;
n = NUM_BITS_SHORT_2;
}
if(COND_SHORT_3(offset)) {
i = 3;
n = NUM_BITS_SHORT_3;
}
} else {
if(COND_LONG_0(offset)) {
i = 0;
n = NUM_BITS_LONG_0;
}
if(COND_LONG_1(offset)) {
i = 1;
n = NUM_BITS_LONG_1;
}
if(COND_LONG_2(offset)) {
i = 2;
n = NUM_BITS_LONG_2;
}
if(COND_LONG_3(offset)) {
i = 3;
n = NUM_BITS_LONG_3;
}
}
// First write number of bits
wBit(((i & 2) == 0) ? 0 : 1);
wBit(((i & 1) == 0) ? 0 : 1);
if(n >= 8) { // Offset is 2 bytes
// Then write the bits less than 8
b = 1 << n;
while(b > 0x100) {
b >>= 1;
wBit(((b & offset) == 0) ? 0 : 1);
};
// Finally write a whole byte, if necessary
wByte((offset & 255) ^ 255); // Inverted(!)
offset >>= 8;
} else { // Offset is 1 byte
// Then write the bits less than 8
b = 1 << n;
while(b > 1) {
b >>= 1;
wBit(((b & offset) == 0) ? 1 : 0); // Inverted(!)
};
}
}
/*
* Cost functions
*/
uint costOfLength(uint len) {
if(len == 1) return 1;
if(len >= 2 && len <= 3) return 3;
if(len >= 4 && len <= 7) return 5;
if(len >= 8 && len <= 15) return 7;
if(len >= 16 && len <= 31) return 9;
if(len >= 32 && len <= 63) return 11;
if(len >= 64 && len <= 127) return 13;
if(len >= 128 && len <= 255) return 14;
printf("costOfLength got wrong value: %i\n", len);
return 10000;
}
uint costOfOffset(uint offset, uint len) {
if(len == 1) {
if(COND_SHORT_0(offset)) return NUM_BITS_SHORT_0;
if(COND_SHORT_1(offset)) return NUM_BITS_SHORT_1;
if(COND_SHORT_2(offset)) return NUM_BITS_SHORT_2;
if(COND_SHORT_3(offset)) return NUM_BITS_SHORT_3;
} else {
if(COND_LONG_0(offset)) return NUM_BITS_LONG_0;
if(COND_LONG_1(offset)) return NUM_BITS_LONG_1;
if(COND_LONG_2(offset)) return NUM_BITS_LONG_2;
if(COND_LONG_3(offset)) return NUM_BITS_LONG_3;
}
printf("costOfOffset got wrong offset: %i\n", offset);
return 10000;
}
uint calculateCostOfMatch(uint len, uint offset) {
uint cost = 1; // Copy-bit
cost += costOfLength(len - 1);
cost += 2; // NumOffsetBits
cost += costOfOffset(offset - 1, len - 1);
return cost;
}
uint calculateCostOfLiteral(uint oldCost, uint litLen) {
uint newCost = oldCost + 8;
// FIXME, what if litLen > 255?
//
// FIXME, cost model for literals does not work.
// Quick wins on short matches are prioritized before
// a longer literal run, which in the end results in a
// worse result.
// Most obvious on files hard to crunch.
switch(litLen) {
case 1:
case 128:
newCost++;
break;
case 2:
case 4:
case 8:
case 16:
case 32:
case 64:
newCost += 2;
break;
default:
break;
}
return newCost;
}
void setupHelpStructures() {
uint i;
// Setup RLE-info
get = ibufSize - 1;
while (get > 0) {
byte cur = ibuf[get];
if (cur == ibuf[get-1]) {
uint len = 2;
while ((get >= len) &&
(cur == ibuf[get-len])) {
len++;
}
rleInfo[get].length = len;
if (get >= len) {
rleInfo[get].valueAfter = ibuf[get-len];
} else {
rleInfo[get].valueAfter = cur; // Avoid accessing ibuf[-1]
}
get -= len;
} else {
get--;
}
}
// Setup Linked list
for (i = 0; i < 65536; i++) {
first[i] = 0;
last[i] = 0;
}
get = ibufSize - 1;
uint cur = ibuf[get];
while (get > 0) {
cur = ((cur << 8) | ibuf[get-1]) & 65535;
if (first[cur] == 0) {
first[cur] = last[cur] = get;
} else {
link[last[cur]] = get;
last[cur] = get;
}
if (rleInfo[get].length == 0) { // No RLE-match here..
get--;
} else { // if RLE-match..
get -= (rleInfo[get].length - 1);
}
}
}
void findMatches() {
typedef struct match {
uint length;
uint offset;
} match;
match matches[256];
node lastNode;
uint i;
get = ibufSize - 1;
uint cur = ibuf[get];
lastNode.cost = 0;
lastNode.next = 0;
lastNode.litLen = 0;
while (get >= 0) {
// Clear matches for current position
for (i = 0; i < 256; i++) {
matches[i].length = 0;
matches[i].offset = 0;
}
cur = (cur << 8) & 65535; // Table65536 lookup
if (get > 0) cur |= ibuf[get-1];
int scn = first[cur];
scn = link[scn];
uint longestMatch = 0;
if (rleInfo[get].length == 0) { // No RLE-match here..
// Scan until start of file, or max offset
while (((get - scn) <= MAX_OFFSET) &&
(scn > 0) &&
(longestMatch < 255)) {
// Ok, we have a match of length 2
// ..or longer, but max 255 or file start
uint len = 2;
while ((len < 255) &&
(scn >= len) &&
(ibuf[scn - len] == ibuf[get - len])) {
++len;
}
// Calc offset
uint offset = get - scn;
// Store match only if it's the longest so far
if(len > longestMatch) {
longestMatch = len;
// Store the match only if first (= best) of this length
while(len >= 2 && matches[len].length == 0) {
// If len == 2, check against short offset!!
if ((len > 2) ||
((len == 2) && (offset <= MAX_OFFSET_SHORT))) {
matches[len].length = len;
matches[len].offset = get - scn;
}
len--;
};
}
scn = link[scn]; // Table65535 lookup
};
first[cur] = link[first[cur]]; // Waste first entry
} else { // if RLE-match..
uint rleLen = rleInfo[get].length;
byte rleValAfter = rleInfo[get].valueAfter;
// First match with self-RLE, which is always
// one byte shorter than the RLE itself.
uint len = rleLen - 1;
if (len > 1) {
if (len > 255) len = 255;
longestMatch = len;
// Store the match
while(len >= 2) {
matches[len].length = len;
matches[len].offset = 1;
len--;
};
}
// Search for more RLE-matches..
// Scan until start of file, or max offset
while (((get - scn) <= MAX_OFFSET) &&
(scn > 0) &&
(longestMatch < 255)) {
// Check for longer matches with same value and after..
// FIXME, that is not what it does, is it?!
if ((rleInfo[scn].length > longestMatch) &&
(rleLen > longestMatch)) {
uint offset = get - scn;
len = rleInfo[scn].length;
if (len > rleLen)
len = rleLen;
if ((len > 2) ||
((len == 2) && (offset <= MAX_OFFSET_SHORT))) {
matches[len].length = len;
matches[len].offset = offset;
longestMatch = len;
}
}
// Check for matches beyond the RLE..
if ((rleInfo[scn].length >= rleLen) &&
(rleInfo[scn].valueAfter == rleValAfter)) {
// Here is a match that goes beyond the RLE..
// Find out correct offset to use valueAfter..
// Then search further to see if more bytes equal.
len = rleLen;
uint offset = get - scn + (rleInfo[scn].length - rleLen);
if (offset <= MAX_OFFSET) {
while ((len < 255) &&
(get >= (offset + len)) &&
(ibuf[get - (offset + len)] == ibuf[get - len])) {
++len;
}
if (len > longestMatch){
longestMatch = len;
// Store the match only if first (= best) of this length
while(len >= 2 && matches[len].length == 0) {
// If len == 2, check against short offset!!
if ((len > 2) ||
((len == 2) && (offset <= MAX_OFFSET_SHORT))) {
matches[len].length = len;
matches[len].offset = offset;
}
len--;
}; //while
}
}
}
scn = link[scn]; // Table65535 lookup
}
if (rleInfo[get].length > 2) {
// Expand RLE to next position
rleInfo[get-1].length = rleInfo[get].length - 1;
rleInfo[get-1].value = rleInfo[get].value;
rleInfo[get-1].valueAfter = rleInfo[get].valueAfter;
} else {
// End of RLE, advance link.
first[cur] = link[first[cur]]; // Waste first entry
}
}
// Now we have all matches from this position..
// ..visit all nodes reached by the matches.
for (i = 255; i > 0; i--) {
// Find all matches we stored
uint len = matches[i].length;
uint offset = matches[i].offset;
if (len != 0) {
uint targetI = get - len + 1;
node* target = &context[targetI];
// Calculate cost for this jump
uint currentCost = lastNode.cost;
currentCost += calculateCostOfMatch(len, offset);
// If this match is first or cheapest way to get here
// then update node
if (target->cost == 0 ||
target->cost > currentCost) {
target->cost = currentCost;
target->next = get + 1;
target->litLen = 0;
target->offset = offset;
}
}
}
// Calc the cost for this node if using one more literal
uint litLen = lastNode.litLen + 1;
uint litCost = calculateCostOfLiteral(lastNode.cost, litLen);
// If literal run is first or cheapest way to get here
// then update node
node* this = &context[get];
if (this->cost == 0 ||
this->cost >= litCost) {
this->cost = litCost;
this->next = get + 1;
this->litLen = litLen;
}
lastNode.cost = this->cost;
lastNode.next = this->next;
lastNode.litLen = this->litLen;
// Loop to the next position
get--;
};
}
// Returns margin
int writeOutput() {
uint i;
put = 0;
curByte = 0;
curCnt = 8;
curIndex = put;
put++;
int maxDiff = 0;
bool needCopyBit = true;
for (i = 0; i < ibufSize;) {
uint link = context[i].next;
uint cost = context[i].cost;
uint litLen = context[i].litLen;
uint offset = context[i].offset;
if (litLen == 0) {
// Put Match
uint len = link - i;
log("$%04x: Mat(%i, %i)\n", i, len, offset);
if(needCopyBit) {
wBit(1);
}
wLength(len - 1);
wOffset(offset - 1, len - 1);
i = link;
needCopyBit = true;
} else {
// Put LiteralRun
needCopyBit = false;
while(litLen > 0) {
uint len = litLen < 255 ? litLen : 255;
log("$%04x: Lit(%i)\n", i, len);
wBit(0);
wLength(len);
wBytes(i, len);
if (litLen == 255) {
needCopyBit = true;
}
litLen -= len;
i += len;
};
}
if ((int)(i - put) > maxDiff) {
maxDiff = i - put;
}
}
wBit(1);
wLength(0xff);
wFlush();
int margin = (maxDiff - (i - put));
return margin;
}
bool crunch(File *aSource,
File *aTarget,
uint address,
bool isExecutable,
bool isRelocated)
{
uint i;
byte *target;
ibufSize = aSource->size - 2;
ibuf = (byte*)malloc(ibufSize);
context = (node*)malloc(sizeof(node) * ibufSize);
link = (uint*)malloc(sizeof(uint) * ibufSize);
rleInfo = (RLEInfo*)malloc(sizeof(RLEInfo) * ibufSize);
// Load ibuf and clear context
for(i = 0; i < ibufSize; ++i) {
ibuf[i] = aSource->data[i + 2];
context[i].cost = 0;
link[i] = 0;
rleInfo[i].length = 0;
}
setupHelpStructures();
findMatches();
obuf = (byte*)malloc(memSize);
int margin = writeOutput();
uint packLen = put;
uint fileLen = put;
uint decrLen = 0;
if(isExecutable) {
decrLen = DECRUNCHER_LENGTH;
fileLen += decrLen + 2;
} else {
fileLen += 4;
}
aTarget->size = fileLen;
aTarget->data = (byte*)malloc(aTarget->size);
target = aTarget->data;
if(isExecutable) {
uint startAddress = 0x10000 - packLen;
uint transfAddress = fileLen + 0x6ff;
decrCode[0x1f] = transfAddress & 0xff; // Transfer from..
decrCode[0x20] = transfAddress >> 8; //
decrCode[0xbc] = startAddress & 0xff; // Depack from..
decrCode[0xbd] = startAddress >> 8; //
decrCode[0x85] = aSource->data[0]; // Depack to..
decrCode[0x86] = aSource->data[1]; //
decrCode[0xca] = address & 0xff; // Jump to..
decrCode[0xcb] = address >> 8; //
target[0] = 0x01;
target[1] = 0x08;
for(i = 0; i < decrLen; ++i) {
target[i + 2] = decrCode[i];
}
for(i = 0; i < put; ++i) {
target[i + 2 + decrLen] = obuf[i];
}
} else { // Not executable..
// Experimantal decision of start address
// uint startAddress = 0xfffa - packLen - 2;
uint startAddress = (aSource->data[1] << 8) | aSource->data[0];
startAddress += (ibufSize - packLen - 2 + margin);
if (isRelocated) {
startAddress = address - packLen - 2;
}
target[0] = startAddress & 0xff; // Load address
target[1] = startAddress >> 8;
target[2] = aSource->data[0]; // Depack to address
target[3] = aSource->data[1];
for(i = 0; i < put; ++i) {
target[i + 4] = obuf[i];
}
}
free(ibuf);
free(context);
free(link);
free(rleInfo);
return true;
}