ys2-intro/loader/tools/subsizer-0.7pre1/src/pathfinder.c
2025-11-13 19:07:39 +03:00

327 lines
6.5 KiB
C

/**************************************************************************
*
* FILE pathfinder.c
* Copyright (c) 2015, 2017 Daniel Kahlin <daniel@kahlin.net>
* Written by Daniel Kahlin <daniel@kahlin.net>
*
* DESCRIPTION
* find optimized path by considering encoding cost
*
******/
#include <limits.h>
#include <stdio.h>
#include <stdint.h>
#include "bitfunc.h"
#include "pathfinder.h"
#include "global.h"
#include "match.h"
#include "message.h"
#include "utils.h"
#define DEBUG_COMPLEXITY 0
PrimaryPath *create_primarypath(int n, uint8_t *buf)
{
PrimaryPath *pp;
pp = safe_malloc(sizeof(PrimaryPath), "primarypath");
/* allocate n+1 entries to allow for the end marker */
pp->path = safe_malloc( (n + 1) * sizeof(Match), "matches");
pp->n = n;
pp->buf = buf;
/* insert end marker */
make_end(&(pp->path[n]));
return pp;
}
void destroy_primarypath(PrimaryPath *pp)
{
if (pp) {
free(pp->path);
free(pp);
}
}
/**************************************************************************
*
* SECTION fast cost functions
*
******/
double litcost[0x10000];
double lencost[0x10000];
double offscost1[0x10000];
double offscost2[0x10000];
double offscost3[0x10000];
double offscost[0x10000];
static void prepare_fast(CostFuncSet *cfs, EncodingSet *es)
{
int i;
/* prepare tables for fast cost calculations */
for (i = 0; i < 0x10000; i++) {
litcost[i] = cfs->cost_lit(es, i);
lencost[i] = cfs->cost_mlen(es, i);
offscost1[i] = cfs->cost_moffs(es, i, 1);
offscost2[i] = cfs->cost_moffs(es, i, 2);
offscost3[i] = cfs->cost_moffs(es, i, 3);
offscost[i] = cfs->cost_moffs(es, i, 4);
}
}
static inline double fast_lit(int l)
{
return litcost[l];
}
static inline double fast_mlen(int l)
{
return lencost[l];
}
static inline double fast_moffs(int of, int l)
{
switch (l) {
case 1:
return offscost1[of];
case 2:
return offscost2[of];
case 3:
return offscost3[of];
default:
return offscost[of];
}
}
/**************************************************************************
*
* NAME find_cheapest_path
*
* DESCRIPTION
* Consider encoding cost to find the cheapest path through all matches
*
******/
PrimaryPath *find_cheapest_path(MatchTree *mt, CostFuncSet *cfs, EncodingSet *es, unsigned int flags)
{
int i, j;
int cur;
int len = mt->len;
double *dist;
double *prev;
Match *path;
/* configuration */
int min_match = 1;
int enforce_exclusion = (flags & FCP_ENFORCE_EXCLUSION);
int literal_sequences = (flags & FCP_LITERAL_SEQUENCES);
int initial_literal = (flags & FCP_INITIAL_LITERAL);
/* construct lookup tables */
prepare_fast(cfs, es);
/* create path tables */
dist = safe_malloc(sizeof(double) * (len + 1), "dist table");
prev = safe_malloc(sizeof(double) * (len + 1), "prev table");
path = safe_malloc(sizeof(Match) * (len + 1), "match table");
/* initialize costs */
for (i = 0; i < len+1; i++) {
/* make sure we have headroom for a few additions. */
dist[i] = INT_MAX - 0x10000;
prev[i] = -1;
//path[i] = 0;
}
dist[0] = 0;
#if DEBUG_COMPLEXITY
int n_match, n_match_taken, n_rle, n_rle_taken, n_lt, n_lt_taken;
n_match = 0;
n_match_taken = 0;
n_rle = 0;
n_rle_taken = 0;
n_lt = 0;
n_lt_taken = 0;
#endif
/*
* calculate costs
*/
cur = 0;
while (cur < len) {
int v;
double w;
Match *m = mt->match[cur];
while (m && !is_end(m)) {
if (is_match(m) && get_match_len(m) >= min_match ) {
/* match */
int l = get_match_len(m);
int of = get_match_offs(m);
/*
* - does not consider when the escape bit isn't needed.
*/
/*
* scan through all possible shorter lengths and see if
* any are cheaper.
* TODO: which choices are actually interesting here?
*/
int c = 4;
while (c && l >= min_match) {
w = 1 + fast_mlen(l) + fast_moffs(of, l);
v = cur + l;
#if DEBUG_COMPLEXITY
n_match++;
#endif
if (dist[v] > dist[cur] + w) {
dist[v] = dist[cur] + w;
prev[v] = cur;
make_match(&path[v], of, l);
#if DEBUG_COMPLEXITY
n_match_taken++;
#endif
}
l--;
c--;
}
} else if (is_rle(m) && get_rle_len(m) >= min_match) {
/* match */
int l = get_rle_len(m);
int of = 1;
/*
* - does not consider when the escape bit isn't needed.
*/
/*
* scan through all possible shorter lengths and see if
* any are cheaper.
* TODO: which choices are actually interesting here?
*/
int c = 4;
while (c && l >= min_match) {
w = 1 + fast_mlen(l) + fast_moffs(of, l);
v = cur + l;
#if DEBUG_COMPLEXITY
n_rle++;
#endif
if (dist[v] > dist[cur] + w) {
dist[v] = dist[cur] + w;
prev[v] = cur;
make_match(&path[v], of, l);
#if DEBUG_COMPLEXITY
n_rle_taken++;
#endif
}
l--;
c--;
}
}
m++;
}
/* literal */
/*
* - should be expanded to handle all possible literal
* sequences
*/
if ( enforce_exclusion &&
(cur > 0) && (mt->buf[cur-1] == mt->buf[cur]) ) {
/* force unbearable cost for bytes breaking the
exclusion property */
w = 30000;
} else {
w = fast_lit(1);
}
v = cur + 1;
#if DEBUG_COMPLEXITY
n_lt++;
#endif
if (dist[v] > dist[cur] + w) {
dist[v] = dist[cur] + w;
prev[v] = cur;
make_literal(&path[v], cur, 1);
#if DEBUG_COMPLEXITY
n_lt_taken++;
#endif
}
if ( !( initial_literal && (cur == 0) ) && literal_sequences ) {
int l = (cur + 256 < len) ? 256 : len - cur;
while (l > 1) {
w = fast_lit(l);
v = cur + l;
#if DEBUG_COMPLEXITY
n_lt++;
#endif
if (dist[v] > dist[cur] + w) {
dist[v] = dist[cur] + w;
prev[v] = cur;
make_literal(&path[v], cur, l);
#if DEBUG_COMPLEXITY
n_lt_taken++;
#endif
}
l--;
}
}
cur++;
}
msg(MSG_DEBUG, "cost=%f bits (%f bytes)\n", dist[len], (dist[len]+7)/8);
#if DEBUG_COMPLEXITY
printf("n_match=%d (%d), n_rle=%d (%d), n_lit=%d (%d)\n", n_match, n_match_taken, n_rle, n_rle_taken, n_lt, n_lt_taken);
#endif
/*
* Backtrack the cheapest path to find the number of primary units
* to allocate.
*/
i = len;
j = 0;
while (i > 0) {
j++;
i = prev[i];
}
PrimaryPath *pp;
pp = create_primarypath(j, mt->buf);
pp->len = len;
pp->cost = dist[len];
/*
* Backtrack the cheapest path and create primary units accordingly.
*/
i = len;
j = pp->n-1;
while (i > 0) {
pp->path[j] = path[i];
j--;
i = prev[i];
}
/* free up path tables */
free(dist);
free(prev);
free(path);
return pp;
}
/* eof */