fast block search part 2 - still ALPHA

now tested... it works but size is not optimal
This commit is contained in:
tildearrow 2025-04-07 13:17:27 -05:00
parent 2fd6fa1a87
commit fc0a7a5e17

View file

@ -601,7 +601,7 @@ void reloc(unsigned char* buf, size_t len, unsigned int sourceAddr, unsigned int
}
}
SafeWriter* stripNops(SafeWriter* s, unsigned char* speedDial) {
SafeWriter* stripNops(SafeWriter* s) {
std::unordered_map<unsigned int,unsigned int> addrTable;
SafeWriter* oldStream=s;
unsigned char* buf=oldStream->getFinalBuf();
@ -666,11 +666,18 @@ struct BlockMatch {
#define OVERLAPS(a1,a2,b1,b2) ((b1)<(a2) && (b2)>(a1))
SafeWriter* findSubBlocks(SafeWriter* stream, std::vector<SafeWriter*>& subBlocks, unsigned char* speedDial) {
// TODO:
// - check whether a block consists only of calls
// - see if we can optimize better
SafeWriter* findSubBlocks(SafeWriter* stream, std::vector<SafeWriter*>& subBlocks) {
unsigned char* buf=stream->getFinalBuf();
size_t matchSize=48;
std::vector<BlockMatch> matches;
// repeat until we run out of matches
while (true) {
matches.clear();
// fast match algorithm
// search for small matches, and then find bigger ones
for (size_t i=0; i<stream->size(); i+=8) {
@ -720,7 +727,9 @@ SafeWriter* findSubBlocks(SafeWriter* stream, std::vector<SafeWriter*>& subBlock
} while (wantMore);
// first stage done
// set done to false unless this match overlaps with itself
// set done to false unless:
// - this match overlaps with itself
// - this block only consists of calls
size_t nonOverlapCount=0;
for (BlockMatch& i: matches) {
i.done=false;
@ -729,10 +738,24 @@ SafeWriter* findSubBlocks(SafeWriter* stream, std::vector<SafeWriter*>& subBlock
i.done=true;
} else {
nonOverlapCount++;
/*
bool onlyCalls=true;
for (size_t j=i.orig; j<i.orig+i.len; j+=8) {
if (buf[j]!=0xf4) {
onlyCalls=false;
break;
}
}
if (onlyCalls) {
i.done=true;
} else {
nonOverlapCount++;
}
*/
}
}
logD("%d non-overlapping candidates",(int)nonOverlapCount);
logD("%d good candidates",(int)nonOverlapCount);
// quit if there isn't anything
if (!nonOverlapCount) return stream;
@ -742,6 +765,7 @@ SafeWriter* findSubBlocks(SafeWriter* stream, std::vector<SafeWriter*>& subBlock
logD("largest match: %d",(int)matchSize);
std::vector<BlockMatch> workMatches;
bool newBlocks=false;
while (matchSize>=48) {
workMatches.clear();
@ -764,6 +788,8 @@ SafeWriter* findSubBlocks(SafeWriter* stream, std::vector<SafeWriter*>& subBlock
// create new sub-block if necessary
if (i.orig!=lastOrig) {
subBlockID=subBlocks.size();
newBlocks=true;
logV("new sub-block %d",(int)subBlockID);
// isolate this sub-block
SafeWriter* newBlock=new SafeWriter;
@ -846,126 +872,16 @@ SafeWriter* findSubBlocks(SafeWriter* stream, std::vector<SafeWriter*>& subBlock
logV("done!");
// get out if we haven't made any blocks
if (!newBlocks) break;
// remove nop's
stream=stripNops(stream,speedDial);
/*
for (size_t groupSize=stream->size()>>1; groupSize>=48; groupSize-=8) {
bool foundSomething=false;
logV("...try size %d",groupSize);
for (size_t searchPos=0; (searchPos+groupSize)<stream->size();) {
const unsigned char* group=&buf[searchPos];
size_t groupLen=0;
size_t groupInsCount=0;
size_t subBlockID=subBlocks.size();
bool haveSub=false;
bool onlyCalls=true;
// register this block
for (size_t i=0; i<groupSize && i<stream->size(); i+=8) {
if (buf[searchPos+i]!=0xf4) onlyCalls=false;
if (groupLen+8>groupSize) break;
groupLen+=8;
groupInsCount++;
}
// don't do anything if we don't have a block large enough
if (groupLen<24) {
searchPos+=8;
continue;
}
// don't do anything if this is just one or two commands
// TODO: this is a duplicate of the previous statement now that all commands are 8 bytes long
if (groupInsCount<3) {
searchPos+=8;
continue;
}
// don't do anything if this block only consists of calls
if (onlyCalls) {
logW("nothing but calls.");
searchPos+=8;
continue;
}
// find identical blocks
for (size_t i=searchPos+groupLen; i+groupLen<stream->size();) {
// compare next block to group
if (memcmp(&buf[i],group,groupLen)==0) {
// we have a sub-block
if (!haveSub) {
// isolate this sub-block
SafeWriter* newBlock=new SafeWriter;
newBlock->init();
newBlock->write(group,groupLen);
newBlock->writeC(0xf9); // ret
// padding
newBlock->writeC(0);
newBlock->writeC(0);
newBlock->writeC(0);
newBlock->writeC(0);
newBlock->writeC(0);
newBlock->writeC(0);
newBlock->writeC(0);
subBlocks.push_back(newBlock);
haveSub=true;
logD("- SUB %x (size %d):",searchPos,groupLen);
}
logD(" - %x",i);
// insert call
buf[i]=0xf4;
buf[i+1]=subBlockID&0xff;
buf[i+2]=(subBlockID>>8)&0xff;
buf[i+3]=(subBlockID>>16)&0xff;
buf[i+4]=(subBlockID>>24)&0xff;
buf[i+5]=0;
buf[i+6]=0;
buf[i+7]=0;
// replace the rest with nop
for (size_t j=i+8; j<i+groupLen; j++) {
buf[j]=0xf1;
}
// continue search from end of block
i+=groupLen;
} else {
// next
i+=8;
}
}
if (haveSub) {
// insert call on the original block
buf[searchPos]=0xf4;
buf[searchPos+1]=subBlockID&0xff;
buf[searchPos+2]=(subBlockID>>8)&0xff;
buf[searchPos+3]=(subBlockID>>16)&0xff;
buf[searchPos+4]=(subBlockID>>24)&0xff;
buf[searchPos+5]=0;
buf[searchPos+6]=0;
buf[searchPos+7]=0;
// replace the rest with nop
for (size_t j=searchPos+8; j<searchPos+groupLen; j++) {
buf[j]=0xf1;
}
// skip this block (it's isolated now)
searchPos+=groupLen;
foundSomething=true;
} else {
// try again somewhere else
searchPos+=8;
}
}
if (foundSomething) {
stream=stripNops(stream,speedDial);
stream=stripNops(stream);
buf=stream->getFinalBuf();
logV("doing it again...");
}
}
*/
return stream;
}
@ -1343,7 +1259,7 @@ SafeWriter* DivEngine::saveCommand(DivCSProgress* progress, unsigned int disable
// PASS 3: remove nop's
// this includes modifying call addresses to compensate
for (int h=0; h<chans; h++) {
chanStream[h]=stripNops(chanStream[h],sortedCmd);
chanStream[h]=stripNops(chanStream[h]);
}
// PASS 4: find sub-blocks and isolate them
@ -1354,7 +1270,7 @@ SafeWriter* DivEngine::saveCommand(DivCSProgress* progress, unsigned int disable
// 6 is the minimum size that can be reliably optimized
logI("finding sub-blocks in chan %d",h);
chanStream[h]=findSubBlocks(chanStream[h],subBlocks,sortedCmd);
chanStream[h]=findSubBlocks(chanStream[h],subBlocks);
// find sub-blocks within sub-blocks
size_t subBlocksLast=0;
size_t subBlocksLen=subBlocks.size();
@ -1362,7 +1278,7 @@ SafeWriter* DivEngine::saveCommand(DivCSProgress* progress, unsigned int disable
while (subBlocksLast!=subBlocksLen) {
logI("got %d blocks... starting from %d",(int)subBlocksLen,(int)subBlocksLast);
for (size_t i=subBlocksLast; i<subBlocksLen; i++) {
SafeWriter* newBlock=findSubBlocks(subBlocks[i],subBlocks,sortedCmd);
SafeWriter* newBlock=findSubBlocks(subBlocks[i],subBlocks);
subBlocks[i]=newBlock;
}
subBlocksLast=subBlocksLen;
@ -1433,7 +1349,7 @@ SafeWriter* DivEngine::saveCommand(DivCSProgress* progress, unsigned int disable
// PASS 5: remove nop's (again)
for (int h=0; h<chans; h++) {
chanStream[h]=stripNops(chanStream[h],sortedCmd);
chanStream[h]=stripNops(chanStream[h]);
}
// PASS 6: pack streams