fast block search part 2 - still ALPHA
now tested... it works but size is not optimal
This commit is contained in:
parent
2fd6fa1a87
commit
fc0a7a5e17
|
@ -601,7 +601,7 @@ void reloc(unsigned char* buf, size_t len, unsigned int sourceAddr, unsigned int
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
SafeWriter* stripNops(SafeWriter* s, unsigned char* speedDial) {
|
SafeWriter* stripNops(SafeWriter* s) {
|
||||||
std::unordered_map<unsigned int,unsigned int> addrTable;
|
std::unordered_map<unsigned int,unsigned int> addrTable;
|
||||||
SafeWriter* oldStream=s;
|
SafeWriter* oldStream=s;
|
||||||
unsigned char* buf=oldStream->getFinalBuf();
|
unsigned char* buf=oldStream->getFinalBuf();
|
||||||
|
@ -666,11 +666,18 @@ struct BlockMatch {
|
||||||
|
|
||||||
#define OVERLAPS(a1,a2,b1,b2) ((b1)<(a2) && (b2)>(a1))
|
#define OVERLAPS(a1,a2,b1,b2) ((b1)<(a2) && (b2)>(a1))
|
||||||
|
|
||||||
SafeWriter* findSubBlocks(SafeWriter* stream, std::vector<SafeWriter*>& subBlocks, unsigned char* speedDial) {
|
// TODO:
|
||||||
|
// - check whether a block consists only of calls
|
||||||
|
// - see if we can optimize better
|
||||||
|
SafeWriter* findSubBlocks(SafeWriter* stream, std::vector<SafeWriter*>& subBlocks) {
|
||||||
unsigned char* buf=stream->getFinalBuf();
|
unsigned char* buf=stream->getFinalBuf();
|
||||||
size_t matchSize=48;
|
size_t matchSize=48;
|
||||||
std::vector<BlockMatch> matches;
|
std::vector<BlockMatch> matches;
|
||||||
|
|
||||||
|
// repeat until we run out of matches
|
||||||
|
while (true) {
|
||||||
|
matches.clear();
|
||||||
|
|
||||||
// fast match algorithm
|
// fast match algorithm
|
||||||
// search for small matches, and then find bigger ones
|
// search for small matches, and then find bigger ones
|
||||||
for (size_t i=0; i<stream->size(); i+=8) {
|
for (size_t i=0; i<stream->size(); i+=8) {
|
||||||
|
@ -720,7 +727,9 @@ SafeWriter* findSubBlocks(SafeWriter* stream, std::vector<SafeWriter*>& subBlock
|
||||||
} while (wantMore);
|
} while (wantMore);
|
||||||
|
|
||||||
// first stage done
|
// first stage done
|
||||||
// set done to false unless this match overlaps with itself
|
// set done to false unless:
|
||||||
|
// - this match overlaps with itself
|
||||||
|
// - this block only consists of calls
|
||||||
size_t nonOverlapCount=0;
|
size_t nonOverlapCount=0;
|
||||||
for (BlockMatch& i: matches) {
|
for (BlockMatch& i: matches) {
|
||||||
i.done=false;
|
i.done=false;
|
||||||
|
@ -729,10 +738,24 @@ SafeWriter* findSubBlocks(SafeWriter* stream, std::vector<SafeWriter*>& subBlock
|
||||||
i.done=true;
|
i.done=true;
|
||||||
} else {
|
} else {
|
||||||
nonOverlapCount++;
|
nonOverlapCount++;
|
||||||
|
/*
|
||||||
|
bool onlyCalls=true;
|
||||||
|
for (size_t j=i.orig; j<i.orig+i.len; j+=8) {
|
||||||
|
if (buf[j]!=0xf4) {
|
||||||
|
onlyCalls=false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (onlyCalls) {
|
||||||
|
i.done=true;
|
||||||
|
} else {
|
||||||
|
nonOverlapCount++;
|
||||||
|
}
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
logD("%d non-overlapping candidates",(int)nonOverlapCount);
|
logD("%d good candidates",(int)nonOverlapCount);
|
||||||
|
|
||||||
// quit if there isn't anything
|
// quit if there isn't anything
|
||||||
if (!nonOverlapCount) return stream;
|
if (!nonOverlapCount) return stream;
|
||||||
|
@ -742,6 +765,7 @@ SafeWriter* findSubBlocks(SafeWriter* stream, std::vector<SafeWriter*>& subBlock
|
||||||
logD("largest match: %d",(int)matchSize);
|
logD("largest match: %d",(int)matchSize);
|
||||||
|
|
||||||
std::vector<BlockMatch> workMatches;
|
std::vector<BlockMatch> workMatches;
|
||||||
|
bool newBlocks=false;
|
||||||
|
|
||||||
while (matchSize>=48) {
|
while (matchSize>=48) {
|
||||||
workMatches.clear();
|
workMatches.clear();
|
||||||
|
@ -764,6 +788,8 @@ SafeWriter* findSubBlocks(SafeWriter* stream, std::vector<SafeWriter*>& subBlock
|
||||||
// create new sub-block if necessary
|
// create new sub-block if necessary
|
||||||
if (i.orig!=lastOrig) {
|
if (i.orig!=lastOrig) {
|
||||||
subBlockID=subBlocks.size();
|
subBlockID=subBlocks.size();
|
||||||
|
newBlocks=true;
|
||||||
|
logV("new sub-block %d",(int)subBlockID);
|
||||||
|
|
||||||
// isolate this sub-block
|
// isolate this sub-block
|
||||||
SafeWriter* newBlock=new SafeWriter;
|
SafeWriter* newBlock=new SafeWriter;
|
||||||
|
@ -846,126 +872,16 @@ SafeWriter* findSubBlocks(SafeWriter* stream, std::vector<SafeWriter*>& subBlock
|
||||||
|
|
||||||
logV("done!");
|
logV("done!");
|
||||||
|
|
||||||
|
// get out if we haven't made any blocks
|
||||||
|
if (!newBlocks) break;
|
||||||
|
|
||||||
// remove nop's
|
// remove nop's
|
||||||
stream=stripNops(stream,speedDial);
|
stream=stripNops(stream);
|
||||||
|
|
||||||
/*
|
|
||||||
for (size_t groupSize=stream->size()>>1; groupSize>=48; groupSize-=8) {
|
|
||||||
bool foundSomething=false;
|
|
||||||
logV("...try size %d",groupSize);
|
|
||||||
for (size_t searchPos=0; (searchPos+groupSize)<stream->size();) {
|
|
||||||
const unsigned char* group=&buf[searchPos];
|
|
||||||
size_t groupLen=0;
|
|
||||||
size_t groupInsCount=0;
|
|
||||||
size_t subBlockID=subBlocks.size();
|
|
||||||
bool haveSub=false;
|
|
||||||
bool onlyCalls=true;
|
|
||||||
|
|
||||||
// register this block
|
|
||||||
for (size_t i=0; i<groupSize && i<stream->size(); i+=8) {
|
|
||||||
if (buf[searchPos+i]!=0xf4) onlyCalls=false;
|
|
||||||
if (groupLen+8>groupSize) break;
|
|
||||||
groupLen+=8;
|
|
||||||
groupInsCount++;
|
|
||||||
}
|
|
||||||
|
|
||||||
// don't do anything if we don't have a block large enough
|
|
||||||
if (groupLen<24) {
|
|
||||||
searchPos+=8;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// don't do anything if this is just one or two commands
|
|
||||||
// TODO: this is a duplicate of the previous statement now that all commands are 8 bytes long
|
|
||||||
if (groupInsCount<3) {
|
|
||||||
searchPos+=8;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// don't do anything if this block only consists of calls
|
|
||||||
if (onlyCalls) {
|
|
||||||
logW("nothing but calls.");
|
|
||||||
searchPos+=8;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// find identical blocks
|
|
||||||
for (size_t i=searchPos+groupLen; i+groupLen<stream->size();) {
|
|
||||||
// compare next block to group
|
|
||||||
if (memcmp(&buf[i],group,groupLen)==0) {
|
|
||||||
// we have a sub-block
|
|
||||||
if (!haveSub) {
|
|
||||||
// isolate this sub-block
|
|
||||||
SafeWriter* newBlock=new SafeWriter;
|
|
||||||
newBlock->init();
|
|
||||||
newBlock->write(group,groupLen);
|
|
||||||
newBlock->writeC(0xf9); // ret
|
|
||||||
// padding
|
|
||||||
newBlock->writeC(0);
|
|
||||||
newBlock->writeC(0);
|
|
||||||
newBlock->writeC(0);
|
|
||||||
newBlock->writeC(0);
|
|
||||||
newBlock->writeC(0);
|
|
||||||
newBlock->writeC(0);
|
|
||||||
newBlock->writeC(0);
|
|
||||||
subBlocks.push_back(newBlock);
|
|
||||||
haveSub=true;
|
|
||||||
logD("- SUB %x (size %d):",searchPos,groupLen);
|
|
||||||
}
|
|
||||||
logD(" - %x",i);
|
|
||||||
// insert call
|
|
||||||
buf[i]=0xf4;
|
|
||||||
buf[i+1]=subBlockID&0xff;
|
|
||||||
buf[i+2]=(subBlockID>>8)&0xff;
|
|
||||||
buf[i+3]=(subBlockID>>16)&0xff;
|
|
||||||
buf[i+4]=(subBlockID>>24)&0xff;
|
|
||||||
buf[i+5]=0;
|
|
||||||
buf[i+6]=0;
|
|
||||||
buf[i+7]=0;
|
|
||||||
|
|
||||||
// replace the rest with nop
|
|
||||||
for (size_t j=i+8; j<i+groupLen; j++) {
|
|
||||||
buf[j]=0xf1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// continue search from end of block
|
|
||||||
i+=groupLen;
|
|
||||||
} else {
|
|
||||||
// next
|
|
||||||
i+=8;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (haveSub) {
|
|
||||||
// insert call on the original block
|
|
||||||
buf[searchPos]=0xf4;
|
|
||||||
buf[searchPos+1]=subBlockID&0xff;
|
|
||||||
buf[searchPos+2]=(subBlockID>>8)&0xff;
|
|
||||||
buf[searchPos+3]=(subBlockID>>16)&0xff;
|
|
||||||
buf[searchPos+4]=(subBlockID>>24)&0xff;
|
|
||||||
buf[searchPos+5]=0;
|
|
||||||
buf[searchPos+6]=0;
|
|
||||||
buf[searchPos+7]=0;
|
|
||||||
|
|
||||||
// replace the rest with nop
|
|
||||||
for (size_t j=searchPos+8; j<searchPos+groupLen; j++) {
|
|
||||||
buf[j]=0xf1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// skip this block (it's isolated now)
|
|
||||||
searchPos+=groupLen;
|
|
||||||
foundSomething=true;
|
|
||||||
} else {
|
|
||||||
// try again somewhere else
|
|
||||||
searchPos+=8;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (foundSomething) {
|
|
||||||
stream=stripNops(stream,speedDial);
|
|
||||||
buf=stream->getFinalBuf();
|
buf=stream->getFinalBuf();
|
||||||
|
|
||||||
|
logV("doing it again...");
|
||||||
}
|
}
|
||||||
}
|
|
||||||
*/
|
|
||||||
return stream;
|
return stream;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1343,7 +1259,7 @@ SafeWriter* DivEngine::saveCommand(DivCSProgress* progress, unsigned int disable
|
||||||
// PASS 3: remove nop's
|
// PASS 3: remove nop's
|
||||||
// this includes modifying call addresses to compensate
|
// this includes modifying call addresses to compensate
|
||||||
for (int h=0; h<chans; h++) {
|
for (int h=0; h<chans; h++) {
|
||||||
chanStream[h]=stripNops(chanStream[h],sortedCmd);
|
chanStream[h]=stripNops(chanStream[h]);
|
||||||
}
|
}
|
||||||
|
|
||||||
// PASS 4: find sub-blocks and isolate them
|
// PASS 4: find sub-blocks and isolate them
|
||||||
|
@ -1354,7 +1270,7 @@ SafeWriter* DivEngine::saveCommand(DivCSProgress* progress, unsigned int disable
|
||||||
|
|
||||||
// 6 is the minimum size that can be reliably optimized
|
// 6 is the minimum size that can be reliably optimized
|
||||||
logI("finding sub-blocks in chan %d",h);
|
logI("finding sub-blocks in chan %d",h);
|
||||||
chanStream[h]=findSubBlocks(chanStream[h],subBlocks,sortedCmd);
|
chanStream[h]=findSubBlocks(chanStream[h],subBlocks);
|
||||||
// find sub-blocks within sub-blocks
|
// find sub-blocks within sub-blocks
|
||||||
size_t subBlocksLast=0;
|
size_t subBlocksLast=0;
|
||||||
size_t subBlocksLen=subBlocks.size();
|
size_t subBlocksLen=subBlocks.size();
|
||||||
|
@ -1362,7 +1278,7 @@ SafeWriter* DivEngine::saveCommand(DivCSProgress* progress, unsigned int disable
|
||||||
while (subBlocksLast!=subBlocksLen) {
|
while (subBlocksLast!=subBlocksLen) {
|
||||||
logI("got %d blocks... starting from %d",(int)subBlocksLen,(int)subBlocksLast);
|
logI("got %d blocks... starting from %d",(int)subBlocksLen,(int)subBlocksLast);
|
||||||
for (size_t i=subBlocksLast; i<subBlocksLen; i++) {
|
for (size_t i=subBlocksLast; i<subBlocksLen; i++) {
|
||||||
SafeWriter* newBlock=findSubBlocks(subBlocks[i],subBlocks,sortedCmd);
|
SafeWriter* newBlock=findSubBlocks(subBlocks[i],subBlocks);
|
||||||
subBlocks[i]=newBlock;
|
subBlocks[i]=newBlock;
|
||||||
}
|
}
|
||||||
subBlocksLast=subBlocksLen;
|
subBlocksLast=subBlocksLen;
|
||||||
|
@ -1433,7 +1349,7 @@ SafeWriter* DivEngine::saveCommand(DivCSProgress* progress, unsigned int disable
|
||||||
|
|
||||||
// PASS 5: remove nop's (again)
|
// PASS 5: remove nop's (again)
|
||||||
for (int h=0; h<chans; h++) {
|
for (int h=0; h<chans; h++) {
|
||||||
chanStream[h]=stripNops(chanStream[h],sortedCmd);
|
chanStream[h]=stripNops(chanStream[h]);
|
||||||
}
|
}
|
||||||
|
|
||||||
// PASS 6: pack streams
|
// PASS 6: pack streams
|
||||||
|
|
Loading…
Reference in a new issue