diff --git a/src/engine/cmdStreamOps.cpp b/src/engine/cmdStreamOps.cpp index 961b24ac0..8a8f00d15 100644 --- a/src/engine/cmdStreamOps.cpp +++ b/src/engine/cmdStreamOps.cpp @@ -663,6 +663,14 @@ SafeWriter* stripNops(SafeWriter* s) { case 0xfa: { // jmp unsigned int addr=buf[i+1]|(buf[i+2]<<8)|(buf[i+3]<<16)|(buf[i+4]<<24); assert(!(addr&7)); + if (buf[addr]==0xf1) { + logE("POINTS TO NOP"); + abort(); + } + if (buf[addr]>=oldStream->size()) { + logE("OUT OF BOUNDS!"); + abort(); + } try { addr=addrTable[addr]; buf[i+1]=addr&0xff; @@ -671,6 +679,7 @@ SafeWriter* stripNops(SafeWriter* s) { buf[i+4]=(addr>>24)&0xff; } catch (std::out_of_range& e) { logW("address %x is not mappable!",addr); + abort(); } break; } @@ -775,6 +784,16 @@ struct BlockMatch { orig(0), block(0), len(0), done(false) {} }; +struct MatchBenefit { + size_t index; + int benefit; + unsigned int len; + MatchBenefit(size_t i, int b, unsigned int l): + index(i), benefit(b), len(l) {} + MatchBenefit(): + index(0), benefit(0), len(0) {} +}; + #define OVERLAPS(a1,a2,b1,b2) ((b1)<(a2) && (b2)>(a1)) #define MIN_MATCH_SIZE 32 @@ -785,6 +804,8 @@ SafeWriter* findSubBlocks(SafeWriter* stream, std::vector& subBlock unsigned char* buf=stream->getFinalBuf(); size_t matchSize=MIN_MATCH_SIZE; std::vector matches; + std::vector origs; + std::vector benefits; matches.clear(); @@ -792,8 +813,14 @@ SafeWriter* findSubBlocks(SafeWriter* stream, std::vector& subBlock // search for small matches, and then find bigger ones logD("finding possible matches"); for (size_t i=0; isize(); i+=8) { + bool storedOrig=false; for (size_t j=i+matchSize; jsize(); j+=8) { if (memcmp(&buf[i],&buf[j],matchSize)==0) { + if (!storedOrig) { + // store index to the first match somewhere else for the sake of speed + origs.push_back(matches.size()); + storedOrig=true; + } // store this match for later matches.push_back(BlockMatch(i,j,matchSize)); } @@ -801,6 +828,7 @@ SafeWriter* findSubBlocks(SafeWriter* stream, std::vector& subBlock } logD("%d candidates",(int)matches.size()); + logD("%d origs",(int)origs.size()); // quit if there isn't anything if (matches.empty()) return stream; @@ -810,9 +838,6 @@ SafeWriter* findSubBlocks(SafeWriter* stream, std::vector& subBlock if ((i&8191)==0) logV("match %d of %d",i,(int)matches.size()); BlockMatch& b=matches[i]; - // don't do anything if this match is done - if (b.done) continue; - size_t finalLen=b.len; size_t origPos=b.orig+b.len; size_t blockPos=b.block+b.len; @@ -831,131 +856,153 @@ SafeWriter* findSubBlocks(SafeWriter* stream, std::vector& subBlock finalLen&=~7; b.len=finalLen; - b.done=true; } - logD("checking overlapping/bad matches"); + // new code MAN... WHY... + // basically the workflow should be: + // - test every block position + // - test every length from MIN_MATCH_SIZE to largest length + // - check for overlap, bad matches and all of that + // - for bad matches, fortunately we can use length for a speed-up... but first make it right + // - add weighted benefit to a list (DEBUG..... remove once it's stable) + // - pick largest benefit from list + // - make sub-blocks!!! + logD("testing matches for benefit"); + for (size_t i: origs) { + size_t orig=matches[i].orig; + size_t minSize=MIN_MATCH_SIZE; + size_t maxSize=minSize; + std::vector testMatches; + std::vector testLenMatches; - // first stage done - // set done to false unless: - // - this match overlaps with itself - // - this block only consists of calls - // - this block contains a ret or jmp - size_t nonOverlapCount=0; - for (BlockMatch& i: matches) { - i.done=false; - if (OVERLAPS(i.orig,i.orig+i.len,i.block,i.block+i.len)) { - // self-overlapping - i.done=true; - } else { - bool onlyCalls=true; - for (size_t j=i.orig; jmaxSize) maxSize=matches[i].len; + testMatches.push_back(matches[i]); + } + + //logD("%d: testing %d matches... (lengths %d-%d)",(int)orig,(int)testMatches.size(),minSize,maxSize); + + // test all lengths + for (size_t len=maxSize; len<=maxSize; len+=8) { + testLenMatches.clear(); + // filter matches + for (BlockMatch& k: testMatches) { + // match length shall be greater than or equal to current length + if (len>k.len) continue; + + // check for bad matches, which include: + // - match overlapping with itself + // - block only consisting of calls + // - block containing a ret, jmp or stop + + // 1. self-overlapping + if (OVERLAPS(k.orig,k.orig+len,k.block,k.block+len)) continue; + + // 2. only calls and jmp/ret/stop + bool metCriteria=false; + for (size_t l=k.orig; lcount=nonOverlapCount; - } - - // NEW STUFF - // find and sort matches by benefit - size_t bestBenefitIndex=0; - int bestBenefit=-1; - size_t lastOrig=SIZE_MAX; - size_t lastLen=SIZE_MAX; - size_t lastOrigOff=0; - int gains=0; - int blockSize=0; - BlockMatch emptyMatch(SIZE_MAX,SIZE_MAX,0); - for (size_t i=0; i<=matches.size(); i++) { - BlockMatch& b=emptyMatch; - if (ibestBenefit) { - bestBenefitIndex=lastOrigOff; - bestBenefit=finalBenefit; - } - if (gains<=0) { - // don't make a sub-block for these matches since we only have loss - //logV("(LOSSES!)"); - for (size_t j=lastOrigOff; j0) { + logD("- %x (%d): %d = %d",(int)i,(int)len,(int)testLenMatches.size(),finalBenefit); + benefits.push_back(MatchBenefit(i,finalBenefit,len)); } - gains=-4; } - gains+=(blockSize-3); } - logI("BEST BENEFIT: %d in %x",bestBenefit,(int)bestBenefitIndex); - logI("match size %d",matches[bestBenefitIndex].len); - // quit if there isn't anything - if (!nonOverlapCount) return stream; + // quit if we can't go any further + if (benefits.empty()) return stream; - // quit if it's all losses - if (bestBenefit<1) return stream; + // pick best benefit + MatchBenefit& bestBenefit=benefits[0]; + for (MatchBenefit& i: benefits) { + if (i.benefit>bestBenefit.benefit) bestBenefit=i; + } - // work on most beneficial matches + logI("BEST BENEFIT: %d in %x with size %u",bestBenefit.benefit,(int)bestBenefit.index,bestBenefit.len); + + /* + // work on matches with this benefit std::vector workMatches; bool newBlocks=false; workMatches.clear(); - size_t bestBenefitOrig=matches[bestBenefitIndex].orig; - size_t bestBenefitLen=matches[bestBenefitIndex].len; - for (size_t i=bestBenefitIndex; i& subBlock // create new sub-block if necessary if (i.orig!=lastOrig) { + if (newBlocks) { + logE("WHAT?!!?!"); + abort(); + } subBlockID=subBlocks.size(); newBlocks=true; logV("new sub-block %d",(int)subBlockID); @@ -1013,32 +1064,14 @@ SafeWriter* findSubBlocks(SafeWriter* stream, std::vector& subBlock for (size_t j=i.block+8; jgetFinalBuf(); + */ return stream; } @@ -1070,20 +1103,19 @@ SafeWriter* packStream(SafeWriter* s, unsigned char* speedDial) { try { addr=addrTable[addr]; // check whether we have sufficient room to turn this into a 16-bit call - /* if (addr<0xff00) { buf[i]=0xf8; buf[i+1]=addr&0xff; buf[i+2]=(addr>>8)&0xff; buf[i+3]=0xf1; buf[i+4]=0xf1; - } else {*/ + } else { buf[i]=0xf5; buf[i+1]=addr&0xff; buf[i+2]=(addr>>8)&0xff; buf[i+3]=(addr>>16)&0xff; buf[i+4]=(addr>>24)&0xff; - //} + } } catch (std::out_of_range& e) { logW("address %x is not mappable!",addr); } @@ -1482,6 +1514,7 @@ SafeWriter* DivEngine::saveCommand(DivCSProgress* progress, unsigned int disable // insert sub-blocks and resolve symbols logI("%d sub-blocks total",(int)subBlocks.size()); std::vector blockOff; + blockOff.clear(); globalStream->seek(0,SEEK_END); for (size_t i=0; i=0) { // push address of original block (discard duplicate) blockOff.push_back(blockOff[dupOf]); + logW("did you say DUPLICATE?!"); + abort(); } else { // write sub-block blockOff.push_back(globalStream->tell());