diff --git a/src/engine/cmdStreamOps.cpp b/src/engine/cmdStreamOps.cpp index 50443f15d..c8795f142 100644 --- a/src/engine/cmdStreamOps.cpp +++ b/src/engine/cmdStreamOps.cpp @@ -601,7 +601,7 @@ void reloc(unsigned char* buf, size_t len, unsigned int sourceAddr, unsigned int } } -SafeWriter* stripNops(SafeWriter* s, unsigned char* speedDial) { +SafeWriter* stripNops(SafeWriter* s) { std::unordered_map addrTable; SafeWriter* oldStream=s; unsigned char* buf=oldStream->getFinalBuf(); @@ -666,306 +666,222 @@ struct BlockMatch { #define OVERLAPS(a1,a2,b1,b2) ((b1)<(a2) && (b2)>(a1)) -SafeWriter* findSubBlocks(SafeWriter* stream, std::vector& subBlocks, unsigned char* speedDial) { +// TODO: +// - check whether a block consists only of calls +// - see if we can optimize better +SafeWriter* findSubBlocks(SafeWriter* stream, std::vector& subBlocks) { unsigned char* buf=stream->getFinalBuf(); size_t matchSize=48; std::vector matches; - // fast match algorithm - // search for small matches, and then find bigger ones - for (size_t i=0; isize(); i+=8) { - for (size_t j=i+matchSize; jsize(); j+=8) { - if (memcmp(&buf[i],&buf[j],matchSize)==0) { - // store this match for later - matches.push_back(BlockMatch(i,j,matchSize)); + // repeat until we run out of matches + while (true) { + matches.clear(); + + // fast match algorithm + // search for small matches, and then find bigger ones + for (size_t i=0; isize(); i+=8) { + for (size_t j=i+matchSize; jsize(); j+=8) { + if (memcmp(&buf[i],&buf[j],matchSize)==0) { + // store this match for later + matches.push_back(BlockMatch(i,j,matchSize)); + } } } - } - logD("%d candidates",(int)matches.size()); + logD("%d candidates",(int)matches.size()); - // quit if there isn't anything - if (matches.empty()) return stream; + // quit if there isn't anything + if (matches.empty()) return stream; - // search for bigger matches - bool wantMore=true; - do { - size_t matchCount=0; - wantMore=false; - matchSize+=8; - for (size_t i=0; istream->size() || (b.block+matchSize)>stream->size()) { - b.done=true; - continue; + // stop if this match is near the edge + if ((b.orig+matchSize)>stream->size() || (b.block+matchSize)>stream->size()) { + b.done=true; + continue; + } + + // check + if (memcmp(&buf[b.orig],&buf[b.block],matchSize)==0) { + // this match may be bigger + b.len=matchSize; + wantMore=true; + matchCount++; + } else { + // this is the max size + b.done=true; + } } + //logV("size %d: %d matches",(int)matchSize,(int)matchCount); + } while (wantMore); - // check - if (memcmp(&buf[b.orig],&buf[b.block],matchSize)==0) { - // this match may be bigger - b.len=matchSize; - wantMore=true; - matchCount++; - } else { - // this is the max size - b.done=true; - } - } - //logV("size %d: %d matches",(int)matchSize,(int)matchCount); - } while (wantMore); - - // first stage done - // set done to false unless this match overlaps with itself - size_t nonOverlapCount=0; - for (BlockMatch& i: matches) { - i.done=false; - if (OVERLAPS(i.orig,i.orig+i.len,i.block,i.block+i.len)) { - // self-overlapping - i.done=true; - } else { - nonOverlapCount++; - } - } - - logD("%d non-overlapping candidates",(int)nonOverlapCount); - - // quit if there isn't anything - if (!nonOverlapCount) return stream; - - // work on largest matches - // progress to smaller ones until we run out of them - logD("largest match: %d",(int)matchSize); - - std::vector workMatches; - - while (matchSize>=48) { - workMatches.clear(); - // find matches with matching size + // first stage done + // set done to false unless: + // - this match overlaps with itself + // - this block only consists of calls + size_t nonOverlapCount=0; for (BlockMatch& i: matches) { - if (i.len==matchSize) { - // mark it as done and push it - workMatches.push_back(i); + i.done=false; + if (OVERLAPS(i.orig,i.orig+i.len,i.block,i.block+i.len)) { + // self-overlapping i.done=true; - } - } - - // make sub-blocks - size_t lastOrig=SIZE_MAX; - size_t subBlockID=subBlocks.size(); - for (BlockMatch& i: workMatches) { - // skip invalid matches (yes, this can happen) - if (i.done) continue; - - // create new sub-block if necessary - if (i.orig!=lastOrig) { - subBlockID=subBlocks.size(); - - // isolate this sub-block - SafeWriter* newBlock=new SafeWriter; - newBlock->init(); - newBlock->write(&buf[i.orig],i.len); - newBlock->writeC(0xf9); // ret - // padding - newBlock->writeC(0); - newBlock->writeC(0); - newBlock->writeC(0); - newBlock->writeC(0); - newBlock->writeC(0); - newBlock->writeC(0); - newBlock->writeC(0); - subBlocks.push_back(newBlock); - lastOrig=i.orig; - - // insert call on the original block - buf[i.orig]=0xf4; - buf[i.orig+1]=subBlockID&0xff; - buf[i.orig+2]=(subBlockID>>8)&0xff; - buf[i.orig+3]=(subBlockID>>16)&0xff; - buf[i.orig+4]=(subBlockID>>24)&0xff; - buf[i.orig+5]=0; - buf[i.orig+6]=0; - buf[i.orig+7]=0; - - // replace the rest with nop - for (size_t j=i.orig+8; j>8)&0xff; - buf[i.block+3]=(subBlockID>>16)&0xff; - buf[i.block+4]=(subBlockID>>24)&0xff; - buf[i.block+5]=0; - buf[i.block+6]=0; - buf[i.block+7]=0; - - // replace the rest with nop - for (size_t j=i.block+8; jmatchSize) matchSize=i.len; - } - if (matchSize>=48) { - logV("trying next size %d",matchSize); - } - } - - logV("done!"); - - // remove nop's - stream=stripNops(stream,speedDial); - - /* - for (size_t groupSize=stream->size()>>1; groupSize>=48; groupSize-=8) { - bool foundSomething=false; - logV("...try size %d",groupSize); - for (size_t searchPos=0; (searchPos+groupSize)size();) { - const unsigned char* group=&buf[searchPos]; - size_t groupLen=0; - size_t groupInsCount=0; - size_t subBlockID=subBlocks.size(); - bool haveSub=false; - bool onlyCalls=true; - - // register this block - for (size_t i=0; isize(); i+=8) { - if (buf[searchPos+i]!=0xf4) onlyCalls=false; - if (groupLen+8>groupSize) break; - groupLen+=8; - groupInsCount++; - } - - // don't do anything if we don't have a block large enough - if (groupLen<24) { - searchPos+=8; - continue; - } - - // don't do anything if this is just one or two commands - // TODO: this is a duplicate of the previous statement now that all commands are 8 bytes long - if (groupInsCount<3) { - searchPos+=8; - continue; - } - - // don't do anything if this block only consists of calls - if (onlyCalls) { - logW("nothing but calls."); - searchPos+=8; - continue; - } - - // find identical blocks - for (size_t i=searchPos+groupLen; i+groupLensize();) { - // compare next block to group - if (memcmp(&buf[i],group,groupLen)==0) { - // we have a sub-block - if (!haveSub) { - // isolate this sub-block - SafeWriter* newBlock=new SafeWriter; - newBlock->init(); - newBlock->write(group,groupLen); - newBlock->writeC(0xf9); // ret - // padding - newBlock->writeC(0); - newBlock->writeC(0); - newBlock->writeC(0); - newBlock->writeC(0); - newBlock->writeC(0); - newBlock->writeC(0); - newBlock->writeC(0); - subBlocks.push_back(newBlock); - haveSub=true; - logD("- SUB %x (size %d):",searchPos,groupLen); + } else { + nonOverlapCount++; + /* + bool onlyCalls=true; + for (size_t j=i.orig; j>8)&0xff; - buf[i+3]=(subBlockID>>16)&0xff; - buf[i+4]=(subBlockID>>24)&0xff; - buf[i+5]=0; - buf[i+6]=0; - buf[i+7]=0; + } + if (onlyCalls) { + i.done=true; + } else { + nonOverlapCount++; + } + */ + } + } + + logD("%d good candidates",(int)nonOverlapCount); + + // quit if there isn't anything + if (!nonOverlapCount) return stream; + + // work on largest matches + // progress to smaller ones until we run out of them + logD("largest match: %d",(int)matchSize); + + std::vector workMatches; + bool newBlocks=false; + + while (matchSize>=48) { + workMatches.clear(); + // find matches with matching size + for (BlockMatch& i: matches) { + if (i.len==matchSize) { + // mark it as done and push it + workMatches.push_back(i); + i.done=true; + } + } + + // make sub-blocks + size_t lastOrig=SIZE_MAX; + size_t subBlockID=subBlocks.size(); + for (BlockMatch& i: workMatches) { + // skip invalid matches (yes, this can happen) + if (i.done) continue; + + // create new sub-block if necessary + if (i.orig!=lastOrig) { + subBlockID=subBlocks.size(); + newBlocks=true; + logV("new sub-block %d",(int)subBlockID); + + // isolate this sub-block + SafeWriter* newBlock=new SafeWriter; + newBlock->init(); + newBlock->write(&buf[i.orig],i.len); + newBlock->writeC(0xf9); // ret + // padding + newBlock->writeC(0); + newBlock->writeC(0); + newBlock->writeC(0); + newBlock->writeC(0); + newBlock->writeC(0); + newBlock->writeC(0); + newBlock->writeC(0); + subBlocks.push_back(newBlock); + lastOrig=i.orig; + + // insert call on the original block + buf[i.orig]=0xf4; + buf[i.orig+1]=subBlockID&0xff; + buf[i.orig+2]=(subBlockID>>8)&0xff; + buf[i.orig+3]=(subBlockID>>16)&0xff; + buf[i.orig+4]=(subBlockID>>24)&0xff; + buf[i.orig+5]=0; + buf[i.orig+6]=0; + buf[i.orig+7]=0; // replace the rest with nop - for (size_t j=i+8; j>8)&0xff; - buf[searchPos+3]=(subBlockID>>16)&0xff; - buf[searchPos+4]=(subBlockID>>24)&0xff; - buf[searchPos+5]=0; - buf[searchPos+6]=0; - buf[searchPos+7]=0; + // set match to the last sub-block + buf[i.block]=0xf4; + buf[i.block+1]=subBlockID&0xff; + buf[i.block+2]=(subBlockID>>8)&0xff; + buf[i.block+3]=(subBlockID>>16)&0xff; + buf[i.block+4]=(subBlockID>>24)&0xff; + buf[i.block+5]=0; + buf[i.block+6]=0; + buf[i.block+7]=0; // replace the rest with nop - for (size_t j=searchPos+8; jmatchSize) matchSize=i.len; + } + if (matchSize>=48) { + logV("trying next size %d",matchSize); } } - if (foundSomething) { - stream=stripNops(stream,speedDial); - buf=stream->getFinalBuf(); - } + + logV("done!"); + + // get out if we haven't made any blocks + if (!newBlocks) break; + + // remove nop's + stream=stripNops(stream); + buf=stream->getFinalBuf(); + + logV("doing it again..."); } - */ + return stream; } @@ -1343,7 +1259,7 @@ SafeWriter* DivEngine::saveCommand(DivCSProgress* progress, unsigned int disable // PASS 3: remove nop's // this includes modifying call addresses to compensate for (int h=0; h