fast block search part 2 - still ALPHA

now tested... it works but size is not optimal
2025-04-07 13:17:27 -05:00 · 2025-04-07 13:17:27 -05:00 · fc0a7a5e17
parent 2fd6fa1a87
commit fc0a7a5e17
1 changed files with 191 additions and 275 deletions
--- a/src/engine/cmdStreamOps.cpp
+++ b/src/engine/cmdStreamOps.cpp
@ -601,7 +601,7 @@ void reloc(unsigned char* buf, size_t len, unsigned int sourceAddr, unsigned int
  }
 }
-SafeWriter* stripNops(SafeWriter* s, unsigned char* speedDial) {
+SafeWriter* stripNops(SafeWriter* s) {
  std::unordered_map<unsigned int,unsigned int> addrTable;
  SafeWriter* oldStream=s;
  unsigned char* buf=oldStream->getFinalBuf();
@ -666,11 +666,18 @@ struct BlockMatch {
 #define OVERLAPS(a1,a2,b1,b2) ((b1)<(a2) && (b2)>(a1))
-SafeWriter* findSubBlocks(SafeWriter* stream, std::vector<SafeWriter*>& subBlocks, unsigned char* speedDial) {
+// TODO:
 // - check whether a block consists only of calls
 // - see if we can optimize better
 SafeWriter* findSubBlocks(SafeWriter* stream, std::vector<SafeWriter*>& subBlocks) {
  unsigned char* buf=stream->getFinalBuf();
  size_t matchSize=48;
  std::vector<BlockMatch> matches;
  // repeat until we run out of matches
  while (true) {
    matches.clear();
    // fast match algorithm
    // search for small matches, and then find bigger ones
    for (size_t i=0; i<stream->size(); i+=8) {
@ -720,7 +727,9 @@ SafeWriter* findSubBlocks(SafeWriter* stream, std::vector<SafeWriter*>& subBlock
    } while (wantMore);
    // first stage done
-  // set done to false unless this match overlaps with itself
+    // set done to false unless:
    // - this match overlaps with itself
    // - this block only consists of calls
    size_t nonOverlapCount=0;
    for (BlockMatch& i: matches) {
      i.done=false;
@ -729,10 +738,24 @@ SafeWriter* findSubBlocks(SafeWriter* stream, std::vector<SafeWriter*>& subBlock
        i.done=true;
      } else {
        nonOverlapCount++;
        /*
        bool onlyCalls=true;
        for (size_t j=i.orig; j<i.orig+i.len; j+=8) {
          if (buf[j]!=0xf4) {
            onlyCalls=false;
            break;
          }
        }
        if (onlyCalls) {
          i.done=true;
        } else {
          nonOverlapCount++;
        }
        */
      }
    }
-  logD("%d non-overlapping candidates",(int)nonOverlapCount);
+    logD("%d good candidates",(int)nonOverlapCount);
    // quit if there isn't anything
    if (!nonOverlapCount) return stream;
@ -742,6 +765,7 @@ SafeWriter* findSubBlocks(SafeWriter* stream, std::vector<SafeWriter*>& subBlock
    logD("largest match: %d",(int)matchSize);
    std::vector<BlockMatch> workMatches;
    bool newBlocks=false;
    while (matchSize>=48) {
      workMatches.clear();
@ -764,6 +788,8 @@ SafeWriter* findSubBlocks(SafeWriter* stream, std::vector<SafeWriter*>& subBlock
        // create new sub-block if necessary
        if (i.orig!=lastOrig) {
          subBlockID=subBlocks.size();
          newBlocks=true;
          logV("new sub-block %d",(int)subBlockID);
          // isolate this sub-block
          SafeWriter* newBlock=new SafeWriter;
@ -846,126 +872,16 @@ SafeWriter* findSubBlocks(SafeWriter* stream, std::vector<SafeWriter*>& subBlock
    logV("done!");
    // get out if we haven't made any blocks
    if (!newBlocks) break;
    // remove nop's
-  stream=stripNops(stream,speedDial);
+    stream=stripNops(stream);
  /*
  for (size_t groupSize=stream->size()>>1; groupSize>=48; groupSize-=8) {
    bool foundSomething=false;
    logV("...try size %d",groupSize);
    for (size_t searchPos=0; (searchPos+groupSize)<stream->size();) {
      const unsigned char* group=&buf[searchPos];
      size_t groupLen=0;
      size_t groupInsCount=0;
      size_t subBlockID=subBlocks.size();
      bool haveSub=false;
      bool onlyCalls=true;
      // register this block
      for (size_t i=0; i<groupSize && i<stream->size(); i+=8) {
        if (buf[searchPos+i]!=0xf4) onlyCalls=false;
        if (groupLen+8>groupSize) break;
        groupLen+=8;
        groupInsCount++;
      }
      // don't do anything if we don't have a block large enough
      if (groupLen<24) {
        searchPos+=8;
        continue;
      }
      // don't do anything if this is just one or two commands
      // TODO: this is a duplicate of the previous statement now that all commands are 8 bytes long
      if (groupInsCount<3) {
        searchPos+=8;
        continue;
      }
      // don't do anything if this block only consists of calls
      if (onlyCalls) {
        logW("nothing but calls.");
        searchPos+=8;
        continue;
      }
      // find identical blocks
      for (size_t i=searchPos+groupLen; i+groupLen<stream->size();) {
        // compare next block to group
        if (memcmp(&buf[i],group,groupLen)==0) {
          // we have a sub-block
          if (!haveSub) {
            // isolate this sub-block
            SafeWriter* newBlock=new SafeWriter;
            newBlock->init();
            newBlock->write(group,groupLen);
            newBlock->writeC(0xf9); // ret
            // padding
            newBlock->writeC(0);
            newBlock->writeC(0);
            newBlock->writeC(0);
            newBlock->writeC(0);
            newBlock->writeC(0);
            newBlock->writeC(0);
            newBlock->writeC(0);
            subBlocks.push_back(newBlock);
            haveSub=true;
          logD("- SUB %x (size %d):",searchPos,groupLen);
          }
          logD("  - %x",i);
          // insert call
          buf[i]=0xf4;
          buf[i+1]=subBlockID&0xff;
          buf[i+2]=(subBlockID>>8)&0xff;
          buf[i+3]=(subBlockID>>16)&0xff;
          buf[i+4]=(subBlockID>>24)&0xff;
          buf[i+5]=0;
          buf[i+6]=0;
          buf[i+7]=0;
          // replace the rest with nop
          for (size_t j=i+8; j<i+groupLen; j++) {
            buf[j]=0xf1;
          }
          // continue search from end of block
          i+=groupLen;
        } else {
          // next
          i+=8;
        }
      }
      if (haveSub) {
        // insert call on the original block
        buf[searchPos]=0xf4;
        buf[searchPos+1]=subBlockID&0xff;
        buf[searchPos+2]=(subBlockID>>8)&0xff;
        buf[searchPos+3]=(subBlockID>>16)&0xff;
        buf[searchPos+4]=(subBlockID>>24)&0xff;
        buf[searchPos+5]=0;
        buf[searchPos+6]=0;
        buf[searchPos+7]=0;
        // replace the rest with nop
        for (size_t j=searchPos+8; j<searchPos+groupLen; j++) {
          buf[j]=0xf1;
        }
        // skip this block (it's isolated now)
        searchPos+=groupLen;
        foundSomething=true;
      } else {
        // try again somewhere else
        searchPos+=8;
      }
    }
    if (foundSomething) {
      stream=stripNops(stream,speedDial);
    buf=stream->getFinalBuf();
    logV("doing it again...");
  }
-  }
+
  */
  return stream;
 }
@ -1343,7 +1259,7 @@ SafeWriter* DivEngine::saveCommand(DivCSProgress* progress, unsigned int disable
  // PASS 3: remove nop's
  // this includes modifying call addresses to compensate
  for (int h=0; h<chans; h++) {
-    chanStream[h]=stripNops(chanStream[h],sortedCmd);
+    chanStream[h]=stripNops(chanStream[h]);
  }
  // PASS 4: find sub-blocks and isolate them
@ -1354,7 +1270,7 @@ SafeWriter* DivEngine::saveCommand(DivCSProgress* progress, unsigned int disable
      // 6 is the minimum size that can be reliably optimized
      logI("finding sub-blocks in chan %d",h);
-      chanStream[h]=findSubBlocks(chanStream[h],subBlocks,sortedCmd);
+      chanStream[h]=findSubBlocks(chanStream[h],subBlocks);
      // find sub-blocks within sub-blocks
      size_t subBlocksLast=0;
      size_t subBlocksLen=subBlocks.size();
@ -1362,7 +1278,7 @@ SafeWriter* DivEngine::saveCommand(DivCSProgress* progress, unsigned int disable
      while (subBlocksLast!=subBlocksLen) {
        logI("got %d blocks... starting from %d",(int)subBlocksLen,(int)subBlocksLast);
        for (size_t i=subBlocksLast; i<subBlocksLen; i++) {
-          SafeWriter* newBlock=findSubBlocks(subBlocks[i],subBlocks,sortedCmd);
+          SafeWriter* newBlock=findSubBlocks(subBlocks[i],subBlocks);
          subBlocks[i]=newBlock;
        }
        subBlocksLast=subBlocksLen;
@ -1433,7 +1349,7 @@ SafeWriter* DivEngine::saveCommand(DivCSProgress* progress, unsigned int disable
  // PASS 5: remove nop's (again)
  for (int h=0; h<chans; h++) {
-    chanStream[h]=stripNops(chanStream[h],sortedCmd);
+    chanStream[h]=stripNops(chanStream[h]);
  }
  // PASS 6: pack streams