From 2ea7bf65ea89828347611c2e8e3cfa575ec22c31 Mon Sep 17 00:00:00 2001 From: tildearrow Date: Fri, 11 Apr 2025 20:21:46 -0500 Subject: [PATCH] TO DO: - FIX CIELOS - FIX HUGE STACKS BYE --- src/engine/cmdStream.cpp | 2 +- src/engine/cmdStream.h | 3 +- src/engine/cmdStreamOps.cpp | 555 ++++++++++++++++++------------------ src/gui/gui.cpp | 2 +- 4 files changed, 285 insertions(+), 277 deletions(-) diff --git a/src/engine/cmdStream.cpp b/src/engine/cmdStream.cpp index 9098e8f57..a3f1b3f98 100644 --- a/src/engine/cmdStream.cpp +++ b/src/engine/cmdStream.cpp @@ -24,7 +24,7 @@ #include "../ta-log.h" bool DivCSChannelState::doCall(unsigned int addr) { - if (callStackPos>=16) { + if (callStackPos>=DIV_MAX_CSSTACK) { readPos=0; return false; } diff --git a/src/engine/cmdStream.h b/src/engine/cmdStream.h index f879240f1..5bf8e5977 100644 --- a/src/engine/cmdStream.h +++ b/src/engine/cmdStream.h @@ -24,6 +24,7 @@ #include "safeReader.h" #define DIV_MAX_CSTRACE 64 +#define DIV_MAX_CSSTACK 128 class DivEngine; @@ -39,7 +40,7 @@ struct DivCSChannelState { int portaTarget, portaSpeed; unsigned char arp, arpStage, arpTicks, loopCount; - unsigned int callStack[16]; + unsigned int callStack[DIV_MAX_CSSTACK]; unsigned char callStackPos; unsigned int trace[DIV_MAX_CSTRACE]; diff --git a/src/engine/cmdStreamOps.cpp b/src/engine/cmdStreamOps.cpp index 80ae054a4..961b24ac0 100644 --- a/src/engine/cmdStreamOps.cpp +++ b/src/engine/cmdStreamOps.cpp @@ -662,6 +662,7 @@ SafeWriter* stripNops(SafeWriter* s) { case 0xf5: // calli case 0xfa: { // jmp unsigned int addr=buf[i+1]|(buf[i+2]<<8)|(buf[i+3]<<16)|(buf[i+4]<<24); + assert(!(addr&7)); try { addr=addrTable[addr]; buf[i+1]=addr&0xff; @@ -780,255 +781,265 @@ struct BlockMatch { // TODO: // - see if we can optimize even more -SafeWriter* findSubBlocks(SafeWriter* stream, std::vector& subBlocks, unsigned char* speedDial) { +SafeWriter* findSubBlocks(SafeWriter* stream, std::vector& subBlocks, unsigned char* speedDial, DivCSProgress* progress) { unsigned char* buf=stream->getFinalBuf(); size_t matchSize=MIN_MATCH_SIZE; std::vector matches; - // repeat until we run out of matches - while (true) { - matchSize=MIN_MATCH_SIZE; - matches.clear(); + matches.clear(); - // fast match algorithm - // search for small matches, and then find bigger ones - logD("finding possible matches"); - for (size_t i=0; isize(); i+=8) { - for (size_t j=i+matchSize; jsize(); j+=8) { - if (memcmp(&buf[i],&buf[j],matchSize)==0) { - // store this match for later - matches.push_back(BlockMatch(i,j,matchSize)); - } + // fast match algorithm + // search for small matches, and then find bigger ones + logD("finding possible matches"); + for (size_t i=0; isize(); i+=8) { + for (size_t j=i+matchSize; jsize(); j+=8) { + if (memcmp(&buf[i],&buf[j],matchSize)==0) { + // store this match for later + matches.push_back(BlockMatch(i,j,matchSize)); } } + } - logD("%d candidates",(int)matches.size()); + logD("%d candidates",(int)matches.size()); - // quit if there isn't anything - if (matches.empty()) return stream; + // quit if there isn't anything + if (matches.empty()) return stream; - // search for bigger matches - for (size_t i=0; i=stream->size() || blockPos>=stream->size()) { - break; - } - - if (buf[origPos]!=buf[blockPos]) { - break; - } - origPos++; - blockPos++; - finalLen++; + size_t finalLen=b.len; + size_t origPos=b.orig+b.len; + size_t blockPos=b.block+b.len; + while (true) { + if (origPos>=stream->size() || blockPos>=stream->size()) { + break; } - finalLen&=~7; - b.len=finalLen; - b.done=true; + if (buf[origPos]!=buf[blockPos]) { + break; + } + origPos++; + blockPos++; + finalLen++; } - logD("checking overlapping/bad matches"); + finalLen&=~7; + b.len=finalLen; + b.done=true; + } - // first stage done - // set done to false unless: - // - this match overlaps with itself - // - this block only consists of calls - size_t nonOverlapCount=0; - for (BlockMatch& i: matches) { - i.done=false; - if (OVERLAPS(i.orig,i.orig+i.len,i.block,i.block+i.len)) { - // self-overlapping - i.done=true; - } else { - bool onlyCalls=true; + logD("checking overlapping/bad matches"); + + // first stage done + // set done to false unless: + // - this match overlaps with itself + // - this block only consists of calls + // - this block contains a ret or jmp + size_t nonOverlapCount=0; + for (BlockMatch& i: matches) { + i.done=false; + if (OVERLAPS(i.orig,i.orig+i.len,i.block,i.block+i.len)) { + // self-overlapping + i.done=true; + } else { + bool onlyCalls=true; + for (size_t j=i.orig; jcount=nonOverlapCount; + } - if (b.orig!=lastOrig || b.len!=lastLen) { - if (lastOrig!=SIZE_MAX) { - // commit previous block and start new one - //logV("%x gains: %d",(int)lastOrig,gains); - if (gains>bestBenefit) { - bestBenefitIndex=lastOrigOff; - bestBenefit=gains; - } - if (gains<=0) { - // don't make a sub-block for these matches since we only have loss - //logV("(LOSSES!)"); - for (size_t j=lastOrigOff; jbestBenefit) { + bestBenefitIndex=lastOrigOff; + bestBenefit=finalBenefit; + } + if (gains<=0) { + // don't make a sub-block for these matches since we only have loss + //logV("(LOSSES!)"); + for (size_t j=lastOrigOff; j workMatches; - bool newBlocks=false; - - workMatches.clear(); - - size_t bestBenefitOrig=matches[bestBenefitIndex].orig; - size_t bestBenefitLen=matches[bestBenefitIndex].len; - for (size_t i=bestBenefitIndex; iinit(); - newBlock->write(&buf[i.orig],i.len); - newBlock->writeC(0xf9); // ret - // padding - newBlock->writeC(0); - newBlock->writeC(0); - newBlock->writeC(0); - newBlock->writeC(0); - newBlock->writeC(0); - newBlock->writeC(0); - newBlock->writeC(0); - subBlocks.push_back(newBlock); - lastOrig=i.orig; - - // insert call on the original block - buf[i.orig]=0xf4; - buf[i.orig+1]=subBlockID&0xff; - buf[i.orig+2]=(subBlockID>>8)&0xff; - buf[i.orig+3]=(subBlockID>>16)&0xff; - buf[i.orig+4]=(subBlockID>>24)&0xff; - buf[i.orig+5]=0; - buf[i.orig+6]=0; - buf[i.orig+7]=0; - - // replace the rest with nop - for (size_t j=i.orig+8; j>8)&0xff; - buf[i.block+3]=(subBlockID>>16)&0xff; - buf[i.block+4]=(subBlockID>>24)&0xff; - buf[i.block+5]=0; - buf[i.block+6]=0; - buf[i.block+7]=0; + // quit if there isn't anything + if (!nonOverlapCount) return stream; + + // quit if it's all losses + if (bestBenefit<1) return stream; + + // work on most beneficial matches + std::vector workMatches; + bool newBlocks=false; + + workMatches.clear(); + + size_t bestBenefitOrig=matches[bestBenefitIndex].orig; + size_t bestBenefitLen=matches[bestBenefitIndex].len; + for (size_t i=bestBenefitIndex; iinit(); + newBlock->write(&buf[i.orig],i.len); + newBlock->writeC(0xf9); // ret + // padding + newBlock->writeC(0); + newBlock->writeC(0); + newBlock->writeC(0); + newBlock->writeC(0); + newBlock->writeC(0); + newBlock->writeC(0); + newBlock->writeC(0); + subBlocks.push_back(newBlock); + lastOrig=i.orig; + + // insert call on the original block + buf[i.orig]=0xf4; + buf[i.orig+1]=subBlockID&0xff; + buf[i.orig+2]=(subBlockID>>8)&0xff; + buf[i.orig+3]=(subBlockID>>16)&0xff; + buf[i.orig+4]=(subBlockID>>24)&0xff; + buf[i.orig+5]=0; + buf[i.orig+6]=0; + buf[i.orig+7]=0; // replace the rest with nop - for (size_t j=i.block+8; j>8)&0xff; + buf[i.block+3]=(subBlockID>>16)&0xff; + buf[i.block+4]=(subBlockID>>24)&0xff; + buf[i.block+5]=0; + buf[i.block+6]=0; + buf[i.block+7]=0; - // get out if we haven't made any blocks - if (!newBlocks) break; + // replace the rest with nop + for (size_t j=i.block+8; jgetFinalBuf(); - - logV("doing it again..."); + // invalidate overlapping work matches + for (BlockMatch& j: workMatches) { + if (j.orig!=i.orig || j.len!=i.len) { + j.done=true; + logE("NO (orig %d %d) (%d!=%d)",j.orig,i.orig,j.len,i.len); + abort(); + } + if (OVERLAPS(i.orig,i.orig+i.len,j.block,j.block+j.len)) { + logE("ERROR: SELF-OVERLAP"); + abort(); + } + if (OVERLAPS(i.block,i.block+i.len,j.block,j.block+j.len)) { + j.done=true; + } + } } + logV("done!"); + + // get out if we haven't made any blocks + if (!newBlocks) return stream; + + // remove nop's + stream=stripNops(stream); + buf=stream->getFinalBuf(); + return stream; } @@ -1059,19 +1070,20 @@ SafeWriter* packStream(SafeWriter* s, unsigned char* speedDial) { try { addr=addrTable[addr]; // check whether we have sufficient room to turn this into a 16-bit call + /* if (addr<0xff00) { buf[i]=0xf8; buf[i+1]=addr&0xff; buf[i+2]=(addr>>8)&0xff; buf[i+3]=0xf1; buf[i+4]=0xf1; - } else { + } else {*/ buf[i]=0xf5; buf[i+1]=addr&0xff; buf[i+2]=(addr>>8)&0xff; buf[i+3]=(addr>>16)&0xff; buf[i+4]=(addr>>24)&0xff; - } + //} } catch (std::out_of_range& e) { logW("address %x is not mappable!",addr); } @@ -1458,76 +1470,71 @@ SafeWriter* DivEngine::saveCommand(DivCSProgress* progress, unsigned int disable // 6 is the minimum size that can be reliably optimized logI("finding sub-blocks"); - globalStream=findSubBlocks(globalStream,subBlocks,sortedCmd); - // find sub-blocks within sub-blocks - size_t subBlocksLast=0; - size_t subBlocksLen=subBlocks.size(); - logI("finding sub-blocks within sub-blocks"); - while (subBlocksLast!=subBlocksLen) { - logI("got %d blocks... starting from %d",(int)subBlocksLen,(int)subBlocksLast); - for (size_t i=subBlocksLast; i blockOff; + globalStream->seek(0,SEEK_END); + for (size_t i=0; isize()==subBlocks[i]->size()) { + if (memcmp(subBlocks[j]->getFinalBuf(),subBlocks[i]->getFinalBuf(),subBlocks[j]->size())==0) { + logW("we have one"); + dupOf=j; + break; + } + } + } + + if (dupOf>=0) { + // push address of original block (discard duplicate) + blockOff.push_back(blockOff[dupOf]); + } else { + // write sub-block + blockOff.push_back(globalStream->tell()); + logV("block size: %d",(int)block->size()); + assert(!(block->size()&7)); + globalStream->write(block->getFinalBuf(),block->size()); + } } - subBlocksLast=subBlocksLen; - subBlocksLen=subBlocks.size(); - } - // insert sub-blocks and resolve symbols - logI("%d sub-blocks total",(int)subBlocks.size()); - std::vector blockOff; - globalStream->seek(0,SEEK_END); - for (size_t i=0; ifinish(); + delete block; + } + subBlocks.clear(); - // check whether this block is duplicate - int dupOf=-1; - for (size_t j=0; jsize()==subBlocks[i]->size()) { - if (memcmp(subBlocks[j]->getFinalBuf(),subBlocks[i]->getFinalBuf(),subBlocks[j]->size())==0) { - logW("we have one"); - dupOf=j; - break; + // resolve symbols + unsigned char* buf=globalStream->getFinalBuf(); + for (size_t j=0; jsize(); j+=8) { + if (buf[j]==0xf4) { // callsym + unsigned int addr=buf[j+1]|(buf[j+2]<<8)|(buf[j+3]<<16)|(buf[j+4]<<24); + if (addr>8)&0xff; + buf[j+3]=(addr>>16)&0xff; + buf[j+4]=(addr>>24)&0xff; + } else { + logE("requested symbol %d is out of bounds!",addr); } } } - - if (dupOf>=0) { - // push address of original block (discard duplicate) - blockOff.push_back(blockOff[dupOf]); - } else { - // write sub-block - blockOff.push_back(globalStream->tell()); - logV("block size: %d",(int)block->size()); - assert(!(block->size()&7)); - globalStream->write(block->getFinalBuf(),block->size()); - } - } - - for (SafeWriter* block: subBlocks) { - block->finish(); - delete block; - } - subBlocks.clear(); - - // resolve symbols - unsigned char* buf=globalStream->getFinalBuf(); - for (size_t j=0; jsize(); j+=8) { - if (buf[j]==0xf4) { // callsym - unsigned int addr=buf[j+1]|(buf[j+2]<<8)|(buf[j+3]<<16)|(buf[j+4]<<24); - if (addr>8)&0xff; - buf[j+3]=(addr>>16)&0xff; - buf[j+4]=(addr>>24)&0xff; - } else { - logE("requested symbol %d is out of bounds!",addr); - } - } - } + } while (haveBlocks); size_t afterSize=globalStream->size(); logI("(before: %d - after: %d)",(int)beforeSize,(int)afterSize); diff --git a/src/gui/gui.cpp b/src/gui/gui.cpp index a88f8c5d9..bc856704d 100644 --- a/src/gui/gui.cpp +++ b/src/gui/gui.cpp @@ -6064,7 +6064,7 @@ bool FurnaceGUI::loop() { } } else { WAKE_UP; - ImGui::Text("Exporting..."); + ImGui::Text("Exporting... %d",csProgress.count); // check whether we're done if (csExportDone) {