sub-blocks, part 1

currently awful
This commit is contained in:
tildearrow 2025-04-04 05:01:49 -05:00
parent c110f87548
commit d5f1d3c25c
5 changed files with 186 additions and 77 deletions

View file

@ -72,6 +72,7 @@ hex | description
f0 | UNUSED - placeholder used during optimization passes (3-byte nonce follows)
f1 | no operation
f2 | UNUSED - unoptimized extended command
f3 | loop (negative offset and count follow... both are 8-bit)
f4 | call symbol (16-bit index follows; only used internally)
f5 | jump to sub-block (address follows)
f6 | go to sub-block (32-bit offset follows)

View file

@ -122,6 +122,23 @@ bool DivCSPlayer::tick() {
break;
case 0xf1: // nop
break;
case 0xf3: { // loop
unsigned char loopOff=stream.readC();
if (chan[i].loopCount>0) {
stream.readC();
if (--chan[i].loopCount) {
// jump
chan[i].readPos-=loopOff;
mustTell=false;
}
} else {
chan[i].loopCount=stream.readC();
// jump
chan[i].readPos-=loopOff;
mustTell=false;
}
break;
}
case 0xf7:
command=stream.readC();
break;
@ -130,6 +147,7 @@ bool DivCSPlayer::tick() {
if (!chan[i].doCall(callAddr)) {
logE("%d: (callb16) stack error!",i);
}
mustTell=false;
break;
}
case 0xf6: {
@ -137,6 +155,7 @@ bool DivCSPlayer::tick() {
if (!chan[i].doCall(callAddr)) {
logE("%d: (callb32) stack error!",i);
}
mustTell=false;
break;
}
case 0xf5: {
@ -144,6 +163,7 @@ bool DivCSPlayer::tick() {
if (!chan[i].doCall(callAddr)) {
logE("%d: (call) stack error!",i);
}
mustTell=false;
break;
}
case 0xf4: {

View file

@ -37,7 +37,7 @@ struct DivCSChannelState {
int volume, volMax, volSpeed, volSpeedTarget;
int vibratoDepth, vibratoRate, vibratoPos;
int portaTarget, portaSpeed;
unsigned char arp, arpStage, arpTicks;
unsigned char arp, arpStage, arpTicks, loopCount;
unsigned int callStack[8];
unsigned char callStackPos;
@ -65,6 +65,7 @@ struct DivCSChannelState {
arp(0),
arpStage(0),
arpTicks(0),
loopCount(0),
callStackPos(0),
tracePos(0) {
for (int i=0; i<DIV_MAX_CSTRACE; i++) {

View file

@ -269,6 +269,61 @@ void reloc(unsigned char* buf, size_t len, unsigned int sourceAddr, unsigned int
}
}
SafeWriter* stripNops(SafeWriter* s) {
std::unordered_map<unsigned int,unsigned int> addrTable;
SafeWriter* oldStream=s;
unsigned char* buf=oldStream->getFinalBuf();
s=new SafeWriter;
s->init();
// prepare address map
size_t addr=0;
for (size_t i=0; i<oldStream->size();) {
int insLen=getInsLength(buf[i]);
if (insLen<1) {
logE("INS %x NOT IMPLEMENTED...",buf[i]);
break;
}
addrTable[i]=addr;
if (buf[i]!=0xf1) addr+=insLen;
i+=insLen;
}
// translate addresses
for (size_t i=0; i<oldStream->size();) {
int insLen=getInsLength(buf[i]);
if (insLen<1) {
logE("INS %x NOT IMPLEMENTED...",buf[i]);
break;
}
switch (buf[i]) {
case 0xf5: // call
case 0xfa: { // jmp
unsigned int addr=buf[i+1]|(buf[i+2]<<8)|(buf[i+3]<<8)|(buf[i+4]<<24);
try {
addr=addrTable[addr];
buf[i+1]=addr&0xff;
buf[i+2]=(addr>>8)&0xff;
buf[i+3]=(addr>>16)&0xff;
buf[i+4]=(addr>>24)&0xff;
} catch (std::out_of_range& e) {
logW("address %x is not mappable!",addr);
}
break;
}
}
if (buf[i]!=0xf1) {
s->write(&buf[i],insLen);
}
i+=insLen;
}
oldStream->finish();
delete oldStream;
return s;
}
SafeWriter* DivEngine::saveCommand() {
stop();
repeatPattern=false;
@ -406,34 +461,7 @@ SafeWriter* DivEngine::saveCommand() {
logV("%d",tick);
cmdStreamEnabled=oldCmdStreamEnabled;
// PASS 1: find sub-blocks and isolate them
// PASS 2: find loops
// PASS 3: optimize command calls
/*
int sortCand=-1;
int sortPos=0;
while (sortPos<16) {
sortCand=-1;
for (int i=DIV_CMD_SAMPLE_MODE; i<256; i++) {
if (cmdPopularity[i]) {
if (sortCand==-1) {
sortCand=i;
} else if (cmdPopularity[sortCand]<cmdPopularity[i]) {
sortCand=i;
}
}
}
if (sortCand==-1) break;
sortedCmdPopularity[sortPos]=cmdPopularity[sortCand];
sortedCmd[sortPos]=sortCand;
cmdPopularity[sortCand]=0;
sortPos++;
}*/
// PASS 4: condense delays
// PASS 1: condense delays
// calculate delay usage
for (int h=0; h<chans; h++) {
unsigned char* buf=chanStream[h]->getFinalBuf();
@ -535,62 +563,119 @@ SafeWriter* DivEngine::saveCommand() {
}
}
// PASS 5: remove all remaining nop's
// PASS 2: remove nop's
// this includes modifying call addresses to compensate
for (int h=0; h<chans; h++) {
std::unordered_map<unsigned int,unsigned int> addrTable;
SafeWriter* oldStream=chanStream[h];
unsigned char* buf=oldStream->getFinalBuf();
chanStream[h]=new SafeWriter;
chanStream[h]->init();
chanStream[h]=stripNops(chanStream[h]);
}
// PASS 2: find sub-blocks and isolate them (TODO: THIS!)
for (int h=0; h<chans; h++) {
unsigned char* buf=chanStream[h]->getFinalBuf();
unsigned char group[256]; // max offset is -255
size_t groupLen=0;
memset(group,0,256);
// 3 is the minimum loop size that can be reliably optimized
logI("finding loop in chan %d",h);
for (int groupSize=3; groupSize<256; groupSize++) {
bool foundSomething=false;
//logD("...try size %d",groupSize);
for (size_t searchPos=0; searchPos<chanStream[h]->size();) {
int insLen=getInsLength(buf[searchPos]);
groupLen=0;
// prepare address map
size_t addr=0;
for (size_t i=0; i<oldStream->size();) {
int insLen=getInsLength(buf[i]);
if (insLen<1) {
logE("INS %x NOT IMPLEMENTED...",buf[i]);
logE("INS %x NOT IMPLEMENTED...",buf[searchPos]);
break;
}
addrTable[i]=addr;
if (buf[i]!=0xf1) addr+=insLen;
i+=insLen;
}
// translate addresses
for (size_t i=0; i<oldStream->size();) {
int insLen=getInsLength(buf[i]);
if (insLen<1) {
logE("INS %x NOT IMPLEMENTED...",buf[i]);
// copy a block
for (int i=0; i<groupSize && searchPos+i<chanStream[h]->size();) {
int insLenI=getInsLength(buf[searchPos+i]);
if (insLenI<1) {
logE("INS %x NOT IMPLEMENTED...",buf[searchPos+i]);
break;
}
switch (buf[i]) {
case 0xf5: // call
case 0xfa: { // jmp
unsigned int addr=buf[i+1]|(buf[i+2]<<8)|(buf[i+3]<<8)|(buf[i+4]<<24);
try {
addr=addrTable[addr];
buf[i+1]=addr&0xff;
buf[i+2]=(addr>>8)&0xff;
buf[i+3]=(addr>>16)&0xff;
buf[i+4]=(addr>>24)&0xff;
} catch (std::out_of_range& e) {
logW("address %x is not mappable!",addr);
}
break;
}
}
if (buf[i]!=0xf1) {
chanStream[h]->write(&buf[i],insLen);
}
i+=insLen;
i+=insLenI;
if ((int)groupLen+insLenI>groupSize) break;
groupLen+=insLenI;
}
oldStream->finish();
delete oldStream;
// don't do anything if we don't have a block
if (!groupLen) {
searchPos+=insLen;
continue;
}
memcpy(group,&buf[searchPos],groupLen);
// find contiguous blocks
size_t searchPos1=searchPos+groupLen;
size_t posOfFirstBlock=searchPos1;
int loopCount=0;
while (true) {
// stop if we're near the end
if (searchPos1>=chanStream[h]->size()) break;
// compare next block to group
if (memcmp(&buf[searchPos1],group,groupLen)!=0) break;
// if we're here, we found a contiguous block
searchPos1+=groupLen;
loopCount++;
// don't loop more than 255 times
if (loopCount>=255) break;
}
if (loopCount>0) {
// write loop command
logD("- LOOP: %x (size %d, %d times)",searchPos,groupLen,loopCount);
buf[posOfFirstBlock++]=0xf3;
buf[posOfFirstBlock++]=groupLen;
buf[posOfFirstBlock++]=loopCount;
// set the rest to nop
while (posOfFirstBlock<searchPos1) {
buf[posOfFirstBlock++]=0xf1;
}
// skip contiguous blocks
searchPos=searchPos1;
foundSomething=true;
} else {
// try again somewhere else
searchPos+=insLen;
}
}
if (foundSomething) {
chanStream[h]=stripNops(chanStream[h]);
buf=chanStream[h]->getFinalBuf();
}
}
}
// PASS 3: optimize command calls
/*
int sortCand=-1;
int sortPos=0;
while (sortPos<16) {
sortCand=-1;
for (int i=DIV_CMD_SAMPLE_MODE; i<256; i++) {
if (cmdPopularity[i]) {
if (sortCand==-1) {
sortCand=i;
} else if (cmdPopularity[sortCand]<cmdPopularity[i]) {
sortCand=i;
}
}
}
if (sortCand==-1) break;
sortedCmdPopularity[sortPos]=cmdPopularity[sortCand];
sortedCmd[sortPos]=sortCand;
cmdPopularity[sortCand]=0;
sortPos++;
}*/
/*
for (int i=0; i<chans; i++) {
// optimize stream

View file

@ -101,6 +101,9 @@ String disasmCmd(unsigned char* buf, size_t bufLen, unsigned int addr) {
case 0xf1:
return "nop";
break;
case 0xf3:
return fmt::sprintf("loop (-%d), %d",(int)buf[addr+1],(int)buf[addr+2]);
break;
case 0xf4:
if (addr+2>=bufLen) return "???";
return fmt::sprintf("callsym %.4x",(int)(buf[addr+1]|(buf[addr+2]<<8)));
@ -367,7 +370,6 @@ void FurnaceGUI::drawCSPlayer() {
}
if (!highlights.empty()) nextHighlight=highlights[0];
for (int i=csClipper.DisplayStart; i<csClipper.DisplayEnd; i++) {
ImGui::TableNextRow();
ImGui::TableNextColumn();