Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Minor IR Interpreter optimizations, other bugfixes #19233

Merged
merged 4 commits into from
Jun 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 28 additions & 13 deletions Core/MIPS/IR/IRInterpreter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -260,16 +260,24 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst) {
float temp[4];
for (int i = 0; i < 4; i++)
temp[i] = mips->f[inst->src1 + ((inst->src2 >> (i * 2)) & 3)];
const int dest = inst->dest;
for (int i = 0; i < 4; i++)
mips->f[inst->dest + i] = temp[i];
mips->f[dest + i] = temp[i];
break;
}

case IROp::Vec4Blend:
{
const int dest = inst->dest;
const int src1 = inst->src1;
const int src2 = inst->src2;
const int constant = inst->constant;
// 90% of calls to this is inst->constant == 7 or inst->constant == 8. Some are 1 and 4, others very rare.
// Could use _mm_blendv_ps (SSE4+BMI), vbslq_f32 (ARM), __riscv_vmerge_vvm (RISC-V)
for (int i = 0; i < 4; i++)
mips->f[inst->dest + i] = ((inst->constant >> i) & 1) ? mips->f[inst->src2 + i] : mips->f[inst->src1 + i];
mips->f[dest + i] = ((constant >> i) & 1) ? mips->f[src2 + i] : mips->f[src1 + i];
break;
}

case IROp::Vec4Mov:
{
Expand Down Expand Up @@ -377,15 +385,19 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst) {

case IROp::Vec2Unpack16To31:
{
mips->fi[inst->dest] = (mips->fi[inst->src1] << 16) >> 1;
mips->fi[inst->dest + 1] = (mips->fi[inst->src1] & 0xFFFF0000) >> 1;
const int dest = inst->dest;
const int src1 = inst->src1;
mips->fi[dest] = (mips->fi[src1] << 16) >> 1;
mips->fi[dest + 1] = (mips->fi[src1] & 0xFFFF0000) >> 1;
break;
}

case IROp::Vec2Unpack16To32:
{
mips->fi[inst->dest] = (mips->fi[inst->src1] << 16);
mips->fi[inst->dest + 1] = (mips->fi[inst->src1] & 0xFFFF0000);
const int dest = inst->dest;
const int src1 = inst->src1;
mips->fi[dest] = (mips->fi[src1] << 16);
mips->fi[dest + 1] = (mips->fi[src1] & 0xFFFF0000);
break;
}

Expand Down Expand Up @@ -467,22 +479,26 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst) {
val = _mm_andnot_si128(mask, val);
_mm_store_si128((__m128i *)&mips->fi[inst->dest], val);
#else
const int src1 = inst->src1;
const int dest = inst->dest;
for (int i = 0; i < 4; i++) {
u32 val = mips->fi[inst->src1 + i];
mips->fi[inst->dest + i] = (int)val >= 0 ? val : 0;
u32 val = mips->fi[src1 + i];
mips->fi[dest + i] = (int)val >= 0 ? val : 0;
}
#endif
break;
}

case IROp::Vec4DuplicateUpperBitsAndShift1: // For vuc2i, the weird one.
{
const int src1 = inst->src1;
const int dest = inst->dest;
for (int i = 0; i < 4; i++) {
u32 val = mips->fi[inst->src1 + i];
u32 val = mips->fi[src1 + i];
val = val | (val >> 8);
val = val | (val >> 16);
val >>= 1;
mips->fi[inst->dest + i] = val;
mips->fi[dest + i] = val;
}
break;
}
Expand Down Expand Up @@ -1111,11 +1127,10 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst) {
break;

case IROp::Nop:
_assert_(false);
break;
default:
// Unimplemented IR op. Bad.
Crash();
break;
// Unimplemented IR op. Bad.
}

#ifdef _DEBUG
Expand Down
10 changes: 9 additions & 1 deletion GPU/Common/VertexDecoderCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1320,6 +1320,10 @@ void VertexDecoder::DecodeVerts(u8 *decodedptr, const void *verts, const UVScale
int count = indexUpperBound - indexLowerBound + 1;
int stride = decFmt.stride;

#ifdef _DEBUG
decodedCount += count;
#endif

// Check alignment before running the decoder, as we may crash if it's bad (as should the real PSP but doesn't always)
if (((uintptr_t)verts & (biggest - 1)) != 0) {
// Bad alignment. Not really sure what to do here... zero the verts to be safe?
Expand Down Expand Up @@ -1475,7 +1479,7 @@ static const char * const colnames[8] = { "", "?", "?", "?", "565", "5551", "444

int VertexDecoder::ToString(char *output, bool spaces) const {
char *start = output;

output += sprintf(output, "[%08x] ", fmt_);
output += sprintf(output, "P: %s ", posnames[pos]);
if (nrm)
output += sprintf(output, "N: %s ", nrmnames[nrm]);
Expand All @@ -1502,6 +1506,10 @@ int VertexDecoder::ToString(char *output, bool spaces) const {
}
}

#ifdef _DEBUG
output += sprintf(output, " (%llu)", (long long)decodedCount);
#endif

return output - start;
}

Expand Down
4 changes: 4 additions & 0 deletions GPU/Common/VertexDecoderCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,10 @@ class VertexDecoder {

u8 biggest; // in practice, alignment.

#ifdef _DEBUG
mutable u64 decodedCount = 0;
#endif

friend class VertexDecoderJitCache;

private:
Expand Down
20 changes: 19 additions & 1 deletion GPU/Common/VertexDecoderHandwritten.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,30 @@
#endif
#endif


// Candidates for hand-writing
// (found using our custom Very Sleepy).
// GPU::P:_f_N:_s8_C:_8888_T:_u16__(24b)_040001BE (5%+ of God of War execution)
// GPU::P:_f_N:_s8_C:_8888_T:_u16_W:_f_(1x)__(28b)_040007BE (1%+ of God of War execution)

// Tekken 6:
// (found using the vertex counter that's active in _DEBUG)
// [04000111] P: s16 C: 565 T: u8 (10b) (736949) // Also in Midnight Club

// Wipeout Pure:
// [0400013f] P: s16 N: s8 C: 8888 T: f (24b) (1495430)

// Flatout:
// [04000122] P: s16 N: s8 T: u16 (14b) (3901754)
// [04000116] P: s16 C: 5551 T: u16 (12b) (2225841)

// Test drive:
// [05000100] P: s16 (6b) (2827872)
// [050011ff] P: f N: f C: 8888 T: f I: u16 (36b) (3812112)

// Burnout Dominator:
// [04000122] P: s16 N: s8 T: u16 (14b) (1710813)
// [04000116] P: s16 C: 5551 T: u16 (12b) (7688298)

// This is the first GoW one.
void VtxDec_Tu16_C8888_Pfloat(const u8 *srcp, u8 *dstp, int count, const UVScale *uvScaleOffset) {
struct GOWVTX {
Expand Down
32 changes: 22 additions & 10 deletions UI/DevScreens.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1245,16 +1245,18 @@ void JitCompareScreen::OnRandomBlock(int flag) {
int tries = 0;
while (!anyWanted && tries < numBlocks) {
currentBlock_ = rand() % numBlocks;
JitBlockDebugInfo b = blockCache->GetBlockDebugInfo(currentBlock_);
u32 mipsBytes = (u32)b.origDisasm.size() * 4;
for (u32 addr = b.originalAddress; addr < b.originalAddress + mipsBytes; addr += 4) {
MIPSOpcode opcode = Memory::Read_Instruction(addr);
if (MIPSGetInfo(opcode) & flag) {
char temp[256];
MIPSDisAsm(opcode, addr, temp, sizeof(temp));
// INFO_LOG(HLE, "Stopping at random instruction: %08x %s", addr, temp);
anyWanted = true;
break;
if (blockCache->IsValidBlock(currentBlock_)) {
JitBlockDebugInfo b = blockCache->GetBlockDebugInfo(currentBlock_);
u32 mipsBytes = (u32)b.origDisasm.size() * 4;
for (u32 addr = b.originalAddress; addr < b.originalAddress + mipsBytes; addr += 4) {
MIPSOpcode opcode = Memory::Read_Instruction(addr);
if (MIPSGetInfo(opcode) & flag) {
char temp[256];
MIPSDisAsm(opcode, addr, temp, sizeof(temp));
// INFO_LOG(HLE, "Stopping at random instruction: %08x %s", addr, temp);
anyWanted = true;
break;
}
}
}
tries++;
Expand Down Expand Up @@ -1360,6 +1362,16 @@ void ShaderViewScreen::CreateViews() {
layout->Add(new Button(di->T("Back")))->OnClick.Handle<UIScreen>(this, &UIScreen::OnBack);
}

bool ShaderViewScreen::key(const KeyInput &ki) {
if (ki.flags & KEY_CHAR) {
if (ki.unicodeChar == 'C' || ki.unicodeChar == 'c') {
System_CopyStringToClipboard(gpu->DebugGetShaderString(id_, type_, SHADER_STRING_SHORT_DESC));
}
}
return UIDialogScreenWithBackground::key(ki);
}


const std::string framedumpsBaseUrl = "http://framedump.ppsspp.org/repro/";

FrameDumpTestScreen::FrameDumpTestScreen() {
Expand Down
1 change: 1 addition & 0 deletions UI/DevScreens.h
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ class ShaderViewScreen : public UIDialogScreenWithBackground {
: id_(id), type_(type) {}

void CreateViews() override;
bool key(const KeyInput &ki) override;

const char *tag() const override { return "ShaderView"; }

Expand Down
Loading