Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improved provoking vertex fix #19334

Merged
merged 5 commits into from
Jul 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Common/GPU/D3D11/thin3d_d3d11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,8 @@ D3D11DrawContext::D3D11DrawContext(ID3D11Device *device, ID3D11DeviceContext *de
caps_.blendMinMaxSupported = true;
caps_.multiSampleLevelsMask = 1; // More could be supported with some work.

caps_.provokingVertexLast = false; // D3D has it first, unfortunately. (and no way to change it).

caps_.presentInstantModeChange = true;
caps_.presentMaxInterval = 4;
caps_.presentModesSupported = PresentMode::FIFO | PresentMode::IMMEDIATE;
Expand Down
2 changes: 2 additions & 0 deletions Common/GPU/D3D9/thin3d_d3d9.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -786,6 +786,8 @@ D3D9Context::D3D9Context(IDirect3D9 *d3d, IDirect3D9Ex *d3dEx, int adapterId, ID
caps_.presentMaxInterval = 1;
caps_.presentModesSupported = PresentMode::FIFO;

caps_.provokingVertexLast = false; // D3D has it first, unfortunately (and no way to change it).

if ((caps.RasterCaps & D3DPRASTERCAPS_ANISOTROPY) != 0 && caps.MaxAnisotropy > 1) {
caps_.anisoSupported = true;
}
Expand Down
3 changes: 3 additions & 0 deletions Common/GPU/OpenGL/thin3d_gl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -598,6 +598,9 @@ OpenGLContext::OpenGLContext(bool canChangeSwapInterval) : renderManager_(frameT
// GLES has no support for logic framebuffer operations. There doesn't even seem to exist any such extensions.
caps_.logicOpSupported = !gl_extensions.IsGLES;

// Always the case in GL (which is what we want for PSP flat shade).
caps_.provokingVertexLast = true;

// Interesting potential hack for emulating GL_DEPTH_CLAMP (use a separate varying, force depth in fragment shader):
// This will induce a performance penalty on many architectures though so a blanket enable of this
// is probably not a good idea.
Expand Down
3 changes: 3 additions & 0 deletions Common/GPU/Vulkan/thin3d_vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -913,6 +913,9 @@ VKContext::VKContext(VulkanContext *vulkan, bool useRenderThread)
caps_.sampleRateShadingSupported = vulkan->GetDeviceFeatures().enabled.standard.sampleRateShading != 0;
caps_.textureSwizzleSupported = true;

// Note that it must also be enabled on the pipelines (which we do).
caps_.provokingVertexLast = vulkan->GetDeviceFeatures().enabled.provokingVertex.provokingVertexLast;

// Present mode stuff
caps_.presentMaxInterval = 1;
caps_.presentInstantModeChange = false; // TODO: Fix this with some work in VulkanContext
Expand Down
2 changes: 1 addition & 1 deletion Common/GPU/thin3d.h
Original file line number Diff line number Diff line change
Expand Up @@ -612,7 +612,7 @@ struct DeviceCaps {
bool setMaxFrameLatencySupported;
bool textureSwizzleSupported;
bool requiresHalfPixelOffset;

bool provokingVertexLast; // GL behavior, what the PSP does
bool verySlowShaderCompiler;

// From the other backends, we can detect if D3D9 support is known bad (like on Xe) and disable it.
Expand Down
8 changes: 4 additions & 4 deletions GPU/Common/IndexGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,13 +86,13 @@ void IndexGenerator::AddList(int numVerts, int indexOffset, bool clockwise) {

alignas(16) static const u16 offsets_clockwise[24] = {
0, (u16)(0 + 1), (u16)(0 + 2),
1, (u16)(1 + 2), (u16)(1 + 1),
(u16)(1 + 1), 1, (u16)(1 + 2),
2, (u16)(2 + 1), (u16)(2 + 2),
3, (u16)(3 + 2), (u16)(3 + 1),
(u16)(3 + 1), 3, (u16)(3 + 2),
4, (u16)(4 + 1), (u16)(4 + 2),
5, (u16)(5 + 2), (u16)(5 + 1),
(u16)(5 + 1), 5, (u16)(5 + 2),
6, (u16)(6 + 1), (u16)(6 + 2),
7, (u16)(7 + 2), (u16)(7 + 1),
(u16)(7 + 1), 7, (u16)(7 + 2),
};

alignas(16) static const uint16_t offsets_counter_clockwise[24] = {
Expand Down
108 changes: 36 additions & 72 deletions GPU/Common/SoftwareTransformCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,33 +125,6 @@ static bool IsReallyAClear(const TransformedVertex *transformed, int numVerts, f
return true;
}

static int ColorIndexOffset(int prim, GEShadeMode shadeMode, bool clearMode) {
if (shadeMode != GE_SHADE_FLAT || clearMode) {
return 0;
}

switch (prim) {
case GE_PRIM_LINES:
case GE_PRIM_LINE_STRIP:
return 1;

case GE_PRIM_TRIANGLES:
case GE_PRIM_TRIANGLE_STRIP:
return 2;

case GE_PRIM_TRIANGLE_FAN:
return 1;

case GE_PRIM_RECTANGLES:
// We already use BR color when expanding, so no need to offset.
return 0;

default:
break;
}
return 0;
}

void SoftwareTransform::SetProjMatrix(const float mtx[14], bool invertedX, bool invertedY, const Lin::Vec3 &trans, const Lin::Vec3 &scale) {
memcpy(&projMatrix_.m, mtx, 16 * sizeof(float));

Expand Down Expand Up @@ -202,11 +175,6 @@ void SoftwareTransform::Transform(int prim, u32 vertType, const DecVtxFormat &de
fog_slope = std::signbit(fog_slope) ? -65535.0f : 65535.0f;
}

int provokeIndOffset = 0;
if (params_.provokeFlatFirst) {
provokeIndOffset = ColorIndexOffset(prim, gstate.getShadeMode(), gstate.isModeClear());
}

VertexReader reader(decoded, decVtxFormat, vertType);
if (throughmode) {
const u32 materialAmbientRGBA = gstate.getMaterialAmbientRGBA();
Expand All @@ -221,13 +189,7 @@ void SoftwareTransform::Transform(int prim, u32 vertType, const DecVtxFormat &de
vert.pos_w = 1.0f;

if (hasColor) {
if (provokeIndOffset != 0 && index + provokeIndOffset < numDecodedVerts) {
reader.Goto(index + provokeIndOffset);
vert.color0_32 = reader.ReadColor0_8888();
reader.Goto(index);
} else {
vert.color0_32 = reader.ReadColor0_8888();
}
vert.color0_32 = reader.ReadColor0_8888();
} else {
vert.color0_32 = materialAmbientRGBA;
}
Expand Down Expand Up @@ -268,10 +230,7 @@ void SoftwareTransform::Transform(int prim, u32 vertType, const DecVtxFormat &de
if (reader.hasUV())
reader.ReadUV(ruv);

// Read all the provoking vertex values here.
Vec4f unlitColor;
if (provokeIndOffset != 0 && index + provokeIndOffset < numDecodedVerts)
reader.Goto(index + provokeIndOffset);
if (reader.hasColor0())
reader.ReadColor0(unlitColor.AsArray());
else
Expand Down Expand Up @@ -342,34 +301,14 @@ void SoftwareTransform::Transform(int prim, u32 vertType, const DecVtxFormat &de
break;

case GE_PROJMAP_NORMALIZED_NORMAL: // Use normalized normal as source
// Flat uses the vertex normal, not provoking.
if (provokeIndOffset == 0) {
source = normal.Normalized(cpu_info.bSSE4_1);
} else {
reader.Goto(index);
if (reader.hasNormal())
reader.ReadNrm(source.AsArray());
if (gstate.areNormalsReversed())
source = -source;
source.Normalize();
}
source = normal.Normalized(cpu_info.bSSE4_1);
if (!reader.hasNormal()) {
ERROR_LOG_REPORT(Log::G3D, "Normal projection mapping without normal?");
}
break;

case GE_PROJMAP_NORMAL: // Use non-normalized normal as source!
// Flat uses the vertex normal, not provoking.
if (provokeIndOffset == 0) {
source = normal;
} else {
// Need to read the normal for this vertex and weight it again..
reader.Goto(index);
if (reader.hasNormal())
reader.ReadNrm(source.AsArray());
if (gstate.areNormalsReversed())
source = -source;
}
source = normal;
if (!reader.hasNormal()) {
ERROR_LOG_REPORT(Log::G3D, "Normal projection mapping without normal?");
}
Expand Down Expand Up @@ -497,13 +436,11 @@ void SoftwareTransform::BuildDrawingParams(int prim, int vertexCount, u32 vertTy

if (prim == GE_PRIM_RECTANGLES) {
if (!ExpandRectangles(vertexCount, numDecodedVerts, vertsSize, inds, indsSize, transformed, transformedExpanded, numTrans, throughmode, &result->pixelMapped)) {
result->drawIndexed = false;
result->drawNumTrans = 0;
result->pixelMapped = false;
return;
}
result->drawBuffer = transformedExpanded;
result->drawIndexed = true;

// We don't know the color until here, so we have to do it now, instead of in StateMapping.
// Might want to reconsider the order of things later...
Expand All @@ -521,25 +458,20 @@ void SoftwareTransform::BuildDrawingParams(int prim, int vertexCount, u32 vertTy
} else if (prim == GE_PRIM_POINTS) {
result->pixelMapped = false;
if (!ExpandPoints(vertexCount, numDecodedVerts, vertsSize, inds, indsSize, transformed, transformedExpanded, numTrans, throughmode)) {
result->drawIndexed = false;
result->drawNumTrans = 0;
return;
}
result->drawBuffer = transformedExpanded;
result->drawIndexed = true;
} else if (prim == GE_PRIM_LINES) {
result->pixelMapped = false;
if (!ExpandLines(vertexCount, numDecodedVerts, vertsSize, inds, indsSize, transformed, transformedExpanded, numTrans, throughmode)) {
result->drawIndexed = false;
result->drawNumTrans = 0;
return;
}
result->drawBuffer = transformedExpanded;
result->drawIndexed = true;
} else {
// We can simply draw the unexpanded buffer.
numTrans = vertexCount;
result->drawIndexed = true;
result->pixelMapped = false;

// If we don't support custom cull in the shader, process it here.
Expand Down Expand Up @@ -635,7 +567,7 @@ void SoftwareTransform::BuildDrawingParams(int prim, int vertexCount, u32 vertTy
gpuStats.numClears++;
}

result->action = SW_DRAW_PRIMITIVES;
result->action = SW_DRAW_INDEXED;
result->drawNumTrans = numTrans;
}

Expand Down Expand Up @@ -758,6 +690,38 @@ bool SoftwareTransform::ExpandRectangles(int vertexCount, int &numDecodedVerts,
return true;
}

// In-place. So, better not be doing this on GPU memory!
void IndexBufferProvokingLastToFirst(int prim, u16 *inds, int indsSize) {
switch (prim) {
case GE_PRIM_LINES:
// Swap every two indices.
for (int i = 0; i < indsSize - 1; i += 2) {
u16 temp = inds[i];
inds[i] = inds[i + 1];
inds[i + 1] = temp;
}
break;
case GE_PRIM_TRIANGLES:
// Rotate the triangle so the last becomes the first, without changing the winding order.
// This could be done with a series of pshufb.
for (int i = 0; i < indsSize - 2; i += 3) {
u16 temp = inds[i + 2];
inds[i + 2] = inds[i + 1];
inds[i + 1] = inds[i];
inds[i] = temp;
}
break;
case GE_PRIM_POINTS:
// Nothing to do,
break;
case GE_PRIM_RECTANGLES:
// Nothing to do, already using the 2nd vertex.
break;
default:
_dbg_assert_msg_(false, "IndexBufferProvokingFirstToLast: Only works with plain indexed primitives, no strips or fans")
}
}

bool SoftwareTransform::ExpandLines(int vertexCount, int &numDecodedVerts, int vertsSize, u16 *&inds, int indsSize, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode) {
// Before we start, do a sanity check - does the output fit?
if ((vertexCount / 2) * 6 > indsSize) {
Expand Down
10 changes: 6 additions & 4 deletions GPU/Common/SoftwareTransformCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class TextureCacheCommon;

enum SoftwareTransformAction {
SW_NOT_READY,
SW_DRAW_PRIMITIVES,
SW_DRAW_INDEXED,
SW_CLEAR,
};

Expand All @@ -44,8 +44,6 @@ struct SoftwareTransformResult {

TransformedVertex *drawBuffer;
int drawNumTrans;
bool drawIndexed;

bool pixelMapped;
};

Expand All @@ -57,11 +55,15 @@ struct SoftwareTransformParams {
TextureCacheCommon *texCache;
bool allowClear;
bool allowSeparateAlphaClear;
bool provokeFlatFirst;
bool flippedY;
bool usesHalfZ;
};

// Converts an index buffer to make the provoking vertex the last.
// In-place. So, better not be doing this on GPU memory!
// TODO: We could do this already during index decode.
void IndexBufferProvokingLastToFirst(int prim, u16 *inds, int indsSize);

class SoftwareTransform {
public:
SoftwareTransform(SoftwareTransformParams &params) : params_(params) {}
Expand Down
4 changes: 3 additions & 1 deletion GPU/D3D11/D3D11Util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ std::vector<uint8_t> CompileShaderToBytecodeD3D11(const char *code, size_t codeS
if (trimmed.find("pow(f, e) will not work for negative f") != std::string::npos) {
continue;
}
WARN_LOG(Log::G3D, "%.*s", (int)trimmed.length(), trimmed.data());
if (trimmed.size() > 1) { // ignore single nulls, not sure how they appear.
WARN_LOG(Log::G3D, "%.*s", (int)trimmed.length(), trimmed.data());
}
}
} else {
ERROR_LOG(Log::G3D, "%s: %s\n\n%s", "errors", errors.c_str(), numberedCode.c_str());
Expand Down
27 changes: 14 additions & 13 deletions GPU/D3D11/DrawEngineD3D11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -382,10 +382,15 @@ void DrawEngineD3D11::DoFlush() {
params.texCache = textureCache_;
params.allowClear = true;
params.allowSeparateAlphaClear = false; // D3D11 doesn't support separate alpha clears
params.provokeFlatFirst = true;
params.flippedY = false;
params.usesHalfZ = true;

if (gstate.getShadeMode() == GE_SHADE_FLAT) {
// We need to rotate the index buffer to simulate a different provoking vertex.
// We do this before line expansion etc.
IndexBufferProvokingLastToFirst(prim, inds, vertexCount);
}

// We need correct viewport values in gstate_c already.
if (gstate_c.IsDirty(DIRTY_VIEWPORTSCISSOR_STATE)) {
ViewportAndScissor vpAndScissor;
Expand Down Expand Up @@ -424,7 +429,7 @@ void DrawEngineD3D11::DoFlush() {

ApplyDrawStateLate(result.setStencil, result.stencilValue);

if (result.action == SW_DRAW_PRIMITIVES) {
if (result.action == SW_DRAW_INDEXED) {
D3D11VertexShader *vshader;
D3D11FragmentShader *fshader;
shaderManager_->GetShaders(prim, dec_, &vshader, &fshader, pipelineState_, false, false, decOptions_.expandAllWeightsToFloat, true);
Expand Down Expand Up @@ -452,17 +457,13 @@ void DrawEngineD3D11::DoFlush() {
pushVerts_->EndPush(context_);
ID3D11Buffer *buf = pushVerts_->Buf();
context_->IASetVertexBuffers(0, 1, &buf, &stride, &vOffset);
if (result.drawIndexed) {
UINT iOffset;
int iSize = sizeof(uint16_t) * result.drawNumTrans;
uint8_t *iptr = pushInds_->BeginPush(context_, &iOffset, iSize);
memcpy(iptr, inds, iSize);
pushInds_->EndPush(context_);
context_->IASetIndexBuffer(pushInds_->Buf(), DXGI_FORMAT_R16_UINT, iOffset);
context_->DrawIndexed(result.drawNumTrans, 0, 0);
} else {
context_->Draw(result.drawNumTrans, 0);
}
UINT iOffset;
int iSize = sizeof(uint16_t) * result.drawNumTrans;
uint8_t *iptr = pushInds_->BeginPush(context_, &iOffset, iSize);
memcpy(iptr, inds, iSize);
pushInds_->EndPush(context_);
context_->IASetIndexBuffer(pushInds_->Buf(), DXGI_FORMAT_R16_UINT, iOffset);
context_->DrawIndexed(result.drawNumTrans, 0, 0);
} else if (result.action == SW_CLEAR) {
u32 clearColor = result.color;
float clearDepth = result.depth;
Expand Down
16 changes: 9 additions & 7 deletions GPU/Directx9/DrawEngineDX9.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -334,10 +334,16 @@ void DrawEngineDX9::DoFlush() {
params.texCache = textureCache_;
params.allowClear = true;
params.allowSeparateAlphaClear = false;
params.provokeFlatFirst = true;
params.flippedY = false;
params.usesHalfZ = true;

if (gstate.getShadeMode() == GE_SHADE_FLAT) {
// We need to rotate the index buffer to simulate a different provoking vertex.
// We do this before line expansion etc.
int indexCount = RemainingIndices(inds);
IndexBufferProvokingLastToFirst(prim, inds, vertexCount);
}

// We need correct viewport values in gstate_c already.
if (gstate_c.IsDirty(DIRTY_VIEWPORTSCISSOR_STATE)) {
ViewportAndScissor vpAndScissor;
Expand Down Expand Up @@ -382,7 +388,7 @@ void DrawEngineDX9::DoFlush() {

VSShader *vshader = shaderManager_->ApplyShader(false, false, dec_, decOptions_.expandAllWeightsToFloat, true, pipelineState_);

if (result.action == SW_DRAW_PRIMITIVES) {
if (result.action == SW_DRAW_INDEXED) {
if (result.setStencil) {
dxstate.stencilFunc.set(D3DCMP_ALWAYS);
dxstate.stencilRef.set(result.stencilValue);
Expand All @@ -393,11 +399,7 @@ void DrawEngineDX9::DoFlush() {
// Might help for text drawing.

device_->SetVertexDeclaration(transformedVertexDecl_);
if (result.drawIndexed) {
device_->DrawIndexedPrimitiveUP(d3d_prim[prim], 0, numDecodedVerts_, D3DPrimCount(d3d_prim[prim], result.drawNumTrans), inds, D3DFMT_INDEX16, result.drawBuffer, sizeof(TransformedVertex));
} else {
device_->DrawPrimitiveUP(d3d_prim[prim], D3DPrimCount(d3d_prim[prim], result.drawNumTrans), result.drawBuffer, sizeof(TransformedVertex));
}
device_->DrawIndexedPrimitiveUP(d3d_prim[prim], 0, numDecodedVerts_, D3DPrimCount(d3d_prim[prim], result.drawNumTrans), inds, D3DFMT_INDEX16, result.drawBuffer, sizeof(TransformedVertex));
} else if (result.action == SW_CLEAR) {
u32 clearColor = result.color;
float clearDepth = result.depth;
Expand Down
Loading
Loading