From 6e3c33a2eb4f33a7c748ac5279597975e960075e Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Sat, 30 Mar 2024 11:44:10 +0000 Subject: [PATCH 01/28] GS/HW: Initial work implementing RT in RT support --- bin/resources/shaders/vulkan/tfx.glsl | 4 +- pcsx2/GS/GSState.cpp | 3 +- pcsx2/GS/GSState.h | 1 + pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 216 +++++++++++++++++++---- pcsx2/GS/Renderers/HW/GSRendererHW.h | 2 + pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 127 ++++++++++--- pcsx2/GS/Renderers/HW/GSTextureCache.h | 2 +- 7 files changed, 289 insertions(+), 66 deletions(-) diff --git a/bin/resources/shaders/vulkan/tfx.glsl b/bin/resources/shaders/vulkan/tfx.glsl index 69c4345db9ca7..0b379046db135 100644 --- a/bin/resources/shaders/vulkan/tfx.glsl +++ b/bin/resources/shaders/vulkan/tfx.glsl @@ -945,7 +945,7 @@ vec4 ps_color() vec4 T = sample_color(st); #endif - #if PS_SHUFFLE && !PS_READ16_SRC && !PS_SHUFFLE_SAME + #if PS_SHUFFLE && !PS_READ16_SRC && !PS_SHUFFLE_SAME && !(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) uvec4 denorm_c_before = uvec4(T); #if (PS_PROCESS_BA & SHUFFLE_READ) T.r = float((denorm_c_before.b << 3) & 0xF8u); @@ -1320,7 +1320,7 @@ void main() ps_blend(C, alpha_blend); #if PS_SHUFFLE - #if !PS_READ16_SRC && !PS_SHUFFLE_SAME + #if !PS_READ16_SRC && !PS_SHUFFLE_SAME && !(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) uvec4 denorm_c_after = uvec4(C); #if (PS_PROCESS_BA & SHUFFLE_READ) C.b = float(((denorm_c_after.r >> 3) & 0x1Fu) | ((denorm_c_after.g << 2) & 0xE0u)); diff --git a/pcsx2/GS/GSState.cpp b/pcsx2/GS/GSState.cpp index c664204864483..e0aa487706f3f 100644 --- a/pcsx2/GS/GSState.cpp +++ b/pcsx2/GS/GSState.cpp @@ -1674,7 +1674,8 @@ void GSState::FlushPrim() Console.Warning("GS: Possible invalid draw, Frame PSM %x ZPSM %x", m_context->FRAME.PSM, m_context->ZBUF.PSM); } #endif - + // Update scissor, it may have been modified by a previous draw + m_env.CTXT[PRIM->CTXT].UpdateScissor(); m_vt.Update(m_vertex.buff, m_index.buff, m_vertex.tail, m_index.tail, GSUtil::GetPrimClass(PRIM->PRIM)); // Texel coordinate rounding diff --git a/pcsx2/GS/GSState.h b/pcsx2/GS/GSState.h index 75416d86d5228..a9d5e6c99271f 100644 --- a/pcsx2/GS/GSState.h +++ b/pcsx2/GS/GSState.h @@ -224,6 +224,7 @@ class GSState : public GSAlignedClass<32> bool m_texflush_flag = false; bool m_isPackedUV_HackFlag = false; bool m_channel_shuffle = false; + bool m_in_target_draw = false; u8 m_scanmask_used = 0; u32 m_dirty_gs_regs = 0; int m_backed_up_ctx = 0; diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index eb3acb08e4951..357c9171bfde0 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -346,7 +346,7 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, tex_pos &= 0xFF; shuffle_across = (((tex_pos + 8) >> 4) ^ ((pos + 8) >> 4)) & 0x8; - const bool full_width = !shuffle_across && ((second_vert.XYZ.X - first_vert.XYZ.X) >> 4) >= 16 && m_r.width() > 8; + const bool full_width = ((second_vert.XYZ.X - first_vert.XYZ.X) >> 4) >= 16 && m_r.width() > 8; process_ba = ((pos > 112 && pos < 136) || full_width) ? SHUFFLE_WRITE : 0; process_rg = (!process_ba || full_width) ? SHUFFLE_WRITE : 0; // "same group" means it can read blue and write alpha using C32 tricks @@ -489,7 +489,7 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, // Dogs will reuse the Z in a different size format for a completely unrelated draw with an FBW of 2, then go back to using it in full width const bool size_is_wrong = tex->m_target ? (static_cast(tex->m_from_target_TEX0.TBW * 64) < tex->m_from_target->m_valid.z / 2) : false; const u32 draw_page_width = std::max(static_cast(m_vt.m_max.p.x + (!(process_ba & SHUFFLE_WRITE) ? 8.9f : 0.9f)) / 64, 1); - const bool single_direction_doubled = (m_vt.m_max.p.y > rt->m_valid.w) != (m_vt.m_max.p.x > rt->m_valid.z); + const bool single_direction_doubled = (m_vt.m_max.p.y > rt->m_valid.w) != (m_vt.m_max.p.x > rt->m_valid.z) || (IsSinglePageDraw() && m_r.height() > 32); if (size_is_wrong || (rt && ((rt->m_TEX0.TBW % draw_page_width) == 0 || single_direction_doubled))) { @@ -554,7 +554,7 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, } else { - if ((floor(m_vt.m_max.p.y) <= rt->m_valid.w) && ((floor(m_vt.m_max.p.x) > (m_cached_ctx.FRAME.FBW * 64)) || (rt->m_TEX0.TBW != m_cached_ctx.FRAME.FBW))) + if (((m_r.width() + 8) & ~(GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].pgs.x - 1)) != GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].pgs.x && (floor(m_vt.m_max.p.y) <= rt->m_valid.w) && ((floor(m_vt.m_max.p.x) > (m_cached_ctx.FRAME.FBW * 64)) || (rt->m_TEX0.TBW != m_cached_ctx.FRAME.FBW))) { half_bottom_vert = false; half_bottom_uv = false; @@ -587,6 +587,14 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, else v[i + 1 - reversed_U].U += 128u; } + else + { + if (((pos + 8) >> 4) & 0x8) + { + v[i + reversed_pos].XYZ.X -= 128u; + v[i + 1 - reversed_pos].XYZ.X -= 128u; + } + } if (half_bottom_vert) { @@ -704,6 +712,14 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, m_vt.m_max.t.x += 8.0f; } } + else + { + if (fmod(std::floor(m_vt.m_min.p.x), 64.0f) == 8.0f) + { + m_vt.m_min.p.x -= 8.0f; + m_vt.m_max.p.x -= 8.0f; + } + } if (half_right_vert) { @@ -897,7 +913,7 @@ GSVector2i GSRendererHW::GetValidSize(const GSTextureCache::Source* tex) } // If it's a channel shuffle, it'll likely be just a single page, so assume full screen. - if (m_channel_shuffle) + if (m_channel_shuffle || (tex && IsPageCopy())) { const int page_x = frame_psm.pgs.x - 1; const int page_y = frame_psm.pgs.y - 1; @@ -1019,6 +1035,25 @@ bool GSRendererHW::IsPossibleChannelShuffle() const return false; } +bool GSRendererHW::IsPageCopy() const +{ + if (!PRIM->TME) + return false; + + const GSDrawingContext& next_ctx = m_env.CTXT[m_backed_up_ctx]; + + if (next_ctx.TEX0.TBP0 != (m_cached_ctx.TEX0.TBP0 + 0x20)) + return false; + + if (next_ctx.FRAME.FBP != (m_cached_ctx.FRAME.FBP + 0x1)) + return false; + + if (!NextDrawMatchesShuffle()) + return false; + + return true; +} + bool GSRendererHW::NextDrawMatchesShuffle() const { // Make sure nothing unexpected has changed. @@ -1176,6 +1211,16 @@ GSVector4i GSRendererHW::GetDrawRectForPages(u32 bw, u32 psm, u32 num_pages) return GSVector4i::loadh(size); } +bool GSRendererHW::IsSinglePageDraw() const +{ + const GSVector2i& frame_pgs = GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].pgs; + + if (m_r.width() <= frame_pgs.x && m_r.height() <= frame_pgs.y) + return true; + + return false; +} + bool GSRendererHW::TryToResolveSinglePageFramebuffer(GIFRegFRAME& FRAME, bool only_next_draw) { const u32 start_bp = FRAME.Block(); @@ -1586,7 +1631,11 @@ void GSRendererHW::Move() const int w = m_env.TRXREG.RRW; const int h = m_env.TRXREG.RRH; - + GL_CACHE("Starting Move! 0x%x W:%d F:%s => 0x%x W:%d F:%s (DIR %d%d), sPos(%d %d) dPos(%d %d) size(%d %d) draw %d", + m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW, psm_str(m_env.BITBLTBUF.SPSM), + m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, psm_str(m_env.BITBLTBUF.DPSM), + m_env.TRXPOS.DIRX, m_env.TRXPOS.DIRY, + sx, sy, dx, dy, w, h, s_n); if (g_texture_cache->Move(m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW, m_env.BITBLTBUF.SPSM, sx, sy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, m_env.BITBLTBUF.DPSM, dx, dy, w, h)) { @@ -2538,7 +2587,7 @@ void GSRendererHW::Draw() FRAME_TEX0.PSM = m_cached_ctx.FRAME.PSM; GSTextureCache::Target* tgt = g_texture_cache->LookupTarget(FRAME_TEX0, GSVector2i(m_vt.m_max.p.x, m_vt.m_max.p.y), GetTextureScaleFactor(), GSTextureCache::RenderTarget, false, - fm); + fm, false, false, false, false, GSVector4i::zero(), true); if (tgt) shuffle_target = tgt->m_32_bits_fmt; @@ -2630,14 +2679,11 @@ void GSRendererHW::Draw() const bool can_expand = !(m_cached_ctx.ZBUF.ZMSK && output_black); // Estimate size based on the scissor rectangle and height cache. - const GSVector2i t_size = GetTargetSize(src, can_expand); + GSVector2i t_size = GetTargetSize(src, can_expand); const GSVector4i t_size_rect = GSVector4i::loadh(t_size); // Ensure draw rect is clamped to framebuffer size. Necessary for updating valid area. const GSVector4i unclamped_draw_rect = m_r; - // Don't clamp on shuffle, the height cache may troll us with the REAL height. - if (!m_texture_shuffle && m_split_texture_shuffle_pages == 0) - m_r = m_r.rintersect(t_size_rect); float target_scale = GetTextureScaleFactor(); int scale_draw = IsScalingDraw(src, m_primitive_covers_without_gaps != NoGapsType::GapsFound); @@ -2694,6 +2740,10 @@ void GSRendererHW::Draw() GSTextureCache::Target* rt = nullptr; GIFRegTEX0 FRAME_TEX0; + const GSLocalMemory::psm_t& frame_psm = GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM]; + + m_in_target_draw = false; + if (!no_rt) { // FBW is going to be wrong for channel shuffling into a new target, so take it from the source. @@ -2702,21 +2752,28 @@ void GSRendererHW::Draw() FRAME_TEX0.TBW = (m_channel_shuffle && src->m_target) ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; FRAME_TEX0.PSM = m_cached_ctx.FRAME.PSM; + const bool possible_shuffle = draw_sprite_tex && (((src && src->m_target && src->m_from_target && src->m_from_target->m_32_bits_fmt) && + GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16 && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) || + IsPossibleChannelShuffle()); + // Don't clamp on shuffle, the height cache may troll us with the REAL height. + if (!possible_shuffle && m_split_texture_shuffle_pages == 0) + m_r = m_r.rintersect(t_size_rect); + // Normally we would use 1024 here to match the clear above, but The Godfather does a 1023x1023 draw instead // (very close to 1024x1024, but apparently the GS rounds down..). So, catch that here, we don't want to // create that target, because the clear isn't black, it'll hang around and never get invalidated. const bool is_square = (t_size.y == t_size.x) && m_r.w >= 1023 && m_primitive_covers_without_gaps == NoGapsType::FullCover; const bool is_clear = is_possible_mem_clear && is_square; - const bool possible_shuffle = draw_sprite_tex && (((src && src->m_target && src->m_from_target && src->m_from_target->m_32_bits_fmt) && - GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16 && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) || - IsPossibleChannelShuffle()); // Preserve downscaled target when copying directly from a downscaled target, or it's a normal draw using a downscaled target. Clears that are drawing to the target can also preserve size. // Of course if this size is different (in width) or this is a shuffle happening, this will be bypassed. const bool preserve_downscale_draw = scale_draw < 0 || (scale_draw == 0 && ((src && src->m_from_target && src->m_from_target->m_downscaled) || is_possible_mem_clear == ClearType::ClearWithDraw)); + m_in_target_draw = false; + rt = g_texture_cache->LookupTarget(FRAME_TEX0, t_size, ((src && src->m_scale != 1) && GSConfig.UserHacks_NativeScaling == GSNativeScaling::Normal && !possible_shuffle) ? GetTextureScaleFactor() : target_scale, GSTextureCache::RenderTarget, true, - fm, false, force_preload, preserve_rt_rgb, preserve_rt_alpha, unclamped_draw_rect, possible_shuffle, is_possible_mem_clear && FRAME_TEX0.TBP0 != m_cached_ctx.ZBUF.Block(), GSConfig.UserHacks_NativeScaling != GSNativeScaling::Off && preserve_downscale_draw && is_possible_mem_clear != ClearType::NormalClear); + fm, false, force_preload, preserve_rt_rgb, preserve_rt_alpha, unclamped_draw_rect, possible_shuffle, is_possible_mem_clear && FRAME_TEX0.TBP0 != m_cached_ctx.ZBUF.Block(), + GSConfig.UserHacks_NativeScaling != GSNativeScaling::Off && preserve_downscale_draw && is_possible_mem_clear != ClearType::NormalClear, src); // Draw skipped because it was a clear and there was no target. if (!rt) @@ -2737,6 +2794,10 @@ void GSRendererHW::Draw() CleanupDraw(true); return; } + else if (IsPageCopy() && src->m_from_target && m_cached_ctx.TEX0.TBP0 >= src->m_from_target->m_TEX0.TBP0) + { + FRAME_TEX0.TBW = src->m_from_target->m_TEX0.TBW; + } rt = g_texture_cache->CreateTarget(FRAME_TEX0, t_size, GetValidSize(src), (scale_draw < 0 && is_possible_mem_clear != ClearType::NormalClear) ? src->m_from_target->GetScale() : target_scale, GSTextureCache::RenderTarget, true, fm, false, force_preload, preserve_rt_color || possible_shuffle, m_r, src); @@ -2747,7 +2808,36 @@ void GSRendererHW::Draw() return; } } + else if (rt->m_TEX0.TBP0 != FRAME_TEX0.TBP0) // Must have done rt in rt + { + GSVertex* v = &m_vertex.buff[0]; + u32 vertical_offset = (((FRAME_TEX0.TBP0 - rt->m_TEX0.TBP0) >> 5) / std::max(rt->m_TEX0.TBW, 1U)) * frame_psm.pgs.y; // I know I could just not shift it.. + const u32 horizontal_offset = (((FRAME_TEX0.TBP0 - rt->m_TEX0.TBP0) >> 5) % std::max(rt->m_TEX0.TBW, 1U)) * frame_psm.pgs.x; + + for (u32 i = 0; i < m_vertex.tail; i++) + { + v[i].XYZ.Y += vertical_offset << 4; + v[i].XYZ.X += horizontal_offset << 4; + } + + m_context->scissor.in.x += horizontal_offset; + m_context->scissor.in.z += horizontal_offset; + m_context->scissor.in.y += vertical_offset; + m_context->scissor.in.w += vertical_offset; + m_r.y += vertical_offset; + m_r.w += vertical_offset; + m_r.x += horizontal_offset; + m_r.z += horizontal_offset; + m_in_target_draw = true; + m_vt.m_min.p.x += horizontal_offset; + m_vt.m_max.p.x += horizontal_offset; + m_vt.m_min.p.y += vertical_offset; + m_vt.m_max.p.y += vertical_offset; + t_size.x = rt->m_unscaled_size.x - horizontal_offset; + t_size.y = rt->m_unscaled_size.y - vertical_offset; + } + if (src && src->m_from_target && src->m_target_direct && src->m_from_target == rt) { src->m_texture = rt->m_texture; @@ -2784,7 +2874,6 @@ void GSRendererHW::Draw() if (!ds) { - ds = g_texture_cache->CreateTarget(ZBUF_TEX0, t_size, GetValidSize(src), target_scale, GSTextureCache::DepthStencil, true, 0, false, force_preload, preserve_depth, m_r, src); if (!ds) [[unlikely]] @@ -3061,7 +3150,7 @@ void GSRendererHW::Draw() } } const bool blending_cd = PRIM->ABE && !m_context->ALPHA.IsOpaque(); - if (rt && ((!is_possible_mem_clear || blending_cd) || rt->m_TEX0.PSM != FRAME_TEX0.PSM)) + if (rt && ((!is_possible_mem_clear || blending_cd) || rt->m_TEX0.PSM != FRAME_TEX0.PSM) && !m_in_target_draw) { if (rt->m_TEX0.TBW != FRAME_TEX0.TBW && !m_cached_ctx.ZBUF.ZMSK && (m_cached_ctx.FRAME.FBMSK & 0xFF000000)) { @@ -3072,11 +3161,15 @@ void GSRendererHW::Draw() if (m_cached_ctx.FRAME.FBMSK & 0xF0000000) rt->m_valid_alpha_high = false; } - rt->m_TEX0 = FRAME_TEX0; + if (FRAME_TEX0.TBW != 1 || (m_r.width() > frame_psm.pgs.x || m_r.height() > frame_psm.pgs.y)) + rt->m_TEX0 = FRAME_TEX0; } - if (ds && (!is_possible_mem_clear || ds->m_TEX0.PSM != ZBUF_TEX0.PSM || (rt && ds->m_TEX0.TBW != rt->m_TEX0.TBW))) - ds->m_TEX0 = ZBUF_TEX0; + if (ds && (!is_possible_mem_clear || ds->m_TEX0.PSM != ZBUF_TEX0.PSM || (rt && ds->m_TEX0.TBW != rt->m_TEX0.TBW)) && !m_in_target_draw) + { + if (ZBUF_TEX0.TBW != 1 || (m_r.width() > frame_psm.pgs.x || m_r.height() > frame_psm.pgs.y)) + ds->m_TEX0 = ZBUF_TEX0; + } } else if (!m_texture_shuffle) { @@ -3084,7 +3177,7 @@ void GSRendererHW::Draw() // The FBW should also be okay, since it's coming from the source. if (rt) { - const bool update_fbw = (m_channel_shuffle && src->m_target) && (!PRIM->ABE || IsOpaque() || m_context->ALPHA.IsBlack()); + const bool update_fbw = rt->m_last_draw == s_n && (m_channel_shuffle && src->m_target) && (!PRIM->ABE || IsOpaque() || m_context->ALPHA.IsBlack()); rt->m_TEX0.TBW = update_fbw ? FRAME_TEX0.TBW : std::max(rt->m_TEX0.TBW, FRAME_TEX0.TBW); rt->m_TEX0.PSM = FRAME_TEX0.PSM; } @@ -3106,7 +3199,7 @@ void GSRendererHW::Draw() GSTextureCache::Target* old_ds = nullptr; // If the draw is dated, we're going to expand in to black, so it's just a pointless rescale which will mess up our valid rects and end blocks. - if(!(m_cached_ctx.TEST.DATE && m_cached_ctx.TEST.DATM)) + if (!(m_cached_ctx.TEST.DATE && m_cached_ctx.TEST.DATM)) { GSVector2i new_size = t_size; @@ -3154,7 +3247,7 @@ void GSRendererHW::Draw() rt->ResizeDrawn(rt->GetUnscaledRect()); } - const GSVector4i update_rect = m_r.rintersect(GSVector4i::loadh(new_size)); + const GSVector4i update_rect = m_r.rintersect(GSVector4i::loadh(GSVector2i(new_w, new_h))); // Limit to 2x the vertical height of the resolution (for double buffering) rt->UpdateValidity(update_rect, can_update_size || (m_r.w <= (resolution.y * 2) && !m_texture_shuffle)); rt->UpdateDrawn(update_rect, can_update_size || (m_r.w <= (resolution.y * 2) && !m_texture_shuffle)); @@ -3223,7 +3316,7 @@ void GSRendererHW::Draw() } } } - else + else if (!m_in_target_draw) { // RT and DS sizes need to match, even if we're not doing any resizing. const int new_w = std::max(rt ? rt->m_unscaled_size.x : 0, ds ? ds->m_unscaled_size.x : 0); @@ -4008,8 +4101,8 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool min_uv.x -= block_offset.x * t_psm.bs.x; min_uv.y -= block_offset.y * t_psm.bs.y; - if (GSLocalMemory::IsPageAligned(src->m_TEX0.PSM, m_r) && - block_offset.eq(m_r_block_offset)) + //if (/*GSLocalMemory::IsPageAligned(src->m_TEX0.PSM, m_r) &&*/ + // block_offset.eq(m_r_block_offset)) { if (min_uv.eq(GSVector4i::cxpr(0, 0, 0, 0))) channel = ChannelFetch_RED; @@ -4057,13 +4150,36 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool // Performance GPU note: it could be wise to reduce the size to // the rendered size of the framebuffer - GSVertex* s = &m_vertex.buff[0]; - s[0].XYZ.X = static_cast(m_context->XYOFFSET.OFX + 0); - s[1].XYZ.X = static_cast(m_context->XYOFFSET.OFX + 16384); - s[0].XYZ.Y = static_cast(m_context->XYOFFSET.OFY + 0); - s[1].XYZ.Y = static_cast(m_context->XYOFFSET.OFY + 16384); + if (!m_in_target_draw && (GSConfig.UserHacks_TextureInsideRt == GSTextureInRtMode::Disabled || NextDrawMatchesShuffle())) + { + GSVertex* s = &m_vertex.buff[0]; + s[0].XYZ.X = static_cast(m_context->XYOFFSET.OFX + 0); + s[1].XYZ.X = static_cast(m_context->XYOFFSET.OFX + 16384); + s[0].XYZ.Y = static_cast(m_context->XYOFFSET.OFY + 0); + s[1].XYZ.Y = static_cast(m_context->XYOFFSET.OFY + 16384); - m_r = GSVector4i(0, 0, 1024, 1024); + s[0].U = 0; + s[1].U = 16384; + s[0].V = 0; + s[1].V = 16384; + + m_r = GSVector4i(0, 0, 1024, 1024); + } + else + { + GSVertex* s = &m_vertex.buff[0]; + s[0].XYZ.X = static_cast(m_context->XYOFFSET.OFX + (m_r.x << 4)); + s[1].XYZ.X = static_cast(m_context->XYOFFSET.OFX + (m_r.z << 4)); + s[0].XYZ.Y = static_cast(m_context->XYOFFSET.OFY + (m_r.y << 4)); + s[1].XYZ.Y = static_cast(m_context->XYOFFSET.OFY + (m_r.w << 4)); + + s[0].U = (m_r.x << 4); + s[1].U = (m_r.z << 4); + s[0].V = (m_r.y << 4); + s[1].V = (m_r.w << 4); + m_last_channel_shuffle_fbmsk = 0xFFFFFFFF; + } + m_vertex.head = m_vertex.tail = m_vertex.next = 2; m_index.tail = 2; @@ -5211,9 +5327,12 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c const GSTextureCache::Source* tex, const TextureMinMaxResult& tmm, GSTextureCache::SourceRegion& source_region, bool& target_region, GSVector2i& unscaled_size, float& scale, GSDevice::RecycledTexture& src_copy) { + + const int tex_diff = tex->m_from_target ? static_cast(m_cached_ctx.TEX0.TBP0 - tex->m_from_target->m_TEX0.TBP0) : 0; + const int frame_diff = rt ? static_cast(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0) : 0; // Detect framebuffer read that will need special handling const GSTextureCache::Target* src_target = nullptr; - if (m_conf.tex == m_conf.rt) + if (m_conf.tex == m_conf.rt && !(m_channel_shuffle && tex && tex_diff != frame_diff)) { // Can we read the framebuffer directly? (i.e. sample location matches up). if (CanUseTexIsFB(rt, tex, tmm)) @@ -5253,6 +5372,10 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c GL_CACHE("Source is depth buffer, unsafe to read, taking copy."); src_target = ds; } + else if (m_channel_shuffle && tex->m_from_target && tex_diff != frame_diff) + { + src_target = tex->m_from_target; + } else if (!m_downscale_source) { // No match. @@ -5275,7 +5398,34 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c { copy_range = src_bounds; copy_size = src_unscaled_size; + GSVector4i::storel(©_dst_offset, copy_range); + if (m_channel_shuffle && (tex_diff || frame_diff)) + { + + u32 page_offset = (m_cached_ctx.TEX0.TBP0 - src_target->m_TEX0.TBP0) >> 5; + u32 vertical_offset = (page_offset / src_target->m_TEX0.TBW) * GSLocalMemory::m_psm[src_target->m_TEX0.PSM].pgs.y; + u32 horizontal_offset = (page_offset % src_target->m_TEX0.TBW) * GSLocalMemory::m_psm[src_target->m_TEX0.PSM].pgs.x; + copy_range.y += vertical_offset; + copy_range.x += horizontal_offset; + copy_size.y -= vertical_offset; + copy_size.x -= horizontal_offset; + + if (m_in_target_draw) + { + copy_size.x = m_r.width(); + copy_size.y = m_r.height(); + copy_range.w = copy_range.y + copy_size.y; + copy_range.z = copy_range.x + copy_size.x; + + if (tex_diff != frame_diff) + { + GSVector4i::storel(©_dst_offset, m_r); + copy_size.x += copy_dst_offset.x; + copy_size.y += copy_dst_offset.y; + } + } + } } else { @@ -5285,7 +5435,7 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c copy_size.y = std::min(tex_size.y, src_unscaled_size.y); // Use the texture min/max to get the copy range if not reinterpreted. - if (m_texture_shuffle) + if (m_texture_shuffle || m_channel_shuffle) copy_range = GSVector4i::loadh(copy_size); else copy_range = tmm.coverage; diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.h b/pcsx2/GS/Renderers/HW/GSRendererHW.h index 02dce7ece7759..07228fb6ab3c1 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.h +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.h @@ -113,12 +113,14 @@ class GSRendererHW : public GSRenderer void SetTCOffset(); bool IsPossibleChannelShuffle() const; + bool IsPageCopy() const; bool NextDrawMatchesShuffle() const; bool IsSplitTextureShuffle(GSTextureCache::Target* rt); GSVector4i GetSplitTextureShuffleDrawRect() const; u32 GetEffectiveTextureShuffleFbmsk() const; static GSVector4i GetDrawRectForPages(u32 bw, u32 psm, u32 num_pages); + bool IsSinglePageDraw() const; bool TryToResolveSinglePageFramebuffer(GIFRegFRAME& FRAME, bool only_next_draw); bool IsSplitClearActive() const; diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index ffcc7b14169b6..f19c7f9de325d 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -945,7 +945,7 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const bool is_depth, c t->ResizeTexture(t->m_unscaled_size.x, t->m_unscaled_size.y); t->m_valid = dst->m_valid; } - + CopyRGBFromDepthToColor(t, dst); } @@ -1091,6 +1091,8 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const req_rect.y = region.HasY() ? region.GetMinY() : 0; GSVector4i block_boundary_rect = req_rect; + block_boundary_rect.x = block_boundary_rect.x & ~(psm_s.bs.x - 1); + block_boundary_rect.y = block_boundary_rect.y & ~(psm_s.bs.y - 1); // Round up to the nearst block boundary for lookup to avoid problems due to bilinear and inclusive rects. block_boundary_rect.z = std::max(req_rect.x + 1, (block_boundary_rect.z + (psm_s.bs.x - 2)) & ~(psm_s.bs.x - 1)); block_boundary_rect.w = std::max(req_rect.y + 1, (block_boundary_rect.w + (psm_s.bs.y - 2)) & ~(psm_s.bs.y - 1)); @@ -1646,10 +1648,6 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const { GSVector4i new_rect = req_rect; - // Just in case the TextureMinMax trolls us as it does, when checking if inside the target. - new_rect.z -= 2; - new_rect.w -= 2; - // Let's try a trick to avoid to use wrongly a depth buffer // Unfortunately, I don't have any Arc the Lad testcase // @@ -1658,7 +1656,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const { for (auto t : m_dst[DepthStencil]) { - if (t->m_age <= 1 && t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(psm, t->m_TEX0.PSM) && t->Inside(bp, bw, psm, new_rect)) + if (t->m_age <= 1 && t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(psm, t->m_TEX0.PSM) && t->Inside(bp, bw, psm, block_boundary_rect)) { GL_INS("TC: Warning depth format read as color format. Pixels will be scrambled"); // Let's fetch a depth format texture. Rational, it will avoid the texture allocation and the @@ -1668,7 +1666,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const GIFRegTEX0 depth_TEX0; depth_TEX0.U32[0] = TEX0.U32[0] | (0x30u << 20u); depth_TEX0.U32[1] = TEX0.U32[1]; - src = LookupDepthSource(false, depth_TEX0, TEXA, CLAMP, req_rect, possible_shuffle, linear, frame_fbp, req_color, req_alpha); + src = LookupDepthSource(false, depth_TEX0, TEXA, CLAMP, block_boundary_rect, possible_shuffle, linear, frame_fbp, req_color, req_alpha); if (src != nullptr) { @@ -1690,7 +1688,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const } else { - src = LookupDepthSource(false, TEX0, TEXA, CLAMP, req_rect, possible_shuffle, linear, frame_fbp, req_color, req_alpha, true); + src = LookupDepthSource(false, TEX0, TEXA, CLAMP, block_boundary_rect, possible_shuffle, linear, frame_fbp, req_color, req_alpha, true); if (src != nullptr) { @@ -1803,7 +1801,7 @@ GSVector2i GSTextureCache::ScaleRenderTargetSize(const GSVector2i& sz, float sca } GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVector2i& size, float scale, int type, - bool used, u32 fbmask, bool is_frame, bool preload, bool preserve_rgb, bool preserve_alpha, const GSVector4i draw_rect, bool is_shuffle, bool possible_clear, bool preserve_scale) + bool used, u32 fbmask, bool is_frame, bool preload, bool preserve_rgb, bool preserve_alpha, const GSVector4i draw_rect, bool is_shuffle, bool possible_clear, bool preserve_scale, GSTextureCache::Source* src) { const GSLocalMemory::psm_t& psm_s = GSLocalMemory::m_psm[TEX0.PSM]; const u32 bp = TEX0.TBP0; @@ -1812,8 +1810,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe const GSVector4 sRect(0, 0, 1, 1); GSVector4 dRect{}; bool clear = true; - const auto& calcRescale = [&size, &scale, &new_size, &new_scaled_size, &clear, &dRect](const Target* tgt) - { + const auto& calcRescale = [&size, &scale, &new_size, &new_scaled_size, &clear, &dRect](const Target* tgt) { // TODO Possible optimization: rescale only the validity rectangle of the old target texture into the new one. clear = (size.x > tgt->m_unscaled_size.x || size.y > tgt->m_unscaled_size.y); new_size = size.max(tgt->m_unscaled_size); @@ -1826,7 +1823,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe Target* dst = nullptr; auto& list = m_dst[type]; - + const GSVector4i min_rect = draw_rect.max_u32(GSVector4i(0, 0, draw_rect.x, draw_rect.y)); // TODO: Move all frame stuff to its own routine too. if (!is_frame) { @@ -1837,6 +1834,10 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe if (bp == t->m_TEX0.TBP0) { bool can_use = true; + + if (dst && (GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw)) + continue; + // if It's an old target and it's being completely overwritten, kill it. // Dragon Quest 8 reuses a render-target sized buffer as a single-page buffer, without clearing it. But, // it does dirty it by writing over the 64x64 region. So while we can't use this heuristic for tossing @@ -1880,7 +1881,11 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe dst = t; dst->m_32_bits_fmt |= (psm_s.bpp != 16); - break; + + if (FindOverlappingTarget(dst)) + continue; + else + break; } else { @@ -1890,6 +1895,16 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe delete t; } } + // Probably pointing to half way through the target + else if ((!dst || ((GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw))) && GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets && (t->m_TEX0.TBW == TEX0.TBW || (TEX0.TBW == 1 && t->m_TEX0.TBW > 1)) && t->Inside(bp, TEX0.TBW, TEX0.PSM, min_rect)) + { + //DevCon.Warning("Here draw %d wanted %x PSM %x got %x PSM %x offset of %d pages width %d pages draw width %d", GSState::s_n, bp, TEX0.PSM, t->m_TEX0.TBP0, t->m_TEX0.PSM, (bp - t->m_TEX0.TBP0) >> 5, t->m_TEX0.TBW, draw_rect.width()); + dst = t; + + dst->m_32_bits_fmt |= (psm_s.bpp != 16); + //Continue just in case there's a newer target + continue; + } } } else @@ -2042,6 +2057,64 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe dst->m_alpha_min = 0; dst->m_alpha_max = 0; } + else if (!is_shuffle && std::abs(static_cast(GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp - GSLocalMemory::m_psm[TEX0.PSM].bpp)) == 16) + { + const bool scale_down = GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp > GSLocalMemory::m_psm[TEX0.PSM].bpp; + new_size = dst->m_unscaled_size; + new_scaled_size = ScaleRenderTargetSize(dst->m_unscaled_size, scale); + + dRect = (GSVector4(GSVector4i::loadh(dst->m_unscaled_size)) * GSVector4(scale)).ceil(); + + if (scale_down) + { + if ((new_size.y * 2) < 1024) + { + new_scaled_size.y *= 2; + new_size.y *= 2; + dst->m_valid.y *= 2; + dst->m_valid.w *= 2; + } + dRect.y *= 2; + dRect.w *= 2; + } + else + { + new_scaled_size.y /= 2; + new_size.y /= 2; + dRect.y /= 2; + dRect.w /= 2; + dst->m_valid.y /= 2; + dst->m_valid.w /= 2; + } + GL_INS("TC Convert to 16bit: %dx%d: %dx%d @ %f -> %dx%d @ %f", dst->m_unscaled_size.x, dst->m_unscaled_size.y, + dst->m_texture->GetWidth(), dst->m_texture->GetHeight(), dst->m_scale, new_scaled_size.x, new_scaled_size.y, + scale); + DevCon.Warning("Scale %s draw %d", scale_down ? "down" : "up", GSState::s_n); + GSTexture* tex = type == RenderTarget ? g_gs_device->CreateRenderTarget(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::Color, true) : + g_gs_device->CreateDepthStencil(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::DepthStencil, true); + + m_target_memory_usage += tex->GetMemUsage(); + + g_gs_device->StretchRect(dst->m_texture, sRect, tex, dRect, (type == RenderTarget) ? ShaderConvert::COPY : ShaderConvert::DEPTH_COPY, false); + + + if (src && src->m_from_target && src->m_from_target == dst) + { + src->m_texture = dst->m_texture; + src->m_target_direct = false; + src->m_shared_texture = false; + } + else + { + m_target_memory_usage -= dst->m_texture->GetMemUsage(); + g_gs_device->Recycle(dst->m_texture); + } + + dst->m_TEX0.PSM = TEX0.PSM; + dst->m_texture = tex; + dst->m_unscaled_size = new_size; + + } // If our RGB was invalidated, we need to pull it from depth. // Terminator 3 will reuse our dst_matched target with the RGB masked, then later use the full ARGB area, so we need to update the depth. @@ -3220,8 +3293,7 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r // But this causes rects to be too big, especially in WRC games, I don't think there's any need to align them here. GSVector4i r = rect; - off.loopPages(rect, [this, &rect, bp, bw, psm, &found](u32 page) - { + off.loopPages(rect, [this, &rect, bp, bw, psm, &found](u32 page) { auto& list = m_src.m_map[page]; for (auto i = list.begin(); i != list.end();) { @@ -3844,7 +3916,7 @@ bool GSTextureCache::Move(u32 SBP, u32 SBW, u32 SPSM, int sx, int sy, u32 DBP, u // Make sure the copy doesn't go out of bounds (it shouldn't). if ((scaled_dx + scaled_w) > dst->m_texture->GetWidth() || (scaled_dy + scaled_h) > dst->m_texture->GetHeight()) return false; - GL_CACHE("HW Move 0x%x[BW:%u PSM:%s] to 0x%x[BW:%u PSM:%s] <%d,%d->%d,%d> -> <%d,%d->%d,%d>", SBP, SBW, + DevCon.Warning("HW Move 0x%x[BW:%u PSM:%s] to 0x%x[BW:%u PSM:%s] <%d,%d->%d,%d> -> <%d,%d->%d,%d>", SBP, SBW, psm_str(SPSM), DBP, DBW, psm_str(DPSM), sx, sy, sx + w, sy + h, dx, dy, dx + w, dy + h); const bool cover_whole_target = dst->m_type == RenderTarget && GSVector4i(dx, dy, dx + w, dy + h).rintersect(dst->m_valid).eq(dst->m_valid); @@ -4401,7 +4473,7 @@ void GSTextureCache::IncAge() AgeHashCache(); // As of 04/15/2024 this is s et to 60 (just 1 second of targets), which should be fine now as it doesn't destroy targets which haven't been covered. - // + // // For reference, here are some games sensitive to killing old targets: // Original maxage was 4 here, Xenosaga 2 needs at least 240, else it flickers on scene transitions. // ffx intro scene changes leave the old image untouched for a couple of frames and only then start using it @@ -5634,8 +5706,7 @@ std::shared_ptr GSTextureCache::LookupPaletteObject(con void GSTextureCache::Read(Target* t, const GSVector4i& r) { - if ((!t->m_dirty.empty() && !t->m_dirty.GetTotalRect(t->m_TEX0, t->m_unscaled_size).rintersect(r).rempty()) - || r.width() == 0 || r.height() == 0) + if ((!t->m_dirty.empty() && !t->m_dirty.GetTotalRect(t->m_TEX0, t->m_unscaled_size).rintersect(r).rempty()) || r.width() == 0 || r.height() == 0) return; const GIFRegTEX0& TEX0 = t->m_TEX0; @@ -5856,7 +5927,10 @@ GSTextureCache::Source::~Source() // to recycle. if (!m_shared_texture && !m_from_hash_cache && m_texture) { - g_texture_cache->m_source_memory_usage -= m_texture->GetMemUsage(); + if(m_from_target) + g_texture_cache->m_target_memory_usage -= m_texture->GetMemUsage(); + else + g_texture_cache->m_source_memory_usage -= m_texture->GetMemUsage(); g_gs_device->Recycle(m_texture); } } @@ -6587,8 +6661,7 @@ void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0) m_surfaces.insert(s); // The source pointer will be stored/duplicated in all m_map[array of pages] - s->m_pages.loopPages([this, s](u32 page) - { + s->m_pages.loopPages([this, s](u32 page) { s->m_erase_it[page] = m_map[page].InsertFront(s); }); } @@ -6631,8 +6704,7 @@ void GSTextureCache::SourceMap::RemoveAt(Source* s) GL_CACHE("TC: Remove Src Texture: 0x%x TBW %u PSM %s", s->m_TEX0.TBP0, s->m_TEX0.TBW, psm_str(s->m_TEX0.PSM)); - s->m_pages.loopPages([this, s](u32 page) - { + s->m_pages.loopPages([this, s](u32 page) { m_map[page].EraseIndex(s->m_erase_it[page]); }); @@ -7045,7 +7117,7 @@ std::shared_ptr GSTextureCache::PaletteMap::LookupPalet { // Palette is unused it = map.erase(it); // Erase element from map - // The palette object should now be gone as the shared pointer to the object in the map is deleted + // The palette object should now be gone as the shared pointer to the object in the map is deleted } else { @@ -7109,10 +7181,7 @@ bool GSTextureCache::SurfaceOffsetKeyEqual::operator()(const GSTextureCache::Sur { const SurfaceOffsetKeyElem& lhs_elem = lhs.elems[i]; const SurfaceOffsetKeyElem& rhs_elem = rhs.elems[i]; - if (lhs_elem.bp != rhs_elem.bp - || lhs_elem.bw != rhs_elem.bw - || lhs_elem.psm != rhs_elem.psm - || !lhs_elem.rect.eq(rhs_elem.rect)) + if (lhs_elem.bp != rhs_elem.bp || lhs_elem.bw != rhs_elem.bw || lhs_elem.psm != rhs_elem.psm || !lhs_elem.rect.eq(rhs_elem.rect)) return false; } return true; diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.h b/pcsx2/GS/Renderers/HW/GSTextureCache.h index 3ee9f925b0aaa..0c2f264b9ebd2 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.h +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.h @@ -491,7 +491,7 @@ class GSTextureCache Target* FindTargetOverlap(Target* target, int type, int psm); Target* LookupTarget(GIFRegTEX0 TEX0, const GSVector2i& size, float scale, int type, bool used = true, u32 fbmask = 0, bool is_frame = false, bool preload = GSConfig.PreloadFrameWithGSData, bool preserve_rgb = true, bool preserve_alpha = true, - const GSVector4i draw_rc = GSVector4i::zero(), bool is_shuffle = false, bool possible_clear = false, bool preserve_scale = false); + const GSVector4i draw_rc = GSVector4i::zero(), bool is_shuffle = false, bool possible_clear = false, bool preserve_scale = false, GSTextureCache::Source* src = nullptr); Target* CreateTarget(GIFRegTEX0 TEX0, const GSVector2i& size, const GSVector2i& valid_size,float scale, int type, bool used = true, u32 fbmask = 0, bool is_frame = false, bool preload = GSConfig.PreloadFrameWithGSData, bool preserve_target = true, const GSVector4i draw_rc = GSVector4i::zero(), GSTextureCache::Source* src = nullptr); From 6254c971122891bd39d6e72d76c0d010b00cda3e Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Wed, 26 Jun 2024 12:23:35 +0100 Subject: [PATCH 02/28] GS/HW: Further fixes to RT in RT - Still a ways to go... --- pcsx2/GS/GSState.h | 1 + pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 89 ++++++++++++++++-------- pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 84 ++++++++++++++++------ 3 files changed, 123 insertions(+), 51 deletions(-) diff --git a/pcsx2/GS/GSState.h b/pcsx2/GS/GSState.h index a9d5e6c99271f..5b07b03424217 100644 --- a/pcsx2/GS/GSState.h +++ b/pcsx2/GS/GSState.h @@ -225,6 +225,7 @@ class GSState : public GSAlignedClass<32> bool m_isPackedUV_HackFlag = false; bool m_channel_shuffle = false; bool m_in_target_draw = false; + u32 m_target_offset = 0; u8 m_scanmask_used = 0; u32 m_dirty_gs_regs = 0; int m_backed_up_ctx = 0; diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 357c9171bfde0..0e0578a618113 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -2086,6 +2086,8 @@ void GSRendererHW::Draw() if (num_skipped_channel_shuffle_draws > 0) GL_INS("Skipped %u channel shuffle draws", num_skipped_channel_shuffle_draws); num_skipped_channel_shuffle_draws = 0; + m_last_channel_shuffle_fbp = 0xffff; + m_last_channel_shuffle_end_block = 0xffff; #else if (m_channel_shuffle) return; @@ -2743,18 +2745,21 @@ void GSRendererHW::Draw() const GSLocalMemory::psm_t& frame_psm = GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM]; m_in_target_draw = false; + m_target_offset = 0; if (!no_rt) { + const bool possible_shuffle = draw_sprite_tex && (((src && src->m_target && src->m_from_target && src->m_from_target->m_32_bits_fmt) && + GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16 && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) || + IsPossibleChannelShuffle()); + // FBW is going to be wrong for channel shuffling into a new target, so take it from the source. FRAME_TEX0.U64 = 0; - FRAME_TEX0.TBP0 = m_cached_ctx.FRAME.Block(); - FRAME_TEX0.TBW = (m_channel_shuffle && src->m_target) ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; + FRAME_TEX0.TBP0 = ((m_last_channel_shuffle_end_block + 1) == m_cached_ctx.FRAME.Block() && possible_shuffle) ? m_last_channel_shuffle_fbp : m_cached_ctx.FRAME.Block(); + FRAME_TEX0.TBW = (possible_shuffle && (m_last_channel_shuffle_end_block + 1) && src->m_target) ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; FRAME_TEX0.PSM = m_cached_ctx.FRAME.PSM; - const bool possible_shuffle = draw_sprite_tex && (((src && src->m_target && src->m_from_target && src->m_from_target->m_32_bits_fmt) && - GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16 && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) || - IsPossibleChannelShuffle()); + // Don't clamp on shuffle, the height cache may troll us with the REAL height. if (!possible_shuffle && m_split_texture_shuffle_pages == 0) m_r = m_r.rintersect(t_size_rect); @@ -2769,12 +2774,10 @@ void GSRendererHW::Draw() // Of course if this size is different (in width) or this is a shuffle happening, this will be bypassed. const bool preserve_downscale_draw = scale_draw < 0 || (scale_draw == 0 && ((src && src->m_from_target && src->m_from_target->m_downscaled) || is_possible_mem_clear == ClearType::ClearWithDraw)); - m_in_target_draw = false; - rt = g_texture_cache->LookupTarget(FRAME_TEX0, t_size, ((src && src->m_scale != 1) && GSConfig.UserHacks_NativeScaling == GSNativeScaling::Normal && !possible_shuffle) ? GetTextureScaleFactor() : target_scale, GSTextureCache::RenderTarget, true, fm, false, force_preload, preserve_rt_rgb, preserve_rt_alpha, unclamped_draw_rect, possible_shuffle, is_possible_mem_clear && FRAME_TEX0.TBP0 != m_cached_ctx.ZBUF.Block(), GSConfig.UserHacks_NativeScaling != GSNativeScaling::Off && preserve_downscale_draw && is_possible_mem_clear != ClearType::NormalClear, src); - + // Draw skipped because it was a clear and there was no target. if (!rt) { @@ -2808,12 +2811,14 @@ void GSRendererHW::Draw() return; } } - else if (rt->m_TEX0.TBP0 != FRAME_TEX0.TBP0) // Must have done rt in rt + else if (rt->m_TEX0.TBP0 != m_cached_ctx.FRAME.Block()) // Must have done rt in rt { GSVertex* v = &m_vertex.buff[0]; - u32 vertical_offset = (((FRAME_TEX0.TBP0 - rt->m_TEX0.TBP0) >> 5) / std::max(rt->m_TEX0.TBW, 1U)) * frame_psm.pgs.y; // I know I could just not shift it.. + int vertical_offset = ((std::abs(static_cast(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) >> 5) / std::max(rt->m_TEX0.TBW, 1U)) * frame_psm.pgs.y; // I know I could just not shift it.. - const u32 horizontal_offset = (((FRAME_TEX0.TBP0 - rt->m_TEX0.TBP0) >> 5) % std::max(rt->m_TEX0.TBW, 1U)) * frame_psm.pgs.x; + const int horizontal_offset = (std::abs(static_cast((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) >> 5) % std::max(rt->m_TEX0.TBW, 1U)) * frame_psm.pgs.x; + // Used to reduce the offset made later in channel shuffles + m_target_offset = std::abs(static_cast((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) >> 5); for (u32 i = 0; i < m_vertex.tail; i++) { @@ -2836,6 +2841,16 @@ void GSRendererHW::Draw() m_vt.m_max.p.y += vertical_offset; t_size.x = rt->m_unscaled_size.x - horizontal_offset; t_size.y = rt->m_unscaled_size.y - vertical_offset; + + if (t_size.y <= 0) + { + u32 new_height = m_r.w; + + //DevCon.Warning("Resizing texture %d x %d draw %d", rt->m_unscaled_size.x, new_height, s_n); + rt->ResizeTexture(rt->m_unscaled_size.x, new_height); + rt->UpdateValidity(m_r, true); + rt->UpdateDrawn(m_r, true); + } } if (src && src->m_from_target && src->m_target_direct && src->m_from_target == rt) @@ -2858,6 +2873,8 @@ void GSRendererHW::Draw() // If it's a new target, we don't know where the end is as it's starting on a shuffle, so just do every shuffle following. m_last_channel_shuffle_end_block = (rt->m_last_draw >= s_n) ? (MAX_BLOCKS - 1) : (rt->m_end_block < rt->m_TEX0.TBP0 ? (rt->m_end_block + MAX_BLOCKS) : rt->m_end_block); } + else + m_last_channel_shuffle_end_block = 0xFFFF; } GSTextureCache::Target* ds = nullptr; @@ -2866,12 +2883,14 @@ void GSRendererHW::Draw() { ZBUF_TEX0.U64 = 0; ZBUF_TEX0.TBP0 = m_cached_ctx.ZBUF.Block(); - ZBUF_TEX0.TBW = m_channel_shuffle ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; + ZBUF_TEX0.TBW = m_cached_ctx.FRAME.FBW; ZBUF_TEX0.PSM = m_cached_ctx.ZBUF.PSM; ds = g_texture_cache->LookupTarget(ZBUF_TEX0, t_size, target_scale, GSTextureCache::DepthStencil, m_cached_ctx.DepthWrite(), 0, false, force_preload, preserve_depth, preserve_depth, unclamped_draw_rect, IsPossibleChannelShuffle(), is_possible_mem_clear && ZBUF_TEX0.TBP0 != m_cached_ctx.FRAME.Block()); + ZBUF_TEX0.TBW = m_channel_shuffle ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; + if (!ds) { ds = g_texture_cache->CreateTarget(ZBUF_TEX0, t_size, GetValidSize(src), target_scale, GSTextureCache::DepthStencil, @@ -3162,7 +3181,10 @@ void GSRendererHW::Draw() rt->m_valid_alpha_high = false; } if (FRAME_TEX0.TBW != 1 || (m_r.width() > frame_psm.pgs.x || m_r.height() > frame_psm.pgs.y)) + { rt->m_TEX0 = FRAME_TEX0; + + } } if (ds && (!is_possible_mem_clear || ds->m_TEX0.PSM != ZBUF_TEX0.PSM || (rt && ds->m_TEX0.TBW != rt->m_TEX0.TBW)) && !m_in_target_draw) @@ -3316,7 +3338,7 @@ void GSRendererHW::Draw() } } } - else if (!m_in_target_draw) + else { // RT and DS sizes need to match, even if we're not doing any resizing. const int new_w = std::max(rt ? rt->m_unscaled_size.x : 0, ds ? ds->m_unscaled_size.x : 0); @@ -3993,7 +4015,7 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool m_conf.ps.urban_chaos_hle = 1; } } - else if (m_index.tail <= 64 && m_cached_ctx.CLAMP.WMT == 3) + else if (m_index.tail < 64 && m_cached_ctx.CLAMP.WMT == 3) { // Blood will tell. I think it is channel effect too but again // implemented in a different way. I don't want to add more CRC stuff. So @@ -4167,16 +4189,24 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool } else { + const GSLocalMemory::psm_t frame_psm = GSLocalMemory::m_psm[m_context->FRAME.PSM]; + const u32 frame_page_offset = std::max(static_cast(((m_r.x / frame_psm.pgs.x) + (m_r.y / frame_psm.pgs.y) * src->m_TEX0.TBW) - m_target_offset), 0); + m_r = GSVector4i(m_r.x & ~(frame_psm.pgs.x - 1), m_r.y & ~(frame_psm.pgs.y - 1), (m_r.z + (frame_psm.pgs.x - 1)) & ~(frame_psm.pgs.x - 1), (m_r.w + (frame_psm.pgs.y - 1)) & ~(frame_psm.pgs.y - 1)); + m_cached_ctx.FRAME.FBP += frame_page_offset; + m_in_target_draw |= frame_page_offset > 0; GSVertex* s = &m_vertex.buff[0]; s[0].XYZ.X = static_cast(m_context->XYOFFSET.OFX + (m_r.x << 4)); s[1].XYZ.X = static_cast(m_context->XYOFFSET.OFX + (m_r.z << 4)); s[0].XYZ.Y = static_cast(m_context->XYOFFSET.OFY + (m_r.y << 4)); s[1].XYZ.Y = static_cast(m_context->XYOFFSET.OFY + (m_r.w << 4)); - s[0].U = (m_r.x << 4); - s[1].U = (m_r.z << 4); - s[0].V = (m_r.y << 4); - s[1].V = (m_r.w << 4); + const GSLocalMemory::psm_t tex_psm = GSLocalMemory::m_psm[m_context->TEX0.PSM]; + const u32 tex_page_offset = (m_vt.m_min.t.x / tex_psm.pgs.x) + (m_vt.m_min.t.y / tex_psm.pgs.y); + m_cached_ctx.TEX0.TBP0 += tex_page_offset << 5; + s[0].U = m_r.x << 4; + s[1].U = m_r.z << 4; + s[0].V = m_r.y << 4; + s[1].V = m_r.w << 4; m_last_channel_shuffle_fbmsk = 0xFFFFFFFF; } @@ -5332,7 +5362,7 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c const int frame_diff = rt ? static_cast(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0) : 0; // Detect framebuffer read that will need special handling const GSTextureCache::Target* src_target = nullptr; - if (m_conf.tex == m_conf.rt && !(m_channel_shuffle && tex && tex_diff != frame_diff)) + if (m_conf.tex == m_conf.rt && !(m_channel_shuffle && tex && (tex_diff != frame_diff || target_region))) { // Can we read the framebuffer directly? (i.e. sample location matches up). if (CanUseTexIsFB(rt, tex, tmm)) @@ -5390,7 +5420,7 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c GSVector4i copy_range; GSVector2i copy_size; GSVector2i copy_dst_offset; - + bool copied_rt = false; // Shuffles take the whole target. This should've already been halved. // We can't partially copy depth targets in DirectX, and GL/Vulkan should use the direct read above. // Restricting it also breaks Tom and Jerry... @@ -5406,11 +5436,14 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c u32 page_offset = (m_cached_ctx.TEX0.TBP0 - src_target->m_TEX0.TBP0) >> 5; u32 vertical_offset = (page_offset / src_target->m_TEX0.TBW) * GSLocalMemory::m_psm[src_target->m_TEX0.PSM].pgs.y; u32 horizontal_offset = (page_offset % src_target->m_TEX0.TBW) * GSLocalMemory::m_psm[src_target->m_TEX0.PSM].pgs.x; + copy_range.y += vertical_offset; copy_range.x += horizontal_offset; copy_size.y -= vertical_offset; copy_size.x -= horizontal_offset; - + target_region = false; + source_region.bits = 0; + //copied_rt = tex->m_from_target != nullptr; if (m_in_target_draw) { copy_size.x = m_r.width(); @@ -5506,12 +5539,9 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c static_cast(std::ceil(static_cast(copy_dst_offset.y) * scale))); src_copy.reset(src_target->m_texture->IsDepthStencil() ? - g_gs_device->CreateDepthStencil( - scaled_copy_size.x, scaled_copy_size.y, src_target->m_texture->GetFormat(), false) : - (m_downscale_source ? g_gs_device->CreateRenderTarget(scaled_copy_size.x, scaled_copy_size.y, src_target->m_texture->GetFormat(), true, - true) : - g_gs_device->CreateTexture( - scaled_copy_size.x, scaled_copy_size.y, 1, src_target->m_texture->GetFormat(), true))); + g_gs_device->CreateDepthStencil(scaled_copy_size.x, scaled_copy_size.y, src_target->m_texture->GetFormat(), false) : + (m_downscale_source || copied_rt) ? g_gs_device->CreateRenderTarget(scaled_copy_size.x, scaled_copy_size.y, src_target->m_texture->GetFormat(), true, true) : + g_gs_device->CreateTexture(scaled_copy_size.x, scaled_copy_size.y, 1, src_target->m_texture->GetFormat(), true)); if (!src_copy) [[unlikely]] { Console.Error("Failed to allocate %dx%d texture for hazard copy", scaled_copy_size.x, scaled_copy_size.y); @@ -5519,6 +5549,7 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c m_conf.ps.tfx = 4; return; } + if (m_downscale_source) { g_perfmon.Put(GSPerfMon::TextureCopies, 1); @@ -7357,7 +7388,7 @@ void GSRendererHW::ClearGSLocalMemory(const GSOffset& off, const GSVector4i& r, bool GSRendererHW::OI_BlitFMV(GSTextureCache::Target* _rt, GSTextureCache::Source* tex, const GSVector4i& r_draw) { - if (r_draw.w > 1024 && (m_vt.m_primclass == GS_SPRITE_CLASS) && (m_vertex.next == 2) && m_process_texture && !PRIM->ABE && tex && !tex->m_target && m_cached_ctx.TEX0.TBW > 0) + /*if (r_draw.w > 1024 && (m_vt.m_primclass == GS_SPRITE_CLASS) && (m_vertex.next == 2) && m_process_texture && !PRIM->ABE && tex && !tex->m_target && m_cached_ctx.TEX0.TBW > 0) { GL_PUSH("OI_BlitFMV"); @@ -7411,7 +7442,7 @@ bool GSRendererHW::OI_BlitFMV(GSTextureCache::Target* _rt, GSTextureCache::Sourc g_texture_cache->InvalidateVideoMemSubTarget(_rt); return false; // skip current draw - } + }*/ // Nothing to see keep going return true; diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index f19c7f9de325d..7616b198ee226 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -18,6 +18,7 @@ #include "fmt/format.h" #include +#include #ifdef __APPLE__ #include @@ -1094,8 +1095,8 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const block_boundary_rect.x = block_boundary_rect.x & ~(psm_s.bs.x - 1); block_boundary_rect.y = block_boundary_rect.y & ~(psm_s.bs.y - 1); // Round up to the nearst block boundary for lookup to avoid problems due to bilinear and inclusive rects. - block_boundary_rect.z = std::max(req_rect.x + 1, (block_boundary_rect.z + (psm_s.bs.x - 2)) & ~(psm_s.bs.x - 1)); - block_boundary_rect.w = std::max(req_rect.y + 1, (block_boundary_rect.w + (psm_s.bs.y - 2)) & ~(psm_s.bs.y - 1)); + block_boundary_rect.z = std::max(req_rect.x + 1, (block_boundary_rect.z + (psm_s.bs.x / 2)) & ~(psm_s.bs.x - 1)); + block_boundary_rect.w = std::max(req_rect.y + 1, (block_boundary_rect.w + (psm_s.bs.y / 2)) & ~(psm_s.bs.y - 1)); // Arc the Lad finds the wrong surface here when looking for a depth stencil. // Since we're currently not caching depth stencils (check ToDo in CreateSource) we should not look for it here. @@ -1117,8 +1118,8 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const if (((bp & (BLOCKS_PER_PAGE - 1)) != (t->m_TEX0.TBP0 & (BLOCKS_PER_PAGE - 1))) && (bp & (BLOCKS_PER_PAGE - 1))) continue; + //const bool overlaps = t->Inside(bp, bw, psm, block_boundary_rect); const bool overlaps = t->Overlaps(bp, bw, psm, block_boundary_rect); - // Try to make sure the target has available what we need, be careful of self referencing frames with font in the alpha. // Also is we have already found a target which we had to offset in to by using a region or exact address, // it's probable that's more correct than being inside (Tomb Raider Legends + Project Snowblind) @@ -1525,7 +1526,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const rect.y -= new_rect.y & ~(page_size.y - 1); } - rect = rect.rintersect(t->m_valid); + //rect = rect.rintersect(t->m_valid); if (rect.rempty()) continue; @@ -1646,8 +1647,6 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const if (!found_t && !dst && !GSConfig.UserHacks_DisableDepthSupport) { - GSVector4i new_rect = req_rect; - // Let's try a trick to avoid to use wrongly a depth buffer // Unfortunately, I don't have any Arc the Lad testcase // @@ -1836,8 +1835,11 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe bool can_use = true; if (dst && (GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw)) + { + DevCon.Warning("Ignoring target at %x as one at %x is newer", t->m_TEX0.TBP0, dst->m_TEX0.TBP0); continue; - + } + // if It's an old target and it's being completely overwritten, kill it. // Dragon Quest 8 reuses a render-target sized buffer as a single-page buffer, without clearing it. But, // it does dirty it by writing over the 64x64 region. So while we can't use this heuristic for tossing @@ -1882,12 +1884,12 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe dst->m_32_bits_fmt |= (psm_s.bpp != 16); - if (FindOverlappingTarget(dst)) + /*if (FindOverlappingTarget(dst)) continue; - else + else*/ break; } - else + else if(!(src && src->m_from_target == t)) { GL_INS("TC: Deleting RT BP 0x%x BW %d PSM %s due to change in target", t->m_TEX0.TBP0, t->m_TEX0.TBW, psm_str(t->m_TEX0.PSM)); InvalidateSourcesFromTarget(t); @@ -1896,14 +1898,34 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe } } // Probably pointing to half way through the target - else if ((!dst || ((GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw))) && GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets && (t->m_TEX0.TBW == TEX0.TBW || (TEX0.TBW == 1 && t->m_TEX0.TBW > 1)) && t->Inside(bp, TEX0.TBW, TEX0.PSM, min_rect)) + else if(GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets) { - //DevCon.Warning("Here draw %d wanted %x PSM %x got %x PSM %x offset of %d pages width %d pages draw width %d", GSState::s_n, bp, TEX0.PSM, t->m_TEX0.TBP0, t->m_TEX0.PSM, (bp - t->m_TEX0.TBP0) >> 5, t->m_TEX0.TBW, draw_rect.width()); - dst = t; + const u32 widthpage_offset = (std::abs(static_cast(bp - t->m_TEX0.TBP0)) >> 5) % std::max(t->m_TEX0.TBW, 1U); + const bool is_aligned_ok = widthpage_offset == 0 || ((widthpage_offset + TEX0.TBW) <= t->m_TEX0.TBW) || min_rect.width() <= 64 || (widthpage_offset == (t->m_TEX0.TBW >> 1) && (static_cast(min_rect.width()) <= (widthpage_offset * 64))); + if ((!dst || ((GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw))) && is_aligned_ok && (t->m_TEX0.TBW == TEX0.TBW || (TEX0.TBW == 1 && t->m_TEX0.TBW > 1)) && t->Inside(bp, TEX0.TBW, TEX0.PSM, min_rect)) + { + // If it's too old, it's probably not a real target to jump in to anymore. + if ((GSState::s_n - t->m_last_draw) > 10 && (!t->m_dirty.empty() || (!is_shuffle && + !(widthpage_offset == 0/*TEX0.TBP0 == ((((t->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 1) + t->m_TEX0.TBP0)*/ || min_rect.width() <= 64 || + (widthpage_offset == (t->m_TEX0.TBW >> 1) && min_rect.width() == widthpage_offset * 64))))) + { + GL_INS("TC: Deleting RT BP 0x%x BW %d PSM %s due to change in target", t->m_TEX0.TBP0, t->m_TEX0.TBW, psm_str(t->m_TEX0.PSM)); + InvalidateSourcesFromTarget(t); + i = list.erase(i); + delete t; + } + else + { + //DevCon.Warning("Here draw %d wanted %x PSM %x got %x PSM %x offset of %d pages width %d pages draw width %d", GSState::s_n, bp, TEX0.PSM, t->m_TEX0.TBP0, t->m_TEX0.PSM, (bp - t->m_TEX0.TBP0) >> 5, t->m_TEX0.TBW, draw_rect.width()); + dst = t; - dst->m_32_bits_fmt |= (psm_s.bpp != 16); - //Continue just in case there's a newer target - continue; + dst->m_32_bits_fmt |= (psm_s.bpp != 16); + //Continue just in case there's a newer target + if (used) + list.MoveFront(i.Index()); + break; + } + } } } } @@ -2049,6 +2071,8 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe { calcRescale(dst); GSTexture* tex = g_gs_device->CreateDepthStencil(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::DepthStencil, false); + if (!tex) + return nullptr; g_gs_device->StretchRect(dst->m_texture, sRect, tex, dRect, ShaderConvert::FLOAT32_TO_FLOAT24, false); g_perfmon.Put(GSPerfMon::TextureCopies, 1); g_gs_device->Recycle(dst->m_texture); @@ -2089,10 +2113,11 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe GL_INS("TC Convert to 16bit: %dx%d: %dx%d @ %f -> %dx%d @ %f", dst->m_unscaled_size.x, dst->m_unscaled_size.y, dst->m_texture->GetWidth(), dst->m_texture->GetHeight(), dst->m_scale, new_scaled_size.x, new_scaled_size.y, scale); - DevCon.Warning("Scale %s draw %d", scale_down ? "down" : "up", GSState::s_n); + //DevCon.Warning("Scale %s draw %d", scale_down ? "down" : "up", GSState::s_n); GSTexture* tex = type == RenderTarget ? g_gs_device->CreateRenderTarget(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::Color, true) : g_gs_device->CreateDepthStencil(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::DepthStencil, true); - + if (!tex) + return nullptr; m_target_memory_usage += tex->GetMemUsage(); g_gs_device->StretchRect(dst->m_texture, sRect, tex, dRect, (type == RenderTarget) ? ShaderConvert::COPY : ShaderConvert::DEPTH_COPY, false); @@ -2983,7 +3008,7 @@ void GSTextureCache::ScaleTargetForDisplay(Target* t, const GIFRegTEX0& dispfb, } // Inject the new size back into the cache. - GetTargetSize(t->m_TEX0.TBP0, t->m_TEX0.TBW, t->m_TEX0.PSM, 0, static_cast(needed_height)); + GetTargetSize(t->m_TEX0.TBP0, t->m_TEX0.TBW, t->m_TEX0.PSM, new_width, static_cast(needed_height)); } float GSTextureCache::ConvertColorToDepth(u32 c, ShaderConvert convert) @@ -4452,7 +4477,10 @@ void GSTextureCache::ReplaceSourceTexture(Source* s, GSTexture* new_texture, flo if (s->m_from_hash_cache) s->m_from_hash_cache->refcount++; else if (!s->m_shared_texture) + { + DevCon.Warning("replace %d", m_source_memory_usage); m_source_memory_usage += s->m_texture->GetMemUsage(); + } } void GSTextureCache::IncAge() @@ -4588,7 +4616,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con return nullptr; } - m_source_memory_usage += dTex->GetMemUsage(); + m_target_memory_usage += dTex->GetMemUsage(); // copy the rt in const GSVector4i area(GSVector4i(x, y, x + w, y + h).rintersect(GSVector4i(sTex->GetSize()).zwxy())); @@ -4905,7 +4933,9 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con return nullptr; } - m_source_memory_usage += dTex->GetMemUsage(); + src->m_shared_texture = false; + src->m_target_direct = false; + m_target_memory_usage += dTex->GetMemUsage(); src->m_texture = dTex; if (use_texture) @@ -5360,7 +5390,7 @@ GSTextureCache::Source* GSTextureCache::CreateMergedSource(GIFRegTEX0 TEX0, GIFR Console.Error("Failed to allocate %dx%d merged dest texture", scaled_width, scaled_height); return nullptr; } - + DevCon.Warning("Merged %d", m_source_memory_usage); m_source_memory_usage += dtex->GetMemUsage(); // Sort rect list by the texture, we want to batch as many as possible together. @@ -6251,6 +6281,7 @@ GSTextureCache::Target::~Target() { // Targets should never be shared. pxAssert(!m_shared_texture); + if (m_texture) { g_texture_cache->m_target_memory_usage -= m_texture->GetMemUsage(); @@ -6552,7 +6583,11 @@ void GSTextureCache::Target::ResizeValidity(const GSVector4i& rect) m_valid = m_valid.rintersect(rect); m_drawn_since_read = m_drawn_since_read.rintersect(rect); m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid); + + const u32 offset = ((UnwrappedEndBlock() + 1) - m_TEX0.TBP0) % (std::max(m_TEX0.TBW, 1U) << 5); + m_end_block += offset; } + // Else No valid size, so need to resize down. // GL_CACHE("ResizeValidity (0x%x->0x%x) from R:%d,%d Valid: %d,%d", m_TEX0.TBP0, m_end_block, rect.z, rect.w, m_valid.z, m_valid.w); @@ -6565,12 +6600,16 @@ void GSTextureCache::Target::UpdateValidity(const GSVector4i& rect, bool can_res m_valid = rect; m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid); + const u32 offset = ((UnwrappedEndBlock() + 1) - m_TEX0.TBP0) % (std::max(m_TEX0.TBW, 1U) << 5); + m_end_block += offset; } else if (can_resize) { m_valid = m_valid.runion(rect); m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid); + const u32 offset = ((UnwrappedEndBlock() + 1) - m_TEX0.TBP0) % (std::max(m_TEX0.TBW, 1U) << 5); + m_end_block += offset; } // GL_CACHE("UpdateValidity (0x%x->0x%x) from R:%d,%d Valid: %d,%d", m_TEX0.TBP0, m_end_block, rect.z, rect.w, m_valid.z, m_valid.w); } @@ -7034,6 +7073,7 @@ void GSTextureCache::Palette::InitializeTexture() } m_tex_palette->Update(GSVector4i(0, 0, m_pal, 1), m_clut, m_pal * sizeof(m_clut[0])); + g_texture_cache->m_source_memory_usage += m_tex_palette->GetMemUsage(); } } From c2256e77a7c94d7b82323694c1676f1b7be208d3 Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Tue, 2 Jul 2024 15:36:45 +0100 Subject: [PATCH 03/28] GS/HW: Further RT in RT changes to improve compatibility --- pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 164 ++++++++++++----------- pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 29 +++- pcsx2/GS/Renderers/HW/GSTextureCache.h | 2 +- 3 files changed, 110 insertions(+), 85 deletions(-) diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 0e0578a618113..f58147dacd500 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -2747,6 +2747,75 @@ void GSRendererHW::Draw() m_in_target_draw = false; m_target_offset = 0; + GSTextureCache::Target* ds = nullptr; + GIFRegTEX0 ZBUF_TEX0; + if (!no_ds) + { + ZBUF_TEX0.U64 = 0; + ZBUF_TEX0.TBP0 = m_cached_ctx.ZBUF.Block(); + ZBUF_TEX0.TBW = m_cached_ctx.FRAME.FBW; + ZBUF_TEX0.PSM = m_cached_ctx.ZBUF.PSM; + + ds = g_texture_cache->LookupTarget(ZBUF_TEX0, t_size, target_scale, GSTextureCache::DepthStencil, + m_cached_ctx.DepthWrite(), 0, false, force_preload, preserve_depth, preserve_depth, unclamped_draw_rect, IsPossibleChannelShuffle(), is_possible_mem_clear && ZBUF_TEX0.TBP0 != m_cached_ctx.FRAME.Block(), false, + src, -1); + + ZBUF_TEX0.TBW = m_channel_shuffle ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; + + if (!ds) + { + ds = g_texture_cache->CreateTarget(ZBUF_TEX0, t_size, GetValidSize(src), target_scale, GSTextureCache::DepthStencil, + true, 0, false, force_preload, preserve_depth, m_r, src); + if (!ds) [[unlikely]] + { + GL_INS("ERROR: Failed to create ZBUF target, skipping."); + CleanupDraw(true); + return; + } + } + else + { + // If it failed to check depth test earlier, we can now check the top bits from the alpha to get a bit more accurate picture. + if (((zm && m_cached_ctx.TEST.ZTST > ZTST_ALWAYS) || (m_vt.m_eq.z && m_cached_ctx.TEST.ZTST == ZTST_GEQUAL)) && GSLocalMemory::m_psm[m_cached_ctx.ZBUF.PSM].trbpp == 32) + { + if (ds->m_alpha_max != 0) + { + const u32 max_z = (static_cast(ds->m_alpha_max + 1) << 24) - 1; + + switch (m_cached_ctx.TEST.ZTST) + { + case ZTST_GEQUAL: + // Every Z value will pass + if (max_z <= m_vt.m_min.p.z) + { + m_cached_ctx.TEST.ZTST = ZTST_ALWAYS; + if (zm) + { + ds = nullptr; + no_ds = true; + } + } + break; + case ZTST_GREATER: + // Every Z value will pass + if (max_z < m_vt.m_min.p.z) + { + m_cached_ctx.TEST.ZTST = ZTST_ALWAYS; + if (zm) + { + ds = nullptr; + no_ds = true; + } + } + break; + default: + break; + } + } + } + } + } + if (!no_rt) { const bool possible_shuffle = draw_sprite_tex && (((src && src->m_target && src->m_from_target && src->m_from_target->m_32_bits_fmt) && @@ -2776,7 +2845,7 @@ void GSRendererHW::Draw() rt = g_texture_cache->LookupTarget(FRAME_TEX0, t_size, ((src && src->m_scale != 1) && GSConfig.UserHacks_NativeScaling == GSNativeScaling::Normal && !possible_shuffle) ? GetTextureScaleFactor() : target_scale, GSTextureCache::RenderTarget, true, fm, false, force_preload, preserve_rt_rgb, preserve_rt_alpha, unclamped_draw_rect, possible_shuffle, is_possible_mem_clear && FRAME_TEX0.TBP0 != m_cached_ctx.ZBUF.Block(), - GSConfig.UserHacks_NativeScaling != GSNativeScaling::Off && preserve_downscale_draw && is_possible_mem_clear != ClearType::NormalClear, src); + GSConfig.UserHacks_NativeScaling != GSNativeScaling::Off && preserve_downscale_draw && is_possible_mem_clear != ClearType::NormalClear, src, no_ds ? -1 : (m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0)); // Draw skipped because it was a clear and there was no target. if (!rt) @@ -2811,7 +2880,7 @@ void GSRendererHW::Draw() return; } } - else if (rt->m_TEX0.TBP0 != m_cached_ctx.FRAME.Block()) // Must have done rt in rt + else if (rt->m_TEX0.TBP0 != m_cached_ctx.FRAME.Block()) { GSVertex* v = &m_vertex.buff[0]; int vertical_offset = ((std::abs(static_cast(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) >> 5) / std::max(rt->m_TEX0.TBW, 1U)) * frame_psm.pgs.y; // I know I could just not shift it.. @@ -2839,17 +2908,24 @@ void GSRendererHW::Draw() m_vt.m_max.p.x += horizontal_offset; m_vt.m_min.p.y += vertical_offset; m_vt.m_max.p.y += vertical_offset; + t_size.x = rt->m_unscaled_size.x - horizontal_offset; t_size.y = rt->m_unscaled_size.y - vertical_offset; - if (t_size.y <= 0) + // Don't resize if the BPP don't match. + if (frame_psm.bpp == GSLocalMemory::m_psm[rt->m_TEX0.PSM].bpp) { - u32 new_height = m_r.w; - - //DevCon.Warning("Resizing texture %d x %d draw %d", rt->m_unscaled_size.x, new_height, s_n); - rt->ResizeTexture(rt->m_unscaled_size.x, new_height); - rt->UpdateValidity(m_r, true); - rt->UpdateDrawn(m_r, true); + if (t_size.y <= 0) + { + u32 new_height = m_r.w; + + if (possible_shuffle && std::abs(static_cast(GSLocalMemory::m_psm[rt->m_TEX0.PSM].bpp - GSLocalMemory::m_psm[TEX0.PSM].bpp)) == 16) + new_height /= 2; + //DevCon.Warning("Resizing texture %d x %d draw %d", rt->m_unscaled_size.x, new_height, s_n); + rt->ResizeTexture(rt->m_unscaled_size.x, new_height); + rt->UpdateValidity(m_r, true); + rt->UpdateDrawn(m_r, true); + } } } @@ -2877,74 +2953,6 @@ void GSRendererHW::Draw() m_last_channel_shuffle_end_block = 0xFFFF; } - GSTextureCache::Target* ds = nullptr; - GIFRegTEX0 ZBUF_TEX0; - if (!no_ds) - { - ZBUF_TEX0.U64 = 0; - ZBUF_TEX0.TBP0 = m_cached_ctx.ZBUF.Block(); - ZBUF_TEX0.TBW = m_cached_ctx.FRAME.FBW; - ZBUF_TEX0.PSM = m_cached_ctx.ZBUF.PSM; - - ds = g_texture_cache->LookupTarget(ZBUF_TEX0, t_size, target_scale, GSTextureCache::DepthStencil, - m_cached_ctx.DepthWrite(), 0, false, force_preload, preserve_depth, preserve_depth, unclamped_draw_rect, IsPossibleChannelShuffle(), is_possible_mem_clear && ZBUF_TEX0.TBP0 != m_cached_ctx.FRAME.Block()); - - ZBUF_TEX0.TBW = m_channel_shuffle ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; - - if (!ds) - { - ds = g_texture_cache->CreateTarget(ZBUF_TEX0, t_size, GetValidSize(src), target_scale, GSTextureCache::DepthStencil, - true, 0, false, force_preload, preserve_depth, m_r, src); - if (!ds) [[unlikely]] - { - GL_INS("ERROR: Failed to create ZBUF target, skipping."); - CleanupDraw(true); - return; - } - } - else - { - // If it failed to check depth test earlier, we can now check the top bits from the alpha to get a bit more accurate picture. - if (((zm && m_cached_ctx.TEST.ZTST > ZTST_ALWAYS) || (m_vt.m_eq.z && m_cached_ctx.TEST.ZTST == ZTST_GEQUAL)) && GSLocalMemory::m_psm[m_cached_ctx.ZBUF.PSM].trbpp == 32) - { - if (ds->m_alpha_max != 0) - { - const u32 max_z = (static_cast(ds->m_alpha_max + 1) << 24) - 1; - - switch (m_cached_ctx.TEST.ZTST) - { - case ZTST_GEQUAL: - // Every Z value will pass - if (max_z <= m_vt.m_min.p.z) - { - m_cached_ctx.TEST.ZTST = ZTST_ALWAYS; - if (zm) - { - ds = nullptr; - no_ds = true; - } - } - break; - case ZTST_GREATER: - // Every Z value will pass - if (max_z < m_vt.m_min.p.z) - { - m_cached_ctx.TEST.ZTST = ZTST_ALWAYS; - if (zm) - { - ds = nullptr; - no_ds = true; - } - } - break; - default: - break; - } - } - } - } - } - if (m_process_texture) { GIFRegCLAMP MIP_CLAMP = m_cached_ctx.CLAMP; @@ -4172,7 +4180,7 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool // Performance GPU note: it could be wise to reduce the size to // the rendered size of the framebuffer - if (!m_in_target_draw && (GSConfig.UserHacks_TextureInsideRt == GSTextureInRtMode::Disabled || NextDrawMatchesShuffle())) + if (GSConfig.UserHacks_TextureInsideRt == GSTextureInRtMode::Disabled || (!m_in_target_draw && NextDrawMatchesShuffle())) { GSVertex* s = &m_vertex.buff[0]; s[0].XYZ.X = static_cast(m_context->XYOFFSET.OFX + 0); diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index 7616b198ee226..24f642f6bd92a 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -1800,7 +1800,8 @@ GSVector2i GSTextureCache::ScaleRenderTargetSize(const GSVector2i& sz, float sca } GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVector2i& size, float scale, int type, - bool used, u32 fbmask, bool is_frame, bool preload, bool preserve_rgb, bool preserve_alpha, const GSVector4i draw_rect, bool is_shuffle, bool possible_clear, bool preserve_scale, GSTextureCache::Source* src) + bool used, u32 fbmask, bool is_frame, bool preload, bool preserve_rgb, bool preserve_alpha, const GSVector4i draw_rect, + bool is_shuffle, bool possible_clear, bool preserve_scale, GSTextureCache::Source* src, int offset) { const GSLocalMemory::psm_t& psm_s = GSLocalMemory::m_psm[TEX0.PSM]; const u32 bp = TEX0.TBP0; @@ -1898,8 +1899,11 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe } } // Probably pointing to half way through the target - else if(GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets) + else if (GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets) { + if (offset != -1 && (bp - t->m_TEX0.TBP0) != offset) + continue; + const u32 widthpage_offset = (std::abs(static_cast(bp - t->m_TEX0.TBP0)) >> 5) % std::max(t->m_TEX0.TBW, 1U); const bool is_aligned_ok = widthpage_offset == 0 || ((widthpage_offset + TEX0.TBW) <= t->m_TEX0.TBW) || min_rect.width() <= 64 || (widthpage_offset == (t->m_TEX0.TBW >> 1) && (static_cast(min_rect.width()) <= (widthpage_offset * 64))); if ((!dst || ((GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw))) && is_aligned_ok && (t->m_TEX0.TBW == TEX0.TBW || (TEX0.TBW == 1 && t->m_TEX0.TBW > 1)) && t->Inside(bp, TEX0.TBW, TEX0.PSM, min_rect)) @@ -2083,6 +2087,8 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe } else if (!is_shuffle && std::abs(static_cast(GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp - GSLocalMemory::m_psm[TEX0.PSM].bpp)) == 16) { + dst->Update(false); + const bool scale_down = GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp > GSLocalMemory::m_psm[TEX0.PSM].bpp; new_size = dst->m_unscaled_size; new_scaled_size = ScaleRenderTargetSize(dst->m_unscaled_size, scale); @@ -2340,6 +2346,8 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe dst->m_valid_alpha_high = dst_match->m_valid_alpha_high; //&& psm_s.trbpp != 24; dst->m_valid_rgb = dst_match->m_valid_rgb; dst->m_was_dst_matched = true; + dst_match->m_was_dst_matched = true; + dst_match->m_valid_rgb = false; if (GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp == 16 && GSLocalMemory::m_psm[dst_match->m_TEX0.PSM].bpp > 16) dst->m_TEX0.TBW = dst_match->m_TEX0.TBW; // Be careful of shuffles of the depth as C16, but using a buffer width of 16 (Mercenaries). @@ -3174,6 +3182,17 @@ void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 wr continue; } + // Not covering the whole target, and a different format, so just dirty it. + if (start_bp == t->m_TEX0.TBP0 && (t->UnwrappedEndBlock() > end_bp) && write_psm != t->m_TEX0.PSM) + { + const GSLocalMemory::psm_t& target_psm = GSLocalMemory::m_psm[write_psm]; + u32 total_pages = (end_bp - t->m_TEX0.TBP0) >> 5; + GSVector4i dirty_area = GSVector4i(0, 0, t->m_valid.z, (total_pages / t->m_TEX0.TBW) * target_psm.pgs.y); + InvalidateVideoMem(g_gs_renderer->m_mem.GetOffset(t->m_TEX0.TBP0, t->m_TEX0.TBW, write_psm), dirty_area, true); + ++i; + continue; + } + InvalidateSourcesFromTarget(t); t->m_valid_alpha_low &= preserve_alpha; @@ -4588,9 +4607,9 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con } bool hack = false; - bool channel_shuffle = false; + bool channel_shuffle = dst && (TEX0.PSM == PSMT8) && (GSRendererHW::GetInstance()->TestChannelShuffle(dst)); - if (dst && (x_offset != 0 || y_offset != 0)) + if (dst && (x_offset != 0 || y_offset != 0) && (TEX0.PSM != PSMT8 || channel_shuffle)) { const float scale = dst->m_scale; const int x = static_cast(scale * x_offset); @@ -4653,8 +4672,6 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con src->m_texture = dst->m_texture; src->m_unscaled_size = dst->m_unscaled_size; src->m_shared_texture = true; - - channel_shuffle = GSRendererHW::GetInstance()->TestChannelShuffle(dst); } // Invalidate immediately on recursive draws, because if we don't here, InvalidateVideoMem() will. diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.h b/pcsx2/GS/Renderers/HW/GSTextureCache.h index 0c2f264b9ebd2..024dfbbed66db 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.h +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.h @@ -491,7 +491,7 @@ class GSTextureCache Target* FindTargetOverlap(Target* target, int type, int psm); Target* LookupTarget(GIFRegTEX0 TEX0, const GSVector2i& size, float scale, int type, bool used = true, u32 fbmask = 0, bool is_frame = false, bool preload = GSConfig.PreloadFrameWithGSData, bool preserve_rgb = true, bool preserve_alpha = true, - const GSVector4i draw_rc = GSVector4i::zero(), bool is_shuffle = false, bool possible_clear = false, bool preserve_scale = false, GSTextureCache::Source* src = nullptr); + const GSVector4i draw_rc = GSVector4i::zero(), bool is_shuffle = false, bool possible_clear = false, bool preserve_scale = false, GSTextureCache::Source* src = nullptr, int offset = -1); Target* CreateTarget(GIFRegTEX0 TEX0, const GSVector2i& size, const GSVector2i& valid_size,float scale, int type, bool used = true, u32 fbmask = 0, bool is_frame = false, bool preload = GSConfig.PreloadFrameWithGSData, bool preserve_target = true, const GSVector4i draw_rc = GSVector4i::zero(), GSTextureCache::Source* src = nullptr); From f1f11f8285de5d8d64569a2784ac7b75a8b241b4 Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Wed, 1 Jan 2025 01:01:47 +0000 Subject: [PATCH 04/28] GS/HW: Further fixes for RT in RT changes in behaviour --- bin/resources/shaders/dx11/tfx.fx | 7 +- bin/resources/shaders/opengl/tfx_fs.glsl | 7 +- bin/resources/shaders/vulkan/tfx.glsl | 7 +- pcsx2/GS/GSState.cpp | 10 + pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 301 +++++++++++++++++++---- pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 204 ++++++++++----- pcsx2/GS/Renderers/HW/GSTextureCache.h | 8 +- pcsx2/GS/Renderers/Metal/tfx.metal | 7 +- 8 files changed, 429 insertions(+), 122 deletions(-) diff --git a/bin/resources/shaders/dx11/tfx.fx b/bin/resources/shaders/dx11/tfx.fx index 7a722cdca1e61..bcf812c83db1c 100644 --- a/bin/resources/shaders/dx11/tfx.fx +++ b/bin/resources/shaders/dx11/tfx.fx @@ -1123,11 +1123,8 @@ PS_OUTPUT ps_main(PS_INPUT input) { if (PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) { - C.rb = C.br; - float g_temp = C.g; - - C.g = C.a; - C.a = g_temp; + C.b = C.r; + C.a = C.g; } else if(PS_PROCESS_BA & SHUFFLE_READ) { diff --git a/bin/resources/shaders/opengl/tfx_fs.glsl b/bin/resources/shaders/opengl/tfx_fs.glsl index c641a6743fd5b..257fbc42d211e 100644 --- a/bin/resources/shaders/opengl/tfx_fs.glsl +++ b/bin/resources/shaders/opengl/tfx_fs.glsl @@ -1086,11 +1086,8 @@ void ps_main() C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80u))); #elif PS_SHUFFLE_ACROSS #if(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) - C.rb = C.br; - float g_temp = C.g; - - C.g = C.a; - C.a = g_temp; + C.b = C.r; + C.a = C.g; #elif(PS_PROCESS_BA & SHUFFLE_READ) C.rb = C.bb; C.ga = C.aa; diff --git a/bin/resources/shaders/vulkan/tfx.glsl b/bin/resources/shaders/vulkan/tfx.glsl index 0b379046db135..a6335daf3133a 100644 --- a/bin/resources/shaders/vulkan/tfx.glsl +++ b/bin/resources/shaders/vulkan/tfx.glsl @@ -1350,11 +1350,8 @@ void main() // Write RB part. Mask will take care of the correct destination #elif PS_SHUFFLE_ACROSS #if(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) - C.rb = C.br; - float g_temp = C.g; - - C.g = C.a; - C.a = g_temp; + C.b = C.r; + C.a = C.g; #elif(PS_PROCESS_BA & SHUFFLE_READ) C.rb = C.bb; C.ga = C.aa; diff --git a/pcsx2/GS/GSState.cpp b/pcsx2/GS/GSState.cpp index e0aa487706f3f..86f1b3a2690fa 100644 --- a/pcsx2/GS/GSState.cpp +++ b/pcsx2/GS/GSState.cpp @@ -3095,6 +3095,16 @@ __forceinline bool GSState::IsAutoFlushDraw(u32 prim) if (!(GSUtil::GetChannelMask(m_context->TEX0.PSM) & GSUtil::GetChannelMask(m_context->FRAME.PSM, m_context->FRAME.FBMSK | ~(GSLocalMemory::m_psm[m_context->FRAME.PSM].fmsk)))) return false; + // Try to detect shuffles, because these will not autoflush, they by design clash. + if (GSLocalMemory::m_psm[m_context->FRAME.PSM].bpp == 16 && GSLocalMemory::m_psm[m_context->TEX0.PSM].bpp == 16) + { + // Pretty confident here... + GSVertex* buffer = &m_vertex.buff[0]; + const bool const_spacing = (buffer[m_index.buff[0]].U - buffer[m_index.buff[0]].XYZ.X) == (m_v.U - m_v.XYZ.X); + + if (const_spacing) + return false; + } const u32 frame_mask = GSLocalMemory::m_psm[m_context->FRAME.PSM].fmsk; const bool frame_hit = m_context->FRAME.Block() == m_context->TEX0.TBP0 && !(m_context->TEST.ATE && m_context->TEST.ATST == 0 && m_context->TEST.AFAIL == 2) && ((m_context->FRAME.FBMSK & frame_mask) != frame_mask); // There's a strange behaviour we need to test on a PS2 here, if the FRAME is a Z format, like Powerdrome something swaps over, and it seems Alpha Fail of "FB Only" writes to the Z.. it's odd. diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index f58147dacd500..70235932ee9af 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -346,7 +346,7 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, tex_pos &= 0xFF; shuffle_across = (((tex_pos + 8) >> 4) ^ ((pos + 8) >> 4)) & 0x8; - const bool full_width = ((second_vert.XYZ.X - first_vert.XYZ.X) >> 4) >= 16 && m_r.width() > 8; + const bool full_width = !shuffle_across && (((second_vert.XYZ.X + 9) - first_vert.XYZ.X) >> 4) >= 16 && m_r.width() > 8; process_ba = ((pos > 112 && pos < 136) || full_width) ? SHUFFLE_WRITE : 0; process_rg = (!process_ba || full_width) ? SHUFFLE_WRITE : 0; // "same group" means it can read blue and write alpha using C32 tricks @@ -471,7 +471,7 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, GSVector4::storeh(&v[1].ST.S, st); } } - m_r = fpr; + m_r = r; m_vertex.head = m_vertex.tail = m_vertex.next = 2; m_index.tail = 2; return; @@ -1040,7 +1040,8 @@ bool GSRendererHW::IsPageCopy() const if (!PRIM->TME) return false; - const GSDrawingContext& next_ctx = m_env.CTXT[m_backed_up_ctx]; + const int get_next_ctx = (m_state_flush_reason == CONTEXTCHANGE) ? m_env.PRIM.CTXT : m_backed_up_ctx; + const GSDrawingContext& next_ctx = m_env.CTXT[get_next_ctx]; if (next_ctx.TEX0.TBP0 != (m_cached_ctx.TEX0.TBP0 + 0x20)) return false; @@ -2461,7 +2462,7 @@ void GSRendererHW::Draw() GIFRegTEX0 TEX0 = {}; GSTextureCache::Source* src = nullptr; TextureMinMaxResult tmm; - + bool possible_shuffle = false; // Disable texture mapping if the blend is black and using alpha from vertex. if (m_process_texture) { @@ -2578,7 +2579,7 @@ void GSRendererHW::Draw() GIFRegTEX0 FRAME_TEX0; bool shuffle_target = false; - if (!no_rt && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) + if (!no_rt && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16 && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp >= 16) { if (m_cached_ctx.FRAME.Block() != m_cached_ctx.TEX0.TBP0) { @@ -2601,16 +2602,22 @@ void GSRendererHW::Draw() const GSVertex* v = &m_vertex.buff[0]; const int first_x = ((v[0].XYZ.X - m_context->XYOFFSET.OFX) + 8) >> 4; - const int first_u = PRIM->FST ? ((v[0].U + 8) >> 4) : static_cast(((1 << m_cached_ctx.TEX0.TW) * (v[0].ST.S / v[1].RGBAQ.Q)) + 0.5f); - const int second_u = PRIM->FST ? ((v[1].U + 8) >> 4) : static_cast(((1 << m_cached_ctx.TEX0.TW) * (v[1].ST.S / v[1].RGBAQ.Q)) + 0.5f); - const bool shuffle_coords = (first_x ^ first_u) & 8; - const int draw_width = std::abs(v[1].XYZ.X - v[0].XYZ.X) >> 4; + const int first_u = PRIM->FST ? ((v[0].U + 9) >> 4) : static_cast(((1 << m_cached_ctx.TEX0.TW) * (v[0].ST.S / v[1].RGBAQ.Q))); + const int second_u = PRIM->FST ? ((v[1].U + 9) >> 4) : static_cast(((1 << m_cached_ctx.TEX0.TW) * (v[1].ST.S / v[1].RGBAQ.Q)) + 0.6f); + // offset coordinates swap around RG/BA. (Ace Combat) + const u32 minv = m_cached_ctx.CLAMP.MINV; + const u32 minu = m_cached_ctx.CLAMP.MINU; + const bool rgba_shuffle = ((m_cached_ctx.CLAMP.WMS == m_cached_ctx.CLAMP.WMT && m_cached_ctx.CLAMP.WMS == CLAMP_REGION_REPEAT) && (minu && minv)); + const bool shuffle_coords = ((first_x ^ first_u) & 8) || rgba_shuffle; + // Round up half of second coord, it can sometimes be slightly under. + const int draw_width = std::abs(v[1].XYZ.X + 9 - v[0].XYZ.X) >> 4; const int read_width = std::abs(second_u - first_u); - shuffle_target = shuffle_coords && draw_width == 8 && draw_width == read_width; + shuffle_target = shuffle_coords && (draw_width & 7) == 0 && std::abs(draw_width - read_width) <= 1; } } - const bool possible_shuffle = !no_rt && (((shuffle_target && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) || (m_cached_ctx.FRAME.Block() == m_cached_ctx.TEX0.TBP0 && ((m_cached_ctx.TEX0.PSM & 0x6) || m_cached_ctx.FRAME.PSM != m_cached_ctx.TEX0.PSM))) || IsPossibleChannelShuffle()); + + possible_shuffle = !no_rt && (((shuffle_target /*&& GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16*/) /*|| (m_cached_ctx.FRAME.Block() == m_cached_ctx.TEX0.TBP0 && ((m_cached_ctx.TEX0.PSM & 0x6) || m_cached_ctx.FRAME.PSM != m_cached_ctx.TEX0.PSM))*/) || IsPossibleChannelShuffle()); const bool need_aem_color = GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].trbpp <= 24 && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].pal == 0 && ((PRIM->ABE && m_context->ALPHA.C == 0) || IsDiscardingDstAlpha()) && m_draw_env->TEXA.AEM; const u32 color_mask = (m_vt.m_max.c > GSVector4i::zero()).mask(); const bool texture_function_color = m_cached_ctx.TEX0.TFX == TFX_DECAL || (color_mask & 0xFFF) || (m_cached_ctx.TEX0.TFX > TFX_DECAL && (color_mask & 0xF000)); @@ -2636,6 +2643,7 @@ void GSRendererHW::Draw() return; } + possible_shuffle &= src && (src->m_from_target != nullptr); // We don't know the alpha range of direct sources when we first tried to optimize the alpha test. // Moving the texture lookup before the ATST optimization complicates things a lot, so instead, // recompute it, and everything derived from it again if it changes. @@ -2762,7 +2770,7 @@ void GSRendererHW::Draw() ZBUF_TEX0.TBW = m_channel_shuffle ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; - if (!ds) + if (!ds && m_cached_ctx.FRAME.FBP != m_cached_ctx.ZBUF.ZBP) { ds = g_texture_cache->CreateTarget(ZBUF_TEX0, t_size, GetValidSize(src), target_scale, GSTextureCache::DepthStencil, true, 0, false, force_preload, preserve_depth, m_r, src); @@ -2818,21 +2826,34 @@ void GSRendererHW::Draw() if (!no_rt) { - const bool possible_shuffle = draw_sprite_tex && (((src && src->m_target && src->m_from_target && src->m_from_target->m_32_bits_fmt) && - GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16 && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) || + possible_shuffle |= draw_sprite_tex && m_primitive_covers_without_gaps != NoGapsType::FullCover && (((src && src->m_target && src->m_from_target && src->m_from_target->m_32_bits_fmt) && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) || IsPossibleChannelShuffle()); // FBW is going to be wrong for channel shuffling into a new target, so take it from the source. FRAME_TEX0.U64 = 0; FRAME_TEX0.TBP0 = ((m_last_channel_shuffle_end_block + 1) == m_cached_ctx.FRAME.Block() && possible_shuffle) ? m_last_channel_shuffle_fbp : m_cached_ctx.FRAME.Block(); - FRAME_TEX0.TBW = (possible_shuffle && (m_last_channel_shuffle_end_block + 1) && src->m_target) ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; + FRAME_TEX0.TBW = (possible_shuffle && IsPossibleChannelShuffle() && src && src->m_from_target) ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; FRAME_TEX0.PSM = m_cached_ctx.FRAME.PSM; - // Don't clamp on shuffle, the height cache may troll us with the REAL height. if (!possible_shuffle && m_split_texture_shuffle_pages == 0) m_r = m_r.rintersect(t_size_rect); + // Do the lookup with the real format on a shuffle, if possible. + if (possible_shuffle && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16 && GSLocalMemory ::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) + { + // Creating a new target on a shuffle, possible temp buffer, but let's try to get the real format. + const int get_next_ctx = (m_state_flush_reason == CONTEXTCHANGE) ? m_env.PRIM.CTXT : m_backed_up_ctx; + const GSDrawingContext& next_ctx = m_env.CTXT[get_next_ctx]; + + if (next_ctx.FRAME.Block() == FRAME_TEX0.TBP0 && next_ctx.FRAME.PSM != FRAME_TEX0.PSM) + FRAME_TEX0.PSM = next_ctx.FRAME.PSM; + else if (next_ctx.TEX0.TBP0 == FRAME_TEX0.TBP0 && next_ctx.TEX0.PSM != FRAME_TEX0.PSM) + FRAME_TEX0.PSM = next_ctx.TEX0.PSM; + else + FRAME_TEX0.PSM = PSMCT32; // Guess full color if no upcoming hint, it'll fix itself later. + } + // Normally we would use 1024 here to match the clear above, but The Godfather does a 1023x1023 draw instead // (very close to 1024x1024, but apparently the GS rounds down..). So, catch that here, we don't want to // create that target, because the clear isn't black, it'll hang around and never get invalidated. @@ -2845,7 +2866,7 @@ void GSRendererHW::Draw() rt = g_texture_cache->LookupTarget(FRAME_TEX0, t_size, ((src && src->m_scale != 1) && GSConfig.UserHacks_NativeScaling == GSNativeScaling::Normal && !possible_shuffle) ? GetTextureScaleFactor() : target_scale, GSTextureCache::RenderTarget, true, fm, false, force_preload, preserve_rt_rgb, preserve_rt_alpha, unclamped_draw_rect, possible_shuffle, is_possible_mem_clear && FRAME_TEX0.TBP0 != m_cached_ctx.ZBUF.Block(), - GSConfig.UserHacks_NativeScaling != GSNativeScaling::Off && preserve_downscale_draw && is_possible_mem_clear != ClearType::NormalClear, src, no_ds ? -1 : (m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0)); + GSConfig.UserHacks_NativeScaling != GSNativeScaling::Off && preserve_downscale_draw && is_possible_mem_clear != ClearType::NormalClear, src, (no_ds || !ds) ? -1 : (m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0)); // Draw skipped because it was a clear and there was no target. if (!rt) @@ -2882,13 +2903,68 @@ void GSRendererHW::Draw() } else if (rt->m_TEX0.TBP0 != m_cached_ctx.FRAME.Block()) { - GSVertex* v = &m_vertex.buff[0]; - int vertical_offset = ((std::abs(static_cast(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) >> 5) / std::max(rt->m_TEX0.TBW, 1U)) * frame_psm.pgs.y; // I know I could just not shift it.. + int vertical_offset = ((static_cast(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0) / 32) / std::max(static_cast(rt->m_TEX0.TBW), 1)) * frame_psm.pgs.y; // I know I could just not shift it.. - const int horizontal_offset = (std::abs(static_cast((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) >> 5) % std::max(rt->m_TEX0.TBW, 1U)) * frame_psm.pgs.x; + const int horizontal_offset = ((static_cast((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) / 32) % static_cast(std::max(rt->m_TEX0.TBW, 1U))) * frame_psm.pgs.x; // Used to reduce the offset made later in channel shuffles m_target_offset = std::abs(static_cast((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) >> 5); + if (vertical_offset < 0) + { + rt->m_TEX0.TBP0 = m_cached_ctx.FRAME.Block(); + GSVector2i new_scaled_size = rt->m_unscaled_size * rt->m_scale; + // Make sure to use the original format for the offset. + int new_offset = std::abs((vertical_offset / frame_psm.pgs.y) * GSLocalMemory::m_psm[rt->m_TEX0.PSM].pgs.y); + + new_scaled_size.y += new_offset * rt->m_scale; + GSTexture* tex = g_gs_device->CreateRenderTarget(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::Color, true); + //if (!tex) + // return nullptr; + //m_target_memory_usage += tex->GetMemUsage(); + GSVector4i dRect = GSVector4i(0, new_offset * rt->m_scale, new_scaled_size.x, new_scaled_size.y); + g_gs_device->StretchRect(rt->m_texture, GSVector4(0,0,1,1), tex, GSVector4(dRect), ShaderConvert::COPY, false); + + + if (src && src->m_from_target && src->m_from_target == rt) + { + src->m_texture = rt->m_texture; + src->m_target_direct = false; + src->m_shared_texture = false; + } + else + { + //m_target_memory_usage -= dst->m_texture->GetMemUsage(); + g_gs_device->Recycle(rt->m_texture); + } + + rt->m_valid.y += new_offset; + rt->m_valid.w += new_offset; + rt->m_drawn_since_read.y += new_offset; + rt->m_drawn_since_read.w += new_offset; + rt->m_texture = tex; + rt->m_unscaled_size = new_scaled_size / rt->m_scale; + + t_size.y += std::abs(vertical_offset); + vertical_offset = 0; + } + + // Z isn't offset but RT is, so we need a temp Z to align it, hopefully nothing will ever write to the Z too, right?? + if (ds && vertical_offset && (m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0) != (m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) + { + + int z_vertical_offset = ((static_cast(m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0) / 32) / std::max(rt->m_TEX0.TBW, 1U)) * GSLocalMemory::m_psm[m_cached_ctx.ZBUF.PSM].pgs.y; + int z_offset = vertical_offset; + GL_CACHE("RT in RT Z copy on draw %d z_vert_offset %d z_offset %d", s_n, z_vertical_offset, z_offset); + GSVector4i dRect = GSVector4i(0, z_offset * ds->m_scale, ds->m_unscaled_size.x * ds->m_scale, std::min(z_offset + m_r.w + 1, z_offset + ds->m_unscaled_size.y) * ds->m_scale); + int new_height = std::max(static_cast(ds->m_unscaled_size.y * ds->m_scale), dRect.w); + GSTexture* tex = g_gs_device->CreateDepthStencil(ds->m_unscaled_size.x * ds->m_scale, new_height, GSTexture::Format::DepthStencil, true); + g_gs_device->StretchRect(ds->m_texture, GSVector4(0.0f, z_vertical_offset / static_cast(ds->m_unscaled_size.y), 1.0f, std::min(z_vertical_offset + m_r.w + 1, ds->m_unscaled_size.y) / static_cast(ds->m_unscaled_size.y)), tex, GSVector4(dRect), ShaderConvert::DEPTH_COPY, false); + + g_texture_cache->SetTemporaryZ(tex); + } + + GSVertex* v = &m_vertex.buff[0]; + for (u32 i = 0; i < m_vertex.tail; i++) { v[i].XYZ.Y += vertical_offset << 4; @@ -2915,7 +2991,7 @@ void GSRendererHW::Draw() // Don't resize if the BPP don't match. if (frame_psm.bpp == GSLocalMemory::m_psm[rt->m_TEX0.PSM].bpp) { - if (t_size.y <= 0) + if (m_r.w > rt->m_unscaled_size.y) { u32 new_height = m_r.w; @@ -2923,8 +2999,11 @@ void GSRendererHW::Draw() new_height /= 2; //DevCon.Warning("Resizing texture %d x %d draw %d", rt->m_unscaled_size.x, new_height, s_n); rt->ResizeTexture(rt->m_unscaled_size.x, new_height); - rt->UpdateValidity(m_r, true); - rt->UpdateDrawn(m_r, true); + + const bool frame_masked = ((m_cached_ctx.FRAME.FBMSK & frame_psm.fmsk) == frame_psm.fmsk) || (m_cached_ctx.TEST.ATE && m_cached_ctx.TEST.ATST == ATST_NEVER && !(m_cached_ctx.TEST.AFAIL & AFAIL_FB_ONLY)); + + rt->UpdateValidity(m_r, !frame_masked); + rt->UpdateDrawn(m_r, !frame_masked); } } } @@ -2953,6 +3032,75 @@ void GSRendererHW::Draw() m_last_channel_shuffle_end_block = 0xFFFF; } + // Only run if DS was new and matched the framebuffer. + if (!no_ds && !ds) + { + ZBUF_TEX0.U64 = 0; + ZBUF_TEX0.TBP0 = m_cached_ctx.ZBUF.Block(); + ZBUF_TEX0.TBW = m_cached_ctx.FRAME.FBW; + ZBUF_TEX0.PSM = m_cached_ctx.ZBUF.PSM; + + ds = g_texture_cache->LookupTarget(ZBUF_TEX0, t_size, target_scale, GSTextureCache::DepthStencil, + m_cached_ctx.DepthWrite(), 0, false, force_preload, preserve_depth, preserve_depth, unclamped_draw_rect, IsPossibleChannelShuffle(), is_possible_mem_clear && ZBUF_TEX0.TBP0 != m_cached_ctx.FRAME.Block(), false, + src, -1); + + ZBUF_TEX0.TBW = m_channel_shuffle ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; + + // This should never happen, but just to be safe.. + if (!ds) + { + ds = g_texture_cache->CreateTarget(ZBUF_TEX0, t_size, GetValidSize(src), target_scale, GSTextureCache::DepthStencil, + true, 0, false, force_preload, preserve_depth, m_r, src); + if (!ds) [[unlikely]] + { + GL_INS("ERROR: Failed to create ZBUF target, skipping."); + CleanupDraw(true); + return; + } + } + else + { + // If it failed to check depth test earlier, we can now check the top bits from the alpha to get a bit more accurate picture. + if (((zm && m_cached_ctx.TEST.ZTST > ZTST_ALWAYS) || (m_vt.m_eq.z && m_cached_ctx.TEST.ZTST == ZTST_GEQUAL)) && GSLocalMemory::m_psm[m_cached_ctx.ZBUF.PSM].trbpp == 32) + { + if (ds->m_alpha_max != 0) + { + const u32 max_z = (static_cast(ds->m_alpha_max + 1) << 24) - 1; + + switch (m_cached_ctx.TEST.ZTST) + { + case ZTST_GEQUAL: + // Every Z value will pass + if (max_z <= m_vt.m_min.p.z) + { + m_cached_ctx.TEST.ZTST = ZTST_ALWAYS; + if (zm) + { + ds = nullptr; + no_ds = true; + } + } + break; + case ZTST_GREATER: + // Every Z value will pass + if (max_z < m_vt.m_min.p.z) + { + m_cached_ctx.TEST.ZTST = ZTST_ALWAYS; + if (zm) + { + ds = nullptr; + no_ds = true; + } + } + break; + default: + break; + } + } + } + } + } + if (m_process_texture) { GIFRegCLAMP MIP_CLAMP = m_cached_ctx.CLAMP; @@ -2966,7 +3114,8 @@ void GSRendererHW::Draw() const int first_u = PRIM->FST ? ((v[0].U + 8) >> 4) : static_cast(((1 << m_cached_ctx.TEX0.TW) * (v[0].ST.S / v[1].RGBAQ.Q)) + 0.5f); const bool shuffle_coords = (first_x ^ first_u) & 8; const u32 draw_end = GSLocalMemory::GetEndBlockAddress(m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, m_r) + 1; - const bool draw_uses_target = src->m_from_target && ((src->m_from_target_TEX0.TBP0 <= m_cached_ctx.FRAME.Block() && + const u32 draw_start = GSLocalMemory::GetStartBlockAddress(m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, m_r); + const bool draw_uses_target = src->m_from_target && ((src->m_from_target_TEX0.TBP0 <= draw_start && src->m_from_target->UnwrappedEndBlock() > m_cached_ctx.FRAME.Block()) || (m_cached_ctx.FRAME.Block() < src->m_from_target_TEX0.TBP0 && draw_end > src->m_from_target_TEX0.TBP0)); @@ -3207,8 +3356,8 @@ void GSRendererHW::Draw() // The FBW should also be okay, since it's coming from the source. if (rt) { - const bool update_fbw = rt->m_last_draw == s_n && (m_channel_shuffle && src->m_target) && (!PRIM->ABE || IsOpaque() || m_context->ALPHA.IsBlack()); - rt->m_TEX0.TBW = update_fbw ? FRAME_TEX0.TBW : std::max(rt->m_TEX0.TBW, FRAME_TEX0.TBW); + const bool update_fbw = !m_in_target_draw && (m_channel_shuffle && src->m_target) && (!PRIM->ABE || IsOpaque() || m_context->ALPHA.IsBlack()); + rt->m_TEX0.TBW = update_fbw ? ((src && src->m_from_target && src->m_32_bits_fmt) ? src->m_from_target->m_TEX0.TBW : FRAME_TEX0.TBW) : std::max(rt->m_TEX0.TBW, FRAME_TEX0.TBW); rt->m_TEX0.PSM = FRAME_TEX0.PSM; } if (ds) @@ -3217,6 +3366,11 @@ void GSRendererHW::Draw() ds->m_TEX0.PSM = ZBUF_TEX0.PSM; } } + // Probably grabbed an old 16bit target (Band Hero) + /*else if (m_texture_shuffle && GSLocalMemory::m_psm[rt->m_TEX0.PSM].bpp == 16) + { + rt->m_TEX0.PSM = PSMCT32; + }*/ // Figure out which channels we're writing. if (rt) @@ -3234,7 +3388,7 @@ void GSRendererHW::Draw() GSVector2i new_size = t_size; // We need to adjust the size if it's a texture shuffle as we could end up making the RT twice the size. - if (src && m_texture_shuffle && m_split_texture_shuffle_pages == 0) + if (src && m_texture_shuffle && !m_copy_16bit_to_target_shuffle && m_split_texture_shuffle_pages == 0) { if ((new_size.x > src->m_valid_rect.z && m_vt.m_max.p.x == new_size.x) || (new_size.y > src->m_valid_rect.w && m_vt.m_max.p.y == new_size.y)) { @@ -3245,9 +3399,18 @@ void GSRendererHW::Draw() } } + if (m_in_target_draw && src && m_channel_shuffle && src->m_from_target && src->m_from_target == rt && m_cached_ctx.TEX0.TBP0 == src->m_from_target->m_TEX0.TBP0) + { + new_size.y = std::max(new_size.y, static_cast((((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0) >> 5) / rt->m_TEX0.TBW) * frame_psm.pgs.y) * 2); + GSVector4i new_valid = rt->m_valid; + new_valid.w = new_size.y; + rt->UpdateValidity(new_valid, true); + } + // We still need to make sure the dimensions of the targets match. - const int new_w = std::max(new_size.x, std::max(rt ? rt->m_unscaled_size.x : 0, ds ? ds->m_unscaled_size.x : 0)); - const int new_h = std::max(new_size.y, std::max(rt ? rt->m_unscaled_size.y : 0, ds ? ds->m_unscaled_size.y : 0)); + // Limit new size to 2048, the GS can't address more than this so may avoid some bugs/crashes. + const int new_w = std::min(2048, std::max(new_size.x, std::max(rt ? rt->m_unscaled_size.x : 0, ds ? ds->m_unscaled_size.x : 0))); + const int new_h = std::min(2048, std::max(new_size.y, std::max(rt ? rt->m_unscaled_size.y : 0, ds ? ds->m_unscaled_size.y : 0))); if (rt) { const u32 old_end_block = rt->m_end_block; @@ -3259,6 +3422,25 @@ void GSRendererHW::Draw() if (rt->GetUnscaledWidth() != new_w || rt->GetUnscaledHeight() != new_h) GL_INS("Resize RT from %dx%d to %dx%d", rt->GetUnscaledWidth(), rt->GetUnscaledHeight(), new_w, new_h); + // May not be needed/could cause problems with garbage loaded from GS memory + if (preserve_rt_color) + { + RGBAMask mask; + mask._u32 = 0xF; + + if (new_w > rt->m_unscaled_size.x) + { + GSVector4i width_dirty_rect = GSVector4i(rt->m_unscaled_size.x, 0, new_w, new_h); + g_texture_cache->AddDirtyRectTarget(rt, width_dirty_rect, rt->m_TEX0.PSM, rt->m_TEX0.TBW, mask); + } + + if (new_h > rt->m_unscaled_size.y) + { + GSVector4i height_dirty_rect = GSVector4i(0, rt->m_unscaled_size.y, new_w, new_h); + g_texture_cache->AddDirtyRectTarget(rt, height_dirty_rect, rt->m_TEX0.PSM, rt->m_TEX0.TBW, mask); + } + } + rt->ResizeTexture(new_w, new_h); if (!m_texture_shuffle && !m_channel_shuffle) @@ -3278,9 +3460,11 @@ void GSRendererHW::Draw() } const GSVector4i update_rect = m_r.rintersect(GSVector4i::loadh(GSVector2i(new_w, new_h))); + // if frame is masked or afailing always to never write frame, wanna make sure we don't touch it. This might happen if DATE or Alpha Test is being used to write to Z. + const bool frame_masked = ((m_cached_ctx.FRAME.FBMSK & frame_psm.fmsk) == frame_psm.fmsk) || (m_cached_ctx.TEST.ATE && m_cached_ctx.TEST.ATST == ATST_NEVER && !(m_cached_ctx.TEST.AFAIL & AFAIL_FB_ONLY)); // Limit to 2x the vertical height of the resolution (for double buffering) - rt->UpdateValidity(update_rect, can_update_size || (m_r.w <= (resolution.y * 2) && !m_texture_shuffle)); - rt->UpdateDrawn(update_rect, can_update_size || (m_r.w <= (resolution.y * 2) && !m_texture_shuffle)); + rt->UpdateValidity(update_rect, !frame_masked && (can_update_size || (m_r.w <= (resolution.y * 2) && !m_texture_shuffle))); + rt->UpdateDrawn(update_rect, !frame_masked && (can_update_size || (m_r.w <= (resolution.y * 2) && !m_texture_shuffle))); // Probably changing to double buffering, so invalidate any old target that was next to it. // This resolves an issue where the PCRTC will find the old target in FMV's causing flashing. // Grandia Xtreme, Onimusha Warlord. @@ -3310,7 +3494,7 @@ void GSRendererHW::Draw() const bool new_rect = ds->m_valid.rempty(); const bool new_height = new_h > ds->GetUnscaledHeight(); const int old_height = ds->m_texture->GetHeight(); - + const GSVector4i old_rect = ds->GetUnscaledRect(); pxAssert(ds->GetScale() == target_scale); if (ds->GetUnscaledWidth() != new_w || ds->GetUnscaledHeight() != new_h) GL_INS("Resize DS from %dx%d to %dx%d", ds->GetUnscaledWidth(), ds->GetUnscaledHeight(), new_w, new_h); @@ -3323,8 +3507,12 @@ void GSRendererHW::Draw() } // Limit to 2x the vertical height of the resolution (for double buffering) - ds->UpdateValidity(m_r, can_update_size || m_r.w <= (resolution.y * 2)); - ds->UpdateDrawn(m_r, can_update_size || m_r.w <= (resolution.y * 2)); + // Dark cloud writes to 424 when the buffer is only 416 high, but masks the Z. + // Updating the valid causes the Z to overlap the framebuffer, which is obviously incorrect. + const bool z_masked = m_cached_ctx.ZBUF.ZMSK; + + ds->UpdateValidity(m_r, !z_masked && (can_update_size || m_r.w <= (resolution.y * 2))); + ds->UpdateDrawn(m_r, !z_masked && (can_update_size || m_r.w <= (resolution.y * 2))); if (!new_rect && new_height && old_end_block != ds->m_end_block) { @@ -3423,7 +3611,9 @@ void GSRendererHW::Draw() { s = GetDrawDumpPath("%05d_f%lld_rz0_%05x_%s.bmp", s_n, frame, m_cached_ctx.ZBUF.Block(), psm_str(m_cached_ctx.ZBUF.PSM)); - if (ds->m_texture) + if (g_texture_cache->GetTemporaryZ()) + g_texture_cache->GetTemporaryZ()->Save(s); + else if (ds->m_texture) ds->m_texture->Save(s); } } @@ -3512,9 +3702,10 @@ void GSRendererHW::Draw() if ((fm & fm_mask) != fm_mask && rt) { + const bool frame_masked = ((m_cached_ctx.FRAME.FBMSK & frame_psm.fmsk) == frame_psm.fmsk) || (m_cached_ctx.TEST.ATE && m_cached_ctx.TEST.ATST == ATST_NEVER && !(m_cached_ctx.TEST.AFAIL & AFAIL_FB_ONLY)); //rt->m_valid = rt->m_valid.runion(r); // Limit to 2x the vertical height of the resolution (for double buffering) - rt->UpdateValidity(real_rect, can_update_size || (real_rect.w <= (resolution.y * 2) && !m_texture_shuffle)); + rt->UpdateValidity(real_rect, !frame_masked && (can_update_size || (real_rect.w <= (resolution.y * 2) && !m_texture_shuffle))); g_texture_cache->InvalidateVideoMem(context->offset.fb, real_rect, false); @@ -3525,15 +3716,31 @@ void GSRendererHW::Draw() if (zm != 0xffffffff && ds) { + const bool z_masked = m_cached_ctx.ZBUF.ZMSK; + //ds->m_valid = ds->m_valid.runion(r); // Limit to 2x the vertical height of the resolution (for double buffering) - ds->UpdateValidity(real_rect, can_update_size || (real_rect.w <= (resolution.y * 2) && !m_texture_shuffle)); + ds->UpdateValidity(real_rect, !z_masked && (can_update_size || (real_rect.w <= (resolution.y * 2) && !m_texture_shuffle))); g_texture_cache->InvalidateVideoMem(context->offset.zb, real_rect, false); // Remove overwritten RTs at the ZBP. g_texture_cache->InvalidateVideoMemType( GSTextureCache::RenderTarget, m_cached_ctx.ZBUF.Block(), m_cached_ctx.ZBUF.PSM, zm); + + + if (g_texture_cache->GetTemporaryZ()) + { + if (m_cached_ctx.DepthWrite()) + { + int vertical_offset = ((static_cast(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0) / 32) / std::max(static_cast(rt->m_TEX0.TBW), 1)) * frame_psm.pgs.y; + int z_vertical_offset = ((static_cast(m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0) / 32) / std::max(rt->m_TEX0.TBW, 1U)) * GSLocalMemory::m_psm[m_cached_ctx.ZBUF.PSM].pgs.y; + int z_offset = vertical_offset; + GL_CACHE("RT in RT Z copy back draw %d z_vert_offset %d z_offset %d", s_n, z_vertical_offset, z_offset); + GSVector4i dRect = GSVector4i(0, z_vertical_offset * ds->m_scale, ds->m_unscaled_size.x * ds->m_scale, std::min(z_vertical_offset + m_r.w + 1 - vertical_offset, ds->m_unscaled_size.y) * ds->m_scale); + g_gs_device->StretchRect(g_texture_cache->GetTemporaryZ(), GSVector4(0.0f, z_offset / static_cast(g_texture_cache->GetTemporaryZ()->GetHeight()), 1.0f, std::min(real_rect.w + 1, ds->m_unscaled_size.y + z_offset) / static_cast(g_texture_cache->GetTemporaryZ()->GetHeight())), ds->m_texture, GSVector4(dRect), ShaderConvert::DEPTH_COPY, false); + } + } } // @@ -4023,7 +4230,7 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool m_conf.ps.urban_chaos_hle = 1; } } - else if (m_index.tail < 64 && m_cached_ctx.CLAMP.WMT == 3) + else if (m_index.tail <= 64 && !IsPageCopy() && m_cached_ctx.CLAMP.WMT == 3) { // Blood will tell. I think it is channel effect too but again // implemented in a different way. I don't want to add more CRC stuff. So @@ -4180,7 +4387,7 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool // Performance GPU note: it could be wise to reduce the size to // the rendered size of the framebuffer - if (GSConfig.UserHacks_TextureInsideRt == GSTextureInRtMode::Disabled || (!m_in_target_draw && NextDrawMatchesShuffle())) + if (GSConfig.UserHacks_TextureInsideRt == GSTextureInRtMode::Disabled || (!m_in_target_draw && IsPageCopy())) { GSVertex* s = &m_vertex.buff[0]; s[0].XYZ.X = static_cast(m_context->XYOFFSET.OFX + 0); @@ -5604,6 +5811,13 @@ bool GSRendererHW::CanUseTexIsFB(const GSTextureCache::Target* rt, const GSTextu return false; } + // the texture is offset, and the frame isn't also offset, we can't do this. + if (tex->GetRegion().HasX() || tex->GetRegion().HasY()) + { + if (m_cached_ctx.FRAME.Block() != m_cached_ctx.TEX0.TBP0) + return false; + } + // If we're a shuffle, tex-is-fb is always fine. if (m_texture_shuffle || m_channel_shuffle) { @@ -5753,6 +5967,7 @@ void GSRendererHW::CleanupDraw(bool invalidate_temp_src) if (invalidate_temp_src) g_texture_cache->InvalidateTemporarySource(); + g_texture_cache->InvalidateTemporaryZ(); // Restore Scissor. m_context->UpdateScissor(); @@ -5792,7 +6007,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta m_conf.cb_vs.texture_offset = {}; m_conf.ps.scanmsk = env.SCANMSK.MSK; m_conf.rt = rt ? rt->m_texture : nullptr; - m_conf.ds = ds ? ds->m_texture : nullptr; + m_conf.ds = ds ? (g_texture_cache->GetTemporaryZ() ? g_texture_cache->GetTemporaryZ() : ds->m_texture) : nullptr; // Z setup has to come before channel shuffle EmulateZbuffer(ds); @@ -6163,7 +6378,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta const bool full_cover = rt->m_valid.rintersect(m_r).eq(rt->m_valid) && m_primitive_covers_without_gaps == NoGapsType::FullCover && !(DATE || !always_passing_alpha || !IsDepthAlwaysPassing()); // Restrict this to only when we're overwriting the whole target. - new_scale_rt_alpha = full_cover; + new_scale_rt_alpha = full_cover || rt->m_last_draw >= s_n; } } @@ -7248,7 +7463,7 @@ bool GSRendererHW::TryGSMemClear(bool no_rt, bool preserve_rt, bool invalidate_r g_texture_cache->InvalidateContainedTargets( GSLocalMemory::GetStartBlockAddress( m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, m_r), - rt_end_bp, m_cached_ctx.FRAME.PSM); + rt_end_bp, m_cached_ctx.FRAME.PSM, m_cached_ctx.FRAME.FBW); GSUploadQueue clear_queue; clear_queue.draw = s_n; @@ -7271,7 +7486,7 @@ bool GSRendererHW::TryGSMemClear(bool no_rt, bool preserve_rt, bool invalidate_r g_texture_cache->InvalidateContainedTargets( GSLocalMemory::GetStartBlockAddress( m_cached_ctx.ZBUF.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.ZBUF.PSM, m_r), - ds_end_bp, m_cached_ctx.ZBUF.PSM); + ds_end_bp, m_cached_ctx.ZBUF.PSM, m_cached_ctx.FRAME.FBW); } } diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index 24f642f6bd92a..f5cb8661705b8 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -149,7 +149,8 @@ void GSTextureCache::AddDirtyRectTarget(Target* target, GSVector4i rect, u32 psm if (rect.rempty()) return; - + if (rect.w > 2048) + DevCon.Warning("BAd"); std::vector::iterator it = target->m_dirty.end(); while (it != target->m_dirty.begin()) { @@ -274,6 +275,15 @@ GSVector4i GSTextureCache::TranslateAlignedRectByPage(u32 tbp, u32 tebp, u32 tbw const int inc_horizontal_offset = (page_offset % src_pgw) * src_page_size.x; in_rect = (in_rect + GSVector4i(0, inc_vertical_offset).xyxy()).max_i32(GSVector4i(0)); in_rect = (in_rect + GSVector4i(inc_horizontal_offset, 0).xyxy()).max_i32(GSVector4i(0)); + + // Project Snowblind and Tomb Raider access the rect offset by 1 page and use a region to correct it, we need to account for that here. + if (in_rect.x >= (dst_pgw * dst_page_size.x)) + { + in_rect.z -= dst_pgw * dst_page_size.x; + in_rect.x -= dst_pgw * dst_page_size.x; + in_rect.y += dst_page_size.y; + in_rect.w += dst_page_size.y; + } page_offset = 0; single_page = (in_rect.width() / src_page_size.x) <= 1 && (in_rect.height() / src_page_size.y) <= 1; } @@ -1448,8 +1458,8 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const // Make sure the texture actually is INSIDE the RT, it's possibly not valid if it isn't. // Also check BP >= TBP, create source isn't equpped to expand it backwards and all data comes from the target. (GH3) else if (GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets && - (GSLocalMemory::m_psm[color_psm].bpp >= 16 || (possible_shuffle && GSLocalMemory::m_psm[color_psm].bpp == 8 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32)) && // Channel shuffles or non indexed lookups. - t->m_age <= 1 && (!found_t || t->m_last_draw > dst->m_last_draw) && CanTranslate(bp, bw, psm, block_boundary_rect, t->m_TEX0.TBP0, t->m_TEX0.PSM, t->m_TEX0.TBW)) + (GSLocalMemory::m_psm[color_psm].bpp >= 16 || (/*possible_shuffle &&*/ GSLocalMemory::m_psm[color_psm].bpp == 8 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32 && t->m_TEX0.TBW >= (bw * 2))) && // Channel shuffles or non indexed lookups. + t->m_age <= 1 && (!found_t || t->m_last_draw > dst->m_last_draw) /*&& CanTranslate(bp, bw, psm, block_boundary_rect, t->m_TEX0.TBP0, t->m_TEX0.PSM, t->m_TEX0.TBW)*/) { if (!t->HasValidBitsForFormat(psm, req_color, req_alpha) && !(possible_shuffle && GSLocalMemory::m_psm[psm].bpp == 16 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32)) @@ -1481,7 +1491,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const } if (bp > t->m_TEX0.TBP0) { - GSVector4i new_rect = possible_shuffle ? block_boundary_rect : rect; + GSVector4i new_rect = (GSLocalMemory::m_psm[color_psm].bpp != GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp) ? block_boundary_rect : rect; if (linear) { new_rect.z -= 1; @@ -1586,15 +1596,18 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const // Omitting that check here seemed less risky than blowing CS targets out... const GSVector2i& page_size = GSLocalMemory::m_psm[src_psm].pgs; const GSOffset offset(GSLocalMemory::m_psm[src_psm].info, bp, bw, psm); + const u32 offset_bp = offset.bn(region.GetMinX(), region.GetMinY()); if (bp < t->m_TEX0.TBP0 && region.HasX() && region.HasY() && (region.GetMinX() & (page_size.x - 1)) == 0 && (region.GetMinY() & (page_size.y - 1)) == 0 && - offset.bn(region.GetMinX(), region.GetMinY()) == t->m_TEX0.TBP0) + (offset.bn(region.GetMinX(), region.GetMinY()) == t->m_TEX0.TBP0 || + (offset_bp >= t->m_TEX0.TBP0) && ((((offset_bp - t->m_TEX0.TBP0) >> 5) % bw) + (rect.width() / page_size.x)) <= bw)) { GL_CACHE("TC: Target 0x%x detected in front of TBP 0x%x with %d,%d offset (%d pages)", t->m_TEX0.TBP0, TEX0.TBP0, region.GetMinX(), region.GetMinY(), (region.GetMinY() / page_size.y) * TEX0.TBW + (region.GetMinX() / page_size.x)); - x_offset = -region.GetMinX(); - y_offset = -region.GetMinY(); + + x_offset = ((((offset_bp - t->m_TEX0.TBP0) >> 5) % bw) * page_size.x) - region.GetMinX(); + y_offset = ((((offset_bp - t->m_TEX0.TBP0) >> 5) / bw) * page_size.y) - region.GetMinY(); dst = t; tex_merge_rt = false; found_t = true; @@ -1827,7 +1840,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe // TODO: Move all frame stuff to its own routine too. if (!is_frame) { - for (auto i = list.begin(); i != list.end(); ++i) + for (auto i = list.begin(); i != list.end();) { Target* t = *i; @@ -1838,6 +1851,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe if (dst && (GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw)) { DevCon.Warning("Ignoring target at %x as one at %x is newer", t->m_TEX0.TBP0, dst->m_TEX0.TBP0); + i++; continue; } @@ -1896,21 +1910,26 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe InvalidateSourcesFromTarget(t); i = list.erase(i); delete t; + + continue; } } // Probably pointing to half way through the target - else if (GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets) + else if (!min_rect.rempty()&& GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets) { - if (offset != -1 && (bp - t->m_TEX0.TBP0) != offset) + // Problem: Project - Snowblind and Tomb Raider offset the RT but not the Z + /*if (offset != -1 && (bp - t->m_TEX0.TBP0) != offset) + { continue; + }*/ const u32 widthpage_offset = (std::abs(static_cast(bp - t->m_TEX0.TBP0)) >> 5) % std::max(t->m_TEX0.TBW, 1U); - const bool is_aligned_ok = widthpage_offset == 0 || ((widthpage_offset + TEX0.TBW) <= t->m_TEX0.TBW) || min_rect.width() <= 64 || (widthpage_offset == (t->m_TEX0.TBW >> 1) && (static_cast(min_rect.width()) <= (widthpage_offset * 64))); - if ((!dst || ((GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw))) && is_aligned_ok && (t->m_TEX0.TBW == TEX0.TBW || (TEX0.TBW == 1 && t->m_TEX0.TBW > 1)) && t->Inside(bp, TEX0.TBW, TEX0.PSM, min_rect)) - { + const bool is_aligned_ok = widthpage_offset == 0 || (t->m_TEX0.TBW == TEX0.TBW && ((min_rect.z >> 6) + widthpage_offset) <= TEX0.TBW) || ((widthpage_offset + TEX0.TBW) <= t->m_TEX0.TBW) || min_rect.width() <= 64 || (widthpage_offset == (t->m_TEX0.TBW >> 1) && (static_cast(min_rect.width()) <= (widthpage_offset * 64))); + if ((!dst || ((GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw))) && is_aligned_ok && (t->m_TEX0.TBW == TEX0.TBW || (TEX0.TBW == 1 && t->m_TEX0.TBW > 1)) && t->Overlaps(bp, TEX0.TBW, TEX0.PSM, min_rect)) + { /*TEX0.TBP0 == ((((t->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 1) + t->m_TEX0.TBP0)*/ // If it's too old, it's probably not a real target to jump in to anymore. - if ((GSState::s_n - t->m_last_draw) > 10 && (!t->m_dirty.empty() || (!is_shuffle && - !(widthpage_offset == 0/*TEX0.TBP0 == ((((t->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 1) + t->m_TEX0.TBP0)*/ || min_rect.width() <= 64 || + /*if ((GSState::s_n - t->m_last_draw) > 10 && (!t->m_dirty.empty() || (!is_shuffle && + !(widthpage_offset == 0 || min_rect.width() <= 64 || (widthpage_offset == (t->m_TEX0.TBW >> 1) && min_rect.width() == widthpage_offset * 64))))) { GL_INS("TC: Deleting RT BP 0x%x BW %d PSM %s due to change in target", t->m_TEX0.TBP0, t->m_TEX0.TBW, psm_str(t->m_TEX0.PSM)); @@ -1918,6 +1937,16 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe i = list.erase(i); delete t; } + else*/ + if (!is_shuffle && !GSUtil::HasSameSwizzleBits(t->m_TEX0.PSM, TEX0.PSM)) + { + GL_INS("TC: Deleting RT BP 0x%x BW %d PSM %s due to change in target", t->m_TEX0.TBP0, t->m_TEX0.TBW, psm_str(t->m_TEX0.PSM)); + InvalidateSourcesFromTarget(t); + i = list.erase(i); + delete t; + + continue; + } else { //DevCon.Warning("Here draw %d wanted %x PSM %x got %x PSM %x offset of %d pages width %d pages draw width %d", GSState::s_n, bp, TEX0.PSM, t->m_TEX0.TBP0, t->m_TEX0.PSM, (bp - t->m_TEX0.TBP0) >> 5, t->m_TEX0.TBW, draw_rect.width()); @@ -1931,6 +1960,8 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe } } } + + i++; } } else @@ -2085,7 +2116,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe dst->m_alpha_min = 0; dst->m_alpha_max = 0; } - else if (!is_shuffle && std::abs(static_cast(GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp - GSLocalMemory::m_psm[TEX0.PSM].bpp)) == 16) + else if (std::abs(static_cast(GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp - GSLocalMemory::m_psm[TEX0.PSM].bpp)) == 16) { dst->Update(false); @@ -2094,7 +2125,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe new_scaled_size = ScaleRenderTargetSize(dst->m_unscaled_size, scale); dRect = (GSVector4(GSVector4i::loadh(dst->m_unscaled_size)) * GSVector4(scale)).ceil(); - + if (scale_down) { if ((new_size.y * 2) < 1024) @@ -2116,34 +2147,38 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe dst->m_valid.y /= 2; dst->m_valid.w /= 2; } - GL_INS("TC Convert to 16bit: %dx%d: %dx%d @ %f -> %dx%d @ %f", dst->m_unscaled_size.x, dst->m_unscaled_size.y, - dst->m_texture->GetWidth(), dst->m_texture->GetHeight(), dst->m_scale, new_scaled_size.x, new_scaled_size.y, - scale); - //DevCon.Warning("Scale %s draw %d", scale_down ? "down" : "up", GSState::s_n); - GSTexture* tex = type == RenderTarget ? g_gs_device->CreateRenderTarget(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::Color, true) : - g_gs_device->CreateDepthStencil(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::DepthStencil, true); - if (!tex) - return nullptr; - m_target_memory_usage += tex->GetMemUsage(); + if (!is_shuffle) + { + GL_INS("TC Convert to 16bit: %dx%d: %dx%d @ %f -> %dx%d @ %f", dst->m_unscaled_size.x, dst->m_unscaled_size.y, + dst->m_texture->GetWidth(), dst->m_texture->GetHeight(), dst->m_scale, new_scaled_size.x, new_scaled_size.y, + scale); + //DevCon.Warning("Scale %s draw %d", scale_down ? "down" : "up", GSState::s_n); + GSTexture* tex = type == RenderTarget ? g_gs_device->CreateRenderTarget(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::Color, true) : + g_gs_device->CreateDepthStencil(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::DepthStencil, true); + if (!tex) + return nullptr; + m_target_memory_usage += tex->GetMemUsage(); - g_gs_device->StretchRect(dst->m_texture, sRect, tex, dRect, (type == RenderTarget) ? ShaderConvert::COPY : ShaderConvert::DEPTH_COPY, false); + g_gs_device->StretchRect(dst->m_texture, sRect, tex, dRect, (type == RenderTarget) ? ShaderConvert::COPY : ShaderConvert::DEPTH_COPY, false); - - if (src && src->m_from_target && src->m_from_target == dst) - { - src->m_texture = dst->m_texture; - src->m_target_direct = false; - src->m_shared_texture = false; - } - else - { - m_target_memory_usage -= dst->m_texture->GetMemUsage(); - g_gs_device->Recycle(dst->m_texture); + + if (src && src->m_from_target && src->m_from_target == dst) + { + src->m_texture = dst->m_texture; + src->m_target_direct = false; + src->m_shared_texture = false; + } + else + { + m_target_memory_usage -= dst->m_texture->GetMemUsage(); + g_gs_device->Recycle(dst->m_texture); + } + + dst->m_texture = tex; + dst->m_unscaled_size = new_size; } - + // New format or doing a shuffle to a 32bit target that used to be 16bit dst->m_TEX0.PSM = TEX0.PSM; - dst->m_texture = tex; - dst->m_unscaled_size = new_size; } @@ -2347,7 +2382,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe dst->m_valid_rgb = dst_match->m_valid_rgb; dst->m_was_dst_matched = true; dst_match->m_was_dst_matched = true; - dst_match->m_valid_rgb = false; + dst_match->m_valid_rgb = preserve_rgb; if (GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp == 16 && GSLocalMemory::m_psm[dst_match->m_TEX0.PSM].bpp > 16) dst->m_TEX0.TBW = dst_match->m_TEX0.TBW; // Be careful of shuffles of the depth as C16, but using a buffer width of 16 (Mercenaries). @@ -2572,7 +2607,7 @@ bool GSTextureCache::PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, cons if (valid_draw_size && supported_fmt) { - const GSVector4i newrect = GSVector4i::loadh(size); + const GSVector4i newrect = GSVector4i::loadh(valid_size); const u32 rect_end = GSLocalMemory::GetUnwrappedEndBlockAddress(TEX0.TBP0, TEX0.TBW, TEX0.PSM, newrect); RGBAMask rgba; @@ -3167,7 +3202,7 @@ bool GSTextureCache::PrepareDownloadTexture(u32 width, u32 height, GSTexture::Fo return true; } -void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 write_psm) +void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 write_psm, u32 write_bw) { const bool preserve_alpha = (GSLocalMemory::m_psm[write_psm].trbpp == 24); for (int type = 0; type < 2; type++) @@ -3176,22 +3211,24 @@ void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 wr for (auto i = list.begin(); i != list.end();) { Target* const t = *i; - if (start_bp != t->m_TEX0.TBP0 && (t->m_TEX0.TBP0 < start_bp || t->UnwrappedEndBlock() > end_bp)) + if ((start_bp > t->UnwrappedEndBlock() || end_bp < t->m_TEX0.TBP0) || (start_bp != t->m_TEX0.TBP0 && (t->m_TEX0.TBP0 < start_bp || t->UnwrappedEndBlock() > end_bp) && t->m_dirty.empty())) { ++i; continue; } + const u32 total_pages = ((end_bp + 1) - t->m_TEX0.TBP0) >> 5; // Not covering the whole target, and a different format, so just dirty it. - if (start_bp == t->m_TEX0.TBP0 && (t->UnwrappedEndBlock() > end_bp) && write_psm != t->m_TEX0.PSM) + /*if (start_bp >= t->m_TEX0.TBP0 && (t->UnwrappedEndBlock() > end_bp) && write_psm != t->m_TEX0.PSM && write_bw == t->m_TEX0.TBW) { const GSLocalMemory::psm_t& target_psm = GSLocalMemory::m_psm[write_psm]; - u32 total_pages = (end_bp - t->m_TEX0.TBP0) >> 5; - GSVector4i dirty_area = GSVector4i(0, 0, t->m_valid.z, (total_pages / t->m_TEX0.TBW) * target_psm.pgs.y); - InvalidateVideoMem(g_gs_renderer->m_mem.GetOffset(t->m_TEX0.TBP0, t->m_TEX0.TBW, write_psm), dirty_area, true); + const u32 page_offset = ((start_bp - t->m_TEX0.TBP0) >> 5); + const u32 vertical_offset = (page_offset / t->m_TEX0.TBW) * target_psm.pgs.y; + GSVector4i dirty_area = GSVector4i(page_offset % t->m_TEX0.TBW, vertical_offset, t->m_valid.z, vertical_offset + ((total_pages / t->m_TEX0.TBW) * target_psm.pgs.y)); + InvalidateVideoMem(g_gs_renderer->m_mem.GetOffset(t->m_TEX0.TBP0, t->m_TEX0.TBW, t->m_TEX0.PSM), dirty_area, true); ++i; continue; - } + }*/ InvalidateSourcesFromTarget(t); @@ -3874,6 +3911,19 @@ bool GSTextureCache::Move(u32 SBP, u32 SBW, u32 SPSM, int sx, int sy, u32 DBP, u if (alpha_only && (!dst || GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp != 32)) return false; + // This is probably copying to a new buffer but using the original one as an offset, so better to use a new texture, if we don't find one. + if (dst && DBP == SBP && dy > dst->m_unscaled_size.y) + { + u32 new_DBP = DBP + (((dy / GSLocalMemory::m_psm[dst->m_TEX0.PSM].pgs.y) * DBW) << 5); + + dst = nullptr; + + DBP = new_DBP; + dy = 0; + + dst = GetExactTarget(DBP, DBW, dpsm_s.depth ? DepthStencil : RenderTarget, DBP); + } + // Beware of the case where a game might create a larger texture by moving a bunch of chunks around. if (dst && DBP == SBP && dy > dst->m_unscaled_size.y) { @@ -3960,7 +4010,7 @@ bool GSTextureCache::Move(u32 SBP, u32 SBW, u32 SPSM, int sx, int sy, u32 DBP, u // Make sure the copy doesn't go out of bounds (it shouldn't). if ((scaled_dx + scaled_w) > dst->m_texture->GetWidth() || (scaled_dy + scaled_h) > dst->m_texture->GetHeight()) return false; - DevCon.Warning("HW Move 0x%x[BW:%u PSM:%s] to 0x%x[BW:%u PSM:%s] <%d,%d->%d,%d> -> <%d,%d->%d,%d>", SBP, SBW, + GL_CACHE("HW Move after draw %d 0x%x[BW:%u PSM:%s] to 0x%x[BW:%u PSM:%s] <%d,%d->%d,%d> -> <%d,%d->%d,%d>", GSState::s_n, SBP, SBW, psm_str(SPSM), DBP, DBW, psm_str(DPSM), sx, sy, sx + w, sy + h, dx, dy, dx + w, dy + h); const bool cover_whole_target = dst->m_type == RenderTarget && GSVector4i(dx, dy, dx + w, dy + h).rintersect(dst->m_valid).eq(dst->m_valid); @@ -4086,6 +4136,7 @@ bool GSTextureCache::Move(u32 SBP, u32 SBW, u32 SPSM, int sx, int sy, u32 DBP, u // Invalidate any sources that overlap with the target (since they're now stale). InvalidateVideoMem(g_gs_renderer->m_mem.GetOffset(DBP, DBW, DPSM), GSVector4i(dx, dy, dx + w, dy + h), false); + return true; } @@ -4272,7 +4323,7 @@ GSTextureCache::Target* GSTextureCache::GetExactTarget(u32 BP, u32 BW, int type, { Target* t = *it; - if (t->m_TEX0.TBP0 == BP && t->m_TEX0.TBW == BW && t->UnwrappedEndBlock() >= end_bp) + if ((t->m_TEX0.TBP0 == BP || (GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets && t->m_TEX0.TBP0 < BP && ((BP >> 5) % t->m_TEX0.TBW) == 0)) && t->m_TEX0.TBW == BW && t->UnwrappedEndBlock() >= end_bp) { rts.MoveFront(it.Index()); return t; @@ -4988,6 +5039,9 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con g_gs_device->ConvertToIndexedTexture(sTex, dst->m_scale, x_offset, y_offset, std::max(dst->m_TEX0.TBW, 1u) * 64, dst->m_TEX0.PSM, dTex, std::max(TEX0.TBW, 1u) * 64, TEX0.PSM); + + src->m_region.SetX((x_offset / GSLocalMemory::m_psm[dst->m_TEX0.PSM].pgs.x) * GSLocalMemory::m_psm[TEX0.PSM].pgs.x, tw); + src->m_region.SetY((y_offset / GSLocalMemory::m_psm[dst->m_TEX0.PSM].pgs.y) * GSLocalMemory::m_psm[TEX0.PSM].pgs.y, th); } else { @@ -5139,8 +5193,10 @@ GSTextureCache::Source* GSTextureCache::CreateMergedSource(GIFRegTEX0 TEX0, GIFR { // We *should* be able to use the TBW here as an indicator of size... except Destroy All Humans 2 sets // TBW to 10, and samples from 64 through 703... which means it'd be grabbing the next row at the end. - const int tex_width = std::max(64 * TEX0.TBW, region.GetMaxX()); - const int tex_height = region.HasY() ? region.GetHeight() : (1 << TEX0.TH); + // Round the size up to the next block + const GSLocalMemory::psm_t& psm_s = GSLocalMemory::m_psm[TEX0.PSM]; + const int tex_width = (std::max(64 * TEX0.TBW, region.GetMaxX()) + (psm_s.bs.x - 1)) & ~(psm_s.bs.x - 1); + const int tex_height = ((region.HasY() ? region.GetHeight() : (1 << TEX0.TH)) + (psm_s.bs.y - 1)) & ~(psm_s.bs.y - 1); const int scaled_width = static_cast(static_cast(tex_width) * scale); const int scaled_height = static_cast(static_cast(tex_height) * scale); @@ -6602,9 +6658,11 @@ void GSTextureCache::Target::ResizeValidity(const GSVector4i& rect) m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid); const u32 offset = ((UnwrappedEndBlock() + 1) - m_TEX0.TBP0) % (std::max(m_TEX0.TBW, 1U) << 5); - m_end_block += offset; + + if (offset) + m_end_block = m_end_block + ((std::max(m_TEX0.TBW, 1U) << 5) - offset); } - + // Else No valid size, so need to resize down. // GL_CACHE("ResizeValidity (0x%x->0x%x) from R:%d,%d Valid: %d,%d", m_TEX0.TBP0, m_end_block, rect.z, rect.w, m_valid.z, m_valid.w); @@ -6612,13 +6670,18 @@ void GSTextureCache::Target::ResizeValidity(const GSVector4i& rect) void GSTextureCache::Target::UpdateValidity(const GSVector4i& rect, bool can_resize) { + if (m_TEX0.TBP0 == 0x1a00 && rect.w == 448 && can_resize) + DevCon.Warning("Here"); + if (m_valid.eq(GSVector4i::zero())) { m_valid = rect; m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid); const u32 offset = ((UnwrappedEndBlock() + 1) - m_TEX0.TBP0) % (std::max(m_TEX0.TBW, 1U) << 5); - m_end_block += offset; + + if (offset) + m_end_block = m_end_block + ((std::max(m_TEX0.TBW, 1U) << 5) - offset); } else if (can_resize) { @@ -6626,7 +6689,9 @@ void GSTextureCache::Target::UpdateValidity(const GSVector4i& rect, bool can_res m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid); const u32 offset = ((UnwrappedEndBlock() + 1) - m_TEX0.TBP0) % (std::max(m_TEX0.TBW, 1U) << 5); - m_end_block += offset; + + if (offset) + m_end_block = m_end_block + ((std::max(m_TEX0.TBW, 1U) << 5) - offset); } // GL_CACHE("UpdateValidity (0x%x->0x%x) from R:%d,%d Valid: %d,%d", m_TEX0.TBP0, m_end_block, rect.z, rect.w, m_valid.z, m_valid.w); } @@ -6997,6 +7062,29 @@ void GSTextureCache::InvalidateTemporarySource() m_temporary_source = nullptr; } +void GSTextureCache::SetTemporaryZ(GSTexture* temp_z) +{ + m_temporary_z = temp_z; +} + +GSTexture* GSTextureCache::GetTemporaryZ() +{ + if (!m_temporary_z) + return nullptr; + + return m_temporary_z; +} + + +void GSTextureCache::InvalidateTemporaryZ() +{ + if (!m_temporary_z) + return; + + g_gs_device->Recycle(m_temporary_z); + m_temporary_z = nullptr; +} + void GSTextureCache::InjectHashCacheTexture(const HashCacheKey& key, GSTexture* tex, const std::pair& alpha_minmax) { // When we insert we update memory usage. Old texture gets removed below. diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.h b/pcsx2/GS/Renderers/HW/GSTextureCache.h index 024dfbbed66db..8dc7315f55d74 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.h +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.h @@ -427,6 +427,7 @@ class GSTextureCache std::unordered_map m_surface_offset_cache; Source* m_temporary_source = nullptr; // invalidated after the draw + GSTexture* m_temporary_z = nullptr; // invalidated after the draw std::unique_ptr m_color_download_texture; std::unique_ptr m_uint16_download_texture; @@ -508,7 +509,7 @@ class GSTextureCache bool HasTargetInHeightCache(u32 bp, u32 fbw, u32 psm, u32 max_age = std::numeric_limits::max(), bool move_front = true); bool Has32BitTarget(u32 bp); - void InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 write_psm = PSMCT32); + void InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 write_psm = PSMCT32, u32 write_bw = 1); void InvalidateVideoMemType(int type, u32 bp, u32 write_psm = PSMCT32, u32 write_fbmsk = 0, bool dirty_only = false); void InvalidateVideoMemSubTarget(GSTextureCache::Target* rt); void InvalidateVideoMem(const GSOffset& off, const GSVector4i& r, bool target = true); @@ -551,6 +552,11 @@ class GSTextureCache /// Invalidates a temporary source, a partial copy only created from the current RT/DS for the current draw. void InvalidateTemporarySource(); + void SetTemporaryZ(GSTexture* temp_z); + GSTexture* GetTemporaryZ(); + + /// Invalidates a temporary Z, a partial copy only created from the current DS for the current draw when Z is not offset but RT is + void InvalidateTemporaryZ(); /// Injects a texture into the hash cache, by using GSTexture::Swap(), transitively applying to all sources. Ownership of tex is transferred. void InjectHashCacheTexture(const HashCacheKey& key, GSTexture* tex, const std::pair& alpha_minmax); diff --git a/pcsx2/GS/Renderers/Metal/tfx.metal b/pcsx2/GS/Renderers/Metal/tfx.metal index b7c2f99c1cd5d..856c198b7d95d 100644 --- a/pcsx2/GS/Renderers/Metal/tfx.metal +++ b/pcsx2/GS/Renderers/Metal/tfx.metal @@ -1168,11 +1168,8 @@ struct PSMain { if (PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) { - C.rb = C.br; - float g_temp = C.g; - - C.g = C.a; - C.a = g_temp; + C.b = C.r; + C.a = C.g; } else if(PS_PROCESS_BA & SHUFFLE_READ) { From 8857736b62ae5b6528e874a3c434ac9caaaaa16d Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Mon, 6 Jan 2025 17:34:30 +0000 Subject: [PATCH 05/28] GS/HW: Fixes to texture is target offsets --- bin/resources/shaders/dx11/tfx.fx | 4 +- bin/resources/shaders/opengl/tfx_fs.glsl | 4 +- bin/resources/shaders/vulkan/tfx.glsl | 4 +- pcsx2/GS/GSState.cpp | 5 +- pcsx2/GS/Renderers/HW/GSHwHack.cpp | 6 +- pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 123 ++++++++++++++++------- pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 58 +++++++---- pcsx2/GS/Renderers/Metal/tfx.metal | 4 +- 8 files changed, 140 insertions(+), 68 deletions(-) diff --git a/bin/resources/shaders/dx11/tfx.fx b/bin/resources/shaders/dx11/tfx.fx index bcf812c83db1c..d7cf69fa7253b 100644 --- a/bin/resources/shaders/dx11/tfx.fx +++ b/bin/resources/shaders/dx11/tfx.fx @@ -1123,8 +1123,8 @@ PS_OUTPUT ps_main(PS_INPUT input) { if (PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) { - C.b = C.r; - C.a = C.g; + C.br = C.rb; + C.ag = C.ga; } else if(PS_PROCESS_BA & SHUFFLE_READ) { diff --git a/bin/resources/shaders/opengl/tfx_fs.glsl b/bin/resources/shaders/opengl/tfx_fs.glsl index 257fbc42d211e..47443659f2af0 100644 --- a/bin/resources/shaders/opengl/tfx_fs.glsl +++ b/bin/resources/shaders/opengl/tfx_fs.glsl @@ -1086,8 +1086,8 @@ void ps_main() C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80u))); #elif PS_SHUFFLE_ACROSS #if(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) - C.b = C.r; - C.a = C.g; + C.br = C.rb; + C.ag = C.ga; #elif(PS_PROCESS_BA & SHUFFLE_READ) C.rb = C.bb; C.ga = C.aa; diff --git a/bin/resources/shaders/vulkan/tfx.glsl b/bin/resources/shaders/vulkan/tfx.glsl index a6335daf3133a..f7a7133bab925 100644 --- a/bin/resources/shaders/vulkan/tfx.glsl +++ b/bin/resources/shaders/vulkan/tfx.glsl @@ -1350,8 +1350,8 @@ void main() // Write RB part. Mask will take care of the correct destination #elif PS_SHUFFLE_ACROSS #if(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) - C.b = C.r; - C.a = C.g; + C.br = C.rb; + C.ag = C.ga; #elif(PS_PROCESS_BA & SHUFFLE_READ) C.rb = C.bb; C.ga = C.aa; diff --git a/pcsx2/GS/GSState.cpp b/pcsx2/GS/GSState.cpp index 86f1b3a2690fa..9f27a481823cf 100644 --- a/pcsx2/GS/GSState.cpp +++ b/pcsx2/GS/GSState.cpp @@ -467,7 +467,8 @@ void GSState::DumpVertices(const std::string& filename) file << std::setfill('0') << std::setw(3) << unsigned(v.RGBAQ.R) << DEL; file << std::setfill('0') << std::setw(3) << unsigned(v.RGBAQ.G) << DEL; file << std::setfill('0') << std::setw(3) << unsigned(v.RGBAQ.B) << DEL; - file << std::setfill('0') << std::setw(3) << unsigned(v.RGBAQ.A); + file << std::setfill('0') << std::setw(3) << unsigned(v.RGBAQ.A) << DEL; + file << "FOG: " << std::setfill('0') << std::setw(3) << unsigned(v.FOG); file << std::endl; } @@ -3100,7 +3101,7 @@ __forceinline bool GSState::IsAutoFlushDraw(u32 prim) { // Pretty confident here... GSVertex* buffer = &m_vertex.buff[0]; - const bool const_spacing = (buffer[m_index.buff[0]].U - buffer[m_index.buff[0]].XYZ.X) == (m_v.U - m_v.XYZ.X); + const bool const_spacing = std::abs(buffer[m_index.buff[0]].U - buffer[m_index.buff[0]].XYZ.X) == std::abs(m_v.U - m_v.XYZ.X) && std::abs(buffer[m_index.buff[1]].XYZ.X - buffer[m_index.buff[0]].XYZ.X) < 64; if (const_spacing) return false; diff --git a/pcsx2/GS/Renderers/HW/GSHwHack.cpp b/pcsx2/GS/Renderers/HW/GSHwHack.cpp index 2d7239692d331..58c50748d788e 100644 --- a/pcsx2/GS/Renderers/HW/GSHwHack.cpp +++ b/pcsx2/GS/Renderers/HW/GSHwHack.cpp @@ -1047,7 +1047,7 @@ bool GSHwHack::OI_SonicUnleashed(GSRendererHW& r, GSTexture* rt, GSTexture* ds, // compute shadow in RG, // save result in alpha with a TS, // Restore RG channel that we previously copied to render shadows. - + // Important note: The game downsizes the target to half height, then later expands it back up to full size, that's why PCSX2 doesn't like it, we don't support that behaviour. const GIFRegTEX0& Texture = RTEX0; GIFRegTEX0 Frame = {}; @@ -1058,9 +1058,9 @@ bool GSHwHack::OI_SonicUnleashed(GSRendererHW& r, GSTexture* rt, GSTexture* ds, if ((!rt) || (!RPRIM->TME) || (GSLocalMemory::m_psm[Texture.PSM].bpp != 16) || (GSLocalMemory::m_psm[Frame.PSM].bpp != 16) || (Texture.TBP0 == Frame.TBP0) || (Frame.TBW != 16 && Texture.TBW != 16)) return true; - GL_INS("OI_SonicUnleashed replace draw by a copy"); + GL_INS("OI_SonicUnleashed replace draw by a copy draw %d", r.s_n); - GSTextureCache::Target* src = g_texture_cache->LookupTarget(Texture, GSVector2i(1, 1), r.GetTextureScaleFactor(), GSTextureCache::RenderTarget); + GSTextureCache::Target* src = g_texture_cache->LookupTarget(Texture, GSVector2i(1, 1), r.GetTextureScaleFactor(), GSTextureCache::RenderTarget, true, 0, false, false, true, true, GSVector4i::zero(), true); if (!src) return true; diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 70235932ee9af..a94b576f58c47 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -346,7 +346,7 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, tex_pos &= 0xFF; shuffle_across = (((tex_pos + 8) >> 4) ^ ((pos + 8) >> 4)) & 0x8; - const bool full_width = !shuffle_across && (((second_vert.XYZ.X + 9) - first_vert.XYZ.X) >> 4) >= 16 && m_r.width() > 8; + const bool full_width = ((second_vert.XYZ.X - first_vert.XYZ.X) >> 4) >= 16 && m_r.width() > 8 && tex && tex->m_from_target && rt == tex->m_from_target; process_ba = ((pos > 112 && pos < 136) || full_width) ? SHUFFLE_WRITE : 0; process_rg = (!process_ba || full_width) ? SHUFFLE_WRITE : 0; // "same group" means it can read blue and write alpha using C32 tricks @@ -733,10 +733,25 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, m_vt.m_max.p.y = floor(m_vt.m_max.p.y + 1.9f) / 2.0f; } - m_context->scissor.in.x = m_vt.m_min.p.x; - m_context->scissor.in.z = m_vt.m_max.p.x + 0.9f; - m_context->scissor.in.y = m_vt.m_min.p.y; - m_context->scissor.in.w = m_vt.m_max.p.y + 0.9f; + if (m_context->scissor.in.x & 8) + { + m_context->scissor.in.x &= ~0xf;//m_vt.m_min.p.x; + + if (half_right_vert) + m_context->scissor.in.x /= 2; + } + if (m_context->scissor.in.z & 8) + { + m_context->scissor.in.z += 8; //m_vt.m_min.p.x; + + if (half_right_vert) + m_context->scissor.in.z /= 2; + } + if (half_bottom_vert) + { + m_context->scissor.in.y /= 2; + m_context->scissor.in.w /= 2; + } // Only do this is the source is being interpreted as 16bit if (half_bottom_uv) @@ -2581,27 +2596,11 @@ void GSRendererHW::Draw() bool shuffle_target = false; if (!no_rt && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16 && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp >= 16) { - if (m_cached_ctx.FRAME.Block() != m_cached_ctx.TEX0.TBP0) - { - // FBW is going to be wrong for channel shuffling into a new target, so take it from the source. - FRAME_TEX0.U64 = 0; - FRAME_TEX0.TBP0 = m_cached_ctx.FRAME.Block(); - FRAME_TEX0.TBW = m_cached_ctx.FRAME.FBW; - FRAME_TEX0.PSM = m_cached_ctx.FRAME.PSM; - - GSTextureCache::Target* tgt = g_texture_cache->LookupTarget(FRAME_TEX0, GSVector2i(m_vt.m_max.p.x, m_vt.m_max.p.y), GetTextureScaleFactor(), GSTextureCache::RenderTarget, false, - fm, false, false, false, false, GSVector4i::zero(), true); - - if (tgt) - shuffle_target = tgt->m_32_bits_fmt; - - tgt = nullptr; - } if (!shuffle_target && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16) { const GSVertex* v = &m_vertex.buff[0]; - const int first_x = ((v[0].XYZ.X - m_context->XYOFFSET.OFX) + 8) >> 4; + const int first_x = std::abs(static_cast(((v[0].XYZ.X - m_context->XYOFFSET.OFX) + 8))) >> 4; const int first_u = PRIM->FST ? ((v[0].U + 9) >> 4) : static_cast(((1 << m_cached_ctx.TEX0.TW) * (v[0].ST.S / v[1].RGBAQ.Q))); const int second_u = PRIM->FST ? ((v[1].U + 9) >> 4) : static_cast(((1 << m_cached_ctx.TEX0.TW) * (v[1].ST.S / v[1].RGBAQ.Q)) + 0.6f); // offset coordinates swap around RG/BA. (Ace Combat) @@ -2615,6 +2614,23 @@ void GSRendererHW::Draw() shuffle_target = shuffle_coords && (draw_width & 7) == 0 && std::abs(draw_width - read_width) <= 1; } + + if (m_cached_ctx.FRAME.Block() != m_cached_ctx.TEX0.TBP0 || !shuffle_target) + { + // FBW is going to be wrong for channel shuffling into a new target, so take it from the source. + FRAME_TEX0.U64 = 0; + FRAME_TEX0.TBP0 = m_cached_ctx.FRAME.Block(); + FRAME_TEX0.TBW = m_cached_ctx.FRAME.FBW; + FRAME_TEX0.PSM = m_cached_ctx.FRAME.PSM; + + GSTextureCache::Target* tgt = g_texture_cache->LookupTarget(FRAME_TEX0, GSVector2i(m_vt.m_max.p.x, m_vt.m_max.p.y), GetTextureScaleFactor(), GSTextureCache::RenderTarget, false, + fm, false, false, false, false, GSVector4i::zero(), true); + + if (tgt) + shuffle_target = tgt->m_32_bits_fmt; + + tgt = nullptr; + } } possible_shuffle = !no_rt && (((shuffle_target /*&& GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16*/) /*|| (m_cached_ctx.FRAME.Block() == m_cached_ctx.TEX0.TBP0 && ((m_cached_ctx.TEX0.PSM & 0x6) || m_cached_ctx.FRAME.PSM != m_cached_ctx.TEX0.PSM))*/) || IsPossibleChannelShuffle()); @@ -2839,6 +2855,7 @@ void GSRendererHW::Draw() if (!possible_shuffle && m_split_texture_shuffle_pages == 0) m_r = m_r.rintersect(t_size_rect); + GSVector4i lookup_rect = unclamped_draw_rect; // Do the lookup with the real format on a shuffle, if possible. if (possible_shuffle && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16 && GSLocalMemory ::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) { @@ -2852,6 +2869,22 @@ void GSRendererHW::Draw() FRAME_TEX0.PSM = next_ctx.TEX0.PSM; else FRAME_TEX0.PSM = PSMCT32; // Guess full color if no upcoming hint, it'll fix itself later. + + // This is just for overlap detection, it doesn't matter which direction we do this in + if (GSLocalMemory::m_psm[FRAME_TEX0.PSM].bpp == 32) + { + // Shuffling with a double width (Sonic Unleashed for example which does a wierd shuffle/not shuffle green backup/restore). + if (src && std::abs((lookup_rect.width() / 2) - src->m_from_target->m_unscaled_size.x) <= 8) + { + lookup_rect.x /= 2; + lookup_rect.z /= 2; + } + else + { + lookup_rect.y /= 2; + lookup_rect.w /= 2; + } + } } // Normally we would use 1024 here to match the clear above, but The Godfather does a 1023x1023 draw instead @@ -2865,7 +2898,7 @@ void GSRendererHW::Draw() const bool preserve_downscale_draw = scale_draw < 0 || (scale_draw == 0 && ((src && src->m_from_target && src->m_from_target->m_downscaled) || is_possible_mem_clear == ClearType::ClearWithDraw)); rt = g_texture_cache->LookupTarget(FRAME_TEX0, t_size, ((src && src->m_scale != 1) && GSConfig.UserHacks_NativeScaling == GSNativeScaling::Normal && !possible_shuffle) ? GetTextureScaleFactor() : target_scale, GSTextureCache::RenderTarget, true, - fm, false, force_preload, preserve_rt_rgb, preserve_rt_alpha, unclamped_draw_rect, possible_shuffle, is_possible_mem_clear && FRAME_TEX0.TBP0 != m_cached_ctx.ZBUF.Block(), + fm, false, force_preload, preserve_rt_rgb, preserve_rt_alpha, lookup_rect, possible_shuffle, is_possible_mem_clear && FRAME_TEX0.TBP0 != m_cached_ctx.ZBUF.Block(), GSConfig.UserHacks_NativeScaling != GSNativeScaling::Off && preserve_downscale_draw && is_possible_mem_clear != ClearType::NormalClear, src, (no_ds || !ds) ? -1 : (m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0)); // Draw skipped because it was a clear and there was no target. @@ -2904,7 +2937,7 @@ void GSRendererHW::Draw() else if (rt->m_TEX0.TBP0 != m_cached_ctx.FRAME.Block()) { int vertical_offset = ((static_cast(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0) / 32) / std::max(static_cast(rt->m_TEX0.TBW), 1)) * frame_psm.pgs.y; // I know I could just not shift it.. - + int texture_offset = 0; const int horizontal_offset = ((static_cast((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) / 32) % static_cast(std::max(rt->m_TEX0.TBW, 1U))) * frame_psm.pgs.x; // Used to reduce the offset made later in channel shuffles m_target_offset = std::abs(static_cast((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) >> 5); @@ -2915,6 +2948,7 @@ void GSRendererHW::Draw() GSVector2i new_scaled_size = rt->m_unscaled_size * rt->m_scale; // Make sure to use the original format for the offset. int new_offset = std::abs((vertical_offset / frame_psm.pgs.y) * GSLocalMemory::m_psm[rt->m_TEX0.PSM].pgs.y); + texture_offset = new_offset; new_scaled_size.y += new_offset * rt->m_scale; GSTexture* tex = g_gs_device->CreateRenderTarget(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::Color, true); @@ -2925,18 +2959,13 @@ void GSRendererHW::Draw() g_gs_device->StretchRect(rt->m_texture, GSVector4(0,0,1,1), tex, GSVector4(dRect), ShaderConvert::COPY, false); - if (src && src->m_from_target && src->m_from_target == rt) - { - src->m_texture = rt->m_texture; - src->m_target_direct = false; - src->m_shared_texture = false; - } - else + if (src && src->m_from_target && src->m_from_target == rt && src->m_target_direct) { - //m_target_memory_usage -= dst->m_texture->GetMemUsage(); - g_gs_device->Recycle(rt->m_texture); + src->m_texture = tex; } + g_gs_device->Recycle(rt->m_texture); + rt->m_valid.y += new_offset; rt->m_valid.w += new_offset; rt->m_drawn_since_read.y += new_offset; @@ -2967,8 +2996,26 @@ void GSRendererHW::Draw() for (u32 i = 0; i < m_vertex.tail; i++) { - v[i].XYZ.Y += vertical_offset << 4; v[i].XYZ.X += horizontal_offset << 4; + v[i].XYZ.Y += vertical_offset << 4; + } + + if (texture_offset && src && src->m_from_target && src->m_target_direct && src->m_from_target == rt) + { + GSVector4i src_region = src->GetRegionRect(); + + if (src_region.rempty()) + { + src_region = GSVector4i::loadh(rt->m_unscaled_size); + src_region.y += texture_offset; + } + else + { + src_region.y += texture_offset; + src_region.w += texture_offset; + } + src->m_region.SetX(src_region.x, src_region.z); + src->m_region.SetY(src_region.y, src_region.w); } m_context->scissor.in.x += horizontal_offset; @@ -3013,6 +3060,7 @@ void GSRendererHW::Draw() src->m_texture = rt->m_texture; src->m_scale = rt->GetScale(); src->m_unscaled_size = rt->m_unscaled_size; + } target_scale = rt->GetScale(); @@ -3423,7 +3471,7 @@ void GSRendererHW::Draw() GL_INS("Resize RT from %dx%d to %dx%d", rt->GetUnscaledWidth(), rt->GetUnscaledHeight(), new_w, new_h); // May not be needed/could cause problems with garbage loaded from GS memory - if (preserve_rt_color) + /*if (preserve_rt_color) { RGBAMask mask; mask._u32 = 0xF; @@ -3439,7 +3487,7 @@ void GSRendererHW::Draw() GSVector4i height_dirty_rect = GSVector4i(0, rt->m_unscaled_size.y, new_w, new_h); g_texture_cache->AddDirtyRectTarget(rt, height_dirty_rect, rt->m_TEX0.PSM, rt->m_TEX0.TBW, mask); } - } + }*/ rt->ResizeTexture(new_w, new_h); @@ -3494,10 +3542,11 @@ void GSRendererHW::Draw() const bool new_rect = ds->m_valid.rempty(); const bool new_height = new_h > ds->GetUnscaledHeight(); const int old_height = ds->m_texture->GetHeight(); - const GSVector4i old_rect = ds->GetUnscaledRect(); + pxAssert(ds->GetScale() == target_scale); if (ds->GetUnscaledWidth() != new_w || ds->GetUnscaledHeight() != new_h) GL_INS("Resize DS from %dx%d to %dx%d", ds->GetUnscaledWidth(), ds->GetUnscaledHeight(), new_w, new_h); + ds->ResizeTexture(new_w, new_h); if (!m_texture_shuffle && !m_channel_shuffle) diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index f5cb8661705b8..734c867ff281d 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -236,7 +236,7 @@ bool GSTextureCache::CanTranslate(u32 bp, u32 bw, u32 spsm, GSVector4i r, u32 db // The page width matches. // The rect width is less than the width of the destination texture and the height is less than or equal to 1 page high. // The rect width and height is equal to the page size and it covers the width of the incoming bw, so lines are sequential. - const bool page_aligned_rect = masked_rect.eq(r); + const bool page_aligned_rect = masked_rect.xyxy().eq(r.xyxy()); const bool width_match = ((bw * 64) / src_page_size.x) == ((dbw * 64) / dst_page_size.x); const bool sequential_pages = page_aligned_rect && r.x == 0 && r.z == src_pixel_width; const bool single_row = (((bw * 64) / src_page_size.x) <= ((dbw * 64) / dst_page_size.x)) && r.z <= src_pixel_width && r.w <= src_page_size.y; @@ -277,12 +277,12 @@ GSVector4i GSTextureCache::TranslateAlignedRectByPage(u32 tbp, u32 tebp, u32 tbw in_rect = (in_rect + GSVector4i(inc_horizontal_offset, 0).xyxy()).max_i32(GSVector4i(0)); // Project Snowblind and Tomb Raider access the rect offset by 1 page and use a region to correct it, we need to account for that here. - if (in_rect.x >= (dst_pgw * dst_page_size.x)) + if (in_rect.x >= (src_pgw * src_page_size.x)) { - in_rect.z -= dst_pgw * dst_page_size.x; - in_rect.x -= dst_pgw * dst_page_size.x; - in_rect.y += dst_page_size.y; - in_rect.w += dst_page_size.y; + in_rect.z -= src_pgw * src_page_size.x; + in_rect.x -= src_pgw * src_page_size.x; + in_rect.y += src_page_size.y; + in_rect.w += src_page_size.y; } page_offset = 0; single_page = (in_rect.width() / src_page_size.x) <= 1 && (in_rect.height() / src_page_size.y) <= 1; @@ -1458,13 +1458,24 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const // Make sure the texture actually is INSIDE the RT, it's possibly not valid if it isn't. // Also check BP >= TBP, create source isn't equpped to expand it backwards and all data comes from the target. (GH3) else if (GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets && - (GSLocalMemory::m_psm[color_psm].bpp >= 16 || (/*possible_shuffle &&*/ GSLocalMemory::m_psm[color_psm].bpp == 8 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32 && t->m_TEX0.TBW >= (bw * 2))) && // Channel shuffles or non indexed lookups. + (GSLocalMemory::m_psm[color_psm].bpp >= 16 || (/*possible_shuffle &&*/ GSLocalMemory::m_psm[color_psm].bpp == 8 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32)) && // Channel shuffles or non indexed lookups. t->m_age <= 1 && (!found_t || t->m_last_draw > dst->m_last_draw) /*&& CanTranslate(bp, bw, psm, block_boundary_rect, t->m_TEX0.TBP0, t->m_TEX0.PSM, t->m_TEX0.TBW)*/) { if (!t->HasValidBitsForFormat(psm, req_color, req_alpha) && !(possible_shuffle && GSLocalMemory::m_psm[psm].bpp == 16 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32)) continue; + if (GSLocalMemory::m_psm[color_psm].bpp == 16 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp && (t->m_TEX0.TBW != bw && (t->m_TEX0.TBW * 2) != bw)) + { + DevCon.Warning("BP %x - 16bit bad match for target bp %x bw %d src %d format %d", bp, t->m_TEX0.TBP0, t->m_TEX0.TBW, bw, t->m_TEX0.PSM); + continue; + } + else if (!possible_shuffle && (GSLocalMemory::m_psm[color_psm].bpp == 8 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32 && + !((t->m_TEX0.TBW == (bw / 2)) || (t->m_TEX0.TBW >= (bw / 2) && (req_rect.w < GSLocalMemory::m_psm[psm].pgs.y))))) + { + DevCon.Warning("BP %x - 8bit bad match for target bp %x bw %d src %d format %d", bp, t->m_TEX0.TBP0, t->m_TEX0.TBW, bw, t->m_TEX0.PSM); + continue; + } // PSM equality needed because CreateSource does not handle PSM conversion. // Only inclusive hit to limit false hits. GSVector4i rect = req_rect; @@ -1600,7 +1611,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const if (bp < t->m_TEX0.TBP0 && region.HasX() && region.HasY() && (region.GetMinX() & (page_size.x - 1)) == 0 && (region.GetMinY() & (page_size.y - 1)) == 0 && (offset.bn(region.GetMinX(), region.GetMinY()) == t->m_TEX0.TBP0 || - (offset_bp >= t->m_TEX0.TBP0) && ((((offset_bp - t->m_TEX0.TBP0) >> 5) % bw) + (rect.width() / page_size.x)) <= bw)) + ((offset_bp >= t->m_TEX0.TBP0) && ((((offset_bp - t->m_TEX0.TBP0) >> 5) % bw) + (rect.width() / page_size.x)) <= bw))) { GL_CACHE("TC: Target 0x%x detected in front of TBP 0x%x with %d,%d offset (%d pages)", t->m_TEX0.TBP0, TEX0.TBP0, region.GetMinX(), region.GetMinY(), @@ -1915,7 +1926,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe } } // Probably pointing to half way through the target - else if (!min_rect.rempty()&& GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets) + else if (!min_rect.rempty() && GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets) { // Problem: Project - Snowblind and Tomb Raider offset the RT but not the Z /*if (offset != -1 && (bp - t->m_TEX0.TBP0) != offset) @@ -2607,7 +2618,7 @@ bool GSTextureCache::PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, cons if (valid_draw_size && supported_fmt) { - const GSVector4i newrect = GSVector4i::loadh(valid_size); + const GSVector4i newrect = GSVector4i::loadh(size); const u32 rect_end = GSLocalMemory::GetUnwrappedEndBlockAddress(TEX0.TBP0, TEX0.TBW, TEX0.PSM, newrect); RGBAMask rgba; @@ -3217,7 +3228,7 @@ void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 wr continue; } - const u32 total_pages = ((end_bp + 1) - t->m_TEX0.TBP0) >> 5; + //const u32 total_pages = ((end_bp + 1) - t->m_TEX0.TBP0) >> 5; // Not covering the whole target, and a different format, so just dirty it. /*if (start_bp >= t->m_TEX0.TBP0 && (t->UnwrappedEndBlock() > end_bp) && write_psm != t->m_TEX0.PSM && write_bw == t->m_TEX0.TBW) { @@ -4322,8 +4333,8 @@ GSTextureCache::Target* GSTextureCache::GetExactTarget(u32 BP, u32 BW, int type, for (auto it = rts.begin(); it != rts.end(); ++it) // Iterate targets from MRU to LRU. { Target* t = *it; - - if ((t->m_TEX0.TBP0 == BP || (GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets && t->m_TEX0.TBP0 < BP && ((BP >> 5) % t->m_TEX0.TBW) == 0)) && t->m_TEX0.TBW == BW && t->UnwrappedEndBlock() >= end_bp) + const u32 tgt_bw = std::max(t->m_TEX0.TBW, 1U); + if ((t->m_TEX0.TBP0 == BP || (GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets && t->m_TEX0.TBP0 < BP && ((BP >> 5) % tgt_bw) == 0)) && tgt_bw == BW && t->UnwrappedEndBlock() >= end_bp) { rts.MoveFront(it.Index()); return t; @@ -5040,8 +5051,22 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con std::max(dst->m_TEX0.TBW, 1u) * 64, dst->m_TEX0.PSM, dTex, std::max(TEX0.TBW, 1u) * 64, TEX0.PSM); - src->m_region.SetX((x_offset / GSLocalMemory::m_psm[dst->m_TEX0.PSM].pgs.x) * GSLocalMemory::m_psm[TEX0.PSM].pgs.x, tw); - src->m_region.SetY((y_offset / GSLocalMemory::m_psm[dst->m_TEX0.PSM].pgs.y) * GSLocalMemory::m_psm[TEX0.PSM].pgs.y, th); + // Adjust the region for the newly translated rect. + u32 const dst_y_height = GSLocalMemory::m_psm[dst->m_TEX0.PSM].pgs.y; + u32 const src_y_height = GSLocalMemory::m_psm[TEX0.PSM].pgs.y; + u32 const dst_page_offset = (y_offset / dst_y_height) * std::max(dst->m_TEX0.TBW, 1U); + y_offset = (dst_page_offset / (std::max(TEX0.TBW / 2U, 1U))) * src_y_height; + + u32 const src_page_width = GSLocalMemory::m_psm[TEX0.PSM].pgs.x; + x_offset = (x_offset / GSLocalMemory::m_psm[dst->m_TEX0.PSM].pgs.x) * GSLocalMemory::m_psm[TEX0.PSM].pgs.x; + if (x_offset >= static_cast(std::max(TEX0.TBW, 1U) * src_page_width)) + { + const u32 adjust = x_offset / src_page_width; + y_offset += adjust * GSLocalMemory::m_psm[TEX0.PSM].pgs.y; + x_offset -= src_page_width * adjust; + } + src->m_region.SetX(x_offset, x_offset + tw); + src->m_region.SetY(y_offset, y_offset + th); } else { @@ -6670,9 +6695,6 @@ void GSTextureCache::Target::ResizeValidity(const GSVector4i& rect) void GSTextureCache::Target::UpdateValidity(const GSVector4i& rect, bool can_resize) { - if (m_TEX0.TBP0 == 0x1a00 && rect.w == 448 && can_resize) - DevCon.Warning("Here"); - if (m_valid.eq(GSVector4i::zero())) { m_valid = rect; diff --git a/pcsx2/GS/Renderers/Metal/tfx.metal b/pcsx2/GS/Renderers/Metal/tfx.metal index 856c198b7d95d..e020e82577923 100644 --- a/pcsx2/GS/Renderers/Metal/tfx.metal +++ b/pcsx2/GS/Renderers/Metal/tfx.metal @@ -1168,8 +1168,8 @@ struct PSMain { if (PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) { - C.b = C.r; - C.a = C.g; + C.br = C.rb; + C.ag = C.ga; } else if(PS_PROCESS_BA & SHUFFLE_READ) { From 12cdc8698469643254bf0324dd61926cc2c58131 Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Sat, 11 Jan 2025 01:29:08 +0000 Subject: [PATCH 06/28] GS/HW: More alterations for new RT in RT system --- pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 26 ++--- pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 121 ++++++++++++++--------- 2 files changed, 90 insertions(+), 57 deletions(-) diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index a94b576f58c47..6c7cbe0bd2c65 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -2594,20 +2594,23 @@ void GSRendererHW::Draw() GIFRegTEX0 FRAME_TEX0; bool shuffle_target = false; - if (!no_rt && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16 && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp >= 16) + if (!no_rt && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16 && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp >= 16 && + (m_vt.m_primclass == GS_SPRITE_CLASS || (m_vt.m_primclass == GS_TRIANGLE_CLASS && (m_index.tail % 6) == 0 && TrianglesAreQuads(true)))) { if (!shuffle_target && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16) { const GSVertex* v = &m_vertex.buff[0]; - const int first_x = std::abs(static_cast(((v[0].XYZ.X - m_context->XYOFFSET.OFX) + 8))) >> 4; - const int first_u = PRIM->FST ? ((v[0].U + 9) >> 4) : static_cast(((1 << m_cached_ctx.TEX0.TW) * (v[0].ST.S / v[1].RGBAQ.Q))); - const int second_u = PRIM->FST ? ((v[1].U + 9) >> 4) : static_cast(((1 << m_cached_ctx.TEX0.TW) * (v[1].ST.S / v[1].RGBAQ.Q)) + 0.6f); + const int first_x = std::clamp((static_cast(((v[0].XYZ.X - m_context->XYOFFSET.OFX) + 8))) >> 4, 0, 2048); + const bool offset_last = PRIM->FST ? (v[1].U > v[0].U) : ((v[1].ST.S / v[1].RGBAQ.Q) > (v[0].ST.S / v[1].RGBAQ.Q)); + const int first_u = PRIM->FST ? ((v[0].U + (offset_last ? 0 : 9)) >> 4) : std::clamp(static_cast(((1 << m_cached_ctx.TEX0.TW) * (v[0].ST.S / v[1].RGBAQ.Q)) + (offset_last ? 0.0f : 0.6f)), 0, 2048); + const int second_u = PRIM->FST ? ((v[1].U + (offset_last ? 9 : 0)) >> 4) : std::clamp(static_cast(((1 << m_cached_ctx.TEX0.TW) * (v[1].ST.S / v[1].RGBAQ.Q)) + (offset_last ? 0.6f : 0.0f)), 0, 2048); // offset coordinates swap around RG/BA. (Ace Combat) const u32 minv = m_cached_ctx.CLAMP.MINV; const u32 minu = m_cached_ctx.CLAMP.MINU; const bool rgba_shuffle = ((m_cached_ctx.CLAMP.WMS == m_cached_ctx.CLAMP.WMT && m_cached_ctx.CLAMP.WMS == CLAMP_REGION_REPEAT) && (minu && minv)); - const bool shuffle_coords = ((first_x ^ first_u) & 8) || rgba_shuffle; + const bool shuffle_coords = ((first_x ^ first_u) & 0xF) == 8 || rgba_shuffle; + // Round up half of second coord, it can sometimes be slightly under. const int draw_width = std::abs(v[1].XYZ.X + 9 - v[0].XYZ.X) >> 4; const int read_width = std::abs(second_u - first_u); @@ -3038,14 +3041,13 @@ void GSRendererHW::Draw() // Don't resize if the BPP don't match. if (frame_psm.bpp == GSLocalMemory::m_psm[rt->m_TEX0.PSM].bpp) { - if (m_r.w > rt->m_unscaled_size.y) + if (m_r.w > rt->m_unscaled_size.y || m_r.z > rt->m_unscaled_size.x) { - u32 new_height = m_r.w; + u32 new_height = std::max(m_r.w, rt->m_unscaled_size.y); + u32 new_width = std::max(m_r.z, rt->m_unscaled_size.x); - if (possible_shuffle && std::abs(static_cast(GSLocalMemory::m_psm[rt->m_TEX0.PSM].bpp - GSLocalMemory::m_psm[TEX0.PSM].bpp)) == 16) - new_height /= 2; //DevCon.Warning("Resizing texture %d x %d draw %d", rt->m_unscaled_size.x, new_height, s_n); - rt->ResizeTexture(rt->m_unscaled_size.x, new_height); + rt->ResizeTexture(new_height, new_height); const bool frame_masked = ((m_cached_ctx.FRAME.FBMSK & frame_psm.fmsk) == frame_psm.fmsk) || (m_cached_ctx.TEST.ATE && m_cached_ctx.TEST.ATST == ATST_NEVER && !(m_cached_ctx.TEST.AFAIL & AFAIL_FB_ONLY)); @@ -3650,7 +3652,7 @@ void GSRendererHW::Draw() if (rt && GSConfig.SaveRT && s_n >= GSConfig.SaveN) { - s = GetDrawDumpPath("%05d_f%lld_rt0_%05x_%s.bmp", s_n, frame, m_cached_ctx.FRAME.Block(), psm_str(m_cached_ctx.FRAME.PSM)); + s = GetDrawDumpPath("%05d_f%lld_rt0_%05x_(%05x)_%s.bmp", s_n, frame, m_cached_ctx.FRAME.Block(), rt->m_TEX0.TBP0, psm_str(m_cached_ctx.FRAME.PSM)); if (rt->m_texture) rt->m_texture->Save(s); @@ -3658,7 +3660,7 @@ void GSRendererHW::Draw() if (ds && GSConfig.SaveDepth && s_n >= GSConfig.SaveN) { - s = GetDrawDumpPath("%05d_f%lld_rz0_%05x_%s.bmp", s_n, frame, m_cached_ctx.ZBUF.Block(), psm_str(m_cached_ctx.ZBUF.PSM)); + s = GetDrawDumpPath("%05d_f%lld_rz0_%05x_(%05x)_%s.bmp", s_n, frame, m_cached_ctx.ZBUF.Block(), ds->m_TEX0.TBP0, psm_str(m_cached_ctx.ZBUF.PSM)); if (g_texture_cache->GetTemporaryZ()) g_texture_cache->GetTemporaryZ()->Save(s); diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index 734c867ff281d..1529daced9b4e 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -149,8 +149,7 @@ void GSTextureCache::AddDirtyRectTarget(Target* target, GSVector4i rect, u32 psm if (rect.rempty()) return; - if (rect.w > 2048) - DevCon.Warning("BAd"); + std::vector::iterator it = target->m_dirty.end(); while (it != target->m_dirty.begin()) { @@ -337,8 +336,22 @@ GSVector4i GSTextureCache::TranslateAlignedRectByPage(u32 tbp, u32 tebp, u32 tbw // Results won't be square, if it's not invalidation, it's a texture, which is problematic to translate, so let's not (FIFA 2005). if (!is_invalidation) { - DevCon.Warning("Uneven pages mess up sbp %x dbp %x spgw %d dpgw %d", sbp, tbp, src_pgw, dst_pgw); - return GSVector4i::zero(); + if (sbp != tbp) + { + // Just take the start page, as this is likely tex in rt, and that's all we care about. + const u32 start_page = (in_rect.y / src_page_size.y) + (in_rect.x / src_page_size.x); + in_rect.x = (start_page % dst_pgw) * dst_page_size.x; + in_rect.y = (start_page / dst_pgw) * dst_page_size.y; + in_rect.z = in_rect.x + dst_page_size.x; + in_rect.w = in_rect.y + dst_page_size.y; + + return in_rect; + } + else + { + DevCon.Warning("Uneven pages mess up sbp %x dbp %x spgw %d dpgw %d", sbp, tbp, src_pgw, dst_pgw); + return GSVector4i::zero(); + } } //TODO: Maybe control dirty blocks directly and add them page at a time for better granularity. @@ -1465,13 +1478,16 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const if (!t->HasValidBitsForFormat(psm, req_color, req_alpha) && !(possible_shuffle && GSLocalMemory::m_psm[psm].bpp == 16 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32)) continue; - if (GSLocalMemory::m_psm[color_psm].bpp == 16 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp && (t->m_TEX0.TBW != bw && (t->m_TEX0.TBW * 2) != bw)) + u32 horz_page_offset = ((bp - t->m_TEX0.TBP0) >> 5) % t->m_TEX0.TBW; + if (GSLocalMemory::m_psm[color_psm].bpp == 16 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32 && bw != 1 && + ((t->m_TEX0.TBW < (horz_page_offset + ((block_boundary_rect.z + GSLocalMemory::m_psm[psm].pgs.x - 1) / GSLocalMemory::m_psm[psm].pgs.x)) || + (t->m_TEX0.TBW != bw && block_boundary_rect.w > GSLocalMemory::m_psm[psm].pgs.y)))) { DevCon.Warning("BP %x - 16bit bad match for target bp %x bw %d src %d format %d", bp, t->m_TEX0.TBP0, t->m_TEX0.TBW, bw, t->m_TEX0.PSM); continue; } - else if (!possible_shuffle && (GSLocalMemory::m_psm[color_psm].bpp == 8 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32 && - !((t->m_TEX0.TBW == (bw / 2)) || (t->m_TEX0.TBW >= (bw / 2) && (req_rect.w < GSLocalMemory::m_psm[psm].pgs.y))))) + else if (!possible_shuffle && (GSLocalMemory::m_psm[color_psm].bpp == 8 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32 && bw != 1 && + !((t->m_TEX0.TBW == (bw / 2)) || (t->m_TEX0.TBW >= (bw / 2) && (block_boundary_rect.w <= GSLocalMemory::m_psm[psm].pgs.y))))) { DevCon.Warning("BP %x - 8bit bad match for target bp %x bw %d src %d format %d", bp, t->m_TEX0.TBP0, t->m_TEX0.TBW, bw, t->m_TEX0.PSM); continue; @@ -1935,21 +1951,17 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe }*/ const u32 widthpage_offset = (std::abs(static_cast(bp - t->m_TEX0.TBP0)) >> 5) % std::max(t->m_TEX0.TBW, 1U); - const bool is_aligned_ok = widthpage_offset == 0 || (t->m_TEX0.TBW == TEX0.TBW && ((min_rect.z >> 6) + widthpage_offset) <= TEX0.TBW) || ((widthpage_offset + TEX0.TBW) <= t->m_TEX0.TBW) || min_rect.width() <= 64 || (widthpage_offset == (t->m_TEX0.TBW >> 1) && (static_cast(min_rect.width()) <= (widthpage_offset * 64))); - if ((!dst || ((GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw))) && is_aligned_ok && (t->m_TEX0.TBW == TEX0.TBW || (TEX0.TBW == 1 && t->m_TEX0.TBW > 1)) && t->Overlaps(bp, TEX0.TBW, TEX0.PSM, min_rect)) - { /*TEX0.TBP0 == ((((t->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 1) + t->m_TEX0.TBP0)*/ - // If it's too old, it's probably not a real target to jump in to anymore. - /*if ((GSState::s_n - t->m_last_draw) > 10 && (!t->m_dirty.empty() || (!is_shuffle && - !(widthpage_offset == 0 || min_rect.width() <= 64 || - (widthpage_offset == (t->m_TEX0.TBW >> 1) && min_rect.width() == widthpage_offset * 64))))) - { - GL_INS("TC: Deleting RT BP 0x%x BW %d PSM %s due to change in target", t->m_TEX0.TBP0, t->m_TEX0.TBW, psm_str(t->m_TEX0.PSM)); - InvalidateSourcesFromTarget(t); - i = list.erase(i); - delete t; - } - else*/ - if (!is_shuffle && !GSUtil::HasSameSwizzleBits(t->m_TEX0.PSM, TEX0.PSM)) + const bool is_aligned_ok = widthpage_offset == 0 || (t->m_TEX0.TBW == TEX0.TBW && + ((((min_rect.z + 63) >> 6) + widthpage_offset) <= TEX0.TBW) || + ((widthpage_offset + TEX0.TBW) <= t->m_TEX0.TBW) || + min_rect.width() <= 64 || (widthpage_offset == (t->m_TEX0.TBW >> 1) && + (static_cast(min_rect.width()) <= (widthpage_offset * 64)))); + if ((!dst || ((GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw))) && is_aligned_ok && (t->m_TEX0.TBW == TEX0.TBW || TEX0.TBW == 1) && t->Overlaps(bp, TEX0.TBW, TEX0.PSM, min_rect)) + { + const GSLocalMemory::psm_t& s_psm = GSLocalMemory::m_psm[TEX0.PSM]; + + if (!is_shuffle && (!GSUtil::HasSameSwizzleBits(t->m_TEX0.PSM, TEX0.PSM) || + (widthpage_offset % std::max(t->m_TEX0.TBW, 1U)) != 0 && ((widthpage_offset + (min_rect.width() + (s_psm.pgs.x - 1)) / s_psm.pgs.x)) > t->m_TEX0.TBW)) { GL_INS("TC: Deleting RT BP 0x%x BW %d PSM %s due to change in target", t->m_TEX0.TBP0, t->m_TEX0.TBW, psm_str(t->m_TEX0.PSM)); InvalidateSourcesFromTarget(t); @@ -1958,7 +1970,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe continue; } - else + else if (t->m_dirty.empty()) { //DevCon.Warning("Here draw %d wanted %x PSM %x got %x PSM %x offset of %d pages width %d pages draw width %d", GSState::s_n, bp, TEX0.PSM, t->m_TEX0.TBP0, t->m_TEX0.PSM, (bp - t->m_TEX0.TBP0) >> 5, t->m_TEX0.TBW, draw_rect.width()); dst = t; @@ -2136,27 +2148,29 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe new_scaled_size = ScaleRenderTargetSize(dst->m_unscaled_size, scale); dRect = (GSVector4(GSVector4i::loadh(dst->m_unscaled_size)) * GSVector4(scale)).ceil(); - - if (scale_down) + if (!is_shuffle || GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp == 16) { - if ((new_size.y * 2) < 1024) + if (scale_down) + { + if ((new_size.y * 2) < 1024) + { + new_scaled_size.y *= 2; + new_size.y *= 2; + dst->m_valid.y *= 2; + dst->m_valid.w *= 2; + } + dRect.y *= 2; + dRect.w *= 2; + } + else { - new_scaled_size.y *= 2; - new_size.y *= 2; - dst->m_valid.y *= 2; - dst->m_valid.w *= 2; + new_scaled_size.y /= 2; + new_size.y /= 2; + dRect.y /= 2; + dRect.w /= 2; + dst->m_valid.y /= 2; + dst->m_valid.w /= 2; } - dRect.y *= 2; - dRect.w *= 2; - } - else - { - new_scaled_size.y /= 2; - new_size.y /= 2; - dRect.y /= 2; - dRect.w /= 2; - dst->m_valid.y /= 2; - dst->m_valid.w /= 2; } if (!is_shuffle) { @@ -2188,9 +2202,16 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe dst->m_texture = tex; dst->m_unscaled_size = new_size; } - // New format or doing a shuffle to a 32bit target that used to be 16bit - dst->m_TEX0.PSM = TEX0.PSM; + // New format or doing a shuffle to a 32bit target that used to be 16bit + if (!is_shuffle) + dst->m_TEX0.PSM = TEX0.PSM; + // LEGO Dome Racers does a copy to a target as 8bit in alpha only, this doesn't really work great for us, so let's make it 32bit with invalid RGB. + else if (dst->m_TEX0.PSM == PSMT8H) + { + //dst->m_TEX0.PSM = PSMCT32; + dst->m_valid_rgb = false; + } } // If our RGB was invalidated, we need to pull it from depth. @@ -2315,7 +2336,15 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe { continue; } - + // If the format is completely different, but it's the same location, it's likely just overwriting it, so get rid. + if (!is_shuffle && t->m_TEX0.TBW != TEX0.TBW && TEX0.TBW != 1 && !preserve_rgb && min_rect.w > GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y) + { + DevCon.Warning("Deleting Z draw %d", GSState::s_n); + InvalidateSourcesFromTarget(t); + i = rev_list.erase(i); + delete t; + continue; + } const GSLocalMemory::psm_t& t_psm_s = GSLocalMemory::m_psm[t->m_TEX0.PSM]; if (t_psm_s.bpp != psm_s.bpp) { @@ -2486,7 +2515,8 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe pxAssert(dst && dst->m_texture && dst->m_scale == scale); } - + if (dst && dst->m_TEX0.TBP0 == 0x3f80 && dst->m_TEX0.PSM == 0) + DevCon.Warning("It's 32bit on draw %d", GSState::s_n); return dst; } @@ -2819,6 +2849,7 @@ bool GSTextureCache::PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, cons GL_INS("RT double buffer copy from FBP 0x%x, %dx%d => %d,%d", t->m_TEX0.TBP0, copy_width, copy_height, 0, dst_offset_scaled_height); + // Clear the dirty first t->Update(); dst->Update(); From 2d62f2f3cd24e4c8ff3d37ea073c4617dd12c2f8 Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Sun, 12 Jan 2025 06:33:38 +0000 Subject: [PATCH 07/28] GS/HW: More changes some regressions --- pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 15 +- pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 254 ++++++++++++++--------- 2 files changed, 165 insertions(+), 104 deletions(-) diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 6c7cbe0bd2c65..7f4824b399197 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -2618,7 +2618,7 @@ void GSRendererHW::Draw() shuffle_target = shuffle_coords && (draw_width & 7) == 0 && std::abs(draw_width - read_width) <= 1; } - if (m_cached_ctx.FRAME.Block() != m_cached_ctx.TEX0.TBP0 || !shuffle_target) + if (!shuffle_target) { // FBW is going to be wrong for channel shuffling into a new target, so take it from the source. FRAME_TEX0.U64 = 0; @@ -2704,7 +2704,7 @@ void GSRendererHW::Draw() // Urban Reign trolls by scissoring a draw to a target at 0x0-0x117F to 378x449 which ends up the size being rounded up to 640x480 // causing the buffer to expand to around 0x1400, which makes a later framebuffer at 0x1180 to fail to be created correctly. // We can cheese this by checking if the Z is masked and the resultant colour is going to be black anyway. - const bool output_black = PRIM->ABE && ((m_context->ALPHA.A == 1 && m_context->ALPHA.B == 0 && GetAlphaMinMax().min >= 128) || m_context->ALPHA.IsBlack()) && m_draw_env->COLCLAMP.CLAMP == 1; + const bool output_black = PRIM->ABE && ((m_context->ALPHA.A == 1 || m_context->ALPHA.IsBlack()) && m_context->ALPHA.D != 1) && m_draw_env->COLCLAMP.CLAMP == 1; const bool can_expand = !(m_cached_ctx.ZBUF.ZMSK && output_black); // Estimate size based on the scissor rectangle and height cache. @@ -2941,7 +2941,7 @@ void GSRendererHW::Draw() { int vertical_offset = ((static_cast(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0) / 32) / std::max(static_cast(rt->m_TEX0.TBW), 1)) * frame_psm.pgs.y; // I know I could just not shift it.. int texture_offset = 0; - const int horizontal_offset = ((static_cast((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) / 32) % static_cast(std::max(rt->m_TEX0.TBW, 1U))) * frame_psm.pgs.x; + int horizontal_offset = ((static_cast((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) / 32) % static_cast(std::max(rt->m_TEX0.TBW, 1U))) * frame_psm.pgs.x; // Used to reduce the offset made later in channel shuffles m_target_offset = std::abs(static_cast((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) >> 5); @@ -2980,6 +2980,13 @@ void GSRendererHW::Draw() vertical_offset = 0; } + if (horizontal_offset < 0) + { + // Thankfully this doesn't really happen, but catwoman moves the framebuffer backwards 1 page with a channel shuffle, which is really messy and not easy to deal with. + // Hopefully the quick channel shuffle will just guess this and run with it. + rt->m_TEX0.TBP0 += horizontal_offset; + horizontal_offset = 0; + } // Z isn't offset but RT is, so we need a temp Z to align it, hopefully nothing will ever write to the Z too, right?? if (ds && vertical_offset && (m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0) != (m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) { @@ -3029,7 +3036,7 @@ void GSRendererHW::Draw() m_r.w += vertical_offset; m_r.x += horizontal_offset; m_r.z += horizontal_offset; - m_in_target_draw = true; + m_in_target_draw = rt->m_TEX0.TBP0 != m_cached_ctx.FRAME.Block(); m_vt.m_min.p.x += horizontal_offset; m_vt.m_max.p.x += horizontal_offset; m_vt.m_min.p.y += vertical_offset; diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index 1529daced9b4e..1fefad333c327 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -1863,6 +1863,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe Target* dst = nullptr; auto& list = m_dst[type]; + const GSVector4i min_rect = draw_rect.max_u32(GSVector4i(0, 0, draw_rect.x, draw_rect.y)); // TODO: Move all frame stuff to its own routine too. if (!is_frame) @@ -1951,12 +1952,18 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe }*/ const u32 widthpage_offset = (std::abs(static_cast(bp - t->m_TEX0.TBP0)) >> 5) % std::max(t->m_TEX0.TBW, 1U); - const bool is_aligned_ok = widthpage_offset == 0 || (t->m_TEX0.TBW == TEX0.TBW && + /*const bool is_aligned_ok = widthpage_offset == 0 || (t->m_TEX0.TBW == TEX0.TBW && ((((min_rect.z + 63) >> 6) + widthpage_offset) <= TEX0.TBW) || ((widthpage_offset + TEX0.TBW) <= t->m_TEX0.TBW) || min_rect.width() <= 64 || (widthpage_offset == (t->m_TEX0.TBW >> 1) && - (static_cast(min_rect.width()) <= (widthpage_offset * 64)))); - if ((!dst || ((GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw))) && is_aligned_ok && (t->m_TEX0.TBW == TEX0.TBW || TEX0.TBW == 1) && t->Overlaps(bp, TEX0.TBW, TEX0.PSM, min_rect)) + (static_cast(min_rect.width()) <= (widthpage_offset * 64))));*/ + const bool is_aligned_ok = widthpage_offset == 0 || ((min_rect.width() <= static_cast((t->m_TEX0.TBW - widthpage_offset) * 64) && (t->m_TEX0.TBW == TEX0.TBW || TEX0.TBW == 1)) && bp >= t->m_TEX0.TBP0); + const bool no_target_or_newer = (!dst || ((GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw))); + const bool width_match = (t->m_TEX0.TBW == TEX0.TBW || TEX0.TBW == 1); + // if it's a shuffle, some games tend to offset back by a page, such as Tomb Raider, for no disernable reason, but it then causes problems. + // This can also happen horizontally (Catwoman moves everything one page left with shuffles), but this is too messy to deal with right now. + const bool overlaps = t->Overlaps(bp, TEX0.TBW, TEX0.PSM, min_rect) || (is_shuffle && t->Overlaps(bp, TEX0.TBW, TEX0.PSM, min_rect + GSVector4i(0, 0, 0, 32))); + if (no_target_or_newer && is_aligned_ok && width_match && overlaps) { const GSLocalMemory::psm_t& s_psm = GSLocalMemory::m_psm[TEX0.PSM]; @@ -2204,12 +2211,12 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe } // New format or doing a shuffle to a 32bit target that used to be 16bit - if (!is_shuffle) + if (!is_shuffle || GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp < GSLocalMemory::m_psm[TEX0.PSM].bpp) dst->m_TEX0.PSM = TEX0.PSM; // LEGO Dome Racers does a copy to a target as 8bit in alpha only, this doesn't really work great for us, so let's make it 32bit with invalid RGB. else if (dst->m_TEX0.PSM == PSMT8H) { - //dst->m_TEX0.PSM = PSMCT32; + dst->m_TEX0.PSM = PSMCT32; dst->m_valid_rgb = false; } } @@ -2515,8 +2522,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe pxAssert(dst && dst->m_texture && dst->m_scale == scale); } - if (dst && dst->m_TEX0.TBP0 == 0x3f80 && dst->m_TEX0.PSM == 0) - DevCon.Warning("It's 32bit on draw %d", GSState::s_n); + return dst; } @@ -2536,7 +2542,8 @@ GSTextureCache::Target* GSTextureCache::CreateTarget(GIFRegTEX0 TEX0, const GSVe // Avoid making garbage targets (usually PCRTC). if (GSVector4i::loadh(size).rempty()) return nullptr; - + if (TEX0.TBP0 == 0x3320 || TEX0.TBP0 == 0x32a0) + DevCon.Warning("Making target %x on draw %d", TEX0.TBP0, GSState::s_n); Target* dst = Target::Create(TEX0, size.x, size.y, scale, type, true); if (!dst) [[unlikely]] return nullptr; @@ -2787,113 +2794,113 @@ bool GSTextureCache::PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, cons auto j = i; Target* t = *j; - if (dst != t && t->m_TEX0.PSM == dst->m_TEX0.PSM/* && t->m_TEX0.TBW == dst->m_TEX0.TBW*/) - if (t->Overlaps(dst->m_TEX0.TBP0, dst->m_TEX0.TBW, dst->m_TEX0.PSM, dst->m_valid)) + if (dst != t && t->m_TEX0.PSM == dst->m_TEX0.PSM && t->Overlaps(dst->m_TEX0.TBP0, dst->m_TEX0.TBW, dst->m_TEX0.PSM, dst->m_valid) && + static_cast(((t->m_TEX0.TBP0 - dst->m_TEX0.TBP0) / 32) % std::max(dst->m_TEX0.TBW, 1U)) <= std::max(0, static_cast(dst->m_TEX0.TBW - t->m_TEX0.TBW))) + { + const u32 buffer_width = std::max(1U, dst->m_TEX0.TBW); + + // If the two targets are misaligned, it's likely a relocation, so we can just kill the old target. + // Kill targets that are overlapping new targets, but ignore the copy if the old target is dirty because we favour GS memory. + if (((((t->m_TEX0.TBP0 - dst->m_TEX0.TBP0) >> 5) % buffer_width) != 0) && !t->m_dirty.empty()) { - const u32 buffer_width = std::max(1U, dst->m_TEX0.TBW); + InvalidateSourcesFromTarget(t); + i = list.erase(j); + delete t; - // If the two targets are misaligned, it's likely a relocation, so we can just kill the old target. - // Kill targets that are overlapping new targets, but ignore the copy if the old target is dirty because we favour GS memory. - if (((((t->m_TEX0.TBP0 - dst->m_TEX0.TBP0) >> 5) % buffer_width) != 0) && !t->m_dirty.empty()) - { - InvalidateSourcesFromTarget(t); - i = list.erase(j); - delete t; + continue; + } + // could be overwriting a double buffer, so if it's the second half of it, just reduce the size down to half. + if (((((t->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 1) + t->m_TEX0.TBP0) == dst->m_TEX0.TBP0) + { + GSVector4i new_valid = t->m_valid; + new_valid.w /= 2; + GL_INS("RT resize buffer for FBP 0x%x, %dx%d => %d,%d", t->m_TEX0.TBP0, t->m_valid.width(), t->m_valid.height(), new_valid.width(), new_valid.height()); + t->ResizeValidity(new_valid); + return hw_clear.value_or(false); + } + // The new texture is behind it but engulfs the whole thing, shrink the new target so it grows in the HW Draw resize. + else if (dst->m_TEX0.TBP0 < t->m_TEX0.TBP0 && (dst->UnwrappedEndBlock() + 1) > t->m_TEX0.TBP0) + { + const int rt_pages = ((t->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 5; + const int overlapping_pages = std::min(rt_pages, static_cast((dst->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 5); + const int overlapping_pages_height = ((overlapping_pages + (buffer_width - 1)) / buffer_width) * GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y; + + if (overlapping_pages_height == 0 || (overlapping_pages % buffer_width)) + { + // No overlap top copy or the widths don't match. + i++; continue; } - // could be overwriting a double buffer, so if it's the second half of it, just reduce the size down to half. - if (((((t->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 1) + t->m_TEX0.TBP0) == dst->m_TEX0.TBP0) + const int dst_offset_height = ((((t->m_TEX0.TBP0 - dst->m_TEX0.TBP0) >> 5) / buffer_width) * GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y); + const int texture_height = (dst->m_TEX0.TBW == t->m_TEX0.TBW) ? (dst_offset_height + t->m_valid.w) : (dst_offset_height + overlapping_pages_height); + + if (texture_height > dst->m_unscaled_size.y && !dst->ResizeTexture(dst->m_unscaled_size.x, texture_height, true)) { - GSVector4i new_valid = t->m_valid; - new_valid.w /= 2; - GL_INS("RT resize buffer for FBP 0x%x, %dx%d => %d,%d", t->m_TEX0.TBP0, t->m_valid.width(), t->m_valid.height(), new_valid.width(), new_valid.height()); - t->ResizeValidity(new_valid); - return hw_clear.value_or(false); + // Resize failed, probably ran out of VRAM, better luck next time. Fall back to CPU. + DevCon.Warning("Failed to resize target on preload? Draw %d", GSState::s_n); + i++; + continue; } - // The new texture is behind it but engulfs the whole thing, shrink the new target so it grows in the HW Draw resize. - else if (dst->m_TEX0.TBP0 < t->m_TEX0.TBP0 && (dst->UnwrappedEndBlock() + 1) > t->m_TEX0.TBP0) + + const int dst_offset_width = (((t->m_TEX0.TBP0 - dst->m_TEX0.TBP0) >> 5) % buffer_width) * GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.x; + const int dst_offset_scaled_width = dst_offset_width * dst->m_scale; + const int dst_offset_scaled_height = dst_offset_height * dst->m_scale; + const GSVector4i dst_rect_scale = GSVector4i(t->m_valid.x, dst_offset_height, t->m_valid.z, texture_height); + + if (((!hw_clear && (preserve_target || preload)) || dst_rect_scale.rintersect(draw_rect).rempty()) && dst->GetScale() == t->GetScale()) { - const int rt_pages = ((t->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 5; - const int overlapping_pages = std::min(rt_pages, static_cast((dst->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 5); - const int overlapping_pages_height = ((overlapping_pages + (buffer_width - 1)) / buffer_width) * GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y; + int copy_width = ((t->m_texture->GetWidth()) > (dst->m_texture->GetWidth()) ? (dst->m_texture->GetWidth()) : t->m_texture->GetWidth()) - dst_offset_scaled_width; + int copy_height = (texture_height - dst_offset_height) * t->m_scale; - if (overlapping_pages_height == 0 || (overlapping_pages % buffer_width)) - { - // No overlap top copy or the widths don't match. - i++; - continue; - } + GL_INS("RT double buffer copy from FBP 0x%x, %dx%d => %d,%d", t->m_TEX0.TBP0, copy_width, copy_height, 0, dst_offset_scaled_height); - const int dst_offset_height = ((((t->m_TEX0.TBP0 - dst->m_TEX0.TBP0) >> 5) / buffer_width) * GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y); - const int texture_height = (dst->m_TEX0.TBW == t->m_TEX0.TBW) ? (dst_offset_height + t->m_valid.w) : (dst_offset_height + overlapping_pages_height); + + // Clear the dirty first + t->Update(); + dst->Update(); - if (texture_height > dst->m_unscaled_size.y && !dst->ResizeTexture(dst->m_unscaled_size.x, texture_height, true)) + // Clamp it if it gets too small, shouldn't happen but stranger things have happened. + if (copy_width < 0) { - // Resize failed, probably ran out of VRAM, better luck next time. Fall back to CPU. - DevCon.Warning("Failed to resize target on preload? Draw %d", GSState::s_n); - i++; - continue; + copy_width = 0; } - const int dst_offset_width = (((t->m_TEX0.TBP0 - dst->m_TEX0.TBP0) >> 5) % buffer_width) * GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.x; - const int dst_offset_scaled_width = dst_offset_width * dst->m_scale; - const int dst_offset_scaled_height = dst_offset_height * dst->m_scale; - const GSVector4i dst_rect_scale = GSVector4i(t->m_valid.x, dst_offset_height, t->m_valid.z, texture_height); - - if (((!hw_clear && (preserve_target || preload)) || dst_rect_scale.rintersect(draw_rect).rempty()) && dst->GetScale() == t->GetScale()) + // Invalidate has been moved to after DrawPrims(), because we might kill the current sources' backing. + if (!t->m_valid_rgb || !(t->m_valid_alpha_high || t->m_valid_alpha_low) || t->m_scale != dst->m_scale) { - int copy_width = ((t->m_texture->GetWidth()) > (dst->m_texture->GetWidth()) ? (dst->m_texture->GetWidth()) : t->m_texture->GetWidth()) - dst_offset_scaled_width; - int copy_height = (texture_height - dst_offset_height) * t->m_scale; - - GL_INS("RT double buffer copy from FBP 0x%x, %dx%d => %d,%d", t->m_TEX0.TBP0, copy_width, copy_height, 0, dst_offset_scaled_height); - - - // Clear the dirty first - t->Update(); - dst->Update(); - - // Clamp it if it gets too small, shouldn't happen but stranger things have happened. - if (copy_width < 0) - { - copy_width = 0; - } - - // Invalidate has been moved to after DrawPrims(), because we might kill the current sources' backing. - if (!t->m_valid_rgb || !(t->m_valid_alpha_high || t->m_valid_alpha_low) || t->m_scale != dst->m_scale) - { - const GSVector4 src_rect = GSVector4(0, 0, copy_width, copy_height) / (GSVector4(t->m_texture->GetSize()).xyxy()); - const GSVector4 dst_rect = GSVector4(dst_offset_scaled_width, dst_offset_scaled_height, dst_offset_scaled_width + copy_width, dst_offset_scaled_height + copy_height); - g_gs_device->StretchRect(t->m_texture, src_rect, dst->m_texture, dst_rect, t->m_valid_rgb, t->m_valid_rgb, t->m_valid_rgb, t->m_valid_alpha_high || t->m_valid_alpha_low); - } - else + const GSVector4 src_rect = GSVector4(0, 0, copy_width, copy_height) / (GSVector4(t->m_texture->GetSize()).xyxy()); + const GSVector4 dst_rect = GSVector4(dst_offset_scaled_width, dst_offset_scaled_height, dst_offset_scaled_width + copy_width, dst_offset_scaled_height + copy_height); + g_gs_device->StretchRect(t->m_texture, src_rect, dst->m_texture, dst_rect, t->m_valid_rgb, t->m_valid_rgb, t->m_valid_rgb, t->m_valid_alpha_high || t->m_valid_alpha_low); + } + else + { + if ((copy_width + dst_offset_scaled_width) > (dst->m_unscaled_size.x * dst->m_scale) || (copy_height + dst_offset_scaled_height) > (dst->m_unscaled_size.y * dst->m_scale)) { - if ((copy_width + dst_offset_scaled_width) > (dst->m_unscaled_size.x * dst->m_scale) || (copy_height + dst_offset_scaled_height) > (dst->m_unscaled_size.y * dst->m_scale)) - { - copy_width = std::min(copy_width, static_cast((dst->m_unscaled_size.x * dst->m_scale) - dst_offset_scaled_width)); - copy_height = std::min(copy_height, static_cast((dst->m_unscaled_size.y * dst->m_scale) - dst_offset_scaled_height)); - } - - g_gs_device->CopyRect(t->m_texture, dst->m_texture, GSVector4i(0, 0, copy_width, copy_height), dst_offset_scaled_width, dst_offset_scaled_height); + copy_width = std::min(copy_width, static_cast((dst->m_unscaled_size.x * dst->m_scale) - dst_offset_scaled_width)); + copy_height = std::min(copy_height, static_cast((dst->m_unscaled_size.y * dst->m_scale) - dst_offset_scaled_height)); } - } - // src is using this target, so point it at the new copy. - if (src && src->m_target && src->m_from_target == t) - { - src->m_from_target = dst; - src->m_texture = dst->m_texture; - src->m_region.SetY(src->m_region.GetMinY() + dst_offset_height, src->m_region.GetMaxY() + dst_offset_height); - src->m_region.SetX(src->m_region.GetMinX() + dst_offset_width, src->m_region.GetMaxX() + dst_offset_width); + g_gs_device->CopyRect(t->m_texture, dst->m_texture, GSVector4i(0, 0, copy_width, copy_height), dst_offset_scaled_width, dst_offset_scaled_height); } + } - InvalidateSourcesFromTarget(t); - i = list.erase(j); - delete t; - continue; + // src is using this target, so point it at the new copy. + if (src && src->m_target && src->m_from_target == t) + { + src->m_from_target = dst; + src->m_texture = dst->m_texture; + src->m_region.SetY(src->m_region.GetMinY() + dst_offset_height, src->m_region.GetMaxY() + dst_offset_height); + src->m_region.SetX(src->m_region.GetMinX() + dst_offset_width, src->m_region.GetMaxX() + dst_offset_width); } + + InvalidateSourcesFromTarget(t); + i = list.erase(j); + delete t; + continue; } + } i++; } } @@ -3244,7 +3251,7 @@ bool GSTextureCache::PrepareDownloadTexture(u32 width, u32 height, GSTexture::Fo return true; } -void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 write_psm, u32 write_bw) +/*void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 write_psm, u32 write_bw) { const bool preserve_alpha = (GSLocalMemory::m_psm[write_psm].trbpp == 24); for (int type = 0; type < 2; type++) @@ -3261,16 +3268,63 @@ void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 wr //const u32 total_pages = ((end_bp + 1) - t->m_TEX0.TBP0) >> 5; // Not covering the whole target, and a different format, so just dirty it. - /*if (start_bp >= t->m_TEX0.TBP0 && (t->UnwrappedEndBlock() > end_bp) && write_psm != t->m_TEX0.PSM && write_bw == t->m_TEX0.TBW) + //if (start_bp >= t->m_TEX0.TBP0 && (t->UnwrappedEndBlock() > end_bp) && write_psm != t->m_TEX0.PSM && write_bw == t->m_TEX0.TBW) + //{ + // const GSLocalMemory::psm_t& target_psm = GSLocalMemory::m_psm[write_psm]; + // const u32 page_offset = ((start_bp - t->m_TEX0.TBP0) >> 5); + // const u32 vertical_offset = (page_offset / t->m_TEX0.TBW) * target_psm.pgs.y; + // GSVector4i dirty_area = GSVector4i(page_offset % t->m_TEX0.TBW, vertical_offset, t->m_valid.z, vertical_offset + ((total_pages / t->m_TEX0.TBW) * target_psm.pgs.y)); + // InvalidateVideoMem(g_gs_renderer->m_mem.GetOffset(t->m_TEX0.TBP0, t->m_TEX0.TBW, t->m_TEX0.PSM), dirty_area, true); + // ++i; + // continue; + //} + + InvalidateSourcesFromTarget(t); + + t->m_valid_alpha_low &= preserve_alpha; + t->m_valid_alpha_high &= preserve_alpha; + t->m_valid_rgb &= !(t->m_TEX0.TBP0 == start_bp); + + // Don't keep partial depth buffers around. + if ((!t->m_valid_alpha_low && !t->m_valid_alpha_high && !t->m_valid_rgb) || type == DepthStencil) + { + auto& rev_list = m_dst[1 - type]; + for (auto j = rev_list.begin(); j != rev_list.end();) + { + Target* const rev_t = *j; + if (rev_t->m_TEX0.TBP0 == t->m_TEX0.TBP0 && GSLocalMemory::m_psm[rev_t->m_TEX0.PSM].bpp == GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp) + { + rev_t->m_was_dst_matched = false; + break; + } + ++j; + } + + GL_CACHE("TC: InvalidateContainedTargets: Remove Target %s[%x, %s]", to_string(type), t->m_TEX0.TBP0, psm_str(t->m_TEX0.PSM)); + i = list.erase(i); + delete t; + continue; + } + + GL_CACHE("TC: InvalidateContainedTargets: Clear RGB valid on %s[%x, %s]", to_string(type), t->m_TEX0.TBP0, psm_str(t->m_TEX0.PSM)); + ++i; + } + } +}*/ +void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 write_psm, u32 write_bw) +{ + const bool preserve_alpha = (GSLocalMemory::m_psm[write_psm].trbpp == 24); + for (int type = 0; type < 2; type++) + { + auto& list = m_dst[type]; + for (auto i = list.begin(); i != list.end();) + { + Target* const t = *i; + if (start_bp != t->m_TEX0.TBP0 && (t->m_TEX0.TBP0 < start_bp || t->UnwrappedEndBlock() > end_bp)) { - const GSLocalMemory::psm_t& target_psm = GSLocalMemory::m_psm[write_psm]; - const u32 page_offset = ((start_bp - t->m_TEX0.TBP0) >> 5); - const u32 vertical_offset = (page_offset / t->m_TEX0.TBW) * target_psm.pgs.y; - GSVector4i dirty_area = GSVector4i(page_offset % t->m_TEX0.TBW, vertical_offset, t->m_valid.z, vertical_offset + ((total_pages / t->m_TEX0.TBW) * target_psm.pgs.y)); - InvalidateVideoMem(g_gs_renderer->m_mem.GetOffset(t->m_TEX0.TBP0, t->m_TEX0.TBW, t->m_TEX0.PSM), dirty_area, true); ++i; continue; - }*/ + } InvalidateSourcesFromTarget(t); From 547c4aba1dd474078b9610235aefce07d8777f62 Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Tue, 14 Jan 2025 13:47:18 +0000 Subject: [PATCH 08/28] GS/HW: Fix offset Z channel shuffle hazard. Adjust Tekken 5 CRC --- bin/resources/GameIndex.yaml | 11 +++++++++++ pcsx2/GS/Renderers/HW/GSHwHack.cpp | 8 +------- pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 3 ++- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/bin/resources/GameIndex.yaml b/bin/resources/GameIndex.yaml index b91c1e33d702a..c2970377263fd 100644 --- a/bin/resources/GameIndex.yaml +++ b/bin/resources/GameIndex.yaml @@ -1967,6 +1967,7 @@ SCAJ-20125: clampModes: eeClampMode: 2 # Fixes camera and stops constant coin noises on Pirates Cove. gsHWFixes: + textureInsideRT: 1 # Fixes heat haze half screen problem. alignSprite: 1 halfPixelOffset: 4 # Align post. nativeScaling: 1 # Fixes depth of field effect. @@ -1977,6 +1978,7 @@ SCAJ-20126: clampModes: eeClampMode: 2 # Fixes camera and stops constant coin noises on Pirates Cove. gsHWFixes: + textureInsideRT: 1 # Fixes heat haze half screen problem. alignSprite: 1 halfPixelOffset: 4 # Align post. nativeScaling: 1 # Fixes depth of field effect. @@ -2454,6 +2456,7 @@ SCAJ-20199: clampModes: eeClampMode: 2 # Fixes camera and stops constant coin noises on Pirates Cove. gsHWFixes: + textureInsideRT: 1 # Fixes heat haze half screen problem. alignSprite: 1 halfPixelOffset: 4 # Align post. nativeScaling: 1 # Fixes depth of field effect. @@ -4149,6 +4152,7 @@ SCED-53538: clampModes: eeClampMode: 2 # Fixes camera and stops constant coin noises on Pirates Cove. gsHWFixes: + textureInsideRT: 1 # Fixes heat haze half screen problem. alignSprite: 1 halfPixelOffset: 4 # Align post. nativeScaling: 1 # Fixes depth of field effect. @@ -5769,6 +5773,7 @@ SCES-53202: clampModes: eeClampMode: 2 # Fixes camera and stops constant coin noises on Pirates Cove. gsHWFixes: + textureInsideRT: 1 # Fixes heat haze half screen problem. alignSprite: 1 halfPixelOffset: 4 # Align post. nativeScaling: 1 # Fixes depth of field effect. @@ -7212,6 +7217,7 @@ SCKA-20049: clampModes: eeClampMode: 2 # Fixes camera and stops constant coin noises on Pirates Cove. gsHWFixes: + textureInsideRT: 1 # Fixes heat haze half screen problem. alignSprite: 1 halfPixelOffset: 4 # Align post. nativeScaling: 1 # Fixes depth of field effect. @@ -7435,6 +7441,7 @@ SCKA-20081: clampModes: eeClampMode: 2 # Fixes camera and stops constant coin noises on Pirates Cove. gsHWFixes: + textureInsideRT: 1 # Fixes heat haze half screen problem. alignSprite: 1 halfPixelOffset: 4 # Align post. nativeScaling: 1 # Fixes depth of field effect. @@ -57371,6 +57378,7 @@ SLPS-25510: clampModes: eeClampMode: 2 # Fixes camera and stops constant coin noises on Pirates Cove. gsHWFixes: + textureInsideRT: 1 # Fixes heat haze half screen problem. alignSprite: 1 # Fixes vertical lines. halfPixelOffset: 4 # Align post. nativeScaling: 1 # Fixes depth of field effect. @@ -60540,6 +60548,7 @@ SLPS-73223: clampModes: eeClampMode: 2 # Fixes camera and stops constant coin noises on Pirates Cove. gsHWFixes: + textureInsideRT: 1 # Fixes heat haze half screen problem. alignSprite: 1 # Fixes vertical lines. halfPixelOffset: 4 # Align post. nativeScaling: 1 # Fixes depth of field effect. @@ -66541,6 +66550,7 @@ SLUS-21059: clampModes: eeClampMode: 2 # Fixes camera and stops constant coin noises on Pirates Cove. gsHWFixes: + textureInsideRT: 1 # Fixes heat haze half screen problem. alignSprite: 1 # Fixes vertical lines. halfPixelOffset: 4 # Align post. nativeScaling: 1 # Fixes depth of field effect. @@ -67085,6 +67095,7 @@ SLUS-21160: clampModes: eeClampMode: 2 # Fixes camera and stops constant coin noises on Pirates Cove. gsHWFixes: + textureInsideRT: 1 # Fixes heat haze half screen problem. alignSprite: 1 # Fixes vertical lines. halfPixelOffset: 4 # Align post. nativeScaling: 1 # Fixes depth of field effect. diff --git a/pcsx2/GS/Renderers/HW/GSHwHack.cpp b/pcsx2/GS/Renderers/HW/GSHwHack.cpp index 58c50748d788e..02516692d5032 100644 --- a/pcsx2/GS/Renderers/HW/GSHwHack.cpp +++ b/pcsx2/GS/Renderers/HW/GSHwHack.cpp @@ -194,7 +194,7 @@ bool GSHwHack::GSC_Tekken5(GSRendererHW& r, int& skip) return true; } - if (!s_nativeres && r.PRIM->PRIM == GS_SPRITE && RTME && RTEX0.TFX == 1 && RFPSM == RTPSM && RTPSM == PSMCT32 && RFBMSK == 0xFF000000 && r.m_index.tail > 2) + if (!s_nativeres && r.PRIM->PRIM == GS_SPRITE && RTME && RTEX0.TFX == 1 && !r.PRIM->ABE && RFPSM == RTPSM && RTPSM == PSMCT32 && RFBMSK == 0xFF000000 && r.m_index.tail > 2) { // Don't enable hack on native res. // Fixes ghosting/blur effect and white lines appearing in stages: Moonfit Wilderness, Acid Rain - caused by upscaling. @@ -204,12 +204,6 @@ bool GSHwHack::GSC_Tekken5(GSRendererHW& r, int& skip) const GSVector4i read_size(r.m_vt.m_min.t.x, r.m_vt.m_min.t.y, r.m_vt.m_max.t.x + 0.5f, r.m_vt.m_max.t.y + 0.5f); r.ReplaceVerticesWithSprite(draw_size, read_size, GSVector2i(read_size.width(), read_size.height()), draw_size); } - else if (RZTST == 1 && RTME && (RFBP == 0x02bc0 || RFBP == 0x02be0 || RFBP == 0x02d00 || RFBP == 0x03480 || RFBP == 0x034a0) && RFPSM == RTPSM && RTBP0 == 0x00000 && RTPSM == PSMCT32) - { - // The moving display effect(flames) is not emulated properly in the entire screen so let's remove the effect in the stage: Burning Temple. Related to half screen bottom issue. - // Fixes black lines in the stage: Burning Temple - caused by upscaling. Note the black lines can also be fixed with Merge Sprite hack. - skip = 2; - } } return true; diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 7f4824b399197..0547ce0ad10cd 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -5655,7 +5655,8 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c GL_CACHE("Source is render target, taking copy."); src_target = rt; } - else if (m_conf.tex == m_conf.ds) + // Be careful of single page channel shuffles where depth is the source but it's not going to the same place, we can't read this directly. + else if (m_conf.tex == m_conf.ds && (!m_channel_shuffle || static_cast(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0) == static_cast(m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0))) { // GL, Vulkan (in General layout), not DirectX! const bool can_read_current_depth_buffer = g_gs_device->Features().test_and_sample_depth; From b8d5b36923202995136b89ce8e3c77412bec8520 Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Tue, 14 Jan 2025 22:51:30 +0000 Subject: [PATCH 09/28] GS/HW: Fix some back to back shuffles and inside source invalidation --- pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 27 +++++++++++++----------- pcsx2/GS/Renderers/HW/GSRendererHW.h | 1 + pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 21 ++++++++++-------- pcsx2/GS/Renderers/HW/GSTextureCache.h | 2 +- 4 files changed, 29 insertions(+), 22 deletions(-) diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 0547ce0ad10cd..3f7e66f8b1dfe 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -936,8 +936,8 @@ GSVector2i GSRendererHW::GetValidSize(const GSTextureCache::Source* tex) // Round up the page as channel shuffles are generally done in pages at a time // Keep in mind the source might be an 8bit texture - int src_width = tex->GetUnscaledWidth(); - int src_height = tex->GetUnscaledHeight(); + int src_width = tex->m_from_target ? tex->m_from_target->m_valid.width() : tex->GetUnscaledWidth(); + int src_height = tex->m_from_target ? tex->m_from_target->m_valid.height() : tex->GetUnscaledHeight(); if (!tex->m_from_target && GSLocalMemory::m_psm[tex->m_TEX0.PSM].bpp == 8) { @@ -2064,9 +2064,7 @@ void GSRendererHW::Draw() DumpVertices(s); } -#ifdef ENABLE_OGL_DEBUG static u32 num_skipped_channel_shuffle_draws = 0; -#endif // We mess with this state as an optimization, so take a copy and use that instead. const GSDrawingContext* context = m_context; @@ -2090,24 +2088,26 @@ void GSRendererHW::Draw() // Tomb Raider: Underworld does similar, except with R, G, B in separate palettes, therefore // we need to split on those too. m_channel_shuffle = IsPossibleChannelShuffle() && m_last_channel_shuffle_fbmsk == m_context->FRAME.FBMSK && - m_last_channel_shuffle_fbp <= m_context->FRAME.Block() && m_last_channel_shuffle_end_block > m_context->FRAME.Block(); + m_last_channel_shuffle_fbp <= m_context->FRAME.Block() && m_last_channel_shuffle_end_block > m_context->FRAME.Block() && + m_last_channel_shuffle_tbp <= m_context->TEX0.TBP0; -#ifdef ENABLE_OGL_DEBUG if (m_channel_shuffle) { + m_last_channel_shuffle_fbp = m_context->FRAME.Block(); + m_last_channel_shuffle_tbp = m_context->TEX0.TBP0; + num_skipped_channel_shuffle_draws++; return; } +#ifdef ENABLE_OGL_DEBUG if (num_skipped_channel_shuffle_draws > 0) - GL_INS("Skipped %u channel shuffle draws", num_skipped_channel_shuffle_draws); + GL_CACHE("Skipped %d channel shuffle draws ending at %d", num_skipped_channel_shuffle_draws, s_n); +#endif num_skipped_channel_shuffle_draws = 0; m_last_channel_shuffle_fbp = 0xffff; + m_last_channel_shuffle_tbp = 0xffff; m_last_channel_shuffle_end_block = 0xffff; -#else - if (m_channel_shuffle) - return; -#endif } GL_PUSH("HW Draw %d (Context %u)", s_n, PRIM->CTXT); @@ -3081,6 +3081,7 @@ void GSRendererHW::Draw() if (m_channel_shuffle) { m_last_channel_shuffle_fbp = rt->m_TEX0.TBP0; + m_last_channel_shuffle_tbp = src->m_TEX0.TBP0; // If it's a new target, we don't know where the end is as it's starting on a shuffle, so just do every shuffle following. m_last_channel_shuffle_end_block = (rt->m_last_draw >= s_n) ? (MAX_BLOCKS - 1) : (rt->m_end_block < rt->m_TEX0.TBP0 ? (rt->m_end_block + MAX_BLOCKS) : rt->m_end_block); @@ -3222,6 +3223,7 @@ void GSRendererHW::Draw() if (rt) { m_last_channel_shuffle_fbp = rt->m_TEX0.TBP0; + m_last_channel_shuffle_tbp = src->m_TEX0.TBP0; // Urban Chaos goes from Z16 to C32, so let's just use the rt's original end block. if (!src->m_from_target || GSLocalMemory::m_psm[src->m_from_target_TEX0.PSM].bpp != GSLocalMemory::m_psm[rt->m_TEX0.PSM].bpp) m_last_channel_shuffle_end_block = rt->m_end_block; @@ -5631,8 +5633,9 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c bool& target_region, GSVector2i& unscaled_size, float& scale, GSDevice::RecycledTexture& src_copy) { - const int tex_diff = tex->m_from_target ? static_cast(m_cached_ctx.TEX0.TBP0 - tex->m_from_target->m_TEX0.TBP0) : 0; + const int tex_diff = tex->m_from_target ? static_cast(m_cached_ctx.TEX0.TBP0 - tex->m_from_target->m_TEX0.TBP0) : static_cast(m_cached_ctx.TEX0.TBP0 - tex->m_TEX0.TBP0); const int frame_diff = rt ? static_cast(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0) : 0; + // Detect framebuffer read that will need special handling const GSTextureCache::Target* src_target = nullptr; if (m_conf.tex == m_conf.rt && !(m_channel_shuffle && tex && (tex_diff != frame_diff || target_region))) diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.h b/pcsx2/GS/Renderers/HW/GSRendererHW.h index 07228fb6ab3c1..df2d45d25ce7c 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.h +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.h @@ -174,6 +174,7 @@ class GSRendererHW : public GSRenderer u32 m_last_channel_shuffle_fbmsk = 0; u32 m_last_channel_shuffle_fbp = 0; + u32 m_last_channel_shuffle_tbp = 0; u32 m_last_channel_shuffle_end_block = 0; GIFRegFRAME m_split_clear_start = {}; diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index 1fefad333c327..093a72130b1c5 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -2542,8 +2542,7 @@ GSTextureCache::Target* GSTextureCache::CreateTarget(GIFRegTEX0 TEX0, const GSVe // Avoid making garbage targets (usually PCRTC). if (GSVector4i::loadh(size).rempty()) return nullptr; - if (TEX0.TBP0 == 0x3320 || TEX0.TBP0 == 0x32a0) - DevCon.Warning("Making target %x on draw %d", TEX0.TBP0, GSState::s_n); + Target* dst = Target::Create(TEX0, size.x, size.y, scale, type, true); if (!dst) [[unlikely]] return nullptr; @@ -3428,6 +3427,12 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r const u32 bw = off.bw(); const u32 psm = off.psm(); + // Get the bounds that we're invalidating in blocks, so we can remove any targets which are completely contained. + // Unfortunately sometimes the draw rect is incorrect, and since the end block gets the rect -1, it'll underflow, + // so we need to prevent that from happening. Just make it a single block in that case, and hope for the best. + const u32 start_bp = GSLocalMemory::GetStartBlockAddress(off.bp(), off.bw(), off.psm(), rect); + const u32 end_bp = rect.rempty() ? start_bp : GSLocalMemory::GetUnwrappedEndBlockAddress(off.bp(), off.bw(), off.psm(), rect); + if (!target) { // Remove Source that have same BP as the render target (color&dss) @@ -3438,7 +3443,7 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r Source* s = *i; ++i; - if (GSUtil::HasSharedBits(bp, psm, s->m_TEX0.TBP0, s->m_TEX0.PSM) || + if ((GSUtil::HasSharedBits(psm, s->m_TEX0.PSM) && (bp >= start_bp && bp < end_bp)) || (GSUtil::HasSharedBits(bp, psm, s->m_from_target_TEX0.TBP0, s->m_TEX0.PSM) && s->m_target)) { m_src.RemoveAt(s); @@ -3535,11 +3540,6 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r if (!target) return; - // Get the bounds that we're invalidating in blocks, so we can remove any targets which are completely contained. - // Unfortunately sometimes the draw rect is incorrect, and since the end block gets the rect -1, it'll underflow, - // so we need to prevent that from happening. Just make it a single block in that case, and hope for the best. - const u32 start_bp = GSLocalMemory::GetStartBlockAddress(off.bp(), off.bw(), off.psm(), rect); - const u32 end_bp = rect.rempty() ? start_bp : GSLocalMemory::GetUnwrappedEndBlockAddress(off.bp(), off.bw(), off.psm(), rect); RGBAMask rgba; rgba._u32 = GSUtil::GetChannelMask(psm); @@ -4819,6 +4819,9 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con src->m_texture = dst->m_texture; src->m_unscaled_size = dst->m_unscaled_size; src->m_shared_texture = true; + + if(channel_shuffle) + m_temporary_source = src; } // Invalidate immediately on recursive draws, because if we don't here, InvalidateVideoMem() will. @@ -5074,7 +5077,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con } // kill source immediately if it's the RT/DS, because that'll get invalidated immediately - if (GSRendererHW::GetInstance()->IsTBPFrameOrZ(dst->m_TEX0.TBP0)) + if (GSRendererHW::GetInstance()->IsTBPFrameOrZ(dst->m_TEX0.TBP0) || channel_shuffle) { GL_CACHE("TC: Source is RT or ZBUF, invalidating after draw."); m_temporary_source = src; diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.h b/pcsx2/GS/Renderers/HW/GSTextureCache.h index 8dc7315f55d74..7f7d02c3aa4d3 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.h +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.h @@ -518,7 +518,7 @@ class GSTextureCache /// Removes any sources which point to the specified target. void InvalidateSourcesFromTarget(const Target* t); - /// Replaces a source's texture externally. Required for some CRC hacks. + /// Removes any sources which point to the same address as a new target. void ReplaceSourceTexture(Source* s, GSTexture* new_texture, float new_scale, const GSVector2i& new_unscaled_size, HashCacheEntry* hc_entry, bool new_texture_is_shared); From da261c154755ae7ed9c8805e671cc442ca6b3d49 Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Wed, 15 Jan 2025 00:52:59 +0000 Subject: [PATCH 10/28] GS/HW: Sync depth texture information when updating dst_match --- pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index 093a72130b1c5..db111e26bc7d5 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -1386,6 +1386,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const DevCon.Warning("Failed to update dst matched texture"); } t->m_valid_rgb = true; + t->m_TEX0 = dst_match->m_TEX0; break; } } @@ -4007,19 +4008,6 @@ bool GSTextureCache::Move(u32 SBP, u32 SBW, u32 SPSM, int sx, int sy, u32 DBP, u if (alpha_only && (!dst || GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp != 32)) return false; - // This is probably copying to a new buffer but using the original one as an offset, so better to use a new texture, if we don't find one. - if (dst && DBP == SBP && dy > dst->m_unscaled_size.y) - { - u32 new_DBP = DBP + (((dy / GSLocalMemory::m_psm[dst->m_TEX0.PSM].pgs.y) * DBW) << 5); - - dst = nullptr; - - DBP = new_DBP; - dy = 0; - - dst = GetExactTarget(DBP, DBW, dpsm_s.depth ? DepthStencil : RenderTarget, DBP); - } - // Beware of the case where a game might create a larger texture by moving a bunch of chunks around. if (dst && DBP == SBP && dy > dst->m_unscaled_size.y) { @@ -4032,7 +4020,7 @@ bool GSTextureCache::Move(u32 SBP, u32 SBW, u32 SPSM, int sx, int sy, u32 DBP, u dst = GetExactTarget(DBP, DBW, dpsm_s.depth ? DepthStencil : RenderTarget, DBP); } - + // Beware of the case where a game might create a larger texture by moving a bunch of chunks around. // We use dx/dy == 0 and the TBW check as a safeguard to make sure these go through to local memory. // We can also recreate the target if it's previously been created in the height cache with a valid size. From 6c9a115ecc5e88fe252cd0cde6886f119e530f69 Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Thu, 16 Jan 2025 01:37:46 +0000 Subject: [PATCH 11/28] GS/HW: Fixes for Tex in RT and shuffle detection --- pcsx2/GS/Renderers/HW/GSHwHack.cpp | 3 ++- pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 16 +++++++++----- pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 28 +++++++----------------- 3 files changed, 20 insertions(+), 27 deletions(-) diff --git a/pcsx2/GS/Renderers/HW/GSHwHack.cpp b/pcsx2/GS/Renderers/HW/GSHwHack.cpp index 02516692d5032..4f3c791092516 100644 --- a/pcsx2/GS/Renderers/HW/GSHwHack.cpp +++ b/pcsx2/GS/Renderers/HW/GSHwHack.cpp @@ -1080,7 +1080,8 @@ bool GSHwHack::OI_SonicUnleashed(GSRendererHW& r, GSTexture* rt, GSTexture* ds, const GSVector2i copy_size(std::min(rt_size.x, src_size.x), std::min(rt_size.y, src_size.y)); const GSVector4 sRect(0.0f, 0.0f, static_cast(copy_size.x) / static_cast(src_size.x), static_cast(copy_size.y) / static_cast(src_size.y)); - const GSVector4 dRect(0, 0, copy_size.x, copy_size.y); + // This is kind of a bodge because the game confuses everything since the source is really 16bit and it assumes it's really drawing 16bit on the copy back, resizing the target. + const GSVector4 dRect(0, 0, copy_size.x, copy_size.y * (src->m_32_bits_fmt ? 1 : 2)); g_gs_device->StretchRect(src->m_texture, sRect, rt, dRect, true, true, true, false); diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 3f7e66f8b1dfe..57fc5c42985d1 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -2608,14 +2608,16 @@ void GSRendererHW::Draw() // offset coordinates swap around RG/BA. (Ace Combat) const u32 minv = m_cached_ctx.CLAMP.MINV; const u32 minu = m_cached_ctx.CLAMP.MINU; - const bool rgba_shuffle = ((m_cached_ctx.CLAMP.WMS == m_cached_ctx.CLAMP.WMT && m_cached_ctx.CLAMP.WMS == CLAMP_REGION_REPEAT) && (minu && minv)); + // Make sure minu or minv are actually a mask on some bits, false positives of games setting 512 (0x1ff) are not masks used for shuffles. + const bool rgba_shuffle = ((m_cached_ctx.CLAMP.WMS == m_cached_ctx.CLAMP.WMT && m_cached_ctx.CLAMP.WMS == CLAMP_REGION_REPEAT) && (minu && minv && ((minu + 1 & minu) || (minv + 1 & minv)))); const bool shuffle_coords = ((first_x ^ first_u) & 0xF) == 8 || rgba_shuffle; // Round up half of second coord, it can sometimes be slightly under. const int draw_width = std::abs(v[1].XYZ.X + 9 - v[0].XYZ.X) >> 4; const int read_width = std::abs(second_u - first_u); - shuffle_target = shuffle_coords && (draw_width & 7) == 0 && std::abs(draw_width - read_width) <= 1; + // m_skip check is just mainly for NFS Undercover, but should hopefully pick up any other games which rewrite shuffles. + shuffle_target = shuffle_coords && (((draw_width & 7) == 0 && std::abs(draw_width - read_width) <= 1) || m_skip > 50); } if (!shuffle_target) @@ -2662,7 +2664,7 @@ void GSRendererHW::Draw() return; } - possible_shuffle &= src && (src->m_from_target != nullptr); + possible_shuffle &= src && (src->m_from_target != nullptr && (src->m_from_target->m_32_bits_fmt) || (m_skip && possible_shuffle)); // We don't know the alpha range of direct sources when we first tried to optimize the alpha test. // Moving the texture lookup before the ATST optimization complicates things a lot, so instead, // recompute it, and everything derived from it again if it changes. @@ -2704,7 +2706,7 @@ void GSRendererHW::Draw() // Urban Reign trolls by scissoring a draw to a target at 0x0-0x117F to 378x449 which ends up the size being rounded up to 640x480 // causing the buffer to expand to around 0x1400, which makes a later framebuffer at 0x1180 to fail to be created correctly. // We can cheese this by checking if the Z is masked and the resultant colour is going to be black anyway. - const bool output_black = PRIM->ABE && ((m_context->ALPHA.A == 1 || m_context->ALPHA.IsBlack()) && m_context->ALPHA.D != 1) && m_draw_env->COLCLAMP.CLAMP == 1; + const bool output_black = PRIM->ABE && ((m_context->ALPHA.A == 1 && m_context->ALPHA.D > 1) || (m_context->ALPHA.IsBlack() && m_context->ALPHA.D != 1)) && m_draw_env->COLCLAMP.CLAMP == 1; const bool can_expand = !(m_cached_ctx.ZBUF.ZMSK && output_black); // Estimate size based on the scissor rectangle and height cache. @@ -7673,7 +7675,9 @@ void GSRendererHW::ClearGSLocalMemory(const GSOffset& off, const GSVector4i& r, bool GSRendererHW::OI_BlitFMV(GSTextureCache::Target* _rt, GSTextureCache::Source* tex, const GSVector4i& r_draw) { - /*if (r_draw.w > 1024 && (m_vt.m_primclass == GS_SPRITE_CLASS) && (m_vertex.next == 2) && m_process_texture && !PRIM->ABE && tex && !tex->m_target && m_cached_ctx.TEX0.TBW > 0) + // Not required when using Tex in RT + if (r_draw.w > 1024 && (m_vt.m_primclass == GS_SPRITE_CLASS) && (m_vertex.next == 2) && m_process_texture && !PRIM->ABE && + tex && !tex->m_target && m_cached_ctx.TEX0.TBW > 0 && GSConfig.UserHacks_TextureInsideRt == GSTextureInRtMode::Disabled) { GL_PUSH("OI_BlitFMV"); @@ -7727,7 +7731,7 @@ bool GSRendererHW::OI_BlitFMV(GSTextureCache::Target* _rt, GSTextureCache::Sourc g_texture_cache->InvalidateVideoMemSubTarget(_rt); return false; // skip current draw - }*/ + } // Nothing to see keep going return true; diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index db111e26bc7d5..86ee301908539 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -336,22 +336,8 @@ GSVector4i GSTextureCache::TranslateAlignedRectByPage(u32 tbp, u32 tebp, u32 tbw // Results won't be square, if it's not invalidation, it's a texture, which is problematic to translate, so let's not (FIFA 2005). if (!is_invalidation) { - if (sbp != tbp) - { - // Just take the start page, as this is likely tex in rt, and that's all we care about. - const u32 start_page = (in_rect.y / src_page_size.y) + (in_rect.x / src_page_size.x); - in_rect.x = (start_page % dst_pgw) * dst_page_size.x; - in_rect.y = (start_page / dst_pgw) * dst_page_size.y; - in_rect.z = in_rect.x + dst_page_size.x; - in_rect.w = in_rect.y + dst_page_size.y; - - return in_rect; - } - else - { - DevCon.Warning("Uneven pages mess up sbp %x dbp %x spgw %d dpgw %d", sbp, tbp, src_pgw, dst_pgw); - return GSVector4i::zero(); - } + DevCon.Warning("Uneven pages mess up sbp %x dbp %x spgw %d dpgw %d", sbp, tbp, src_pgw, dst_pgw); + return GSVector4i::zero(); } //TODO: Maybe control dirty blocks directly and add them page at a time for better granularity. @@ -1146,7 +1132,8 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const // Try to make sure the target has available what we need, be careful of self referencing frames with font in the alpha. // Also is we have already found a target which we had to offset in to by using a region or exact address, // it's probable that's more correct than being inside (Tomb Raider Legends + Project Snowblind) - if (!overlaps || (found_t && dst->m_TEX0.TBP0 >= bp && (GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw))) + // Vakyrie Profile 2 also has some in draws which get done on a different target due to a slight offset, so we need to make sure we have the newer one. + if (!overlaps || (found_t && (GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw))) continue; const bool width_match = (std::max(64U, bw * 64U) >> GSLocalMemory::m_psm[psm].info.pageShiftX()) == @@ -1487,8 +1474,9 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const DevCon.Warning("BP %x - 16bit bad match for target bp %x bw %d src %d format %d", bp, t->m_TEX0.TBP0, t->m_TEX0.TBW, bw, t->m_TEX0.PSM); continue; } - else if (!possible_shuffle && (GSLocalMemory::m_psm[color_psm].bpp == 8 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32 && bw != 1 && - !((t->m_TEX0.TBW == (bw / 2)) || (t->m_TEX0.TBW >= (bw / 2) && (block_boundary_rect.w <= GSLocalMemory::m_psm[psm].pgs.y))))) + // Keep note that 2 bw is basically 1 normal page, as bw is in 64 pixels, and 8bit pages are 128 pixels wide, aka 2 bw. + else if (!possible_shuffle && (GSLocalMemory::m_psm[color_psm].bpp == 8 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32 && + !((t->m_TEX0.TBW == (bw / 2)) || (((bw + 1) / 2) <= t->m_TEX0.TBW && (block_boundary_rect.w <= GSLocalMemory::m_psm[psm].pgs.y))))) { DevCon.Warning("BP %x - 8bit bad match for target bp %x bw %d src %d format %d", bp, t->m_TEX0.TBP0, t->m_TEX0.TBW, bw, t->m_TEX0.PSM); continue; @@ -1566,7 +1554,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const //rect = rect.rintersect(t->m_valid); - if (rect.rempty()) + if (rect.rintersect(t->m_valid).rempty()) continue; if (!t->m_dirty.empty()) From 4f11ea05b5100fb6635cbd96a147bb8efc2ca6e6 Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Thu, 16 Jan 2025 10:42:02 +0000 Subject: [PATCH 12/28] GS/HW: Centralize new target resizing calls to fix statistics/tidy up Also add an override for GSVector4i loadl to take a GSVector2i --- pcsx2/GS/GSVector4i.h | 5 +++ pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 17 +++------ pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 46 ++++++++++++------------ pcsx2/GS/Renderers/HW/GSTextureCache.h | 2 +- 4 files changed, 34 insertions(+), 36 deletions(-) diff --git a/pcsx2/GS/GSVector4i.h b/pcsx2/GS/GSVector4i.h index 53bfc2c6d5e37..63c9cc1f98520 100644 --- a/pcsx2/GS/GSVector4i.h +++ b/pcsx2/GS/GSVector4i.h @@ -1599,6 +1599,11 @@ class alignas(16) GSVector4i return loadh(&v); } + __forceinline static GSVector4i loadl(const GSVector2i& v) + { + return loadl(&v); + } + __forceinline static GSVector4i load(const void* pl, const void* ph) { return loadh(ph, loadl(pl)); diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 57fc5c42985d1..6a920281f296a 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -2950,33 +2950,24 @@ void GSRendererHW::Draw() if (vertical_offset < 0) { rt->m_TEX0.TBP0 = m_cached_ctx.FRAME.Block(); - GSVector2i new_scaled_size = rt->m_unscaled_size * rt->m_scale; + GSVector2i new_size = rt->m_unscaled_size; // Make sure to use the original format for the offset. int new_offset = std::abs((vertical_offset / frame_psm.pgs.y) * GSLocalMemory::m_psm[rt->m_TEX0.PSM].pgs.y); texture_offset = new_offset; - new_scaled_size.y += new_offset * rt->m_scale; - GSTexture* tex = g_gs_device->CreateRenderTarget(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::Color, true); - //if (!tex) - // return nullptr; - //m_target_memory_usage += tex->GetMemUsage(); - GSVector4i dRect = GSVector4i(0, new_offset * rt->m_scale, new_scaled_size.x, new_scaled_size.y); - g_gs_device->StretchRect(rt->m_texture, GSVector4(0,0,1,1), tex, GSVector4(dRect), ShaderConvert::COPY, false); + new_size.y += new_offset; + rt->ResizeTexture(new_size.x, new_size.y, true, true, GSVector4i::loadh(new_size * rt->m_scale).loadl(GSVector2i(0, new_offset * rt->m_scale))); if (src && src->m_from_target && src->m_from_target == rt && src->m_target_direct) { - src->m_texture = tex; + src->m_texture = rt->m_texture; } - g_gs_device->Recycle(rt->m_texture); - rt->m_valid.y += new_offset; rt->m_valid.w += new_offset; rt->m_drawn_since_read.y += new_offset; rt->m_drawn_since_read.w += new_offset; - rt->m_texture = tex; - rt->m_unscaled_size = new_scaled_size / rt->m_scale; t_size.y += std::abs(vertical_offset); vertical_offset = 0; diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index 86ee301908539..47bc4d67747df 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -2173,30 +2173,19 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe GL_INS("TC Convert to 16bit: %dx%d: %dx%d @ %f -> %dx%d @ %f", dst->m_unscaled_size.x, dst->m_unscaled_size.y, dst->m_texture->GetWidth(), dst->m_texture->GetHeight(), dst->m_scale, new_scaled_size.x, new_scaled_size.y, scale); - //DevCon.Warning("Scale %s draw %d", scale_down ? "down" : "up", GSState::s_n); - GSTexture* tex = type == RenderTarget ? g_gs_device->CreateRenderTarget(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::Color, true) : - g_gs_device->CreateDepthStencil(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::DepthStencil, true); - if (!tex) - return nullptr; - m_target_memory_usage += tex->GetMemUsage(); - - g_gs_device->StretchRect(dst->m_texture, sRect, tex, dRect, (type == RenderTarget) ? ShaderConvert::COPY : ShaderConvert::DEPTH_COPY, false); - if (src && src->m_from_target && src->m_from_target == dst) { src->m_texture = dst->m_texture; src->m_target_direct = false; src->m_shared_texture = false; + + dst->ResizeTexture(new_size.x, new_size.y, true, true, GSVector4i(dRect), true); } else { - m_target_memory_usage -= dst->m_texture->GetMemUsage(); - g_gs_device->Recycle(dst->m_texture); + dst->ResizeTexture(new_size.x, new_size.y, true, true, GSVector4i(dRect)); } - - dst->m_texture = tex; - dst->m_unscaled_size = new_size; } // New format or doing a shuffle to a 32bit target that used to be 16bit @@ -6782,7 +6771,7 @@ void GSTextureCache::Target::UpdateValidity(const GSVector4i& rect, bool can_res // GL_CACHE("UpdateValidity (0x%x->0x%x) from R:%d,%d Valid: %d,%d", m_TEX0.TBP0, m_end_block, rect.z, rect.w, m_valid.z, m_valid.w); } -bool GSTextureCache::Target::ResizeTexture(int new_unscaled_width, int new_unscaled_height, bool recycle_old) +bool GSTextureCache::Target::ResizeTexture(int new_unscaled_width, int new_unscaled_height, bool recycle_old, bool require_new_rect, GSVector4i new_rect, bool keep_old) { if (m_unscaled_size.x == new_unscaled_width && m_unscaled_size.y == new_unscaled_height) return true; @@ -6806,7 +6795,7 @@ bool GSTextureCache::Target::ResizeTexture(int new_unscaled_width, int new_unsca // Only need to copy if it's been written to. if (m_texture->GetState() == GSTexture::State::Dirty) { - const GSVector4i rc = GSVector4i::loadh(size.min(new_size)); + const GSVector4i rc = require_new_rect ? new_rect : GSVector4i::loadh(size.min(new_size)); if (tex->IsDepthStencil()) { // Can't do partial copies in DirectX for depth textures, and it's probably not ideal in other @@ -6815,8 +6804,15 @@ bool GSTextureCache::Target::ResizeTexture(int new_unscaled_width, int new_unsca } else { - // Fast memcpy()-like path for color targets. - g_gs_device->CopyRect(m_texture, tex, rc, 0, 0); + if (require_new_rect) + { + g_gs_device->StretchRect(m_texture, tex, GSVector4(rc), ShaderConvert::COPY, false); + } + else + { + // Fast memcpy()-like path for color targets. + g_gs_device->CopyRect(m_texture, tex, rc, 0, 0); + } } g_perfmon.Put(GSPerfMon::TextureCopies, 1); @@ -6834,12 +6830,18 @@ bool GSTextureCache::Target::ResizeTexture(int new_unscaled_width, int new_unsca g_gs_device->InvalidateRenderTarget(tex); } - g_texture_cache->m_target_memory_usage = (g_texture_cache->m_target_memory_usage - m_texture->GetMemUsage()) + tex->GetMemUsage(); - if (recycle_old) - g_gs_device->Recycle(m_texture); + if (!keep_old) + { + g_texture_cache->m_target_memory_usage = (g_texture_cache->m_target_memory_usage - m_texture->GetMemUsage()) + tex->GetMemUsage(); + + if (recycle_old) + g_gs_device->Recycle(m_texture); + else + delete m_texture; + } else - delete m_texture; + g_texture_cache->m_target_memory_usage += tex->GetMemUsage(); m_texture = tex; m_unscaled_size = new_unscaled_size; diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.h b/pcsx2/GS/Renderers/HW/GSTextureCache.h index 7f7d02c3aa4d3..7bb1d5b15dacc 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.h +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.h @@ -257,7 +257,7 @@ class GSTextureCache void UpdateValidChannels(u32 psm, u32 fbmsk); /// Resizes target texture, DOES NOT RESCALE. - bool ResizeTexture(int new_unscaled_width, int new_unscaled_height, bool recycle_old = true); + bool ResizeTexture(int new_unscaled_width, int new_unscaled_height, bool recycle_old = true, bool require_offset = false, GSVector4i offset = GSVector4i::zero(), bool keep_old = false); private: void UpdateTextureDebugName(); From 29976f56e3b7114f65c5965e8eaaadc1b1dba637 Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Thu, 16 Jan 2025 18:16:57 +0000 Subject: [PATCH 13/28] GS/HW: Allow 1:1 quads to be optimized for textures. Fixes for shuffles --- pcsx2/GS/GSState.cpp | 37 +++++++++++++++++++++----- pcsx2/GS/Renderers/HW/GSHwHack.cpp | 2 +- pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 8 +++--- 3 files changed, 36 insertions(+), 11 deletions(-) diff --git a/pcsx2/GS/GSState.cpp b/pcsx2/GS/GSState.cpp index 9f27a481823cf..e82fd0ecb73e7 100644 --- a/pcsx2/GS/GSState.cpp +++ b/pcsx2/GS/GSState.cpp @@ -3871,7 +3871,8 @@ GSState::TextureMinMaxResult GSState::GetTextureMinMax(GIFRegTEX0 TEX0, GIFRegCL const GSVector2 grad(uv_range / pos_range); // Adjust texture range when sprites get scissor clipped. Since we linearly interpolate, this // optimization doesn't work when perspective correction is enabled. - if (m_vt.m_primclass == GS_SPRITE_CLASS && PRIM->FST == 1 && m_primitive_covers_without_gaps != NoGapsType::GapsFound) + // Allowing for quads when the gradiant is 1. It's not guaranteed (would need to check the grandient on each vector), but should be close enough. + if ((m_vt.m_primclass == GS_SPRITE_CLASS || (m_vt.m_primclass == GS_TRIANGLE_CLASS && TrianglesAreQuads(false) && grad.x == 1.0f && grad.y == 1.0f)) && m_primitive_covers_without_gaps != NoGapsType::GapsFound) { // When coordinates are fractional, GS appears to draw to the right/bottom (effectively // taking the ceiling), not to the top/left (taking the floor). @@ -3882,11 +3883,24 @@ GSState::TextureMinMaxResult GSState::GetTextureMinMax(GIFRegTEX0 TEX0, GIFRegCL const GSVertex* vert_first = &m_vertex.buff[m_index.buff[0]]; const GSVertex* vert_second = &m_vertex.buff[m_index.buff[1]]; + const GSVertex* vert_third = &m_vertex.buff[m_index.buff[2]]; GSVector4 new_st = st; + bool u_forward_check = false; + bool x_forward_check = false; + if (m_vt.m_primclass == GS_TRIANGLE_CLASS) + { + u_forward_check = PRIM->FST ? ((vert_first->U < vert_second->U) || (vert_first->U < vert_third->U)) : (((vert_first->ST.S / vert_first->RGBAQ.Q) < (vert_second->ST.S / vert_second->RGBAQ.Q)) || ((vert_first->ST.S / vert_first->RGBAQ.Q) < (vert_third->ST.S / vert_third->RGBAQ.Q))); + x_forward_check = (vert_first->XYZ.X < vert_second->XYZ.X) || (vert_first->XYZ.X < vert_third->XYZ.X); + } + else + { + u_forward_check = PRIM->FST ? (vert_first->U < vert_second->U) : ((vert_first->ST.T / vert_first->RGBAQ.Q) < (vert_second->ST.T / vert_first->RGBAQ.Q)); + x_forward_check = vert_first->XYZ.Y < vert_second->XYZ.Y; + } // Check if the UV coords are going in a different direction to the verts, if they match direction, no need to swap - const bool u_forward = vert_first->U < vert_second->U; - const bool x_forward = vert_first->XYZ.X < vert_second->XYZ.X; + const bool u_forward = u_forward_check; + const bool x_forward = x_forward_check; const bool swap_x = u_forward != x_forward; if (int_rc.left < scissored_rc.left) @@ -3909,9 +3923,20 @@ GSState::TextureMinMaxResult GSState::GetTextureMinMax(GIFRegTEX0 TEX0, GIFRegCL st.x = new_st.x; st.z = new_st.z; } - - const bool v_forward = vert_first->V < vert_second->V; - const bool y_forward = vert_first->XYZ.Y < vert_second->XYZ.Y; + bool v_forward_check = false; + bool y_forward_check = false; + if (m_vt.m_primclass == GS_TRIANGLE_CLASS) + { + v_forward_check = PRIM->FST ? ((vert_first->V < vert_second->V) || (vert_first->V < vert_third->V)) : (((vert_first->ST.T / vert_first->RGBAQ.Q) < (vert_second->ST.T / vert_second->RGBAQ.Q)) || ((vert_first->ST.T / vert_first->RGBAQ.Q) < (vert_third->ST.T / vert_third->RGBAQ.Q))); + y_forward_check = (vert_first->XYZ.Y < vert_second->XYZ.Y) || (vert_first->XYZ.Y < vert_third->XYZ.Y); + } + else + { + v_forward_check = PRIM->FST ? (vert_first->V < vert_second->V) : ((vert_first->ST.T / vert_first->RGBAQ.Q) < (vert_second->ST.T / vert_first->RGBAQ.Q)); + y_forward_check = vert_first->XYZ.Y < vert_second->XYZ.Y; + } + const bool v_forward = v_forward_check; + const bool y_forward = y_forward_check; const bool swap_y = v_forward != y_forward; if (int_rc.top < scissored_rc.top) diff --git a/pcsx2/GS/Renderers/HW/GSHwHack.cpp b/pcsx2/GS/Renderers/HW/GSHwHack.cpp index 4f3c791092516..15703ce3d8d3d 100644 --- a/pcsx2/GS/Renderers/HW/GSHwHack.cpp +++ b/pcsx2/GS/Renderers/HW/GSHwHack.cpp @@ -1081,7 +1081,7 @@ bool GSHwHack::OI_SonicUnleashed(GSRendererHW& r, GSTexture* rt, GSTexture* ds, const GSVector4 sRect(0.0f, 0.0f, static_cast(copy_size.x) / static_cast(src_size.x), static_cast(copy_size.y) / static_cast(src_size.y)); // This is kind of a bodge because the game confuses everything since the source is really 16bit and it assumes it's really drawing 16bit on the copy back, resizing the target. - const GSVector4 dRect(0, 0, copy_size.x, copy_size.y * (src->m_32_bits_fmt ? 1 : 2)); + const GSVector4 dRect(0, 0, copy_size.x, copy_size.y); g_gs_device->StretchRect(src->m_texture, sRect, rt, dRect, true, true, true, false); diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 6a920281f296a..4f24a11a3f512 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -2595,7 +2595,7 @@ void GSRendererHW::Draw() GIFRegTEX0 FRAME_TEX0; bool shuffle_target = false; if (!no_rt && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16 && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp >= 16 && - (m_vt.m_primclass == GS_SPRITE_CLASS || (m_vt.m_primclass == GS_TRIANGLE_CLASS && (m_index.tail % 6) == 0 && TrianglesAreQuads(true)))) + (m_vt.m_primclass == GS_SPRITE_CLASS || (m_vt.m_primclass == GS_TRIANGLE_CLASS && (m_index.tail % 6) == 0 && TrianglesAreQuads(true) && m_index.tail > 6))) { if (!shuffle_target && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16) { @@ -2664,7 +2664,7 @@ void GSRendererHW::Draw() return; } - possible_shuffle &= src && (src->m_from_target != nullptr && (src->m_from_target->m_32_bits_fmt) || (m_skip && possible_shuffle)); + possible_shuffle &= src && (src->m_from_target != nullptr || (m_skip && possible_shuffle)); // We don't know the alpha range of direct sources when we first tried to optimize the alpha test. // Moving the texture lookup before the ATST optimization complicates things a lot, so instead, // recompute it, and everything derived from it again if it changes. @@ -4440,7 +4440,7 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool // Performance GPU note: it could be wise to reduce the size to // the rendered size of the framebuffer - if (GSConfig.UserHacks_TextureInsideRt == GSTextureInRtMode::Disabled || (!m_in_target_draw && IsPageCopy())) + if (GSConfig.UserHacks_TextureInsideRt == GSTextureInRtMode::Disabled || (!m_in_target_draw && IsPageCopy()) || m_conf.ps.urban_chaos_hle || m_conf.ps.tales_of_abyss_hle) { GSVertex* s = &m_vertex.buff[0]; s[0].XYZ.X = static_cast(m_context->XYOFFSET.OFX + 0); @@ -5714,7 +5714,7 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c target_region = false; source_region.bits = 0; //copied_rt = tex->m_from_target != nullptr; - if (m_in_target_draw) + if (page_offset && m_in_target_draw) { copy_size.x = m_r.width(); copy_size.y = m_r.height(); From a127516485239f2c0ad38ae3228c84f39dc7080a Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Thu, 16 Jan 2025 18:39:54 +0000 Subject: [PATCH 14/28] GS/HW: Don't interfere with Tales/Urban Chaos HLE shuffles --- pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 4f24a11a3f512..1ddf274c8e72a 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -2093,8 +2093,12 @@ void GSRendererHW::Draw() if (m_channel_shuffle) { - m_last_channel_shuffle_fbp = m_context->FRAME.Block(); - m_last_channel_shuffle_tbp = m_context->TEX0.TBP0; + // These HLE's skip several channel shuffles in a row which change blends etc. Let's not break the flow, it gets upset. + if (!m_conf.ps.urban_chaos_hle && !m_conf.ps.tales_of_abyss_hle) + { + m_last_channel_shuffle_fbp = m_context->FRAME.Block(); + m_last_channel_shuffle_tbp = m_context->TEX0.TBP0; + } num_skipped_channel_shuffle_draws++; return; From 47d4dc8c7e2c9b6f8b06bd9c2a0b8a53c9ad838b Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Fri, 17 Jan 2025 01:32:58 +0000 Subject: [PATCH 15/28] GS/HW: Further fixes to HW renderer behaviour --- pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 45 ++++++++++++++++++++---- pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 4 +-- 2 files changed, 41 insertions(+), 8 deletions(-) diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 1ddf274c8e72a..078a7accaf14e 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -735,7 +735,7 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, if (m_context->scissor.in.x & 8) { - m_context->scissor.in.x &= ~0xf;//m_vt.m_min.p.x; + m_context->scissor.in.x &= ~0xf; //m_vt.m_min.p.x; if (half_right_vert) m_context->scissor.in.x /= 2; @@ -765,6 +765,38 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, m_vt.m_min.t.x /= 2.0f; m_vt.m_max.t.x = (m_vt.m_max.t.x + 1.9f) / 2.0f; } + + // Special case used in Call of Duty - World at War where it doubles the height and halves the width, but the height is double doubled. + // Check the height of the original texture, if it's half of the draw height, then make it wide instead. + if (half_bottom_uv && tex->m_from_target && m_cached_ctx.TEX0.TBW == m_cached_ctx.FRAME.FBW && + tex->m_from_target->m_TEX0.TBW == (m_cached_ctx.TEX0.TBW * 2) && (m_cached_ctx.TEX0.TBW * 64) == floor(m_vt.m_max.t.x)) + { + m_r.z *= 2; + m_r.w /= 2; + + m_vt.m_max.t.y /= 2; + m_vt.m_max.t.x *= 2; + m_vt.m_max.p.y /= 2; + m_vt.m_max.p.x *= 2; + m_context->scissor.in.w /= 2; + m_context->scissor.in.z *= 2; + + v[1].XYZ.X = ((v[m_index.buff[m_index.tail - 1]].XYZ.X - m_context->XYOFFSET.OFX) * 2) + m_context->XYOFFSET.OFX; + v[1].XYZ.Y = ((v[m_index.buff[m_index.tail - 1]].XYZ.Y - m_context->XYOFFSET.OFY) / 2) + m_context->XYOFFSET.OFY; + + v[1].U = v[m_index.buff[m_index.tail - 1]].U * 2; + v[1].V = v[m_index.buff[m_index.tail - 1]].V / 2; + + v[1].ST.S = v[m_index.buff[m_index.tail - 1]].ST.S * 2; + v[1].ST.T = v[m_index.buff[m_index.tail - 1]].ST.T / 2; + + m_vertex.head = m_vertex.tail = m_vertex.next = 2; + m_index.tail = 2; + + m_cached_ctx.TEX0.TBW *= 2; + m_cached_ctx.FRAME.FBW *= 2; + GL_CACHE("Half width/double height shuffle detected, width changed to %d", m_cached_ctx.FRAME.FBW); + } } GSVector4 GSRendererHW::RealignTargetTextureCoordinate(const GSTextureCache::Source* tex) @@ -2934,8 +2966,9 @@ void GSRendererHW::Draw() FRAME_TEX0.TBW = src->m_from_target->m_TEX0.TBW; } - rt = g_texture_cache->CreateTarget(FRAME_TEX0, t_size, GetValidSize(src), (scale_draw < 0 && is_possible_mem_clear != ClearType::NormalClear) ? src->m_from_target->GetScale() : target_scale, GSTextureCache::RenderTarget, true, - fm, false, force_preload, preserve_rt_color || possible_shuffle, m_r, src); + rt = g_texture_cache->CreateTarget(FRAME_TEX0, t_size, GetValidSize(src), (scale_draw < 0 && is_possible_mem_clear != ClearType::NormalClear) ? src->m_from_target->GetScale() : target_scale, + GSTextureCache::RenderTarget, true, fm, false, force_preload, preserve_rt_color || possible_shuffle, lookup_rect, src); + if (!rt) [[unlikely]] { GL_INS("ERROR: Failed to create FRAME target, skipping."); @@ -4464,7 +4497,7 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool const GSLocalMemory::psm_t frame_psm = GSLocalMemory::m_psm[m_context->FRAME.PSM]; const u32 frame_page_offset = std::max(static_cast(((m_r.x / frame_psm.pgs.x) + (m_r.y / frame_psm.pgs.y) * src->m_TEX0.TBW) - m_target_offset), 0); m_r = GSVector4i(m_r.x & ~(frame_psm.pgs.x - 1), m_r.y & ~(frame_psm.pgs.y - 1), (m_r.z + (frame_psm.pgs.x - 1)) & ~(frame_psm.pgs.x - 1), (m_r.w + (frame_psm.pgs.y - 1)) & ~(frame_psm.pgs.y - 1)); - m_cached_ctx.FRAME.FBP += frame_page_offset; + //m_cached_ctx.FRAME.FBP += frame_page_offset; m_in_target_draw |= frame_page_offset > 0; GSVertex* s = &m_vertex.buff[0]; s[0].XYZ.X = static_cast(m_context->XYOFFSET.OFX + (m_r.x << 4)); @@ -4474,7 +4507,7 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool const GSLocalMemory::psm_t tex_psm = GSLocalMemory::m_psm[m_context->TEX0.PSM]; const u32 tex_page_offset = (m_vt.m_min.t.x / tex_psm.pgs.x) + (m_vt.m_min.t.y / tex_psm.pgs.y); - m_cached_ctx.TEX0.TBP0 += tex_page_offset << 5; + //m_cached_ctx.TEX0.TBP0 += tex_page_offset << 5; s[0].U = m_r.x << 4; s[1].U = m_r.z << 4; s[0].V = m_r.y << 4; @@ -5718,7 +5751,7 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c target_region = false; source_region.bits = 0; //copied_rt = tex->m_from_target != nullptr; - if (page_offset && m_in_target_draw) + if (m_in_target_draw && (page_offset || frame_diff)) { copy_size.x = m_r.width(); copy_size.y = m_r.height(); diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index 47bc4d67747df..f08a10f45506b 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -1948,10 +1948,10 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe (static_cast(min_rect.width()) <= (widthpage_offset * 64))));*/ const bool is_aligned_ok = widthpage_offset == 0 || ((min_rect.width() <= static_cast((t->m_TEX0.TBW - widthpage_offset) * 64) && (t->m_TEX0.TBW == TEX0.TBW || TEX0.TBW == 1)) && bp >= t->m_TEX0.TBP0); const bool no_target_or_newer = (!dst || ((GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw))); - const bool width_match = (t->m_TEX0.TBW == TEX0.TBW || TEX0.TBW == 1); + const bool width_match = (t->m_TEX0.TBW == TEX0.TBW || (TEX0.TBW == 1 && draw_rect.w <= GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y)); // if it's a shuffle, some games tend to offset back by a page, such as Tomb Raider, for no disernable reason, but it then causes problems. // This can also happen horizontally (Catwoman moves everything one page left with shuffles), but this is too messy to deal with right now. - const bool overlaps = t->Overlaps(bp, TEX0.TBW, TEX0.PSM, min_rect) || (is_shuffle && t->Overlaps(bp, TEX0.TBW, TEX0.PSM, min_rect + GSVector4i(0, 0, 0, 32))); + const bool overlaps = t->Overlaps(bp, TEX0.TBW, TEX0.PSM, min_rect) || (is_shuffle && src && GSLocalMemory::m_psm[src->m_TEX0.PSM].bpp == 8 && t->Overlaps(bp, TEX0.TBW, TEX0.PSM, min_rect + GSVector4i(0, 0, 0, 32))); if (no_target_or_newer && is_aligned_ok && width_match && overlaps) { const GSLocalMemory::psm_t& s_psm = GSLocalMemory::m_psm[TEX0.PSM]; From 818527e8e12fb6b515d7d31c8f5cacf4ea2c6fcc Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Thu, 23 Jan 2025 15:48:35 +0000 Subject: [PATCH 16/28] GS/HW: Fix up shuffle behaviour and affected areas - Channel shuffles now check how many pages require drawing before doing the shuffle. - Split texture shuffles don't create new targets with bad valid areas. --- pcsx2/GS/GSState.cpp | 26 +++- pcsx2/GS/GSState.h | 17 +++ pcsx2/GS/Renderers/HW/GSHwHack.cpp | 4 + pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 184 +++++++++++++++++------ pcsx2/GS/Renderers/HW/GSRendererHW.h | 8 +- pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 64 +++++++- pcsx2/GS/Renderers/HW/GSTextureCache.h | 2 +- 7 files changed, 248 insertions(+), 57 deletions(-) diff --git a/pcsx2/GS/GSState.cpp b/pcsx2/GS/GSState.cpp index e82fd0ecb73e7..d886bd3ba9923 100644 --- a/pcsx2/GS/GSState.cpp +++ b/pcsx2/GS/GSState.cpp @@ -119,6 +119,10 @@ GSState::~GSState() _aligned_free(m_vertex.buff); if (m_index.buff) _aligned_free(m_index.buff); + if (m_draw_vertex.buff) + _aligned_free(m_draw_vertex.buff); + if (m_draw_index.buff) + _aligned_free(m_draw_index.buff); } std::string GSState::GetDrawDumpPath(const char* format, ...) @@ -850,7 +854,7 @@ void GSState::ApplyTEX0(GIFRegTEX0& TEX0) // Urban Chaos writes to the memory backing the CLUT in the middle of a shuffle, and // it's unclear whether the CLUT would actually get reloaded in that case. if (TEX0.CBP != m_mem.m_clut.GetCLUTCBP()) - m_channel_shuffle = false; + m_channel_shuffle_abort = true; } TEX0.CPSM &= 0xa; // 1010b @@ -2796,8 +2800,10 @@ void GSState::GrowVertexBuffer() const u32 maxcount = std::max(m_vertex.maxcount * 3 / 2, 10000); GSVertex* vertex = static_cast(_aligned_malloc(sizeof(GSVertex) * maxcount, 32)); + GSVertex* draw_vertex = static_cast(_aligned_malloc(sizeof(GSVertex) * maxcount, 32)); // Worst case index list is a list of points with vs expansion, 6 indices per point u16* index = static_cast(_aligned_malloc(sizeof(u16) * maxcount * 6, 32)); + u16* draw_index = static_cast(_aligned_malloc(sizeof(u16) * maxcount * 6, 32)); if (!vertex || !index) { @@ -2823,6 +2829,22 @@ void GSState::GrowVertexBuffer() _aligned_free(m_index.buff); } + if (m_draw_vertex.buff) + { + std::memcpy(draw_vertex, m_draw_vertex.buff, sizeof(GSVertex) * m_vertex.tail); + + _aligned_free(m_draw_vertex.buff); + } + + if (m_draw_index.buff) + { + std::memcpy(draw_index, m_draw_index.buff, sizeof(u16) * m_index.tail); + + _aligned_free(m_draw_index.buff); + } + + m_draw_vertex.buff = draw_vertex; + m_draw_index.buff = draw_index; m_vertex.buff = vertex; m_vertex.maxcount = maxcount - 3; // -3 to have some space at the end of the buffer before DrawingKick can grow it m_index.buff = index; @@ -3872,7 +3894,7 @@ GSState::TextureMinMaxResult GSState::GetTextureMinMax(GIFRegTEX0 TEX0, GIFRegCL // Adjust texture range when sprites get scissor clipped. Since we linearly interpolate, this // optimization doesn't work when perspective correction is enabled. // Allowing for quads when the gradiant is 1. It's not guaranteed (would need to check the grandient on each vector), but should be close enough. - if ((m_vt.m_primclass == GS_SPRITE_CLASS || (m_vt.m_primclass == GS_TRIANGLE_CLASS && TrianglesAreQuads(false) && grad.x == 1.0f && grad.y == 1.0f)) && m_primitive_covers_without_gaps != NoGapsType::GapsFound) + if (m_primitive_covers_without_gaps != NoGapsType::GapsFound && (m_vt.m_primclass == GS_SPRITE_CLASS || (m_vt.m_primclass == GS_TRIANGLE_CLASS && grad.x == 1.0f && grad.y == 1.0f && TrianglesAreQuads(false)))) { // When coordinates are fractional, GS appears to draw to the right/bottom (effectively // taking the ceiling), not to the top/left (taking the floor). diff --git a/pcsx2/GS/GSState.h b/pcsx2/GS/GSState.h index 5b07b03424217..cf09202b40c13 100644 --- a/pcsx2/GS/GSState.h +++ b/pcsx2/GS/GSState.h @@ -145,6 +145,21 @@ class GSState : public GSAlignedClass<32> u32 tail; } m_index = {}; + struct + { + GSVertex* buff; + u32 head, tail, next, maxcount; // head: first vertex, tail: last vertex + 1, next: last indexed + 1 + u32 xy_tail; + GSVector4i xy[4]; + GSVector4i xyhead; + } m_draw_vertex = {}; + + struct + { + u16* buff; + u32 tail; + } m_draw_index = {}; + void UpdateContext(); void UpdateScissor(); @@ -225,6 +240,8 @@ class GSState : public GSAlignedClass<32> bool m_isPackedUV_HackFlag = false; bool m_channel_shuffle = false; bool m_in_target_draw = false; + bool m_channel_shuffle_abort = false; + u32 m_target_offset = 0; u8 m_scanmask_used = 0; u32 m_dirty_gs_regs = 0; diff --git a/pcsx2/GS/Renderers/HW/GSHwHack.cpp b/pcsx2/GS/Renderers/HW/GSHwHack.cpp index 15703ce3d8d3d..19d57af186ca0 100644 --- a/pcsx2/GS/Renderers/HW/GSHwHack.cpp +++ b/pcsx2/GS/Renderers/HW/GSHwHack.cpp @@ -940,6 +940,10 @@ bool GSHwHack::OI_PointListPalette(GSRendererHW& r, GSTexture* rt, GSTexture* ds && r.m_cached_ctx.FRAME.FBMSK == 0 // No frame buffer masking. ) { + int mask = (r.m_vt.m_max.p.xyxy() == r.m_vt.m_min.p.xyxy()).mask(); + if (mask == 0xf) + return true; + const u32 FBP = r.m_cached_ctx.FRAME.Block(); const u32 FBW = r.m_cached_ctx.FRAME.FBW; GL_INS("PointListPalette - m_r = <%d, %d => %d, %d>, n_vertices = %u, FBP = 0x%x, FBW = %u", r.m_r.x, r.m_r.y, r.m_r.z, r.m_r.w, n_vertices, FBP, FBW); diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 078a7accaf14e..22c244834a5ad 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -1036,7 +1036,8 @@ GSVector2i GSRendererHW::GetTargetSize(const GSTextureCache::Source* tex, const bool GSRendererHW::IsPossibleChannelShuffle() const { if (!PRIM->TME || m_cached_ctx.TEX0.PSM != PSMT8 || // 8-bit texture draw - m_vt.m_primclass != GS_SPRITE_CLASS) // draw_sprite_tex + m_vt.m_primclass != GS_SPRITE_CLASS || // draw_sprite_tex + (m_vertex.tail <= 2 && (((m_vt.m_max.p - m_vt.m_min.p) <= GSVector4(8.0f)).mask() & 0x3) == 0x3)) // Powerdrome does a tiny shuffle on a couple of pixels, can't reliably translate this. { return false; } @@ -1050,6 +1051,7 @@ bool GSRendererHW::IsPossibleChannelShuffle() const const int draw_height = std::abs(v[1].XYZ.Y - v[0].XYZ.Y) >> 4; const bool mask_clamp = (m_cached_ctx.CLAMP.WMS | m_cached_ctx.CLAMP.WMT) & 0x2; + const bool draw_match = (draw_height == 2) || (draw_width == 8); if (draw_match || mask_clamp) @@ -1121,7 +1123,7 @@ bool GSRendererHW::NextDrawMatchesShuffle() const return true; } -bool GSRendererHW::IsSplitTextureShuffle(GSTextureCache::Target* rt) +bool GSRendererHW::IsSplitTextureShuffle(GIFRegTEX0& rt_TEX0, GSVector4i& valid_area) { // For this to work, we're peeking into the next draw, therefore we need dirty registers. if (m_dirty_gs_regs == 0) @@ -1164,7 +1166,7 @@ bool GSRendererHW::IsSplitTextureShuffle(GSTextureCache::Target* rt) const u32 pages_high = static_cast(aligned_rc.height()) / frame_psm.pgs.y; const u32 num_pages = m_context->FRAME.FBW * pages_high; // Jurassic - The Hunted will do a split shuffle with a height of 512 (256) when it's supposed to be 448, so it redoes one row of the shuffle. - const u32 rt_half = (((rt->m_valid.height() / GSLocalMemory::m_psm[rt->m_TEX0.PSM].pgs.y) / 2) * rt->m_TEX0.TBW) + (rt->m_TEX0.TBP0 >> 5); + const u32 rt_half = (((valid_area.height() / GSLocalMemory::m_psm[rt_TEX0.PSM].pgs.y) / 2) * rt_TEX0.TBW) + (rt_TEX0.TBP0 >> 5); // If this is a split texture shuffle, the next draw's FRAME/TEX0 should line up. // Re-add the offset we subtracted in Draw() to get the original FBP/TBP0.. this won't handle wrapping. Oh well. // "Potential" ones are for Jak3 which does a split shuffle on a 128x128 texture with a width of 256, writing to the lower half then offsetting 2 pages. @@ -1200,7 +1202,7 @@ bool GSRendererHW::IsSplitTextureShuffle(GSTextureCache::Target* rt) // If the game has changed the texture width to 1 we need to retanslate it to whatever the rt has so the final rect is correct. if (m_cached_ctx.FRAME.FBW == 1) - m_split_texture_shuffle_fbw = rt->m_TEX0.TBW; + m_split_texture_shuffle_fbw = rt_TEX0.TBW; else m_split_texture_shuffle_fbw = m_cached_ctx.FRAME.FBW; } @@ -1209,10 +1211,10 @@ bool GSRendererHW::IsSplitTextureShuffle(GSTextureCache::Target* rt) u32 total_pages = num_pages; // If the current draw is further than the half way point and the next draw is the half way point, then we can assume it's just overdrawing. - if (next_ctx.FRAME.FBP == rt_half && num_pages > (rt_half - (rt->m_TEX0.TBP0 >> 5))) + if (next_ctx.FRAME.FBP == rt_half && num_pages > (rt_half - (rt_TEX0.TBP0 >> 5))) { - vertical_pages = (rt->m_valid.height() / GSLocalMemory::m_psm[rt->m_TEX0.PSM].pgs.y) / 2; - total_pages = vertical_pages * rt->m_TEX0.TBW; + vertical_pages = (valid_area.height() / GSLocalMemory::m_psm[rt_TEX0.PSM].pgs.y) / 2; + total_pages = vertical_pages * rt_TEX0.TBW; } if ((m_split_texture_shuffle_pages % m_split_texture_shuffle_fbw) == 0) @@ -2119,12 +2121,14 @@ void GSRendererHW::Draw() // Fortunately, it seems to change the FBMSK along the way, so this check alone is sufficient. // Tomb Raider: Underworld does similar, except with R, G, B in separate palettes, therefore // we need to split on those too. - m_channel_shuffle = IsPossibleChannelShuffle() && m_last_channel_shuffle_fbmsk == m_context->FRAME.FBMSK && + m_channel_shuffle = !m_channel_shuffle_abort && IsPossibleChannelShuffle() && m_last_channel_shuffle_fbmsk == m_context->FRAME.FBMSK && m_last_channel_shuffle_fbp <= m_context->FRAME.Block() && m_last_channel_shuffle_end_block > m_context->FRAME.Block() && m_last_channel_shuffle_tbp <= m_context->TEX0.TBP0; if (m_channel_shuffle) { + // Tombraider does vertical strips 2 pages at a time, then puts them horizontally, it's a mess, so let it do the full screen shuffle. + m_full_screen_shuffle |= !IsPageCopy() && NextDrawMatchesShuffle(); // These HLE's skip several channel shuffles in a row which change blends etc. Let's not break the flow, it gets upset. if (!m_conf.ps.urban_chaos_hle && !m_conf.ps.tales_of_abyss_hle) { @@ -2136,16 +2140,61 @@ void GSRendererHW::Draw() return; } + if (m_channel_shuffle_width) + { + if (m_last_rt) + { + //DevCon.Warning("Skipped %d draw %d was abort %d", num_skipped_channel_shuffle_draws, s_n, (int)m_channel_shuffle_abort); + // Some games like Tomb raider abort early, we're never going to know the real height, and the system doesn't work right for partials. + // But it's good enough for games like Hitman Blood Money which only shuffle part of the screen + + if (!m_full_screen_shuffle) + { + const u32 width_pages = ((num_skipped_channel_shuffle_draws + 1) % std::max(1U, m_channel_shuffle_width) % std::max(1U, m_channel_shuffle_width)) * 64;; + m_conf.scissor.w = m_conf.scissor.y + (((num_skipped_channel_shuffle_draws + 1 + (m_channel_shuffle_width - 1)) / std::max(1U, m_channel_shuffle_width)) * 32) * m_conf.cb_ps.ScaleFactor.z; + if (width_pages) + m_conf.scissor.z = m_conf.scissor.x + (((num_skipped_channel_shuffle_draws + 1) % std::max(1U, m_channel_shuffle_width) % std::max(1U, m_channel_shuffle_width)) * 64) * m_conf.cb_ps.ScaleFactor.z; + } + g_gs_device->RenderHW(m_conf); + + if (GSConfig.DumpGSData) + { + const u64 frame = g_perfmon.GetFrame(); + + std::string s; + + if (GSConfig.SaveRT && (s_n - 1) >= GSConfig.SaveN) + { + s = GetDrawDumpPath("%05d_f%lld_rt1_%05x_(%05x)_%s.bmp", s_n - 1, frame, m_cached_ctx.FRAME.Block(), m_last_rt-> m_TEX0.TBP0, psm_str(m_cached_ctx.FRAME.PSM)); + + m_last_rt->m_texture->Save(s); + } + + if (GSConfig.SaveL > 0 && ((s_n - 1) - GSConfig.SaveN) > GSConfig.SaveL) + { + GSConfig.DumpGSData = 0; + } + } + g_texture_cache->InvalidateTemporarySource(); + CleanupDraw(false); + } + } #ifdef ENABLE_OGL_DEBUG if (num_skipped_channel_shuffle_draws > 0) GL_CACHE("Skipped %d channel shuffle draws ending at %d", num_skipped_channel_shuffle_draws, s_n); #endif num_skipped_channel_shuffle_draws = 0; + m_last_channel_shuffle_fbp = 0xffff; m_last_channel_shuffle_tbp = 0xffff; m_last_channel_shuffle_end_block = 0xffff; } + m_last_rt = nullptr; + m_channel_shuffle_width = 0; + m_full_screen_shuffle = false; + m_channel_shuffle_abort = false; + GL_PUSH("HW Draw %d (Context %u)", s_n, PRIM->CTXT); GL_INS("FLUSH REASON: %s%s", GetFlushReasonString(m_state_flush_reason), (m_state_flush_reason != GSFlushReason::CONTEXTCHANGE && m_dirty_gs_regs) ? " AND POSSIBLE CONTEXT CHANGE" : @@ -2255,14 +2304,6 @@ void GSRendererHW::Draw() const bool draw_sprite_tex = PRIM->TME && (m_vt.m_primclass == GS_SPRITE_CLASS); - // We trigger the sw prim render here super early, to avoid creating superfluous render targets. - if (CanUseSwPrimRender(no_rt, no_ds, draw_sprite_tex) && SwPrimRender(*this, true, true)) - { - GL_CACHE("Possible texture decompression, drawn with SwPrimRender() (BP %x BW %u TBP0 %x TBW %u)", - m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBMSK, m_cached_ctx.TEX0.TBP0, m_cached_ctx.TEX0.TBW); - return; - } - // GS doesn't fill the right or bottom edges of sprites/triangles, and for a pixel to be shaded, the vertex // must cross the center. In other words, the range is equal to the floor of coordinates +0.5. Except for // the case where the minimum equals the maximum, because at least one pixel is filled per line. @@ -2290,6 +2331,14 @@ void GSRendererHW::Draw() return; } + // We trigger the sw prim render here super early, to avoid creating superfluous render targets. + if (CanUseSwPrimRender(no_rt, no_ds, draw_sprite_tex) && SwPrimRender(*this, true, true)) + { + GL_CACHE("Possible texture decompression, drawn with SwPrimRender() (BP %x BW %u TBP0 %x TBW %u)", + m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBMSK, m_cached_ctx.TEX0.TBP0, m_cached_ctx.TEX0.TBW); + return; + } + // We want to fix up the context if we're doing a double half clear, regardless of whether we do the CPU fill. const ClearType is_possible_mem_clear = IsConstantDirectWriteMemClear(); if (!GSConfig.UserHacks_DisableSafeFeatures && is_possible_mem_clear) @@ -2823,7 +2872,7 @@ void GSRendererHW::Draw() ds = g_texture_cache->LookupTarget(ZBUF_TEX0, t_size, target_scale, GSTextureCache::DepthStencil, m_cached_ctx.DepthWrite(), 0, false, force_preload, preserve_depth, preserve_depth, unclamped_draw_rect, IsPossibleChannelShuffle(), is_possible_mem_clear && ZBUF_TEX0.TBP0 != m_cached_ctx.FRAME.Block(), false, - src, -1); + src, nullptr, -1); ZBUF_TEX0.TBW = m_channel_shuffle ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; @@ -2886,16 +2935,18 @@ void GSRendererHW::Draw() possible_shuffle |= draw_sprite_tex && m_primitive_covers_without_gaps != NoGapsType::FullCover && (((src && src->m_target && src->m_from_target && src->m_from_target->m_32_bits_fmt) && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) || IsPossibleChannelShuffle()); + const bool possible_horizontal_texture_shuffle = possible_shuffle && src && src->m_from_target && m_r.w <= src->m_from_target->m_valid.w && m_r.z > src->m_from_target->m_valid.z && m_cached_ctx.FRAME.FBW > src->m_from_target_TEX0.TBW; + // FBW is going to be wrong for channel shuffling into a new target, so take it from the source. FRAME_TEX0.U64 = 0; FRAME_TEX0.TBP0 = ((m_last_channel_shuffle_end_block + 1) == m_cached_ctx.FRAME.Block() && possible_shuffle) ? m_last_channel_shuffle_fbp : m_cached_ctx.FRAME.Block(); - FRAME_TEX0.TBW = (possible_shuffle && IsPossibleChannelShuffle() && src && src->m_from_target) ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; + FRAME_TEX0.TBW = (possible_horizontal_texture_shuffle || (possible_shuffle && src && src->m_from_target && IsPossibleChannelShuffle())) ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; FRAME_TEX0.PSM = m_cached_ctx.FRAME.PSM; // Don't clamp on shuffle, the height cache may troll us with the REAL height. if (!possible_shuffle && m_split_texture_shuffle_pages == 0) m_r = m_r.rintersect(t_size_rect); - + GSVector4i lookup_rect = unclamped_draw_rect; // Do the lookup with the real format on a shuffle, if possible. if (possible_shuffle && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16 && GSLocalMemory ::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) @@ -2912,10 +2963,10 @@ void GSRendererHW::Draw() FRAME_TEX0.PSM = PSMCT32; // Guess full color if no upcoming hint, it'll fix itself later. // This is just for overlap detection, it doesn't matter which direction we do this in - if (GSLocalMemory::m_psm[FRAME_TEX0.PSM].bpp == 32) + if (GSLocalMemory::m_psm[FRAME_TEX0.PSM].bpp == 32 && src && src->m_from_target) { // Shuffling with a double width (Sonic Unleashed for example which does a wierd shuffle/not shuffle green backup/restore). - if (src && std::abs((lookup_rect.width() / 2) - src->m_from_target->m_unscaled_size.x) <= 8) + if (std::abs((lookup_rect.width() / 2) - src->m_from_target->m_unscaled_size.x) <= 8) { lookup_rect.x /= 2; lookup_rect.z /= 2; @@ -2940,7 +2991,7 @@ void GSRendererHW::Draw() rt = g_texture_cache->LookupTarget(FRAME_TEX0, t_size, ((src && src->m_scale != 1) && GSConfig.UserHacks_NativeScaling == GSNativeScaling::Normal && !possible_shuffle) ? GetTextureScaleFactor() : target_scale, GSTextureCache::RenderTarget, true, fm, false, force_preload, preserve_rt_rgb, preserve_rt_alpha, lookup_rect, possible_shuffle, is_possible_mem_clear && FRAME_TEX0.TBP0 != m_cached_ctx.ZBUF.Block(), - GSConfig.UserHacks_NativeScaling != GSNativeScaling::Off && preserve_downscale_draw && is_possible_mem_clear != ClearType::NormalClear, src, (no_ds || !ds) ? -1 : (m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0)); + GSConfig.UserHacks_NativeScaling != GSNativeScaling::Off && preserve_downscale_draw && is_possible_mem_clear != ClearType::NormalClear, src, ds, (no_ds || !ds) ? -1 : (m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0)); // Draw skipped because it was a clear and there was no target. if (!rt) @@ -2966,6 +3017,18 @@ void GSRendererHW::Draw() FRAME_TEX0.TBW = src->m_from_target->m_TEX0.TBW; } + if (possible_shuffle && IsSplitTextureShuffle(FRAME_TEX0, lookup_rect)) + { + DevCon.Warning("Split texture shuffle early exit"); + // If TEX0 == FBP, we're going to have a source left in the TC. + // That source will get used in the actual draw unsafely, so kick it out. + if (m_cached_ctx.FRAME.Block() == m_cached_ctx.TEX0.TBP0) + g_texture_cache->InvalidateVideoMem(context->offset.fb, m_r, false); + + CleanupDraw(true); + return; + } + rt = g_texture_cache->CreateTarget(FRAME_TEX0, t_size, GetValidSize(src), (scale_draw < 0 && is_possible_mem_clear != ClearType::NormalClear) ? src->m_from_target->GetScale() : target_scale, GSTextureCache::RenderTarget, true, fm, false, force_preload, preserve_rt_color || possible_shuffle, lookup_rect, src); @@ -3130,7 +3193,7 @@ void GSRendererHW::Draw() ds = g_texture_cache->LookupTarget(ZBUF_TEX0, t_size, target_scale, GSTextureCache::DepthStencil, m_cached_ctx.DepthWrite(), 0, false, force_preload, preserve_depth, preserve_depth, unclamped_draw_rect, IsPossibleChannelShuffle(), is_possible_mem_clear && ZBUF_TEX0.TBP0 != m_cached_ctx.FRAME.Block(), false, - src, -1); + src, nullptr, -1); ZBUF_TEX0.TBW = m_channel_shuffle ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; @@ -3220,17 +3283,18 @@ void GSRendererHW::Draw() (shuffle_coords || rt->m_32_bits_fmt)) && (src->m_32_bits_fmt || m_copy_16bit_to_target_shuffle) && (draw_sprite_tex || (m_vt.m_primclass == GS_TRIANGLE_CLASS && (m_index.tail % 6) == 0 && TrianglesAreQuads(true))); - }; - if (m_texture_shuffle && IsSplitTextureShuffle(rt)) - { - // If TEX0 == FBP, we're going to have a source left in the TC. - // That source will get used in the actual draw unsafely, so kick it out. - if (m_cached_ctx.FRAME.Block() == m_cached_ctx.TEX0.TBP0) - g_texture_cache->InvalidateVideoMem(context->offset.fb, m_r, false); + if (m_texture_shuffle && IsSplitTextureShuffle(rt->m_TEX0, rt->m_valid)) + { + DevCon.Warning("Split texture shuffle"); + // If TEX0 == FBP, we're going to have a source left in the TC. + // That source will get used in the actual draw unsafely, so kick it out. + if (m_cached_ctx.FRAME.Block() == m_cached_ctx.TEX0.TBP0) + g_texture_cache->InvalidateVideoMem(context->offset.fb, m_r, false); - CleanupDraw(true); - return; + CleanupDraw(true); + return; + } } if ((src->m_target || (m_cached_ctx.FRAME.Block() == m_cached_ctx.TEX0.TBP0)) && IsPossibleChannelShuffle()) @@ -3779,7 +3843,6 @@ void GSRendererHW::Draw() if (!skip_draw) DrawPrims(rt, ds, src, tmm); - // // Temporary source *must* be invalidated before normal, because otherwise it'll be double freed. g_texture_cache->InvalidateTemporarySource(); @@ -3841,7 +3904,7 @@ void GSRendererHW::Draw() std::string s; - if (rt && GSConfig.SaveRT && s_n >= GSConfig.SaveN) + if (rt && GSConfig.SaveRT && s_n >= GSConfig.SaveN && !m_last_rt) { s = GetDrawDumpPath("%05d_f%lld_rt1_%05x_(%05x)_%s.bmp", s_n, frame, m_cached_ctx.FRAME.Block(), rt->m_TEX0.TBP0, psm_str(m_cached_ctx.FRAME.PSM)); @@ -3855,7 +3918,7 @@ void GSRendererHW::Draw() ds->m_texture->Save(s); } - if (GSConfig.SaveL > 0 && (s_n - GSConfig.SaveN) > GSConfig.SaveL) + if (GSConfig.SaveL > 0 && (s_n - GSConfig.SaveN) > GSConfig.SaveL && !m_last_rt) { GSConfig.DumpGSData = 0; } @@ -3925,7 +3988,7 @@ bool GSRendererHW::VerifyIndices() return true; } -void GSRendererHW::SetupIA(float target_scale, float sx, float sy) +void GSRendererHW::SetupIA(float target_scale, float sx, float sy, bool req_vert_backup) { GL_PUSH("IA"); @@ -4006,7 +4069,20 @@ void GSRendererHW::SetupIA(float target_scale, float sx, float sy) { m_conf.topology = GSHWDrawConfig::Topology::Triangle; m_conf.vs.expand = GSHWDrawConfig::VSExpand::Sprite; - m_conf.verts = m_vertex.buff; + + if (req_vert_backup) + { + memcpy(m_draw_vertex.buff, m_vertex.buff, sizeof(GSVertex) * m_vertex.next); + memcpy(m_draw_index.buff, m_index.buff, sizeof(u16) * m_index.tail); + + m_conf.verts = m_draw_vertex.buff; + m_conf.indices = m_draw_index.buff; + } + else + { + m_conf.verts = m_vertex.buff; + m_conf.indices = m_index.buff; + } m_conf.nverts = m_vertex.next; m_conf.nindices = m_index.tail * 3; m_conf.indices_per_prim = 6; @@ -4047,9 +4123,20 @@ void GSRendererHW::SetupIA(float target_scale, float sx, float sy) ASSUME(0); } - m_conf.verts = m_vertex.buff; + if (req_vert_backup) + { + memcpy(m_draw_vertex.buff, m_vertex.buff, sizeof(GSVertex) * m_vertex.next); + memcpy(m_draw_index.buff, m_index.buff, sizeof(u16) * m_index.tail); + + m_conf.verts = m_draw_vertex.buff; + m_conf.indices = m_draw_index.buff; + } + else + { + m_conf.verts = m_vertex.buff; + m_conf.indices = m_index.buff; + } m_conf.nverts = m_vertex.next; - m_conf.indices = m_index.buff; m_conf.nindices = m_index.tail; } @@ -4477,6 +4564,8 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool // Performance GPU note: it could be wise to reduce the size to // the rendered size of the framebuffer + const GSLocalMemory::psm_t frame_psm = GSLocalMemory::m_psm[m_context->FRAME.PSM]; + m_full_screen_shuffle = (m_r.height() > frame_psm.pgs.y) || (m_r.width() > frame_psm.pgs.x) || GSConfig.UserHacks_TextureInsideRt == GSTextureInRtMode::Disabled; if (GSConfig.UserHacks_TextureInsideRt == GSTextureInRtMode::Disabled || (!m_in_target_draw && IsPageCopy()) || m_conf.ps.urban_chaos_hle || m_conf.ps.tales_of_abyss_hle) { GSVertex* s = &m_vertex.buff[0]; @@ -4491,10 +4580,13 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool s[1].V = 16384; m_r = GSVector4i(0, 0, 1024, 1024); + + // We need to count the pages that get shuffled to, some games (like Hitman Blood Money dialogue blur effects) only do half the screen. + if (!m_full_screen_shuffle && !m_conf.ps.urban_chaos_hle && !m_conf.ps.tales_of_abyss_hle && src) + m_channel_shuffle_width = src->m_TEX0.TBW; } else { - const GSLocalMemory::psm_t frame_psm = GSLocalMemory::m_psm[m_context->FRAME.PSM]; const u32 frame_page_offset = std::max(static_cast(((m_r.x / frame_psm.pgs.x) + (m_r.y / frame_psm.pgs.y) * src->m_TEX0.TBW) - m_target_offset), 0); m_r = GSVector4i(m_r.x & ~(frame_psm.pgs.x - 1), m_r.y & ~(frame_psm.pgs.y - 1), (m_r.z + (frame_psm.pgs.x - 1)) & ~(frame_psm.pgs.x - 1), (m_r.w + (frame_psm.pgs.y - 1)) & ~(frame_psm.pgs.y - 1)); //m_cached_ctx.FRAME.FBP += frame_page_offset; @@ -4519,6 +4611,7 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool m_index.tail = 2; m_primitive_covers_without_gaps = NoGapsType::FullCover; + m_channel_shuffle_abort = false; return true; } @@ -6613,7 +6706,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta float sx, sy, ox2, oy2; const float ox = static_cast(static_cast(m_context->XYOFFSET.OFX)); const float oy = static_cast(static_cast(m_context->XYOFFSET.OFY)); - if (GSConfig.UserHacks_HalfPixelOffset != GSHalfPixelOffset::Native && rtscale > 1.0f) + if ((GSConfig.UserHacks_HalfPixelOffset != GSHalfPixelOffset::Native || m_channel_shuffle) && rtscale > 1.0f) { sx = 2.0f * rtscale / (rtsize.x << 4); sy = 2.0f * rtscale / (rtsize.y << 4); @@ -6756,7 +6849,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta m_conf.drawarea = m_channel_shuffle ? scissor : scissor.rintersect(ComputeBoundingBox(rtsize, rtscale)); m_conf.scissor = (DATE && !DATE_BARRIER) ? m_conf.drawarea : scissor; - SetupIA(rtscale, sx, sy); + SetupIA(rtscale, sx, sy, m_channel_shuffle_width != 0); if (ate_second_pass) { @@ -6844,7 +6937,10 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta m_conf.drawlist = (m_conf.require_full_barrier && m_vt.m_primclass == GS_SPRITE_CLASS) ? &m_drawlist : nullptr; - g_gs_device->RenderHW(m_conf); + if (!m_channel_shuffle_width) + g_gs_device->RenderHW(m_conf); + else + m_last_rt = rt; } // If the EE uploaded a new CLUT since the last draw, use that. @@ -7104,7 +7200,7 @@ bool GSRendererHW::CanUseSwPrimRender(bool no_rt, bool no_ds, bool draw_sprite_t { GSTextureCache::Target* rt = g_texture_cache->GetTargetWithSharedBits(m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.PSM); - if (!rt) + if (!rt || (!rt->m_dirty.empty() && rt->m_dirty.GetTotalRect(rt->m_TEX0, rt->m_unscaled_size).rintersect(m_r).eq(m_r))) return true; rt = nullptr; diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.h b/pcsx2/GS/Renderers/HW/GSRendererHW.h index df2d45d25ce7c..ff80c9482be43 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.h +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.h @@ -92,7 +92,7 @@ class GSRendererHW : public GSRenderer void DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Target* ds, GSTextureCache::Source* tex, const TextureMinMaxResult& tmm); void ResetStates(); - void SetupIA(float target_scale, float sx, float sy); + void SetupIA(float target_scale, float sx, float sy, bool req_vert_backup); void EmulateTextureShuffleAndFbmask(GSTextureCache::Target* rt, GSTextureCache::Source* tex); bool EmulateChannelShuffle(GSTextureCache::Target* src, bool test_only); void EmulateBlending(int rt_alpha_min, int rt_alpha_max, const bool DATE, bool& DATE_PRIMID, bool& DATE_BARRIER, GSTextureCache::Target* rt, @@ -115,7 +115,7 @@ class GSRendererHW : public GSRenderer bool IsPossibleChannelShuffle() const; bool IsPageCopy() const; bool NextDrawMatchesShuffle() const; - bool IsSplitTextureShuffle(GSTextureCache::Target* rt); + bool IsSplitTextureShuffle(GIFRegTEX0& rt_TEX0, GSVector4i& valid_area); GSVector4i GetSplitTextureShuffleDrawRect() const; u32 GetEffectiveTextureShuffleFbmsk() const; @@ -176,6 +176,10 @@ class GSRendererHW : public GSRenderer u32 m_last_channel_shuffle_fbp = 0; u32 m_last_channel_shuffle_tbp = 0; u32 m_last_channel_shuffle_end_block = 0; + u32 m_channel_shuffle_width = 0; + bool m_full_screen_shuffle = false; + + GSTextureCache::Target* m_last_rt; GIFRegFRAME m_split_clear_start = {}; GIFRegZBUF m_split_clear_start_Z = {}; diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index f08a10f45506b..04ef5dd63ce94 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -1296,7 +1296,21 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const if (!possible_shuffle && frame_fbp != t->m_TEX0.TBP0 && rect_clean && bp == t->m_TEX0.TBP0 && t && GSUtil::HasCompatibleBits(psm, t->m_TEX0.PSM) && width_match && real_fmt_match) { if (!tex_merge_rt && t->Overlaps(bp, bw, psm, req_rect)) + { + // Resize but be careful of +bilinear in req_rect, as it can screw valid areas. + if (psm_s.bpp == GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp && !block_boundary_rect.rintersect(t->m_valid).eq(block_boundary_rect)) + { + RGBAMask rgba_mask; + rgba_mask.c.a = req_alpha; + rgba_mask.c.r = rgba_mask.c.g = rgba_mask.c.b = req_color; + if (block_boundary_rect.z > t->m_valid.z) + AddDirtyRectTarget(t, GSVector4i(t->m_valid.z, t->m_valid.y, block_boundary_rect.z, std::max(block_boundary_rect.w, t->m_valid.w)), t->m_TEX0.PSM, t->m_TEX0.TBW, rgba_mask); + if (block_boundary_rect.w > t->m_valid.w) + AddDirtyRectTarget(t, GSVector4i(t->m_valid.x, t->m_valid.w, std::max(block_boundary_rect.z, t->m_valid.z), block_boundary_rect.w), t->m_TEX0.PSM, t->m_TEX0.TBW, rgba_mask); + } + // Resize including the extra pixel for bilinear. ResizeTarget(t, req_rect, bp, psm, bw); + } } } @@ -1800,6 +1814,21 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const } } + if (src->m_from_target && src->m_target_direct && src->m_region.HasEither()) + { + if (src->m_from_target->m_TEX0.TBP0 == src->m_TEX0.TBP0) + { + src->m_region.SetX(std::min(region.GetMinX(), src->m_region.GetMinX()), std::max(region.GetMaxX(), src->m_region.GetMaxX())); + src->m_region.SetY(std::min(region.GetMinY(), src->m_region.GetMinY()), std::max(region.GetMaxY(), src->m_region.GetMaxY())); + } + else if (src->m_TEX0.TBP0 > src->m_from_target->m_TEX0.TBP0) + { + GSVector4i dst_offset = TranslateAlignedRectByPage(src->m_from_target, src->m_TEX0.TBP0, src->m_TEX0.PSM, src->m_TEX0.TBW, GSVector4i(0, 0, 1, 1), false); + src->m_region.SetX(dst_offset.x + region.GetMinX(), dst_offset.x + region.GetMaxX()); + src->m_region.SetY(dst_offset.y + region.GetMinY(), dst_offset.y + region.GetMaxY()); + } + } + if (gpu_clut) AttachPaletteToSource(src, gpu_clut); else if (src->m_palette && (!src->m_palette_obj || !src->ClutMatch({clut, psm_s.pal}))) @@ -1830,7 +1859,7 @@ GSVector2i GSTextureCache::ScaleRenderTargetSize(const GSVector2i& sz, float sca GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVector2i& size, float scale, int type, bool used, u32 fbmask, bool is_frame, bool preload, bool preserve_rgb, bool preserve_alpha, const GSVector4i draw_rect, - bool is_shuffle, bool possible_clear, bool preserve_scale, GSTextureCache::Source* src, int offset) + bool is_shuffle, bool possible_clear, bool preserve_scale, GSTextureCache::Source* src, GSTextureCache::Target* ds, int offset) { const GSLocalMemory::psm_t& psm_s = GSLocalMemory::m_psm[TEX0.PSM]; const u32 bp = TEX0.TBP0; @@ -1968,6 +1997,23 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe } else if (t->m_dirty.empty()) { + + if (TEX0.TBW == t->m_TEX0.TBW && !is_shuffle && widthpage_offset == 0 && ((min_rect.w + 63)/ 64) > 1) + { + // Beyond Good and Evil does this awful thing where it puts one framebuffer at 0xf00, with the first row of pages blanked out, and the whole thing goes down to 0x2080 + // which is a problem, because it then puts the Z buffer at 0x1fc0, then offsets THAT by 1 row of pages, so it starts at, you guessed it, 2080. + // So let's check the *real* start. + u32 real_start_address = GSLocalMemory::GetStartBlockAddress(t->m_TEX0.TBP0, t->m_TEX0.TBW, t->m_TEX0.PSM, t->m_drawn_since_read); + u32 new_end_address = GSLocalMemory::GetEndBlockAddress(TEX0.TBP0, TEX0.TBW, TEX0.PSM, min_rect); + + // Not really overlapping. + if (real_start_address > new_end_address) + { + i++; + continue; + } + } + //DevCon.Warning("Here draw %d wanted %x PSM %x got %x PSM %x offset of %d pages width %d pages draw width %d", GSState::s_n, bp, TEX0.PSM, t->m_TEX0.TBP0, t->m_TEX0.PSM, (bp - t->m_TEX0.TBP0) >> 5, t->m_TEX0.TBW, draw_rect.width()); dst = t; @@ -2322,7 +2368,9 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe continue; } // If the format is completely different, but it's the same location, it's likely just overwriting it, so get rid. - if (!is_shuffle && t->m_TEX0.TBW != TEX0.TBW && TEX0.TBW != 1 && !preserve_rgb && min_rect.w > GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y) + // Make sure it's not currently in use, that could be bad. + if (!is_shuffle && (!ds || (ds != t)) && + t->m_TEX0.TBW != TEX0.TBW && TEX0.TBW != 1 && !preserve_rgb && min_rect.w > GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y) { DevCon.Warning("Deleting Z draw %d", GSState::s_n); InvalidateSourcesFromTarget(t); @@ -2637,11 +2685,12 @@ bool GSTextureCache::PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, cons RGBAMask rgba; rgba._u32 = GSUtil::GetChannelMask(TEX0.PSM); + dst->UpdateValidity(GSVector4i::loadh(valid_size)); if (!is_frame && !preload && !(src && src->m_TEX0.TBP0 == dst->m_TEX0.TBP0)) { - if ((preserve_target || !draw_rect.eq(dst->m_valid)) && GSRendererHW::GetInstance()->m_draw_transfers.size() > 0) + if ((preserve_target || !draw_rect.eq(GSVector4i::loadh(valid_size))) && GSRendererHW::GetInstance()->m_draw_transfers.size() > 0) { auto& transfers = GSRendererHW::GetInstance()->m_draw_transfers; const int last_draw = transfers.back().draw; @@ -2735,8 +2784,6 @@ bool GSTextureCache::PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, cons { const GSVector4i save_rect = preserve_target ? newrect : eerect; - if (!hw_clear) - dst->UpdateValidity(save_rect); GL_INS("Preloading the RT DATA from updated GS Memory"); AddDirtyRectTarget(dst, save_rect, TEX0.PSM, TEX0.TBW, rgba, GSLocalMemory::m_psm[TEX0.PSM].trbpp >= 16); } @@ -4878,8 +4925,9 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con src->m_valid_rect = dst->m_valid; src->m_end_block = dst->m_end_block; - // Do this first as we could be adding in alpha from an upgraded 24bit target. - dst->Update(); + // Do this first as we could be adding in alpha from an upgraded 24bit target. if the rect intersects a dirty area. + if (!dst->m_dirty.empty() && !src_range->rintersect(dst->m_dirty.GetTotalRect(dst->m_TEX0, dst->m_unscaled_size)).rempty()) + dst->Update(); src->m_valid_alpha_minmax = true; if ((src->m_TEX0.PSM & 0xf) == PSMCT24) @@ -6596,7 +6644,7 @@ void GSTextureCache::Target::Update(bool cannot_scale) { if (alpha_minmax.second > 128 || (m_TEX0.PSM & 0xf) == PSMCT24) UnscaleRTAlpha(); - else if (!cannot_scale && total_rect.eq(m_valid)) + else if (!cannot_scale && total_rect.rintersect(m_valid).eq(m_valid)) m_rt_alpha_scale = true; } diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.h b/pcsx2/GS/Renderers/HW/GSTextureCache.h index 7bb1d5b15dacc..9de5db136a1a8 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.h +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.h @@ -492,7 +492,7 @@ class GSTextureCache Target* FindTargetOverlap(Target* target, int type, int psm); Target* LookupTarget(GIFRegTEX0 TEX0, const GSVector2i& size, float scale, int type, bool used = true, u32 fbmask = 0, bool is_frame = false, bool preload = GSConfig.PreloadFrameWithGSData, bool preserve_rgb = true, bool preserve_alpha = true, - const GSVector4i draw_rc = GSVector4i::zero(), bool is_shuffle = false, bool possible_clear = false, bool preserve_scale = false, GSTextureCache::Source* src = nullptr, int offset = -1); + const GSVector4i draw_rc = GSVector4i::zero(), bool is_shuffle = false, bool possible_clear = false, bool preserve_scale = false, GSTextureCache::Source* src = nullptr, GSTextureCache::Target* ds = nullptr, int offset = -1); Target* CreateTarget(GIFRegTEX0 TEX0, const GSVector2i& size, const GSVector2i& valid_size,float scale, int type, bool used = true, u32 fbmask = 0, bool is_frame = false, bool preload = GSConfig.PreloadFrameWithGSData, bool preserve_target = true, const GSVector4i draw_rc = GSVector4i::zero(), GSTextureCache::Source* src = nullptr); From 704696b0a820735d28eb5e474de08beaed9404fa Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Fri, 24 Jan 2025 14:55:18 +0000 Subject: [PATCH 17/28] GS/HW: Further fixes and rewrite of AlignedRectTranslate --- pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 93 +++++++-- pcsx2/GS/Renderers/HW/GSRendererHW.h | 2 +- pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 245 ++++++++++++++++++----- pcsx2/GS/Renderers/HW/GSTextureCache.h | 2 +- 4 files changed, 270 insertions(+), 72 deletions(-) diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 22c244834a5ad..137266f5f47e3 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -3019,7 +3019,6 @@ void GSRendererHW::Draw() if (possible_shuffle && IsSplitTextureShuffle(FRAME_TEX0, lookup_rect)) { - DevCon.Warning("Split texture shuffle early exit"); // If TEX0 == FBP, we're going to have a source left in the TC. // That source will get used in the actual draw unsafely, so kick it out. if (m_cached_ctx.FRAME.Block() == m_cached_ctx.TEX0.TBP0) @@ -3261,7 +3260,8 @@ void GSRendererHW::Draw() { // Hypothesis: texture shuffle is used as a postprocessing effect so texture will be an old target. // Initially code also tested the RT but it gives too much false-positive - const int first_x = ((v[0].XYZ.X - m_context->XYOFFSET.OFX) + 8) >> 4; + const int horizontal_offset = ((static_cast((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) / 32) % static_cast(std::max(rt->m_TEX0.TBW, 1U))) * frame_psm.pgs.x; + const int first_x = (((v[0].XYZ.X - m_context->XYOFFSET.OFX) + 8) >> 4) - horizontal_offset; const int first_u = PRIM->FST ? ((v[0].U + 8) >> 4) : static_cast(((1 << m_cached_ctx.TEX0.TW) * (v[0].ST.S / v[1].RGBAQ.Q)) + 0.5f); const bool shuffle_coords = (first_x ^ first_u) & 8; const u32 draw_end = GSLocalMemory::GetEndBlockAddress(m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, m_r) + 1; @@ -3286,7 +3286,6 @@ void GSRendererHW::Draw() if (m_texture_shuffle && IsSplitTextureShuffle(rt->m_TEX0, rt->m_valid)) { - DevCon.Warning("Split texture shuffle"); // If TEX0 == FBP, we're going to have a source left in the TC. // That source will get used in the actual draw unsafely, so kick it out. if (m_cached_ctx.FRAME.Block() == m_cached_ctx.TEX0.TBP0) @@ -3440,7 +3439,9 @@ void GSRendererHW::Draw() // Deferred update of TEX0. We don't want to change it when we're doing a shuffle/clear, because it // may increase the buffer width, or change PSM, which breaks P8 conversion amongst other things. + // Some texture shuffles can be to new targets (or reused ones) so they may need their valid rects adjusting. const bool can_update_size = !is_possible_mem_clear && !m_texture_shuffle && !m_channel_shuffle; + if (!m_texture_shuffle && !m_channel_shuffle) { // Try to turn blits in to single sprites, saves upscaling problems when striped clears/blits. @@ -3539,18 +3540,52 @@ void GSRendererHW::Draw() if (!(m_cached_ctx.TEST.DATE && m_cached_ctx.TEST.DATM)) { GSVector2i new_size = t_size; - + GSVector4i update_rect = m_r; + const GIFRegTEX0& draw_TEX0 = rt ? rt->m_TEX0 : ds->m_TEX0; + const int buffer_width = std::max(draw_TEX0.TBW, 1U) * 64; // We need to adjust the size if it's a texture shuffle as we could end up making the RT twice the size. - if (src && m_texture_shuffle && !m_copy_16bit_to_target_shuffle && m_split_texture_shuffle_pages == 0) + if (src && m_texture_shuffle && !m_copy_16bit_to_target_shuffle) { if ((new_size.x > src->m_valid_rect.z && m_vt.m_max.p.x == new_size.x) || (new_size.y > src->m_valid_rect.w && m_vt.m_max.p.y == new_size.y)) { if (new_size.y <= src->m_valid_rect.w && (rt->m_TEX0.TBW != m_cached_ctx.FRAME.FBW)) + { new_size.x /= 2; + } else + { new_size.y /= 2; + } + } + + if (update_rect.z > src->m_valid_rect.z && (rt->m_TEX0.TBW != m_cached_ctx.FRAME.FBW)) + { + // This is a case for Superman Shadow of Apokalypse where it is *nearly* double height and slightly wider, but the page count adds up. + if (update_rect.w > src->m_valid_rect.w) + { + update_rect = src->m_valid_rect; + } + else + { + update_rect.x /= 2; + update_rect.z /= 2; + } + } + else + { + update_rect.y /= 2; + update_rect.w /= 2; } } + // NFS Undercover does a draw with double width of the actual width 1280x240, which functions the same as doubling the height. + // Ignore single page/0 page stuff, that's just gonna get silly + else if (buffer_width > 64 && update_rect.z > buffer_width) + { + float multifactor = static_cast(update_rect.z) / static_cast(buffer_width); + + update_rect.w *= multifactor; + update_rect.z = buffer_width; + } if (m_in_target_draw && src && m_channel_shuffle && src->m_from_target && src->m_from_target == rt && m_cached_ctx.TEX0.TBP0 == src->m_from_target->m_TEX0.TBP0) { @@ -3612,12 +3647,13 @@ void GSRendererHW::Draw() rt->ResizeDrawn(rt->GetUnscaledRect()); } - const GSVector4i update_rect = m_r.rintersect(GSVector4i::loadh(GSVector2i(new_w, new_h))); + const bool rt_update = can_update_size || (m_texture_shuffle && (src && rt && src->m_from_target != rt)); + // if frame is masked or afailing always to never write frame, wanna make sure we don't touch it. This might happen if DATE or Alpha Test is being used to write to Z. const bool frame_masked = ((m_cached_ctx.FRAME.FBMSK & frame_psm.fmsk) == frame_psm.fmsk) || (m_cached_ctx.TEST.ATE && m_cached_ctx.TEST.ATST == ATST_NEVER && !(m_cached_ctx.TEST.AFAIL & AFAIL_FB_ONLY)); // Limit to 2x the vertical height of the resolution (for double buffering) - rt->UpdateValidity(update_rect, !frame_masked && (can_update_size || (m_r.w <= (resolution.y * 2) && !m_texture_shuffle))); - rt->UpdateDrawn(update_rect, !frame_masked && (can_update_size || (m_r.w <= (resolution.y * 2) && !m_texture_shuffle))); + rt->UpdateValidity(update_rect, !frame_masked && (rt_update || (m_r.w <= (resolution.y * 2) && !m_texture_shuffle))); + rt->UpdateDrawn(update_rect, !frame_masked && (rt_update || (m_r.w <= (resolution.y * 2) && !m_texture_shuffle))); // Probably changing to double buffering, so invalidate any old target that was next to it. // This resolves an issue where the PCRTC will find the old target in FMV's causing flashing. // Grandia Xtreme, Onimusha Warlord. @@ -3882,13 +3918,13 @@ void GSRendererHW::Draw() GSTextureCache::RenderTarget, m_cached_ctx.ZBUF.Block(), m_cached_ctx.ZBUF.PSM, zm); - if (g_texture_cache->GetTemporaryZ()) + if (ds && g_texture_cache->GetTemporaryZ()) { if (m_cached_ctx.DepthWrite()) { - int vertical_offset = ((static_cast(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0) / 32) / std::max(static_cast(rt->m_TEX0.TBW), 1)) * frame_psm.pgs.y; - int z_vertical_offset = ((static_cast(m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0) / 32) / std::max(rt->m_TEX0.TBW, 1U)) * GSLocalMemory::m_psm[m_cached_ctx.ZBUF.PSM].pgs.y; - int z_offset = vertical_offset; + const int vertical_offset = ((static_cast(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0) / 32) / std::max(static_cast(rt->m_TEX0.TBW), 1)) * frame_psm.pgs.y; + const int z_vertical_offset = ((static_cast(m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0) / 32) / std::max(rt->m_TEX0.TBW, 1U)) * GSLocalMemory::m_psm[m_cached_ctx.ZBUF.PSM].pgs.y; + const int z_offset = vertical_offset; GL_CACHE("RT in RT Z copy back draw %d z_vert_offset %d z_offset %d", s_n, z_vertical_offset, z_offset); GSVector4i dRect = GSVector4i(0, z_vertical_offset * ds->m_scale, ds->m_unscaled_size.x * ds->m_scale, std::min(z_vertical_offset + m_r.w + 1 - vertical_offset, ds->m_unscaled_size.y) * ds->m_scale); g_gs_device->StretchRect(g_texture_cache->GetTemporaryZ(), GSVector4(0.0f, z_offset / static_cast(g_texture_cache->GetTemporaryZ()->GetHeight()), 1.0f, std::min(real_rect.w + 1, ds->m_unscaled_size.y + z_offset) / static_cast(g_texture_cache->GetTemporaryZ()->GetHeight())), ds->m_texture, GSVector4(dRect), ShaderConvert::DEPTH_COPY, false); @@ -4382,7 +4418,7 @@ bool GSRendererHW::TestChannelShuffle(GSTextureCache::Target* src) return m_channel_shuffle; } -__ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool test_only) +__ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool test_only, GSTextureCache::Target* rt) { if ((src->m_texture->GetType() == GSTexture::Type::DepthStencil) && !src->m_32_bits_fmt) { @@ -4587,9 +4623,30 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool } else { - const u32 frame_page_offset = std::max(static_cast(((m_r.x / frame_psm.pgs.x) + (m_r.y / frame_psm.pgs.y) * src->m_TEX0.TBW) - m_target_offset), 0); + const u32 frame_page_offset = std::max(static_cast(((m_r.x / frame_psm.pgs.x) + (m_r.y / frame_psm.pgs.y) * rt->m_TEX0.TBW)), 0); m_r = GSVector4i(m_r.x & ~(frame_psm.pgs.x - 1), m_r.y & ~(frame_psm.pgs.y - 1), (m_r.z + (frame_psm.pgs.x - 1)) & ~(frame_psm.pgs.x - 1), (m_r.w + (frame_psm.pgs.y - 1)) & ~(frame_psm.pgs.y - 1)); - //m_cached_ctx.FRAME.FBP += frame_page_offset; + + // Hitman suffers from this, not sure on the exact scenario at the moment, but we need the barrier. + if (PRIM->ABE && m_context->ALPHA.IsCdInBlend()) + { + if (m_prim_overlap == PRIM_OVERLAP_NO || !g_gs_device->Features().texture_barrier) + m_conf.require_one_barrier = true; + else + m_conf.require_full_barrier = true; + } + + // This is for offsetting the texture, however if the texture has a region clamp, we don't want to move it. + // A good two test games for this is Ghost in the Shell (no region clamp) and Tekken 5 (offset clamp on shadows) + if (rt && rt->m_TEX0.TBP0 == m_cached_ctx.FRAME.Block()) + { + const bool req_offset = (m_cached_ctx.CLAMP.WMS != 3 || (m_cached_ctx.CLAMP.MAXU & ~0xF) == 0) && + (m_cached_ctx.CLAMP.WMT != 3 || (m_cached_ctx.CLAMP.MAXV & ~0x3) == 0); + //DevCon.Warning("Draw %d offset %d", s_n, frame_page_offset); + // Offset the frame but clear the draw offset + if (req_offset) + m_cached_ctx.FRAME.FBP += frame_page_offset; + } + m_in_target_draw |= frame_page_offset > 0; GSVertex* s = &m_vertex.buff[0]; s[0].XYZ.X = static_cast(m_context->XYOFFSET.OFX + (m_r.x << 4)); @@ -4599,7 +4656,7 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool const GSLocalMemory::psm_t tex_psm = GSLocalMemory::m_psm[m_context->TEX0.PSM]; const u32 tex_page_offset = (m_vt.m_min.t.x / tex_psm.pgs.x) + (m_vt.m_min.t.y / tex_psm.pgs.y); - //m_cached_ctx.TEX0.TBP0 += tex_page_offset << 5; + s[0].U = m_r.x << 4; s[1].U = m_r.z << 4; s[0].V = m_r.y << 4; @@ -5429,7 +5486,7 @@ __ri void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Target* rt, // Hazard handling (i.e. reading from the current RT/DS). GSTextureCache::SourceRegion source_region = tex->GetRegion(); - bool target_region = (tex->IsFromTarget() && source_region.HasEither()); + bool target_region = tex->IsFromTarget() && source_region.HasEither(); GSVector2i unscaled_size = target_region ? tex->GetRegionSize() : tex->GetUnscaledSize(); float scale = tex->GetScale(); HandleTextureHazards(rt, ds, tex, tmm, source_region, target_region, unscaled_size, scale, src_copy); @@ -6203,7 +6260,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta // vertex list (it will interact with PrimitiveOverlap and accurate // blending) if (m_channel_shuffle && tex && tex->m_from_target) - EmulateChannelShuffle(tex->m_from_target, false); + EmulateChannelShuffle(tex->m_from_target, false, rt); // Upscaling hack to avoid various line/grid issues MergeSprite(tex); diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.h b/pcsx2/GS/Renderers/HW/GSRendererHW.h index ff80c9482be43..70c7bd3c39fbe 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.h +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.h @@ -94,7 +94,7 @@ class GSRendererHW : public GSRenderer void ResetStates(); void SetupIA(float target_scale, float sx, float sy, bool req_vert_backup); void EmulateTextureShuffleAndFbmask(GSTextureCache::Target* rt, GSTextureCache::Source* tex); - bool EmulateChannelShuffle(GSTextureCache::Target* src, bool test_only); + bool EmulateChannelShuffle(GSTextureCache::Target* src, bool test_only, GSTextureCache::Target* rt = nullptr); void EmulateBlending(int rt_alpha_min, int rt_alpha_max, const bool DATE, bool& DATE_PRIMID, bool& DATE_BARRIER, GSTextureCache::Target* rt, bool can_scale_rt_alpha, bool& new_rt_alpha_scale); void CleanupDraw(bool invalidate_temp_src); diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index 04ef5dd63ce94..bb2b60b3fb0e9 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -253,6 +253,163 @@ bool GSTextureCache::CanTranslate(u32 bp, u32 bw, u32 spsm, GSVector4i r, u32 db } +GSVector4i GSTextureCache::TranslateAlignedRectByPage(u32 tbp, u32 tebp, u32 tbw, u32 tpsm, u32 sbp, u32 spsm, u32 sbw, GSVector4i src_r, bool is_invalidation) +{ + const GSVector2i src_page_size = GSLocalMemory::m_psm[spsm].pgs; + const GSVector2i dst_page_size = GSLocalMemory::m_psm[tpsm].pgs; + const int clamped_sbw = static_cast(std::max(1U, sbw)); + const int clamped_tbw = static_cast(std::max(1U, tbw)); + const int src_bw = clamped_sbw * 64; + const int dst_bw = clamped_tbw * 64; + const GSLocalMemory::psm_t& s_psm = GSLocalMemory::m_psm[spsm]; + const GSLocalMemory::psm_t& t_psm = GSLocalMemory::m_psm[tpsm]; + const int src_pgw = std::max(1, src_bw / src_page_size.x); + const int dst_pgw = std::max(1, dst_bw / dst_page_size.x); + GSVector4i in_rect = src_r; + + if (sbp < tebp && tebp < tbp) + sbp += 0x4000; + // DST = the target we're trying to fit in to. + // SRC = the format being requested, so we want to from SRC to DST. + int page_offset = (static_cast(sbp) - static_cast(tbp)) >> 5; + int block_offset = (static_cast(sbp) - static_cast(tbp)) & 0x1F; + + if (!(s_psm.bpp == t_psm.bpp)) + { + const int src_bpp = s_psm.bpp; + + if (block_offset) + in_rect = in_rect.ralign(s_psm.bs); + else + in_rect = in_rect.ralign(s_psm.pgs); + + // Convert rect down in to pages and blocks. + const GSVector4i in_pages = GSVector4i(in_rect.x / s_psm.pgs.x, in_rect.y / s_psm.pgs.y, in_rect.z / s_psm.pgs.x, in_rect.w / s_psm.pgs.y); + in_rect -= GSVector4i(in_pages.x * s_psm.pgs.x, in_pages.y * s_psm.pgs.y, in_pages.z * s_psm.pgs.x, in_pages.w * s_psm.pgs.y); + // Handle a minimum of 1 block, they are a different shape between 16 and 32bit. 8x8 vs 16x8. + // FIXME: Block layouts are different between 32bit/8bit and other formats (8x4 instead of 4x8), so this could be a problem if the game invalidates too much. + const GSVector4i in_blocks = GSVector4i(in_rect.x / s_psm.bs.x, in_rect.y / s_psm.bs.y, (in_rect.z + (s_psm.bs.x - 1)) / s_psm.bs.x, (in_rect.w + (s_psm.bs.y - 1)) / s_psm.bs.y); + + // Project Snowblind and Tomb Raider access the rect offset by 1 page and use a region to correct it, we need to account for that here. + in_rect = GSVector4i(in_pages.x * t_psm.pgs.x, in_pages.y * t_psm.pgs.y, in_pages.z * t_psm.pgs.x, in_pages.w * t_psm.pgs.y); + in_rect += GSVector4i(in_blocks.x * t_psm.bs.x, in_blocks.y * t_psm.bs.y, in_blocks.z * t_psm.bs.x, in_blocks.w * t_psm.bs.y); + + if (in_rect.rempty()) + { + DevCon.Warning("Error translating rect"); + return GSVector4i::zero(); + } + } + + GSVector4i new_rect = GSVector4i::zero(); + + if (src_pgw != dst_pgw) + { + const int horizontal_dst_page_offset = page_offset % clamped_tbw; + const bool single_row = ((src_pgw + horizontal_dst_page_offset) <= clamped_tbw) && (in_rect.height() <= dst_page_size.y); + const bool single_page = (in_rect.width() <= t_psm.pgs.x) && (in_rect.height() <= t_psm.pgs.y); + const int vertical_offset = in_rect.y / t_psm.pgs.y; + const int horizontal_offset = in_rect.x / t_psm.pgs.x; + const int rect_offset = horizontal_offset + (vertical_offset * src_pgw); + const int rect_pages = ((in_rect.width() / t_psm.pgs.x) % src_pgw) + ((in_rect.height() / t_psm.pgs.y) * src_pgw); + page_offset += rect_offset; + in_rect -= GSVector4i(horizontal_offset * t_psm.pgs.x, vertical_offset * t_psm.pgs.y).xyxy(); + + if (sbw == 0) // Intentionally check this separately + { + // BW == 0 loops vertically on the first page. So just copy the whole page vertically. + if (in_rect.z > dst_page_size.x) + { + new_rect.x = 0; + new_rect.z = (dst_page_size.x); + } + else + { + new_rect.x = in_rect.x; + new_rect.z = in_rect.z; + } + if (in_rect.w > dst_page_size.y) + { + new_rect.y = 0; + new_rect.w = dst_page_size.y; + } + else + { + new_rect.y = in_rect.y; + new_rect.w = in_rect.w; + } + } + else if (src_pgw == 1 && (horizontal_dst_page_offset + rect_pages) <= clamped_tbw) // Intentionally check this separately + { + new_rect.x = (horizontal_dst_page_offset * t_psm.pgs.x) + in_rect.x; + new_rect.z = new_rect.x + (rect_pages * t_psm.pgs.x); + new_rect.y = (page_offset / dst_pgw) * t_psm.pgs.y; + new_rect.w = new_rect.y + t_psm.pgs.y; + } + else if (single_row || single_page) // Single page and single row should be handled the same here + { + //The offsets will move this to the right place + const GSVector2i start_page = GSVector2i(page_offset % dst_pgw, page_offset / dst_pgw); + new_rect.x = (start_page.x * t_psm.pgs.x) + in_rect.x; + new_rect.z = (start_page.x * t_psm.pgs.x) + in_rect.z; + new_rect.y = (start_page.y * t_psm.pgs.y) + in_rect.y; + new_rect.w = (start_page.y * t_psm.pgs.y) + in_rect.w; + } + else + { + + + // Fills full length, so count pages based on the width, adjust rect to fill original rect. + // Battle Assault 3 does a move with BW 7 instead of 8 and does 448x512, instead of 512x448. Same result, but confusing for us. + if ((in_rect.width() / dst_page_size.x) == src_pgw) + { + // The width is mismatched to the page. + if (!is_invalidation && GSConfig.UserHacks_TextureInsideRt < GSTextureInRtMode::MergeTargets) + { + DevCon.Warning("Uneven pages mess up sbp %x dbp %x spgw %d dpgw %d src fmt %d dst fmt %d src_rect %d, %d, %d, %d draw %d", sbp, tbp, src_pgw, dst_pgw, spsm, tpsm, in_rect.x, in_rect.y, in_rect.z, in_rect.w, GSState::s_n); + return GSVector4i::zero(); + } + + const GSVector2i start_page = GSVector2i(page_offset % dst_pgw, page_offset / dst_pgw); + int page_count = (in_rect.height() / dst_page_size.y) * src_pgw; + + // Round up to a whole row, it's better than the alternative. + // Busin 0 - Wizardry Alternative Neo moves with non even rows. + const int horizontal_offset = (page_count % dst_pgw); + if (horizontal_offset) + page_count += dst_pgw - horizontal_offset; + + int new_height = (page_count / dst_pgw) * dst_page_size.y; + new_rect.x = 0; + new_rect.z = dst_pgw * dst_page_size.x; + new_rect.y = start_page.y * dst_page_size.y; + new_rect.w = new_rect.y + new_height; + } + else + { + //TODO: Maybe control dirty blocks directly and add them page at a time for better granularity. + const GSVector2i start_page = GSVector2i((page_offset + rect_offset) % dst_pgw, page_offset / dst_pgw); + DevCon.Warning("Fudging start position"); + // Not easily translatable full pages and make sure the height is rounded upto encompass the half row. + new_rect.x = start_page.x * dst_page_size.x; + new_rect.z = new_rect.x + in_rect.z; + new_rect.y = start_page.y * dst_page_size.y; + new_rect.w = new_rect.y + in_rect.w; + } + } + } + else // Widths match + { + const int horizontal_dst_page_offset = page_offset % clamped_tbw; + const int vertical_dst_page_offset = page_offset / clamped_tbw; + GSVector4i offset_rect(horizontal_dst_page_offset * t_psm.pgs.x, vertical_dst_page_offset * t_psm.pgs.y); + new_rect = in_rect + offset_rect.xyxy(); + } + + return new_rect; +} + +/* GSVector4i GSTextureCache::TranslateAlignedRectByPage(u32 tbp, u32 tebp, u32 tbw, u32 tpsm, u32 sbp, u32 spsm, u32 sbw, GSVector4i src_r, bool is_invalidation) { const GSVector2i src_page_size = GSLocalMemory::m_psm[spsm].pgs; @@ -410,7 +567,7 @@ GSVector4i GSTextureCache::TranslateAlignedRectByPage(u32 tbp, u32 tebp, u32 tbw } return new_rect; -} +}*/ GSVector4i GSTextureCache::TranslateAlignedRectByPage(Target* t, u32 sbp, u32 spsm, u32 sbw, GSVector4i src_r, bool is_invalidation) { @@ -860,6 +1017,13 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const bool is_depth, c bool inside_target = false; GSVector4i target_rc(r); + GSVector4i block_boundary_rect = target_rc; + block_boundary_rect.x = block_boundary_rect.x & ~(psm_s.bs.x - 1); + block_boundary_rect.y = block_boundary_rect.y & ~(psm_s.bs.y - 1); + // Round up to the nearst block boundary for lookup to avoid problems due to bilinear and inclusive rects. + block_boundary_rect.z = std::max(target_rc.x + 1, (block_boundary_rect.z + (psm_s.bs.x / 2)) & ~(psm_s.bs.x - 1)); + block_boundary_rect.w = std::max(target_rc.y + 1, (block_boundary_rect.w + (psm_s.bs.y / 2)) & ~(psm_s.bs.y - 1)); + for (auto t : m_dst[DepthStencil]) { if (!t->m_used || (!t->m_dirty.empty() && !is_depth)) @@ -887,53 +1051,30 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const bool is_depth, c const GSVector2i page_size = GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs; const bool can_translate = CanTranslate(bp, TEX0.TBW, psm, r, t->m_TEX0.TBP0, t->m_TEX0.PSM, t->m_TEX0.TBW); const bool swizzle_match = psm_s.depth == GSLocalMemory::m_psm[t->m_TEX0.PSM].depth; - GSVector4i new_rect = r; - - if (linear) - { - new_rect.z -= 1; - new_rect.w -= 1; - } + GSVector4i new_rect = block_boundary_rect; if (can_translate) { if (swizzle_match) { - target_rc = TranslateAlignedRectByPage(t, bp, psm, TEX0.TBW, new_rect); + block_boundary_rect = TranslateAlignedRectByPage(t, bp, psm, TEX0.TBW, new_rect); } else { - // If it's not page aligned, grab the whole pages it covers, to be safe. - if (psm_s.bpp != GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp) - { - const GSVector2i dst_page_size = psm_s.pgs; - target_rc = GSVector4i(target_rc.x / page_size.x, target_rc.y / page_size.y, - (target_rc.z + (page_size.x - 1)) / page_size.x, - (target_rc.w + (page_size.y - 1)) / page_size.y); - target_rc = GSVector4i(target_rc.x * dst_page_size.x, target_rc.y * dst_page_size.y, - target_rc.z * dst_page_size.x, target_rc.w * dst_page_size.y); - } - else - { - target_rc.x &= ~(page_size.x - 1); - target_rc.y &= ~(page_size.y - 1); - target_rc.z = (new_rect.z + (page_size.x - 1)) & ~(page_size.x - 1); - target_rc.w = (new_rect.w + (page_size.y - 1)) & ~(page_size.y - 1); - } - target_rc = TranslateAlignedRectByPage(t, bp & ~((1 << 5) - 1), psm, TEX0.TBW, target_rc); + const GSVector2i src_page_size = psm_s.pgs; + new_rect.x &= ~(src_page_size.x - 1); + new_rect.y &= ~(src_page_size.y - 1); + new_rect.z = (new_rect.z + (src_page_size.x - 1)) & ~(src_page_size.x - 1); + new_rect.w = (new_rect.w + (src_page_size.y - 1)) & ~(src_page_size.y - 1); + block_boundary_rect = TranslateAlignedRectByPage(t, bp & ~((1 << 5) - 1), psm, TEX0.TBW, new_rect); } - if (!target_rc.rempty()) + if (!block_boundary_rect.rempty()) { dst = t; inside_target = true; } } - if (linear) - { - new_rect.z += 1; - new_rect.w += 1; - } } } @@ -1007,8 +1148,8 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const bool is_depth, c if (inside_target) { // Need to set it up as a region target. - src->m_region.SetX(target_rc.x, target_rc.z); - src->m_region.SetY(target_rc.y, target_rc.w); + src->m_region.SetX(block_boundary_rect.x, block_boundary_rect.z); + src->m_region.SetY(block_boundary_rect.y, block_boundary_rect.w); } if (GSRendererHW::GetInstance()->IsTBPFrameOrZ(dst->m_TEX0.TBP0)) @@ -1434,7 +1575,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const } else { - if (!t->HasValidBitsForFormat(psm, req_color, req_alpha) && !(possible_shuffle && GSLocalMemory::m_psm[psm].bpp == 16 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32)) + if (!t->HasValidBitsForFormat(psm, req_color, req_alpha, t->m_TEX0.TBW == TEX0.TBW) && !(possible_shuffle && GSLocalMemory::m_psm[psm].bpp == 16 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32)) continue; dst = t; @@ -1459,7 +1600,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const // Detect half of the render target (fix snow engine game) // Target Page (8KB) have always a width of 64 pixels // Half of the Target is TBW/2 pages * 8KB / (1 block * 256B) = 0x10 - if (!t->HasValidBitsForFormat(psm, req_color, req_alpha) && !(possible_shuffle && GSLocalMemory::m_psm[psm].bpp == 16 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32)) + if (!t->HasValidBitsForFormat(psm, req_color, req_alpha, t->m_TEX0.TBW == TEX0.TBW) && !(possible_shuffle && GSLocalMemory::m_psm[psm].bpp == 16 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32)) continue; half_right = true; @@ -1476,10 +1617,6 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const (GSLocalMemory::m_psm[color_psm].bpp >= 16 || (/*possible_shuffle &&*/ GSLocalMemory::m_psm[color_psm].bpp == 8 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32)) && // Channel shuffles or non indexed lookups. t->m_age <= 1 && (!found_t || t->m_last_draw > dst->m_last_draw) /*&& CanTranslate(bp, bw, psm, block_boundary_rect, t->m_TEX0.TBP0, t->m_TEX0.PSM, t->m_TEX0.TBW)*/) { - - if (!t->HasValidBitsForFormat(psm, req_color, req_alpha) && !(possible_shuffle && GSLocalMemory::m_psm[psm].bpp == 16 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32)) - continue; - u32 horz_page_offset = ((bp - t->m_TEX0.TBP0) >> 5) % t->m_TEX0.TBW; if (GSLocalMemory::m_psm[color_psm].bpp == 16 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32 && bw != 1 && ((t->m_TEX0.TBW < (horz_page_offset + ((block_boundary_rect.z + GSLocalMemory::m_psm[psm].pgs.x - 1) / GSLocalMemory::m_psm[psm].pgs.x)) || @@ -1497,7 +1634,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const } // PSM equality needed because CreateSource does not handle PSM conversion. // Only inclusive hit to limit false hits. - GSVector4i rect = req_rect; + GSVector4i rect = block_boundary_rect; int src_bw = bw; int src_psm = psm; @@ -1521,12 +1658,8 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const } if (bp > t->m_TEX0.TBP0) { - GSVector4i new_rect = (GSLocalMemory::m_psm[color_psm].bpp != GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp) ? block_boundary_rect : rect; - if (linear) - { - new_rect.z -= 1; - new_rect.w -= 1; - } + GSVector4i new_rect = (GSLocalMemory::m_psm[color_psm].bpp != GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp && (psm & 0x7) != PSMCT16) ? block_boundary_rect : rect; + // Check if it is possible to hit with valid offset on the given Target. // Fixes Jak eyes rendering. // Fixes Xenosaga 3 last dungeon graphic bug. @@ -1578,6 +1711,9 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const continue; } + if (!t->HasValidBitsForFormat(psm, req_color, req_alpha, t->m_TEX0.TBW == TEX0.TBW) && !(possible_shuffle && GSLocalMemory::m_psm[psm].bpp == 16 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32)) + continue; + x_offset = rect.x; y_offset = rect.y; dst = t; @@ -1599,6 +1735,9 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const } if (so.is_valid) { + if (!t->HasValidBitsForFormat(psm, req_color, req_alpha, t->m_TEX0.TBW == TEX0.TBW) && !(possible_shuffle && GSLocalMemory::m_psm[psm].bpp == 16 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32)) + continue; + dst = t; // Offset from Target to Source in Target coords. x_offset = so.b2a_offset.x; @@ -1635,7 +1774,10 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const GL_CACHE("TC: Target 0x%x detected in front of TBP 0x%x with %d,%d offset (%d pages)", t->m_TEX0.TBP0, TEX0.TBP0, region.GetMinX(), region.GetMinY(), (region.GetMinY() / page_size.y) * TEX0.TBW + (region.GetMinX() / page_size.x)); - + + if (!t->HasValidBitsForFormat(psm, req_color, req_alpha, t->m_TEX0.TBW == TEX0.TBW) && !(possible_shuffle && GSLocalMemory::m_psm[psm].bpp == 16 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32)) + continue; + x_offset = ((((offset_bp - t->m_TEX0.TBP0) >> 5) % bw) * page_size.x) - region.GetMinX(); y_offset = ((((offset_bp - t->m_TEX0.TBP0) >> 5) / bw) * page_size.y) - region.GetMinY(); dst = t; @@ -3809,7 +3951,7 @@ void GSTextureCache::InvalidateLocalMem(const GSOffset& off, const GSVector4i& r // Check exact match first const bool bpp_match = GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == GSLocalMemory::m_psm[psm].bpp; const u32 page_mask = ((1 << 5) - 1); - const bool exact_mem_match = (read_start & ~page_mask) == (t->m_TEX0.TBP0 & ~page_mask) && ((read_end + page_mask) & ~page_mask) == ((t->m_end_block + page_mask) & ~page_mask); + const bool exact_mem_match = (read_start & ~page_mask) == (t->m_TEX0.TBP0 & ~page_mask) && ((read_end + (page_mask - 1)) & ~page_mask) <= t->m_end_block; const bool expecting_this_tex = exact_mem_match || (bpp_match && bw == t->m_TEX0.TBW && (((read_start & ~page_mask) == t->m_TEX0.TBP0) || (bp >= t->m_TEX0.TBP0 && ((read_end + page_mask) & ~page_mask) <= ((t->m_end_block + page_mask) & ~page_mask)))); if (!expecting_this_tex) @@ -6718,7 +6860,7 @@ void GSTextureCache::Target::UpdateValidChannels(u32 psm, u32 fbmsk) m_valid_rgb |= (psm_s.trbpp >= 24 && (fbmsk & 0x00FFFFFF) != 0x00FFFFFF) || (psm_s.trbpp == 16); } -bool GSTextureCache::Target::HasValidBitsForFormat(u32 psm, bool req_color, bool req_alpha) +bool GSTextureCache::Target::HasValidBitsForFormat(u32 psm, bool req_color, bool req_alpha, bool width_match) { // Grab validities.. bool alpha_valid = false; @@ -6741,7 +6883,6 @@ bool GSTextureCache::Target::HasValidBitsForFormat(u32 psm, bool req_color, bool if (req_alpha && !alpha_valid && color_valid && (m_TEX0.PSM & 0xF) <= PSMCT24 && (psm & 0xF) == PSMCT32) { - RGBAMask mask; mask._u32 = 0x8; m_TEX0.PSM &= ~PSMCT24; diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.h b/pcsx2/GS/Renderers/HW/GSTextureCache.h index 9de5db136a1a8..a0a434f448773 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.h +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.h @@ -238,7 +238,7 @@ class GSTextureCache static Target* Create(GIFRegTEX0 TEX0, int w, int h, float scale, int type, bool clear); __fi bool HasValidAlpha() const { return (m_valid_alpha_low | m_valid_alpha_high); } - bool HasValidBitsForFormat(u32 psm, bool req_color, bool req_alpha); + bool HasValidBitsForFormat(u32 psm, bool req_color, bool req_alpha, bool width_match); void ResizeDrawn(const GSVector4i& rect); void UpdateDrawn(const GSVector4i& rect, bool can_resize = true); From aaca8b26daf1243d2f1eeefec7c1dd28d206dd8c Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Fri, 24 Jan 2025 16:06:13 +0000 Subject: [PATCH 18/28] GS: Add CRC hack for Guitar Hero 3 to handle crowds --- bin/resources/GameIndex.yaml | 6 ++++++ pcsx2/GS/Renderers/HW/GSHwHack.cpp | 17 +++++++++++++++++ pcsx2/GS/Renderers/HW/GSHwHack.h | 1 + 3 files changed, 24 insertions(+) diff --git a/bin/resources/GameIndex.yaml b/bin/resources/GameIndex.yaml index c2970377263fd..6a121872c0454 100644 --- a/bin/resources/GameIndex.yaml +++ b/bin/resources/GameIndex.yaml @@ -27095,6 +27095,7 @@ SLES-54962: autoFlush: 1 # Fixes bloom intensity. halfPixelOffset: 4 # Mostly aligns post processing. nativeScaling: 1 # Fixes post processing smoothness and position. + getSkipCount: "GSC_GuitarHero" SLES-54963: name: "Tony Hawk's Proving Ground" region: "PAL-E" @@ -27142,6 +27143,7 @@ SLES-54974: autoFlush: 1 # Fixes bloom intensity. halfPixelOffset: 4 # Mostly aligns post processing. nativeScaling: 1 # Fixes post processing smoothness and position. + getSkipCount: "GSC_GuitarHero" SLES-54975: name: "George Of The Jungle" region: "PAL-E" @@ -31234,6 +31236,7 @@ SLKA-25414: autoFlush: 1 # Fixes bloom intensity. halfPixelOffset: 4 # Mostly aligns post processing. nativeScaling: 1 # Fixes post processing smoothness and position. + getSkipCount: "GSC_GuitarHero" SLKA-25417: name: "Jin Samguk Mussang 4 - Empires" region: "NTSC-K" @@ -59358,6 +59361,7 @@ SLPS-25840: autoFlush: 1 # Fixes bloom intensity. halfPixelOffset: 4 # Mostly aligns post processing. nativeScaling: 1 # Fixes post processing smoothness and position. + getSkipCount: "GSC_GuitarHero" SLPS-25841: name: "テイルズ オブ デスティニー ディレクターズカット [プレミアムBOX]" name-sort: "ているず おぶ ですてぃにー でぃれくたーずかっと [ぷれみあむBOX]" @@ -59662,6 +59666,7 @@ SLPS-25890: autoFlush: 1 # Fixes bloom intensity. halfPixelOffset: 4 # Mostly aligns post processing. nativeScaling: 1 # Fixes post processing smoothness and position. + getSkipCount: "GSC_GuitarHero" SLPS-25891: name: "乃木坂春香の秘密 こすぷれ、はじめました♥ [限定版]" name-sort: "のぎざかはるかのひみつ こすぷれ はじめました [げんていばん]" @@ -70174,6 +70179,7 @@ SLUS-21672: autoFlush: 1 # Fixes bloom intensity. halfPixelOffset: 4 # Mostly aligns post processing. nativeScaling: 1 # Fixes post processing smoothness and position. + getSkipCount: "GSC_GuitarHero" SLUS-21673: name: "College Hoops 2K8" region: "NTSC-U" diff --git a/pcsx2/GS/Renderers/HW/GSHwHack.cpp b/pcsx2/GS/Renderers/HW/GSHwHack.cpp index 19d57af186ca0..de3ab04ef307a 100644 --- a/pcsx2/GS/Renderers/HW/GSHwHack.cpp +++ b/pcsx2/GS/Renderers/HW/GSHwHack.cpp @@ -131,6 +131,22 @@ bool GSHwHack::GSC_SacredBlaze(GSRendererHW& r, int& skip) return true; } +bool GSHwHack::GSC_GuitarHero(GSRendererHW& r, int& skip) +{ + // Crowd sprite generation is a mess, better done in software. + if (skip == 0) + { + if (RTBW <= 4 && RTME && RFBW <= 4 && (r.m_context->TEX1.MMIN & 1) == 0) + { + r.ClearGSLocalMemory(r.m_context->offset.zb, r.m_r, 0); + r.SwPrimRender(r, RFBP != 0x2DC0, false); + skip = 1; + } + } + + return true; +} + bool GSHwHack::GSC_SFEX3(GSRendererHW& r, int& skip) { if (skip == 0) @@ -1467,6 +1483,7 @@ const GSHwHack::Entry GSHwHack::s_get_skip_count_function CRC_F(GSC_Manhunt2), CRC_F(GSC_MidnightClub3), CRC_F(GSC_SacredBlaze), + CRC_F(GSC_GuitarHero), CRC_F(GSC_SakuraWarsSoLongMyLove), CRC_F(GSC_Simple2000Vol114), CRC_F(GSC_SFEX3), diff --git a/pcsx2/GS/Renderers/HW/GSHwHack.h b/pcsx2/GS/Renderers/HW/GSHwHack.h index 2da65232a11fb..6553bc44d1094 100644 --- a/pcsx2/GS/Renderers/HW/GSHwHack.h +++ b/pcsx2/GS/Renderers/HW/GSHwHack.h @@ -10,6 +10,7 @@ class GSHwHack static bool GSC_GiTS(GSRendererHW& r, int& skip); static bool GSC_Manhunt2(GSRendererHW& r, int& skip); static bool GSC_SacredBlaze(GSRendererHW& r, int& skip); + static bool GSC_GuitarHero(GSRendererHW& r, int& skip); static bool GSC_SFEX3(GSRendererHW& r, int& skip); static bool GSC_Tekken5(GSRendererHW& r, int& skip); static bool GSC_BurnoutGames(GSRendererHW& r, int& skip); From 6d66eb85353c0992ace5f3f2bdae5d628a1c8266 Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Fri, 24 Jan 2025 17:44:47 +0000 Subject: [PATCH 19/28] GS/HW: Remove no longer required CRCs --- pcsx2/GS/Renderers/HW/GSHwHack.cpp | 89 ------------------------------ pcsx2/GS/Renderers/HW/GSHwHack.h | 3 - 2 files changed, 92 deletions(-) diff --git a/pcsx2/GS/Renderers/HW/GSHwHack.cpp b/pcsx2/GS/Renderers/HW/GSHwHack.cpp index de3ab04ef307a..e34bea9cc8066 100644 --- a/pcsx2/GS/Renderers/HW/GSHwHack.cpp +++ b/pcsx2/GS/Renderers/HW/GSHwHack.cpp @@ -35,52 +35,6 @@ static bool s_nativeres; // Partial level, broken on all renderers. //////////////////////////////////////////////////////////////////////////////// -bool GSHwHack::GSC_DeathByDegreesTekkenNinaWilliams(GSRendererHW& r, int& skip) -{ - // Note: Game also has issues with texture shuffle not supported on strange clamp mode. - // See https://forums.pcsx2.net/Thread-GSDX-Texture-Cache-Bug-Report-Death-By-Degrees-SLUS-20934-NTSC - if (skip == 0) - { - if (!s_nativeres && RTME && RFBP == 0 && RTBP0 == 0x34a0 && RTPSM == PSMCT32) - { - // Don't enable hack on native res if crc is below aggressive. - // Upscaling issue similar to Tekken 5. - skip = 1; // Animation pane - } -#if 0 - else if (RFBP == 0x3500 && RTPSM == PSMT8 && RFBMSK == 0xFFFF00FF) - { - // Needs to be further tested so put it on Aggressive for now, likely channel shuffle. - skip = 4; // Underwater white fog - } -#endif - } - else - { - if (!s_nativeres && RTME && (RFBP | RTBP0 | RFPSM | RTPSM) && RFBMSK == 0x00FFFFFF) - { - // Needs to be further tested so assume it's related with the upscaling hack. - skip = 1; // Animation speed - } - } - - return true; -} - -bool GSHwHack::GSC_GiTS(GSRendererHW& r, int& skip) -{ - if (skip == 0) - { - if (RTME && RFBP == 0x03000 && RFPSM == PSMCT32 && RTPSM == PSMT8) - { - // Channel effect not properly supported yet - skip = 9; - } - } - - return true; -} - // Channel effect not properly supported yet bool GSHwHack::GSC_Manhunt2(GSRendererHW& r, int& skip) { @@ -873,44 +827,6 @@ bool GSHwHack::GSC_MetalGearSolid3(GSRendererHW& r, int& skip) return true; } -bool GSHwHack::GSC_BigMuthaTruckers(GSRendererHW& r, int& skip) -{ - // Rendering pattern: - // CRTC frontbuffer at 0x0 is interlaced (half vertical resolution), - // game needs to do a depth effect (so green channel to alpha), - // but there is a vram limitation so green is pushed into the alpha channel of the CRCT buffer, - // vertical resolution is half so only half is processed at once - // We, however, don't have this limitation so we'll replace the draw with a full-screen TS. - - const GIFRegTEX0& Texture = RTEX0; - - GIFRegTEX0 Frame = {}; - Frame.TBW = RFRAME.FBW; - Frame.TBP0 = RFRAME.Block(); - const int frame_offset_pal = GSLocalMemory::GetEndBlockAddress(0xa00, 10, PSMCT32, GSVector4i(0, 0, 640, 256)) + 1; - const int frame_offset_ntsc = GSLocalMemory::GetEndBlockAddress(0xa00, 10, PSMCT32, GSVector4i(0, 0, 640, 224)) + 1; - const GSVector4i rect = GSVector4i(r.m_vt.m_min.p.x, r.m_vt.m_min.p.y, r.m_vt.m_max.p.x, r.m_vt.m_max.p.y); - - if (RPRIM->TME && Frame.TBW == 10 && Texture.TBW == 10 && Texture.PSM == PSMCT16 && ((rect.w == 512 && Frame.TBP0 == frame_offset_pal) || (Frame.TBP0 == frame_offset_ntsc && rect.w == 448))) - { - // 224 ntsc, 256 pal. - GL_INS("GSC_BigMuthaTruckers half bottom offset %d", r.m_context->XYOFFSET.OFX >> 4); - - const size_t count = r.m_vertex.next; - GSVertex* v = &r.m_vertex.buff[0]; - const u16 offset = (u16)rect.w * 16; - - for (size_t i = 0; i < count; i++) - v[i].XYZ.Y += offset; - - r.m_vt.m_min.p.y += rect.w; - r.m_vt.m_max.p.y += rect.w; - r.m_cached_ctx.FRAME.FBP = 0x50; // 0xA00 >> 5 - } - - return true; -} - bool GSHwHack::GSC_HitmanBloodMoney(GSRendererHW& r, int& skip) { // The game does a stupid thing where it backs up the last 2 pages of the framebuffer with shuffles, uploads a CT32 texture to it @@ -1500,7 +1416,6 @@ const GSHwHack::Entry GSHwHack::s_get_skip_count_function CRC_F(GSC_HitmanBloodMoney), // Channel Effect - CRC_F(GSC_GiTS), CRC_F(GSC_SteambotChronicles), // Depth Issue @@ -1509,10 +1424,6 @@ const GSHwHack::Entry GSHwHack::s_get_skip_count_function // Half Screen bottom issue CRC_F(GSC_Tekken5), - // Texture shuffle - CRC_F(GSC_DeathByDegreesTekkenNinaWilliams), // + Upscaling issues - CRC_F(GSC_BigMuthaTruckers), - // Upscaling hacks CRC_F(GSC_UltramanFightingEvolution), }; diff --git a/pcsx2/GS/Renderers/HW/GSHwHack.h b/pcsx2/GS/Renderers/HW/GSHwHack.h index 6553bc44d1094..b604c7561d737 100644 --- a/pcsx2/GS/Renderers/HW/GSHwHack.h +++ b/pcsx2/GS/Renderers/HW/GSHwHack.h @@ -6,8 +6,6 @@ class GSHwHack { public: - static bool GSC_DeathByDegreesTekkenNinaWilliams(GSRendererHW& r, int& skip); - static bool GSC_GiTS(GSRendererHW& r, int& skip); static bool GSC_Manhunt2(GSRendererHW& r, int& skip); static bool GSC_SacredBlaze(GSRendererHW& r, int& skip); static bool GSC_GuitarHero(GSRendererHW& r, int& skip); @@ -30,7 +28,6 @@ class GSHwHack static bool GSC_NFSUndercover(GSRendererHW& r, int& skip); static bool GSC_PolyphonyDigitalGames(GSRendererHW& r, int& skip); static bool GSC_MetalGearSolid3(GSRendererHW& r, int& skip); - static bool GSC_BigMuthaTruckers(GSRendererHW& r, int& skip); static bool GSC_HitmanBloodMoney(GSRendererHW& r, int& skip); static bool OI_PointListPalette(GSRendererHW& r, GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); From 21bf2d2cffc70cca996b45b0f482e45a211b37b2 Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Fri, 24 Jan 2025 22:23:27 +0000 Subject: [PATCH 20/28] GS/HW: Don't update TBP on targets + make target src's temporary --- pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 4 ++++ pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 4 +++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 137266f5f47e3..fbddb612aaaec 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -3493,6 +3493,7 @@ void GSRendererHW::Draw() } if (FRAME_TEX0.TBW != 1 || (m_r.width() > frame_psm.pgs.x || m_r.height() > frame_psm.pgs.y)) { + FRAME_TEX0.TBP0 = rt->m_TEX0.TBP0; rt->m_TEX0 = FRAME_TEX0; } @@ -3501,7 +3502,10 @@ void GSRendererHW::Draw() if (ds && (!is_possible_mem_clear || ds->m_TEX0.PSM != ZBUF_TEX0.PSM || (rt && ds->m_TEX0.TBW != rt->m_TEX0.TBW)) && !m_in_target_draw) { if (ZBUF_TEX0.TBW != 1 || (m_r.width() > frame_psm.pgs.x || m_r.height() > frame_psm.pgs.y)) + { + ZBUF_TEX0.TBP0 = ds->m_TEX0.TBP0; ds->m_TEX0 = ZBUF_TEX0; + } } } else if (!m_texture_shuffle) diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index bb2b60b3fb0e9..4f1f9ed4b3eb7 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -2073,7 +2073,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe // 2. Preserved data will be in the correct place (in most cases) // 3. Less deleting sources/targets // 4. We can basically do clears in hardware, if they aren't insane ones - if (can_use && !is_shuffle && ((preserve_alpha && preserve_rgb) || (draw_rect.w > GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y && !possible_clear)) && TEX0.TBW != t->m_TEX0.TBW && t->m_dirty.size() >= 1) + if (can_use && ((!is_shuffle && t->m_dirty.size() >= 1) || (is_shuffle && src && GSLocalMemory::m_psm[src->m_TEX0.PSM].bpp == 8 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 16)) && ((preserve_alpha && preserve_rgb) || (draw_rect.w > GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y && !possible_clear)) && TEX0.TBW != t->m_TEX0.TBW) { can_use = false; } @@ -5321,6 +5321,8 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con sTex, sRectF, dTex, GSVector4(destX, destY, new_size.x, new_size.y), shader, false); } + m_temporary_source = src; + g_perfmon.Put(GSPerfMon::TextureCopies, 1); #ifdef PCSX2_DEVBUILD From a43cb2ba064f7b1dd55f4dc6d550b4f2e4ac58a7 Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Sat, 25 Jan 2025 13:21:35 +0000 Subject: [PATCH 21/28] GameDB: Adjust fixes for games affected by RT in RT --- bin/resources/GameIndex.yaml | 228 +++++++++++++++++++++++------------ 1 file changed, 152 insertions(+), 76 deletions(-) diff --git a/bin/resources/GameIndex.yaml b/bin/resources/GameIndex.yaml index 6a121872c0454..445e0b99a4891 100644 --- a/bin/resources/GameIndex.yaml +++ b/bin/resources/GameIndex.yaml @@ -174,9 +174,8 @@ PAPX-90020: recommendedBlendingLevel: 4 # Improves banding and effect emulation. autoFlush: 1 # Fixes light bloom intensity. estimateTextureRegion: 1 # Improves performance and reduces hash cache size. - halfPixelOffset: 4 # Aligns post effects. + halfPixelOffset: 2 # Aligns post effects. nativeScaling: 1 # Fixes post effects. - getSkipCount: "GSC_GiTS" PAPX-90201: name: "ファンタビジョン [体験版]" name-sort: "ふぁんたびじょん [たいけんばん]" @@ -523,7 +522,7 @@ PAPX-90524: region: "NTSC-J" gsHWFixes: roundSprite: 1 # Fixes font sizes and lines in UI. - cpuFramebufferConversion: 1 # Fixes sepia-tone flashback sequences. + textureInsideRT: 1 # Fixes sepia-tone flashback sequences. forceEvenSpritePosition: 1 # Fixes font artifacts and out-of-bound 2D textures. gpuPaletteConversion: 2 # Fixes micro-stuttering and reduces HC size. PBGP-0061: @@ -1248,26 +1247,31 @@ SCAJ-10011: region: "NTSC-Unk" gsHWFixes: alignSprite: 1 # Fixes vertical lines. + textureInsideRT: 1 # Fixes post effects. SCAJ-10012: name: "Taiko Drum Master" region: "NTSC-Unk" gsHWFixes: alignSprite: 1 # Fixes vertical lines. + textureInsideRT: 1 # Fixes post effects. SCAJ-10013: name: "Taiko no Tatsujin - Tobikkiri! Anime Special" region: "NTSC-Unk" gsHWFixes: alignSprite: 1 # Fixes vertical lines. + textureInsideRT: 1 # Fixes post effects. SCAJ-10014: name: "Taiko no Tatsujin - Wai Wai Happy! Rokudaime" region: "NTSC-Unk" gsHWFixes: alignSprite: 1 # Fixes vertical lines. + textureInsideRT: 1 # Fixes post effects. SCAJ-10015: name: "Taiko no Tatsujin - Doka! to Oomori Nanadaime" region: "NTSC-Unk" gsHWFixes: alignSprite: 1 # Fixes vertical lines. + textureInsideRT: 1 # Fixes post effects. SCAJ-20001: name: "Ratchet & Clank" region: "NTSC-Unk" @@ -1327,7 +1331,6 @@ SCAJ-20010: region: "NTSC-Unk" gsHWFixes: textureInsideRT: 1 # Fixes inside RT shuffling. - getSkipCount: "GSC_BigMuthaTruckers" SCAJ-20011: name: "Armored Core 3 - Silent Line" region: "NTSC-HK" @@ -1505,6 +1508,7 @@ SCAJ-20047: region: "NTSC-Unk" gsHWFixes: texturePreloading: 0 # Performs much better with no preload. + textureInsideRT: 1 # Fixes post processing. SCAJ-20048: name: "R:RACING EVOLUTION" name-sort: "Rれーしんぐえぼりゅーしょん" @@ -1554,6 +1558,7 @@ SCAJ-20060: region: "NTSC-Unk" gsHWFixes: texturePreloading: 0 # Performs much better with no preload. + textureInsideRT: 1 # Fixes post processing. SCAJ-20061: name: "Seven Samurai 20XX" region: "NTSC-Unk" @@ -1625,9 +1630,9 @@ SCAJ-20072: recommendedBlendingLevel: 4 # Improves banding and effect emulation. autoFlush: 1 # Fixes light bloom intensity. estimateTextureRegion: 1 # Improves performance and reduces hash cache size. - halfPixelOffset: 4 # Aligns post effects. + halfPixelOffset: 2 # Aligns post effects. nativeScaling: 1 # Fixes post effects. - getSkipCount: "GSC_GiTS" + textureInsideRT: 1 # Fixes post shuffles. SCAJ-20073: name: "Jak and Daxter II" region: "NTSC-Unk" @@ -1891,9 +1896,8 @@ SCAJ-20116: region: "NTSC-C-J" gsHWFixes: alignSprite: 1 # Fixes FMV lines. - cpuSpriteRenderBW: 2 # Prevents Death By VRAM explosion. - cpuSpriteRenderLevel: 2 # Needed for above. - getSkipCount: "GSC_DeathByDegreesTekkenNinaWilliams" + halfPixelOffset: 2 # Aligns post effects. + textureInsideRT: 1 # Fixes post shuffles. SCAJ-20117: name: "Fu-un Bakumatsu-den" region: "NTSC-Unk" @@ -2367,7 +2371,7 @@ SCAJ-20183: region: "NTSC-J" gsHWFixes: roundSprite: 1 # Fixes font sizes and lines in UI. - cpuFramebufferConversion: 1 # Fixes sepia-tone flashback sequences. + textureInsideRT: 1 # Fixes sepia-tone flashback sequences. forceEvenSpritePosition: 1 # Fixes font artifacts and out-of-bound 2D textures. gpuPaletteConversion: 2 # Fixes micro-stuttering and reduces HC size. SCAJ-20184: @@ -3564,6 +3568,7 @@ SCED-52137: autoFlush: 2 # Fixes sun luminosity. halfPixelOffset: 2 # Aligns Depth of Field. nativeScaling: 2 # Fixes Depth of Field effect. + textureInsideRT: 1 # Fixes half screen fog effect. SCED-52141: name: "WRC 3 [Demo]" region: "PAL-E" @@ -3575,6 +3580,7 @@ SCED-52141: autoFlush: 2 # Fixes sun luminosity. halfPixelOffset: 2 # Aligns Depth of Field. nativeScaling: 2 # Fixes Depth of Field effect. + textureInsideRT: 1 # Fixes half screen fog effect. SCED-52147: name: "EyeToy - Christmas Wishi Washi" region: "PAL-E" @@ -4552,8 +4558,7 @@ SCES-50000: clampModes: vuClampMode: 2 # Fixes texture rendering in the intro. gsHWFixes: - cpuFramebufferConversion: 1 - textureInsideRT: 1 + textureInsideRT: 1 # Fixes post effects halfPixelOffset: 2 # Fixes title screen and some intro post processing alignment. roundSprite: 1 # Fixes ui and hud alignment. gpuPaletteConversion: 2 # Lots of CLUTs in large textures. @@ -5276,6 +5281,7 @@ SCES-51684: autoFlush: 2 # Fixes sun luminosity. halfPixelOffset: 2 # Aligns Depth of Field. nativeScaling: 2 # Fixes Depth of Field effect. + textureInsideRT: 1 # Fixes half screen fog effect. patches: 80802EA9: content: |- @@ -5318,6 +5324,7 @@ SCES-51844: region: "PAL-M5" gsHWFixes: texturePreloading: 0 # Performs much better with no preload. + textureInsideRT: 1 # Fixes post processing. SCES-51895: name: "EyeToy - Groove" region: "PAL-M11" @@ -5412,6 +5419,7 @@ SCES-52137: autoFlush: 2 # Fixes sun luminosity. halfPixelOffset: 2 # Aligns Depth of Field. nativeScaling: 2 # Fixes Depth of Field effect. + textureInsideRT: 1 # Fixes half screen fog effect. SCES-52154: name: "EyeToy - Chat" region: "PAL-M11" @@ -5619,9 +5627,8 @@ SCES-52586: compat: 5 gsHWFixes: alignSprite: 1 # Fixes FMV lines. - cpuSpriteRenderBW: 2 # Prevents Death By VRAM explosion. - cpuSpriteRenderLevel: 2 # Needed for above. - getSkipCount: "GSC_DeathByDegreesTekkenNinaWilliams" + halfPixelOffset: 2 # Aligns post effects. + textureInsideRT: 1 # Fixes post shuffles. SCES-52596: name: "This is Football 2005" region: "PAL-Unk" @@ -5641,6 +5648,7 @@ SCES-52684: autoFlush: 2 # Fixes sun luminosity. halfPixelOffset: 2 # Aligns Depth of Field. nativeScaling: 2 # Fixes Depth of Field effect. + textureInsideRT: 1 # Fixes half screen fog effect. SCES-52748: name: "EyeToy - Play 2" region: "PAL-M12" @@ -5719,18 +5727,16 @@ SCES-53053: region: "PAL-F-I" gsHWFixes: alignSprite: 1 # Fixes FMV lines. - cpuSpriteRenderBW: 2 # Prevents Death By VRAM explosion. - cpuSpriteRenderLevel: 2 # Needed for above. - getSkipCount: "GSC_DeathByDegreesTekkenNinaWilliams" + halfPixelOffset: 2 # Aligns post effects. + textureInsideRT: 1 # Fixes post shuffles. SCES-53054: name: "Death by Degrees" region: "PAL-E-G" compat: 5 gsHWFixes: alignSprite: 1 # Fixes FMV lines. - cpuSpriteRenderBW: 2 # Prevents Death By VRAM explosion. - cpuSpriteRenderLevel: 2 # Needed for above. - getSkipCount: "GSC_DeathByDegreesTekkenNinaWilliams" + halfPixelOffset: 2 # Aligns post effects. + textureInsideRT: 1 # Fixes post shuffles. SCES-53055: name: "Eyetoy - Antigrav" region: "PAL-M5" @@ -6974,6 +6980,7 @@ SCKA-20015: compat: 5 gsHWFixes: texturePreloading: 0 # Performs much better with no preload. + textureInsideRT: 1 # Fixes post processing. SCKA-20016: name: "SoulCalibur II" region: "NTSC-K" @@ -7047,9 +7054,9 @@ SCKA-20027: recommendedBlendingLevel: 4 # Improves banding and effect emulation. autoFlush: 1 # Fixes light bloom intensity. estimateTextureRegion: 1 # Improves performance and reduces hash cache size. - halfPixelOffset: 4 # Aligns post effects. + halfPixelOffset: 2 # Aligns post effects. nativeScaling: 1 # Fixes post effects. - getSkipCount: "GSC_GiTS" + textureInsideRT: 1 # Fixes post shuffles. SCKA-20028: name: "Ico [PlayStation2 Big Hit Series]" region: "NTSC-K" @@ -7101,6 +7108,7 @@ SCKA-20034: region: "NTSC-K" gsHWFixes: texturePreloading: 0 # Performs much better with no preload. + textureInsideRT: 1 # Fixes post processing. SCKA-20035: name: "Hot Shots Golf 3 [PlayStation 2 Big Hit Series]" region: "NTSC-K" @@ -7131,9 +7139,8 @@ SCKA-20039: region: "NTSC-K" gsHWFixes: alignSprite: 1 # Fixes FMV lines. - cpuSpriteRenderBW: 2 # Prevents Death By VRAM explosion. - cpuSpriteRenderLevel: 2 # Needed for above. - getSkipCount: "GSC_DeathByDegreesTekkenNinaWilliams" + halfPixelOffset: 2 # Aligns post effects. + textureInsideRT: 1 # Fixes post shuffles. SCKA-20040: name: "Jak 3" region: "NTSC-K" @@ -8617,9 +8624,8 @@ SCPS-15064: recommendedBlendingLevel: 4 # Improves banding and effect emulation. autoFlush: 1 # Fixes light bloom intensity. estimateTextureRegion: 1 # Improves performance and reduces hash cache size. - halfPixelOffset: 4 # Aligns post effects. + halfPixelOffset: 2 # Aligns post effects. nativeScaling: 1 # Fixes post effects. - getSkipCount: "GSC_GiTS" patches: A5768F53: content: |- @@ -9111,7 +9117,7 @@ SCPS-15118: region: "NTSC-J" gsHWFixes: roundSprite: 1 # Fixes font sizes and lines in UI. - cpuFramebufferConversion: 1 # Fixes sepia-tone flashback sequences. + textureInsideRT: 1 # Fixes sepia-tone flashback sequences. forceEvenSpritePosition: 1 # Fixes font artifacts and out-of-bound 2D textures. gpuPaletteConversion: 2 # Fixes micro-stuttering and reduces HC size. SCPS-15119: @@ -9812,7 +9818,7 @@ SCPS-19333: region: "NTSC-J" gsHWFixes: roundSprite: 1 # Fixes font sizes and lines in UI. - cpuFramebufferConversion: 1 # Fixes sepia-tone flashback sequences. + textureInsideRT: 1 # Fixes sepia-tone flashback sequences. forceEvenSpritePosition: 1 # Fixes font artifacts and out-of-bound 2D textures. gpuPaletteConversion: 2 # Fixes micro-stuttering and reduces HC size. SCPS-19335: @@ -13288,6 +13294,9 @@ SLED-52736: SLED-52851: name: "TOCA Race Driver 2" region: "PAL-Unk" + gsHWFixes: + alignSprite: 1 # Fixes lighting and vertical lines, also works with normal vertex. + textureInsideRT: 1 # Fixes shadows. SLED-52852: name: "Forgotten Realms - Demon Stone [Demo]" region: "PAL-E" @@ -13374,6 +13383,10 @@ SLED-53109: SLED-53137: name: "Stolen" region: "PAL-Unk" + gsHWFixes: + halfPixelOffset: 2 # Fixes misaligned bloom effects. + texturePreloading: 1 # Performs much better with partial preload. + textureInsideRT: 1 # Fixes post effects. SLED-53198: name: "Rugby 2005" region: "PAL-Unk" @@ -13531,6 +13544,7 @@ SLED-53723: autoFlush: 1 # Corrects vignette to match software. estimateTextureRegion: 1 # Improves performance and reduces hash cache size. halfPixelOffset: 4 # Aligns blur more correctly to match software. + textureInsideRT: 1 # Fixes post processing effects. SLED-53731: name: "Battlefield 2 - Modern Combat [Demo]" region: "PAL-E" @@ -15829,8 +15843,6 @@ SLES-50876: gsHWFixes: autoFlush: 1 # Aligns and corrects shadows. halfPixelOffset: 2 # Fixes sun and depth line. - cpuCLUTRender: 1 # Fixes janky coloured cars. - gpuTargetCLUT: 1 # Fixes janky coloured cars. textureInsideRT: 1 # Fixes car textures. cpuSpriteRenderBW: 4 # Alleviates text and sky rendering issues. cpuSpriteRenderLevel: 2 # Needed for above. @@ -17008,7 +17020,6 @@ SLES-51355: compat: 5 gsHWFixes: textureInsideRT: 1 # Fixes inside RT shuffling. - getSkipCount: "GSC_BigMuthaTruckers" SLES-51356: name: "Road Trip Adventure" region: "PAL-M3" @@ -18485,6 +18496,8 @@ SLES-51997: compat: 5 clampModes: eeClampMode: 3 # For grey screen ingame. + gsHWFixes: + textureInsideRT: 1 # Fixes post effects on player 2. SLES-51998: name: "Kao the Kangaroo - Round 2" region: "PAL-M5" @@ -18696,7 +18709,7 @@ SLES-52101: name: "Wrath Unleashed" region: "PAL-M5" gsHWFixes: - textureInsideRT: 1 # Fixes flashing and some models still very broken in hardware mode. + textureInsideRT: 1 # Fixes colors. SLES-52102: name: "Hugo Bukkazoom!" region: "PAL-M12" @@ -18759,6 +18772,7 @@ SLES-52132: gsHWFixes: texturePreloading: 1 # Performs much better with partial preload. autoFlush: 1 # Fixes bloom rendering. + textureInsideRT: 1 # Fixes post processing. roundModes: vu1RoundMode: 0 # Fixes missing light cones curtains and certain effects. SLES-52133: @@ -18767,6 +18781,7 @@ SLES-52133: gsHWFixes: texturePreloading: 1 # Performs much better with partial preload. autoFlush: 1 # Fixes bloom rendering. + textureInsideRT: 1 # Fixes post processing. roundModes: vu1RoundMode: 0 # Fixes missing light cones curtains and certain effects. SLES-52134: @@ -18776,6 +18791,7 @@ SLES-52134: gsHWFixes: texturePreloading: 1 # Performs much better with partial preload. autoFlush: 1 # Fixes bloom rendering. + textureInsideRT: 1 # Fixes post processing. roundModes: vu1RoundMode: 0 # Fixes missing light cones curtains and certain effects. SLES-52135: @@ -18784,6 +18800,7 @@ SLES-52135: gsHWFixes: texturePreloading: 1 # Performs much better with partial preload. autoFlush: 1 # Fixes bloom rendering. + textureInsideRT: 1 # Fixes post processing. roundModes: vu1RoundMode: 0 # Fixes missing light cones curtains and certain effects. SLES-52136: @@ -18792,6 +18809,7 @@ SLES-52136: gsHWFixes: texturePreloading: 1 # Performs much better with partial preload. autoFlush: 1 # Fixes bloom rendering. + textureInsideRT: 1 # Fixes post processing. roundModes: vu1RoundMode: 0 # Fixes missing light cones curtains and certain effects. SLES-52143: @@ -18828,8 +18846,6 @@ SLES-52153: gsHWFixes: autoFlush: 1 # Aligns and corrects shadows. halfPixelOffset: 2 # Fixes sun and depth line. - cpuCLUTRender: 1 # Fixes janky coloured cars. - gpuTargetCLUT: 1 # Fixes janky coloured cars. textureInsideRT: 1 # Fixes car textures. cpuSpriteRenderBW: 4 # Alleviates text and sky rendering issues. cpuSpriteRenderLevel: 2 # Needed for above. @@ -19169,6 +19185,8 @@ SLES-52322: texturePreloading: 1 # Performs better with partial preload because it is slow on locations outside gameplay foremost. mergeSprite: 1 # Fixes misaligned white lines. PCRTCOverscan: 1 # Fixes missing HUD. + textureInsideRT: 1 # Fixes post shuffle effect. + halfPixelOffset: 2 # Fixes boxes around shuffle effect pages. SLES-52323: name: "Richard Burns Rally" region: "PAL-M5" @@ -20044,6 +20062,7 @@ SLES-52637: region: "PAL-M5" gsHWFixes: alignSprite: 1 # Fixes lighting and vertical lines, also works with normal vertex. + textureInsideRT: 1 # Fixes shadows. SLES-52638: name: "DTM Race Driver 2" region: "PAL-M5" @@ -20760,9 +20779,9 @@ SLES-52882: region: "PAL-M5" compat: 4 gsHWFixes: - disablePartialInvalidation: 1 # Improves performance. halfPixelOffset: 2 # Fixes misaligned bloom effects. texturePreloading: 1 # Performs much better with partial preload. + textureInsideRT: 1 # Fixes post effects. SLES-52884: name: "Duel Masters" region: "PAL-M5" @@ -20940,6 +20959,8 @@ SLES-52942: gsHWFixes: halfPixelOffset: 2 # Aligns post bloom. nativeScaling: 2 # Fixes light blooms. + textureInsideRT: 1 # Improves performance. + preloadFrameData: 1 # Fixes light trails. getSkipCount: "GSC_MidnightClub3" patches: EBE1972D: @@ -21204,9 +21225,9 @@ SLES-53020: recommendedBlendingLevel: 4 # Improves banding and effect emulation. autoFlush: 1 # Fixes light bloom intensity. estimateTextureRegion: 1 # Improves performance and reduces hash cache size. - halfPixelOffset: 4 # Aligns post effects. + halfPixelOffset: 2 # Aligns post effects. nativeScaling: 1 # Fixes post effects. - getSkipCount: "GSC_GiTS" + textureInsideRT: 1 # Fixes post shuffles. patches: BF6F101F: content: |- @@ -23344,6 +23365,7 @@ SLES-53703: autoFlush: 1 # Corrects vignette to match software. estimateTextureRegion: 1 # Improves performance and reduces hash cache size. halfPixelOffset: 4 # Aligns blur more correctly to match software. + textureInsideRT: 1 # Fixes post processing effects. SLES-53704: name: "Peter Jackson's King Kong - The Official Game of the Movie" name-sort: "King Kong, Peter Jackson's - The Official Game of the Movie" @@ -23353,6 +23375,7 @@ SLES-53704: autoFlush: 1 # Corrects vignette to match software. estimateTextureRegion: 1 # Improves performance and reduces hash cache size. halfPixelOffset: 4 # Aligns blur more correctly to match software. + textureInsideRT: 1 # Fixes post processing effects. SLES-53705: name: "Peter Jackson's King Kong - The Official Game of the Movie" name-sort: "King Kong, Peter Jackson's - The Official Game of the Movie" @@ -23362,6 +23385,7 @@ SLES-53705: autoFlush: 1 # Corrects vignette to match software. estimateTextureRegion: 1 # Improves performance and reduces hash cache size. halfPixelOffset: 4 # Aligns blur more correctly to match software. + textureInsideRT: 1 # Fixes post processing effects. SLES-53706: name: "The Chronicles of Narnia - The Lion, the Witch and the Wardrobe" name-sort: "Chronicles of Narnia, The - The Lion, the Witch and the Wardrobe" @@ -23415,6 +23439,8 @@ SLES-53717: gsHWFixes: halfPixelOffset: 2 # Aligns post bloom. nativeScaling: 2 # Fixes light blooms. + textureInsideRT: 1 # Improves performance. + preloadFrameData: 1 # Fixes light trails. getSkipCount: "GSC_MidnightClub3" patches: 208183AF: @@ -25722,12 +25748,18 @@ SLES-54510: SLES-54511: name: "UEFA Champions League 2006-2007" region: "PAL-E" + gsHWFixes: + textureInsideRT: 1 # Fixes weird glitches. SLES-54512: name: "UEFA Champions League 2006-2007" region: "PAL-F-G" + gsHWFixes: + textureInsideRT: 1 # Fixes weird glitches. SLES-54513: name: "UEFA Champions League 2006-2007" region: "PAL-I-S" + gsHWFixes: + textureInsideRT: 1 # Fixes weird glitches. SLES-54516: name: "Thrillville" region: "PAL-F-G" @@ -26662,6 +26694,7 @@ SLES-54819: gsHWFixes: getSkipCount: "GSC_Manhunt2" autoFlush: 1 # Fixes missing lights and light intensity. + textureInsideRT: 1 # Fixes post lighting. SLES-54820: name: "Stuntman - Ignition" region: "PAL-M5" @@ -27127,7 +27160,7 @@ SLES-54972: region: "PAL-M3" gsHWFixes: roundSprite: 1 # Fixes font sizes and lines in UI. - cpuFramebufferConversion: 1 # Fixes sepia-tone flashback sequences. + textureInsideRT: 1 # Fixes sepia-tone flashback sequences. forceEvenSpritePosition: 1 # Fixes font artifacts and out-of-bound 2D textures. gpuPaletteConversion: 2 # Fixes micro-stuttering and reduces HC size. SLES-54973: @@ -27564,6 +27597,8 @@ SLES-55109: region: "PAL-E" roundModes: vu0RoundMode: 0 # Fixes invisible wall collision in bedroom. + gsHWFixes: + textureInsideRT: 1 # Fixes half screen. SLES-55110: name: "Odin Sphere" region: "PAL-M5" @@ -27771,6 +27806,7 @@ SLES-55191: autoFlush: 1 # Fixes bloom intensity. halfPixelOffset: 4 # Mostly aligns post processing. nativeScaling: 1 # Fixes post processing smoothness and position. + getSkipCount: "GSC_GuitarHero" SLES-55192: name: "Steam Express" region: "PAL-M5" @@ -27832,6 +27868,7 @@ SLES-55200: autoFlush: 1 # Fixes bloom intensity. halfPixelOffset: 4 # Mostly aligns post processing. nativeScaling: 1 # Fixes post processing smoothness and position. + getSkipCount: "GSC_GuitarHero" SLES-55201: name: "Riding Star" region: "PAL-M4" @@ -30204,8 +30241,6 @@ SLKA-25196: gsHWFixes: autoFlush: 1 # Aligns and corrects shadows. halfPixelOffset: 2 # Fixes sun and depth line. - cpuCLUTRender: 1 # Fixes janky coloured cars. - gpuTargetCLUT: 1 # Fixes janky coloured cars. textureInsideRT: 1 # Fixes car textures. cpuSpriteRenderBW: 4 # Alleviates text and sky rendering issues. cpuSpriteRenderLevel: 2 # Needed for above. @@ -30330,6 +30365,7 @@ SLKA-25218: gsHWFixes: texturePreloading: 1 # Performs much better with partial preload. autoFlush: 1 # Fixes bloom rendering. + textureInsideRT: 1 # Fixes post processing. roundModes: vu1RoundMode: 0 # Fixes missing light cones curtains and certain effects. SLKA-25219: @@ -30876,6 +30912,7 @@ SLKA-25337: autoFlush: 1 # Corrects vignette to match software. estimateTextureRegion: 1 # Improves performance and reduces hash cache size. halfPixelOffset: 4 # Aligns blur more correctly to match software. + textureInsideRT: 1 # Fixes post processing effects. SLKA-25338: name: "The Godfather" name-sort: "Godfather, The" @@ -31292,6 +31329,7 @@ SLKA-25434: autoFlush: 1 # Fixes bloom intensity. halfPixelOffset: 4 # Mostly aligns post processing. nativeScaling: 1 # Fixes post processing smoothness and position. + getSkipCount: "GSC_GuitarHero" SLKA-25435: name: "Sengoku Basara X" region: "NTSC-J-K" @@ -33008,8 +33046,7 @@ SLPM-60109: clampModes: vuClampMode: 2 # Fixes texture rendering in the intro. gsHWFixes: - cpuFramebufferConversion: 1 - textureInsideRT: 1 + textureInsideRT: 1 # Fixes post effects halfPixelOffset: 2 # Fixes title screen and some intro post processing alignment. roundSprite: 1 # Fixes ui and hud alignment. gpuPaletteConversion: 2 # Lots of CLUTs in large textures. @@ -33513,6 +33550,7 @@ SLPM-60217: region: "NTSC-J" gsHWFixes: texturePreloading: 0 # Performs much better with no preload. + textureInsideRT: 1 # Fixes post processing. SLPM-60218: name: "GUNGRAVE O.D. [体験版]" name-sort: "がんぐれいぶ おーでぃー [たいけんばん]" @@ -33685,9 +33723,8 @@ SLPM-60257: region: "NTSC-J" gsHWFixes: alignSprite: 1 # Fixes FMV lines. - cpuSpriteRenderBW: 2 # Prevents Death By VRAM explosion. - cpuSpriteRenderLevel: 2 # Needed for above. - getSkipCount: "GSC_DeathByDegreesTekkenNinaWilliams" + halfPixelOffset: 2 # Aligns post effects. + textureInsideRT: 1 # Fixes post shuffles. SLPM-60258: name: "THE TYPING OF THE DEAD ZOMBIE PANIC [体験版]" name-sort: "ざ たいぴんぐおぶ ざ でっど ぞんび ぱにっく [たいけんばん]" @@ -34241,8 +34278,6 @@ SLPM-61092: gsHWFixes: autoFlush: 1 # Aligns and corrects shadows. halfPixelOffset: 2 # Fixes sun and depth line. - cpuCLUTRender: 1 # Fixes janky coloured cars. - gpuTargetCLUT: 1 # Fixes janky coloured cars. textureInsideRT: 1 # Fixes car textures. cpuSpriteRenderBW: 4 # Alleviates text and sky rendering issues. cpuSpriteRenderLevel: 2 # Needed for above. @@ -36528,7 +36563,6 @@ SLPM-62378: region: "NTSC-J" gsHWFixes: textureInsideRT: 1 # Fixes inside RT shuffling. - getSkipCount: "GSC_BigMuthaTruckers" SLPM-62379: name: "カラオケレボリューション J-POPベストVol.2" name-sort: "からおけれぼりゅーしょん J-POPべすとVol.2" @@ -39153,6 +39187,8 @@ SLPM-65073: name-sort: "げんそうすいこでん3 [しょかいせいさんぶん:とくしゅしよう]" name-en: "Gensou Suikoden III [Limited Edition]" region: "NTSC-J" + gsHWFixes: + textureInsideRT: 1 # Fixes half screen. memcardFilters: # This looks like a mess because it includes all serials for Suikoden 3, Suikoden 2, Suikogaiden 1, and Suikogaiden 2. A lot of these probably aren't actually required but it's not really hurting anything to have them here. - "SLPM-65073" - "SLPM-65074" @@ -39172,6 +39208,8 @@ SLPM-65074: name-sort: "げんそうすいこでん3" name-en: "Gensou Suikoden III" region: "NTSC-J" + gsHWFixes: + textureInsideRT: 1 # Fixes half screen. memcardFilters: - "SLPM-65073" - "SLPM-65074" @@ -40087,7 +40125,6 @@ SLPM-65234: region: "NTSC-J" gsHWFixes: textureInsideRT: 1 # Fixes inside RT shuffling. - getSkipCount: "GSC_BigMuthaTruckers" SLPM-65235: name: "ニュールーマニア ポロリ青春" name-sort: "にゅーるーまにあ ぽろりせいしゅん" @@ -40508,6 +40545,8 @@ SLPM-65305: name-sort: "げんそうすいこでん3 [KONAMI THE BEST]" name-en: "Gensou Suikoden 3" region: "NTSC-J" + gsHWFixes: + textureInsideRT: 1 # Fixes half screen. memcardFilters: - "SLPM-65073" - "SLPM-65074" @@ -42051,6 +42090,7 @@ SLPM-65583: autoFlush: 2 # Fixes sun luminosity. halfPixelOffset: 2 # Aligns Depth of Field. nativeScaling: 2 # Fixes Depth of Field effect. + textureInsideRT: 1 # Fixes half screen fog effect. SLPM-65584: name: "真・三國無双3 猛将伝" name-sort: "しんさんごくむそう3 もうしょうでん" @@ -42673,6 +42713,8 @@ SLPM-65694: name-sort: "げんそうすいこでん3 [こなみでんどうせれくしょん]" name-en: "Gensou Suikoden 3 [KONAMI Dendou Selection]" region: "NTSC-J" + gsHWFixes: + textureInsideRT: 1 # Fixes half screen. memcardFilters: - "SLPM-65073" - "SLPM-65074" @@ -42945,8 +42987,6 @@ SLPM-65741: gsHWFixes: autoFlush: 1 # Aligns and corrects shadows. halfPixelOffset: 2 # Fixes sun and depth line. - cpuCLUTRender: 1 # Fixes janky coloured cars. - gpuTargetCLUT: 1 # Fixes janky coloured cars. textureInsideRT: 1 # Fixes car textures. cpuSpriteRenderBW: 4 # Alleviates text and sky rendering issues. cpuSpriteRenderLevel: 2 # Needed for above. @@ -45083,6 +45123,7 @@ SLPM-66103: autoFlush: 2 # Fixes sun luminosity. halfPixelOffset: 2 # Aligns Depth of Field. nativeScaling: 2 # Fixes Depth of Field effect. + textureInsideRT: 1 # Fixes half screen fog effect. SLPM-66104: name: "ぷよぷよフィーバー2 [チュー!]" name-sort: "ぷよぷよふぃーばー2" @@ -45790,6 +45831,7 @@ SLPM-66211: autoFlush: 1 # Corrects vignette to match software. estimateTextureRegion: 1 # Improves performance and reduces hash cache size. halfPixelOffset: 4 # Aligns blur more correctly to match software. + textureInsideRT: 1 # Fixes post processing effects. SLPM-66212: name: "SEGA RALLY 2006" name-sort: "せが らりー 2006" @@ -47629,6 +47671,7 @@ SLPM-66498: region: "NTSC-J" gsHWFixes: alignSprite: 1 # Fixes lighting and vertical lines, also works with normal vertex. + textureInsideRT: 1 # Fixes shadows. SLPM-66499: name: "神様家族 応援願望" name-sort: "かみさまかぞく おうえんがんぼう" @@ -51002,6 +51045,8 @@ SLPM-68505: name-sort: "げんそうすいこでん3" name-en: "Gensou Suikoden III" region: "NTSC-J" + gsHWFixes: + textureInsideRT: 1 # Fixes half screen. SLPM-68509: name: "頭文字D Special Stage [講談社懸賞品]" name-sort: "いにしゃるD すぺしゃる すてーじ [こうだんしゃけんしょうひん]" @@ -51886,8 +51931,7 @@ SLPS-20001: clampModes: vuClampMode: 2 # Fixes texture rendering in the intro. gsHWFixes: - cpuFramebufferConversion: 1 - textureInsideRT: 1 + textureInsideRT: 1 # Fixes post effects halfPixelOffset: 2 # Fixes title screen and some intro post processing alignment. roundSprite: 1 # Fixes ui and hud alignment. gpuPaletteConversion: 2 # Lots of CLUTs in large textures. @@ -53710,6 +53754,7 @@ SLPS-20382: region: "NTSC-J" gsHWFixes: alignSprite: 1 # Fixes vertical lines. + textureInsideRT: 1 # Fixes post effects. SLPS-20383: name: "太鼓の達人 あつまれ!祭りだ!!四代目" name-sort: "たいこのたつじん あつまれ!まつりだ!!よんだいめ" @@ -53717,6 +53762,7 @@ SLPS-20383: region: "NTSC-J" gsHWFixes: alignSprite: 1 # Fixes vertical lines. + textureInsideRT: 1 # Fixes post effects. SLPS-20384: name: "流行り神 警視庁怪異事件ファイル [初回限定版]" name-sort: "はやりがみ けいしちょうかいいじけんふぁいる [しょかいげんていばん]" @@ -53784,6 +53830,7 @@ SLPS-20399: region: "NTSC-J" gsHWFixes: alignSprite: 1 # Fixes vertical lines. + textureInsideRT: 1 # Fixes post effects. SLPS-20400: name: "太鼓の達人 ゴー!ゴー!五代目 [ソフト単体]" name-sort: "たいこのたつじん ごー!ごー!ごだいめ [そふとたんたい]" @@ -53791,6 +53838,7 @@ SLPS-20400: region: "NTSC-J" gsHWFixes: alignSprite: 1 # Fixes vertical lines. + textureInsideRT: 1 # Fixes post effects. SLPS-20401: name: "テクモ ヒットパレード" name-sort: "てくも ひっとぱれーど" @@ -53862,6 +53910,7 @@ SLPS-20413: region: "NTSC-J" gsHWFixes: alignSprite: 1 # Fixes vertical lines. + textureInsideRT: 1 # Fixes post effects. SLPS-20414: name: "太鼓の達人 TAIKO DRUM MASTER" name-sort: "たいこのたつじん TAIKO DRUM MASTER" @@ -53870,6 +53919,7 @@ SLPS-20414: compat: 5 gsHWFixes: alignSprite: 1 # Fixes vertical lines. + textureInsideRT: 1 # Fixes post effects. SLPS-20416: name: "陰陽大戦記 白虎演舞 [“EyeToy”カメラ同梱版]" name-sort: "おんみょうたいせんき びゃっこえんぶ [あいとーいかめらどうこんばん]" @@ -53921,6 +53971,7 @@ SLPS-20424: region: "NTSC-J" gsHWFixes: alignSprite: 1 # Fixes vertical lines. + textureInsideRT: 1 # Fixes post effects. SLPS-20425: name: "太鼓の達人 とびっきり!アニメスペシャル [ソフト単体]" name-sort: "たいこのたつじん とびっきり!あにめすぺしゃる [そふとたんたい]" @@ -53928,6 +53979,7 @@ SLPS-20425: region: "NTSC-J" gsHWFixes: alignSprite: 1 # Fixes vertical lines. + textureInsideRT: 1 # Fixes post effects. SLPS-20426: name: "マダガスカル" name-sort: "まだかすかる" @@ -54037,6 +54089,7 @@ SLPS-20450: region: "NTSC-J" gsHWFixes: alignSprite: 1 # Fixes vertical lines. + textureInsideRT: 1 # Fixes post effects. SLPS-20451: name: "太鼓の達人 わいわいハッピー!六代目" name-sort: "たいこのたつじん わいわいはっぴー!ろくだいめ" @@ -54044,6 +54097,7 @@ SLPS-20451: region: "NTSC-J" gsHWFixes: alignSprite: 1 # Fixes vertical lines. + textureInsideRT: 1 # Fixes post effects. SLPS-20452: name: "SIMPLE2000シリーズ Ultimate Vol.30 降臨!族車ゴッド~仏恥義理★愛羅武勇~" name-sort: "しんぷる2000しりーず あるてぃめっと Vol.30 こうりん!ぞくしゃごっど ぶっちぎりあいらぶゆう" @@ -54241,6 +54295,7 @@ SLPS-20485: region: "NTSC-J" gsHWFixes: alignSprite: 1 # Fixes vertical lines. + textureInsideRT: 1 # Fixes post effects. SLPS-20486: name: "太鼓の達人 ドカッ!と大盛り七代目 [ソフト単体]" name-sort: "たいこのたつじん どかっ!とおおもりななだいめ [そふとたんたい]" @@ -54248,6 +54303,7 @@ SLPS-20486: region: "NTSC-J" gsHWFixes: alignSprite: 1 # Fixes vertical lines. + textureInsideRT: 1 # Fixes post effects. SLPS-20487: name: "パチスロキング! 科学忍者隊ガッチャマン" name-sort: "ぱちすろきんぐ! かがくにんじゃたいがっちゃまん" @@ -56037,6 +56093,7 @@ SLPS-25289: region: "NTSC-J" gsHWFixes: texturePreloading: 0 # Performs much better with no preload. + textureInsideRT: 1 # Fixes post processing. SLPS-25290: name: "タイムクライシス3 [ソフト単体]" name-sort: "たいむくらいしす3 [そふとたんたい]" @@ -56044,6 +56101,7 @@ SLPS-25290: region: "NTSC-J" gsHWFixes: texturePreloading: 0 # Performs much better with no preload. + textureInsideRT: 1 # Fixes post processing. SLPS-25291: name: "Baldur's Gate - DARK ALLIANCE - [PCCW Japan The BEST]" name-sort: "ばるだーずげーと だーくあらいあんす [PCCW Japan The BEST]" @@ -56766,6 +56824,7 @@ SLPS-25406: gsHWFixes: texturePreloading: 1 # Performs much better with partial preload. autoFlush: 1 # Fixes bloom rendering. + textureInsideRT: 1 # Fixes post processing. roundModes: vu1RoundMode: 0 # Fixes missing light cones curtains and certain effects. SLPS-25407: @@ -56893,9 +56952,8 @@ SLPS-25422: region: "NTSC-J" gsHWFixes: alignSprite: 1 # Fixes FMV lines. - cpuSpriteRenderBW: 2 # Prevents Death By VRAM explosion. - cpuSpriteRenderLevel: 2 # Needed for above. - getSkipCount: "GSC_DeathByDegreesTekkenNinaWilliams" + halfPixelOffset: 2 # Aligns post effects. + textureInsideRT: 1 # Fixes post shuffles. SLPS-25423: name: "怪盗アプリコット 完全版 [限定版]" name-sort: "かいとうあぷりこっと かんぜんばん [げんていばん]" @@ -57668,6 +57726,7 @@ SLPS-25563: gsHWFixes: texturePreloading: 1 # Performs much better with partial preload. autoFlush: 1 # Fixes bloom rendering. + textureInsideRT: 1 # Fixes post processing. roundModes: vu1RoundMode: 0 # Fixes missing light cones curtains and certain effects. SLPS-25564: @@ -59618,6 +59677,7 @@ SLPS-25886: autoFlush: 1 # Fixes bloom intensity. halfPixelOffset: 4 # Mostly aligns post processing. nativeScaling: 1 # Fixes post processing smoothness and position. + getSkipCount: "GSC_GuitarHero" SLPS-25887: name: "スーパーロボット大戦Z" name-sort: "すーぱーろぼっとたいせんZ" @@ -59654,6 +59714,7 @@ SLPS-25889: autoFlush: 1 # Fixes bloom intensity. halfPixelOffset: 4 # Mostly aligns post processing. nativeScaling: 1 # Fixes post processing smoothness and position. + getSkipCount: "GSC_GuitarHero" SLPS-25890: name: "ギターヒーロー3 レジェンドオブロック [ソフト単体]" name-sort: "ぎたーひーろー3 れじぇんどおぶろっく [そふとたんたい]" @@ -60239,8 +60300,7 @@ SLPS-71502: name-en: "Ridge Racer V [MEGA HITS!]" region: "NTSC-J" gsHWFixes: - cpuFramebufferConversion: 1 - textureInsideRT: 1 + textureInsideRT: 1 # Fixes post effects halfPixelOffset: 2 # Fixes title screen and some intro post processing alignment. roundSprite: 1 # Fixes ui and hud alignment. gpuPaletteConversion: 2 # Lots of CLUTs in large textures. @@ -61158,8 +61218,7 @@ SLUS-20002: clampModes: vuClampMode: 2 # Fixes texture rendering in the intro. gsHWFixes: - cpuFramebufferConversion: 1 - textureInsideRT: 1 + textureInsideRT: 1 # Fixes post effects halfPixelOffset: 2 # Fixes title screen and some intro post processing alignment. roundSprite: 1 # Fixes ui and hud alignment. gpuPaletteConversion: 2 # Lots of CLUTs in large textures. @@ -62340,7 +62399,6 @@ SLUS-20291: compat: 5 gsHWFixes: textureInsideRT: 1 # Fixes inside RT shuffling. - getSkipCount: "GSC_BigMuthaTruckers" SLUS-20292: name: "Tsugunai - Atonement" region: "NTSC-U" @@ -62816,6 +62874,8 @@ SLUS-20387: name: "Suikoden III" region: "NTSC-U" compat: 5 + gsHWFixes: + textureInsideRT: 1 # Fixes half screen. memcardFilters: # Allows import of Suikoden II clear data. - "SLUS-20387" - "SLUS-00958" @@ -63062,6 +63122,8 @@ SLUS-20433: compat: 5 clampModes: eeClampMode: 3 # For grey screen ingame. + gsHWFixes: + textureInsideRT: 1 # Fixes post effects on player 2. SLUS-20434: name: "Myst III - Exile" region: "NTSC-U" @@ -63882,8 +63944,6 @@ SLUS-20587: gsHWFixes: autoFlush: 1 # Aligns and corrects shadows. halfPixelOffset: 2 # Fixes sun and depth line. - cpuCLUTRender: 1 # Fixes janky coloured cars. - gpuTargetCLUT: 1 # Fixes janky coloured cars. textureInsideRT: 1 # Fixes car textures. cpuSpriteRenderBW: 4 # Alleviates text and sky rendering issues. cpuSpriteRenderLevel: 2 # Needed for above. @@ -63980,7 +64040,6 @@ SLUS-20605: region: "NTSC-U" gsHWFixes: textureInsideRT: 1 # Fixes inside RT shuffling. - getSkipCount: "GSC_BigMuthaTruckers" SLUS-20606: name: "Bounty Hunter - Seek & Destroy" region: "NTSC-U" @@ -64170,6 +64229,7 @@ SLUS-20645: compat: 5 gsHWFixes: texturePreloading: 0 # Performs much better with no preload. + textureInsideRT: 1 # Fixes post processing. SLUS-20646: name: "Mark Davis Pro Bass Challenge" region: "NTSC-U" @@ -64638,6 +64698,8 @@ SLUS-20732: texturePreloading: 1 # Performs better with partial preload because it is slow on locations outside gameplay foremost. mergeSprite: 1 # Fixes misaligned white lines. PCRTCOverscan: 1 # Fixes missing HUD. + textureInsideRT: 1 # Fixes post shuffle effect. + halfPixelOffset: 2 # Fixes boxes around shuffle effect pages. SLUS-20733: name: "Castlevania - Lament of Innocence" region: "NTSC-U" @@ -65181,7 +65243,7 @@ SLUS-20840: region: "NTSC-U" compat: 5 gsHWFixes: - textureInsideRT: 1 # Fixes flashing and some models still very broken in hardware mode. + textureInsideRT: 1 # Fixes colors. SLUS-20841: name: "NFL Street" region: "NTSC-U" @@ -65435,6 +65497,7 @@ SLUS-20882: gsHWFixes: texturePreloading: 1 # Performs much better with partial preload. autoFlush: 1 # Fixes bloom rendering. + textureInsideRT: 1 # Fixes post processing. roundModes: vu1RoundMode: 0 # Fixes missing light cones curtains and certain effects. SLUS-20883: @@ -65762,9 +65825,8 @@ SLUS-20934: compat: 5 gsHWFixes: alignSprite: 1 # Fixes FMV lines. - cpuSpriteRenderBW: 2 # Prevents Death By VRAM explosion. - cpuSpriteRenderLevel: 2 # Needed for above. - getSkipCount: "GSC_DeathByDegreesTekkenNinaWilliams" + halfPixelOffset: 2 # Aligns post effects. + textureInsideRT: 1 # Fixes post shuffles. SLUS-20935: name: "IHRA Professional Drag Racing 2005" region: "NTSC-U" @@ -66198,9 +66260,9 @@ SLUS-21006: recommendedBlendingLevel: 4 # Improves banding and effect emulation. autoFlush: 1 # Fixes light bloom intensity. estimateTextureRegion: 1 # Improves performance and reduces hash cache size. - halfPixelOffset: 4 # Aligns post effects. + halfPixelOffset: 2 # Aligns post effects. nativeScaling: 1 # Fixes post effects. - getSkipCount: "GSC_GiTS" + textureInsideRT: 1 # Fixes post shuffles. patches: default: content: |- @@ -66365,6 +66427,8 @@ SLUS-21029: gsHWFixes: halfPixelOffset: 2 # Aligns post bloom. nativeScaling: 2 # Fixes light blooms. + textureInsideRT: 1 # Improves performance. + preloadFrameData: 1 # Fixes light trails. getSkipCount: "GSC_MidnightClub3" patches: 0DD3417A: @@ -66439,6 +66503,7 @@ SLUS-21039: compat: 5 gsHWFixes: alignSprite: 1 # Fixes lighting and vertical lines, also works with normal vertex. + textureInsideRT: 1 # Fixes shadows. SLUS-21040: name: "The Shield" name-sort: "Shield, The" @@ -66748,9 +66813,9 @@ SLUS-21099: region: "NTSC-U" compat: 4 gsHWFixes: - disablePartialInvalidation: 1 # Improves performance. halfPixelOffset: 2 # Fixes misaligned bloom effects. texturePreloading: 1 # Performs much better with partial preload. + textureInsideRT: 1 # Fixes post effects. SLUS-21100: name: "NCAA March Madness 2005" region: "NTSC-U" @@ -68149,6 +68214,7 @@ SLUS-21311: autoFlush: 1 # Corrects vignette to match software. estimateTextureRegion: 1 # Improves performance and reduces hash cache size. halfPixelOffset: 4 # Aligns blur more correctly to match software. + textureInsideRT: 1 # Fixes post processing effects. SLUS-21312: name: "Wallace & Gromit - The Curse of the Were-Rabbit" region: "NTSC-U" @@ -68433,6 +68499,8 @@ SLUS-21355: gsHWFixes: halfPixelOffset: 2 # Aligns post bloom. nativeScaling: 2 # Fixes light blooms. + textureInsideRT: 1 # Improves performance. + preloadFrameData: 1 # Fixes light trails. getSkipCount: "GSC_MidnightClub3" patches: 60A42FF5: @@ -69634,6 +69702,8 @@ SLUS-21581: name: "UEFA Champions League 2006-2007" region: "NTSC-U" compat: 5 + gsHWFixes: + textureInsideRT: 1 # Fixes weird glitches. SLUS-21582: name: "MVP '07 - NCAA Baseball" region: "NTSC-U" @@ -69831,6 +69901,7 @@ SLUS-21613: gsHWFixes: getSkipCount: "GSC_Manhunt2" autoFlush: 1 # Fixes missing lights and light intensity. + textureInsideRT: 1 # Fixes post lighting. SLUS-21614: name: "Star Wars - The Force Unleashed" region: "NTSC-U" @@ -69854,7 +69925,7 @@ SLUS-21615: compat: 5 gsHWFixes: roundSprite: 1 # Fixes font sizes and lines in UI. - cpuFramebufferConversion: 1 # Fixes sepia-tone flashback sequences. + textureInsideRT: 1 # Fixes sepia-tone flashback sequences. forceEvenSpritePosition: 1 # Fixes font artifacts and out-of-bound 2D textures. gpuPaletteConversion: 2 # Fixes micro-stuttering and reduces HC size. SLUS-21616: @@ -70403,6 +70474,8 @@ SLUS-21716: compat: 5 roundModes: vu0RoundMode: 0 # Fixes invisible wall collision in bedroom. + gsHWFixes: + textureInsideRT: 1 # Fixes half screen. SLUS-21717: name: "Dora the Explorer - Dora Saves the Mermaids" region: "NTSC-U" @@ -70550,6 +70623,7 @@ SLUS-21740: autoFlush: 1 # Fixes bloom intensity. halfPixelOffset: 4 # Mostly aligns post processing. nativeScaling: 1 # Fixes post processing smoothness and position. + getSkipCount: "GSC_GuitarHero" SLUS-21741: name: "Sea Monsters - A Prehistoric Adventure" region: "NTSC-U" @@ -72117,6 +72191,8 @@ SLUS-29087: eeClampMode: 3 # Characters are visible in-game. gsHWFixes: texturePreloading: 1 # Performs better with partial preload because it is slow on locations outside gameplay foremost. + textureInsideRT: 1 # Fixes post shuffle effect. + halfPixelOffset: 2 # Fixes boxes around shuffle effect pages. SLUS-29088: name: "Champions of Norrath - Realms of EverQuest [Demo]" region: "NTSC-U" @@ -72231,9 +72307,9 @@ SLUS-29123: recommendedBlendingLevel: 4 # Improves banding and effect emulation. autoFlush: 1 # Fixes light bloom intensity. estimateTextureRegion: 1 # Improves performance and reduces hash cache size. - halfPixelOffset: 4 # Aligns post effects. + halfPixelOffset: 2 # Aligns post effects. nativeScaling: 1 # Fixes post effects. - getSkipCount: "GSC_GiTS" + textureInsideRT: 1 # Fixes post shuffles. SLUS-29124: name: "Fight Club [Demo]" region: "NTSC-U" From 34a0d50c7b2af73fbec72a98f5b98250fbab6a2c Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Sat, 25 Jan 2025 20:34:06 +0000 Subject: [PATCH 22/28] GS/HW: Check all overlapping pages when clearing sources --- pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 46 +++++++++++++----------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index 4f1f9ed4b3eb7..2c2d0d78f903d 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -3602,38 +3602,42 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r if (!target) { + const int pages = (end_bp + ((1<<5)-1) - start_bp) >> 5; // Remove Source that have same BP as the render target (color&dss) - // rendering will dirty the copy - auto& list = m_src.m_map[bp >> 5]; - for (auto i = list.begin(); i != list.end();) - { - Source* s = *i; - ++i; - - if ((GSUtil::HasSharedBits(psm, s->m_TEX0.PSM) && (bp >= start_bp && bp < end_bp)) || - (GSUtil::HasSharedBits(bp, psm, s->m_from_target_TEX0.TBP0, s->m_TEX0.PSM) && s->m_target)) - { - m_src.RemoveAt(s); - } - } - - u32 bbp = bp + bw * 0x10; - if (bw >= 16 && bbp < 16384) + /// rendering will dirty the copy + for (int pgs = 0; pgs < pages; pgs++) { - // Detect half of the render target (fix snow engine game) - // Target Page (8KB) have always a width of 64 pixels - // Half of the Target is TBW/2 pages * 8KB / (1 block * 256B) = 0x10 - auto& list = m_src.m_map[bbp >> 5]; + auto& list = m_src.m_map[((bp >> 5) + pgs) & 0x1ff]; for (auto i = list.begin(); i != list.end();) { Source* s = *i; ++i; - if (GSUtil::HasSharedBits(bbp, psm, s->m_TEX0.TBP0, s->m_TEX0.PSM)) + if ((GSUtil::HasSharedBits(psm, s->m_TEX0.PSM) && (end_bp > s->m_TEX0.TBP0 && start_bp < s->UnwrappedEndBlock()) && !s->m_target) || + (GSUtil::HasSharedBits(bp, psm, s->m_from_target_TEX0.TBP0, s->m_TEX0.PSM) && s->m_target)) { m_src.RemoveAt(s); } } + + u32 bbp = bp + bw * 0x10; + if (bw >= 16 && bbp < 16384) + { + // Detect half of the render target (fix snow engine game) + // Target Page (8KB) have always a width of 64 pixels + // Half of the Target is TBW/2 pages * 8KB / (1 block * 256B) = 0x10 + auto& list = m_src.m_map[bbp >> 5]; + for (auto i = list.begin(); i != list.end();) + { + Source* s = *i; + ++i; + + if (GSUtil::HasSharedBits(bbp, psm, s->m_TEX0.TBP0, s->m_TEX0.PSM)) + { + m_src.RemoveAt(s); + } + } + } } } From 25338fb4f29631ee60608757023a0fc3a4636336 Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Sat, 25 Jan 2025 20:35:32 +0000 Subject: [PATCH 23/28] GS/HW: Predict valid sizes based on repeated draws and scissor - this should be okay/limited to certain situations like Battlefield 2. Scissor isn't 100% guaranteed to be right, but it's probably better than nothing. --- pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index fbddb612aaaec..953d26ca38d69 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -950,6 +950,12 @@ GSVector2i GSRendererHW::GetValidSize(const GSTextureCache::Source* tex) // e.g. Burnout 3, God of War II, etc. int height = std::min(m_context->scissor.in.w, m_r.w); + // We can check if the next draw is doing the same from the next page, and assume it's a per line clear. + // Battlefield 2 does this. + int pages = ((GSLocalMemory::GetEndBlockAddress(m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, m_r) + 1) - m_cached_ctx.FRAME.Block()) >> 5; + if (m_cached_ctx.FRAME.FBW > 1 && m_r.height() <= 64 && (pages % m_cached_ctx.FRAME.FBW) == 0 && m_env.CTXT[m_backed_up_ctx].FRAME.FBP == (m_cached_ctx.FRAME.FBP + pages) && NextDrawMatchesShuffle()) + height = std::max(m_context->scissor.in.w, height); + // If the draw is less than a page high, FBW=0 is the same as FBW=1. const GSLocalMemory::psm_t& frame_psm = GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM]; int width = std::min(std::max(m_cached_ctx.FRAME.FBW, 1) * 64, m_context->scissor.in.z); From b372cd9654f70d7399303224f0c15b319a484364 Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Sat, 25 Jan 2025 20:36:32 +0000 Subject: [PATCH 24/28] GameDB-GS/HW: Remove Battlefield 2 CRC hacks, add Tex Inside RT instead --- bin/resources/GameIndex.yaml | 46 +++++++++++++----------------- pcsx2/GS/Renderers/HW/GSHwHack.cpp | 39 ------------------------- pcsx2/GS/Renderers/HW/GSHwHack.h | 2 -- 3 files changed, 20 insertions(+), 67 deletions(-) diff --git a/bin/resources/GameIndex.yaml b/bin/resources/GameIndex.yaml index 445e0b99a4891..4d0118972bce4 100644 --- a/bin/resources/GameIndex.yaml +++ b/bin/resources/GameIndex.yaml @@ -3870,9 +3870,13 @@ SCED-52899: SCED-52932: name: "Bonus Demo 8" region: "PAL-M5" + gsHWFixes: + textureInsideRT: 1 # Fixes shadows in TOCA Race Driver 2. SCED-52933: name: "Bonus Demo 8" region: "PAL-M5" + gsHWFixes: + textureInsideRT: 1 # Fixes shadows in TOCA Race Driver 2. SCED-52935: name: "SingStar Party [Demo]" region: "PAL-E" @@ -3946,6 +3950,8 @@ SCED-52997: SCED-53018: name: "Bonus Demo 8 (Geu)" region: "PAL-G" + gsHWFixes: + textureInsideRT: 1 # Fixes shadows in TOCA Race Driver 2. SCED-53043: name: "Magazine Ufficiale PlayStation 2 Speciale Platinum Italia" region: "PAL-I" @@ -13553,8 +13559,7 @@ SLED-53731: autoFlush: 1 # Post-processing. halfPixelOffset: 2 # Offset post-processing. texturePreloading: 1 # Improves performance. - getSkipCount: "GSC_Battlefield2" # Depth clear. - beforeDraw: "OI_Battlefield2" # Framebuffer copy, fixes rendering for bottom part of screen. + textureInsideRT: 1 # Fixes per line drawing. SLED-53732: name: "Spartan - Total Warrior [Demo]" region: "PAL" @@ -23492,8 +23497,7 @@ SLES-53729: autoFlush: 1 # Post-processing. halfPixelOffset: 2 # Offset post-processing. texturePreloading: 1 # Improves performance. - getSkipCount: "GSC_Battlefield2" # Depth clear. - beforeDraw: "OI_Battlefield2" # Framebuffer copy, fixes rendering for bottom part of screen. + textureInsideRT: 1 # Fixes per line drawing. SLES-53730: name: "Battlefield 2 - Modern Combat" region: "PAL-M3" @@ -23502,8 +23506,7 @@ SLES-53730: autoFlush: 1 # Post-processing. halfPixelOffset: 2 # Offset post-processing. texturePreloading: 1 # Improves performance. - getSkipCount: "GSC_Battlefield2" # Depth clear. - beforeDraw: "OI_Battlefield2" # Framebuffer copy, fixes rendering for bottom part of screen. + textureInsideRT: 1 # Fixes per line drawing. SLES-53734: name: "50 Cent - Bulletproof" region: "PAL-E" @@ -30871,13 +30874,11 @@ SLKA-25330: name: "Battlefield 2 - Modern Combat" region: "NTSC-K" gsHWFixes: - autoFlush: 2 # Post-processing. + minimumBlendingLevel: 4 # Fixes ground texture rendering. + autoFlush: 1 # Post-processing. halfPixelOffset: 2 # Offset post-processing. - texturePreloading: 1 # Spikes all over the place otherwise. - textureInsideRT: 1 # Fixes light shinging through objects. - cpuCLUTRender: 1 # Fixes light shining through objects. - getSkipCount: "GSC_Battlefield2" # Depth clear. - beforeDraw: "OI_Battlefield2" # Framebuffer copy, fixes rendering for bottom part of screen. + texturePreloading: 1 # Improves performance. + textureInsideRT: 1 # Fixes per line drawing. SLKA-25331: name: "Marc Ecko's Getting Up - Contents Under Pressure" region: "NTSC-K" @@ -31742,8 +31743,7 @@ SLPM-55034: autoFlush: 1 # Post-processing. halfPixelOffset: 2 # Offset post-processing. texturePreloading: 1 # Improves performance. - getSkipCount: "GSC_Battlefield2" # Depth clear. - beforeDraw: "OI_Battlefield2" # Framebuffer copy, fixes rendering for bottom part of screen. + textureInsideRT: 1 # Fixes per line drawing. SLPM-55035: name: "ファイトナイト ラウンド2 [EA:SY! 1980]" name-sort: "ふぁいとないと らうんど2 [EA:SY! 1980]" @@ -45797,8 +45797,7 @@ SLPM-66206: autoFlush: 1 # Post-processing. halfPixelOffset: 2 # Offset post-processing. texturePreloading: 1 # Improves performance. - getSkipCount: "GSC_Battlefield2" # Depth clear. - beforeDraw: "OI_Battlefield2" # Framebuffer copy, fixes rendering for bottom part of screen. + textureInsideRT: 1 # Fixes per line drawing. SLPM-66207: name: "どろろ [SEGA THE BEST 2800]" name-sort: "どろろ [SEGA THE BEST 2800]" @@ -48574,8 +48573,7 @@ SLPM-66651: autoFlush: 1 # Post-processing. halfPixelOffset: 2 # Offset post-processing. texturePreloading: 1 # Improves performance. - getSkipCount: "GSC_Battlefield2" # Depth clear. - beforeDraw: "OI_Battlefield2" # Framebuffer copy, fixes rendering for bottom part of screen. + textureInsideRT: 1 # Fixes per line drawing. SLPM-66652: name: "バーンアウト リベンジ [EA BEST HITS]" name-sort: "ばーんあうと りべんじ [EA BEST HITS]" @@ -66406,8 +66404,7 @@ SLUS-21026: autoFlush: 1 # Post-processing. halfPixelOffset: 2 # Offset post-processing. texturePreloading: 1 # Improves performance. - getSkipCount: "GSC_Battlefield2" # Depth clear. - beforeDraw: "OI_Battlefield2" # Framebuffer copy, fixes rendering for bottom part of screen. + textureInsideRT: 1 # Fixes per line drawing. SLUS-21027: name: "The Lord of the Rings - The Third Age" name-sort: "Lord of the Rings, The - The Third Age" @@ -72291,8 +72288,7 @@ SLUS-29117: autoFlush: 1 # Post-processing. halfPixelOffset: 2 # Offset post-processing. texturePreloading: 1 # Improves performance. - getSkipCount: "GSC_Battlefield2" # Depth clear. - beforeDraw: "OI_Battlefield2" # Framebuffer copy, fixes rendering for bottom part of screen. + textureInsideRT: 1 # Fixes per line drawing. SLUS-29118: name: "Need for Speed - Underground 2 [Demo]" region: "NTSC-U" @@ -72444,8 +72440,7 @@ SLUS-29152: autoFlush: 1 # Post-processing. halfPixelOffset: 2 # Offset post-processing. texturePreloading: 1 # Improves performance. - getSkipCount: "GSC_Battlefield2" # Depth clear. - beforeDraw: "OI_Battlefield2" # Framebuffer copy, fixes rendering for bottom part of screen. + textureInsideRT: 1 # Fixes per line drawing. SLUS-29153: name: "Burnout Revenge [Demo]" region: "NTSC-U" @@ -72560,8 +72555,7 @@ SLUS-29172: autoFlush: 1 # Post-processing. halfPixelOffset: 2 # Offset post-processing. texturePreloading: 1 # Improves performance. - getSkipCount: "GSC_Battlefield2" # Depth clear. - beforeDraw: "OI_Battlefield2" # Framebuffer copy, fixes rendering for bottom part of screen. + textureInsideRT: 1 # Fixes per line drawing. SLUS-29173: name: "The Sims 2 [Demo]" name-sort: "Sims 2, The [Demo]" diff --git a/pcsx2/GS/Renderers/HW/GSHwHack.cpp b/pcsx2/GS/Renderers/HW/GSHwHack.cpp index e34bea9cc8066..c705d1b709a4e 100644 --- a/pcsx2/GS/Renderers/HW/GSHwHack.cpp +++ b/pcsx2/GS/Renderers/HW/GSHwHack.cpp @@ -1078,43 +1078,6 @@ bool GSHwHack::OI_BurnoutGames(GSRendererHW& r, GSTexture* rt, GSTexture* ds, GS return false; } -bool GSHwHack::GSC_Battlefield2(GSRendererHW& r, int& skip) -{ - if (skip == 0) - { - if (RZBP >= RFBP && RFBP >= 0x2000 && RZBP >= 0x2700 && ((RZBP - RFBP) == 0x700)) - { - skip = 7; - - GIFRegTEX0 TEX0 = {}; - TEX0.TBP0 = RFBP; - TEX0.TBW = 8; - GSTextureCache::Target* dst = g_texture_cache->LookupTarget(TEX0, r.GetTargetSize(), r.GetTextureScaleFactor(), GSTextureCache::DepthStencil); - if (dst) - { - g_gs_device->ClearDepth(dst->m_texture, 0.0f); - } - } - } - - return true; -} - -bool GSHwHack::OI_Battlefield2(GSRendererHW& r, GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) -{ - if (!RPRIM->TME || RFRAME.Block() > 0xD00 || RTEX0.TBP0 > 0x1D00) - return true; - - if (rt && t && RFRAME.Block() == 0 && RTEX0.TBP0 == 0x1000) - { - const GSVector4i rc(0, 0, std::min(rt->GetWidth(), t->m_texture->GetWidth()), std::min(rt->GetHeight(), t->m_texture->GetHeight())); - g_gs_device->CopyRect(t->m_texture, rt, rc, 0, 0); - } - - g_texture_cache->InvalidateTemporarySource(); - return false; -} - bool GSHwHack::OI_HauntingGround(GSRendererHW& r, GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) { // Haunting Ground clears two targets by doing a direct colour write at 0x3000, covering a target at 0x3380. @@ -1409,7 +1372,6 @@ const GSHwHack::Entry GSHwHack::s_get_skip_count_function CRC_F(GSC_ZettaiZetsumeiToshi2), CRC_F(GSC_BlackAndBurnoutSky), CRC_F(GSC_BlueTongueGames), - CRC_F(GSC_Battlefield2), CRC_F(GSC_NFSUndercover), CRC_F(GSC_PolyphonyDigitalGames), CRC_F(GSC_MetalGearSolid3), @@ -1435,7 +1397,6 @@ const GSHwHack::Entry GSHwHack::s_before_draw_functions[] CRC_F(OI_SonicUnleashed), CRC_F(OI_ArTonelico2), CRC_F(OI_BurnoutGames), - CRC_F(OI_Battlefield2), CRC_F(OI_HauntingGround), }; diff --git a/pcsx2/GS/Renderers/HW/GSHwHack.h b/pcsx2/GS/Renderers/HW/GSHwHack.h index b604c7561d737..f32ecbc7efccb 100644 --- a/pcsx2/GS/Renderers/HW/GSHwHack.h +++ b/pcsx2/GS/Renderers/HW/GSHwHack.h @@ -24,7 +24,6 @@ class GSHwHack static bool GSC_UrbanReign(GSRendererHW& r, int& skip); static bool GSC_SteambotChronicles(GSRendererHW& r, int& skip); static bool GSC_BlueTongueGames(GSRendererHW& r, int& skip); - static bool GSC_Battlefield2(GSRendererHW& r, int& skip); static bool GSC_NFSUndercover(GSRendererHW& r, int& skip); static bool GSC_PolyphonyDigitalGames(GSRendererHW& r, int& skip); static bool GSC_MetalGearSolid3(GSRendererHW& r, int& skip); @@ -36,7 +35,6 @@ class GSHwHack static bool OI_SonicUnleashed(GSRendererHW& r, GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); static bool OI_ArTonelico2(GSRendererHW& r, GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); static bool OI_BurnoutGames(GSRendererHW& r, GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); - static bool OI_Battlefield2(GSRendererHW& r, GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); static bool OI_HauntingGround(GSRendererHW& r, GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); static bool MV_Growlanser(GSRendererHW& r); From 07c87e36daae41f2431f41b8dc347fd8bc786045 Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Sun, 26 Jan 2025 16:35:49 +0000 Subject: [PATCH 25/28] GS/HW: Allow offsetting in to a target if full contained. --- pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 25 +++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index 2c2d0d78f903d..e82fc972fb65f 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -1189,7 +1189,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const const u32* const clut = g_gs_renderer->m_mem.m_clut; GSTexture* const gpu_clut = (psm_s.pal > 0) ? g_gs_renderer->m_mem.m_clut.GetGPUTexture() : nullptr; - const SourceRegion region = SourceRegion::Create(TEX0, CLAMP); + SourceRegion region = SourceRegion::Create(TEX0, CLAMP); // Prevent everything going to rubbish if a game somehow sends a TW/TH above 10, and region isn't being used. if ((TEX0.TW > 10 && !region.HasX()) || (TEX0.TH > 10 && !region.HasY())) @@ -1817,6 +1817,29 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const else continue; } + // Else read it back, might be our only choice. Ridge Racer writes to the right side of 0x1a40 for headlights, then tries to access it with the base of 0x9a0 + // naturally, it misses here. But let's make sure the formats match well enough. + else if (bw == t->m_TEX0.TBW && GSLocalMemory::m_psm[psm].bpp == GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp && t->Inside(bp, bw, psm, r)) + { + if (!t->HasValidBitsForFormat(psm, req_color, req_alpha, true)) + continue; + + GIFRegCLAMP fake_CLAMP; + fake_CLAMP.WMS = CLAMP_REGION_CLAMP; + fake_CLAMP.WMT = CLAMP_REGION_CLAMP; + fake_CLAMP.MINU = 0; + fake_CLAMP.MINV = 0; + fake_CLAMP.MAXV = std::min(static_cast(1u << TEX0.TH), 1022u); + fake_CLAMP.MAXU = std::min(static_cast(1u << TEX0.TW), 1022u); + region = SourceRegion::Create(TEX0, fake_CLAMP); + + const GSVector4i custom_offset_rect = TranslateAlignedRectByPage(t, bp, psm, bw, block_boundary_rect); + x_offset = custom_offset_rect.x; + y_offset = custom_offset_rect.y; + dst = t; + tex_merge_rt = false; + found_t = true; + } } } } From 14a9a5f7b148736f18d84d11fa02c69142c58865 Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Thu, 30 Jan 2025 18:53:09 +0000 Subject: [PATCH 26/28] GS/HW: Intercept excessively large clears --- pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 953d26ca38d69..7438f9fc5822b 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -2988,8 +2988,8 @@ void GSRendererHW::Draw() // Normally we would use 1024 here to match the clear above, but The Godfather does a 1023x1023 draw instead // (very close to 1024x1024, but apparently the GS rounds down..). So, catch that here, we don't want to // create that target, because the clear isn't black, it'll hang around and never get invalidated. - const bool is_square = (t_size.y == t_size.x) && m_r.w >= 1023 && m_primitive_covers_without_gaps == NoGapsType::FullCover; - const bool is_clear = is_possible_mem_clear && is_square; + const bool is_large_rect = (t_size.y >= t_size.x) && m_r.w >= 1023 && m_primitive_covers_without_gaps == NoGapsType::FullCover; + const bool is_clear = is_possible_mem_clear && is_large_rect; // Preserve downscaled target when copying directly from a downscaled target, or it's a normal draw using a downscaled target. Clears that are drawing to the target can also preserve size. // Of course if this size is different (in width) or this is a shuffle happening, this will be bypassed. @@ -2998,7 +2998,7 @@ void GSRendererHW::Draw() rt = g_texture_cache->LookupTarget(FRAME_TEX0, t_size, ((src && src->m_scale != 1) && GSConfig.UserHacks_NativeScaling == GSNativeScaling::Normal && !possible_shuffle) ? GetTextureScaleFactor() : target_scale, GSTextureCache::RenderTarget, true, fm, false, force_preload, preserve_rt_rgb, preserve_rt_alpha, lookup_rect, possible_shuffle, is_possible_mem_clear && FRAME_TEX0.TBP0 != m_cached_ctx.ZBUF.Block(), GSConfig.UserHacks_NativeScaling != GSNativeScaling::Off && preserve_downscale_draw && is_possible_mem_clear != ClearType::NormalClear, src, ds, (no_ds || !ds) ? -1 : (m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0)); - + // Draw skipped because it was a clear and there was no target. if (!rt) { @@ -3664,6 +3664,7 @@ void GSRendererHW::Draw() // Limit to 2x the vertical height of the resolution (for double buffering) rt->UpdateValidity(update_rect, !frame_masked && (rt_update || (m_r.w <= (resolution.y * 2) && !m_texture_shuffle))); rt->UpdateDrawn(update_rect, !frame_masked && (rt_update || (m_r.w <= (resolution.y * 2) && !m_texture_shuffle))); + // Probably changing to double buffering, so invalidate any old target that was next to it. // This resolves an issue where the PCRTC will find the old target in FMV's causing flashing. // Grandia Xtreme, Onimusha Warlord. @@ -3712,7 +3713,7 @@ void GSRendererHW::Draw() const bool z_masked = m_cached_ctx.ZBUF.ZMSK; ds->UpdateValidity(m_r, !z_masked && (can_update_size || m_r.w <= (resolution.y * 2))); - ds->UpdateDrawn(m_r, !z_masked && (can_update_size || m_r.w <= (resolution.y * 2))); + ds->UpdateDrawn(m_r, !z_masked && (can_update_size || m_r.w <= (resolution.y * 2))); if (!new_rect && new_height && old_end_block != ds->m_end_block) { From 5ecac0d5ce07743a53234f90a8ab1ceffc4f3546 Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Thu, 30 Jan 2025 18:53:29 +0000 Subject: [PATCH 27/28] GS/HW: Don't allow Tex in RT if not contained --- pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index e82fc972fb65f..dbb37cac65575 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -1714,6 +1714,9 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const if (!t->HasValidBitsForFormat(psm, req_color, req_alpha, t->m_TEX0.TBW == TEX0.TBW) && !(possible_shuffle && GSLocalMemory::m_psm[psm].bpp == 16 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32)) continue; + if (!t->Inside(bp, bw, psm, block_boundary_rect)) + continue; + x_offset = rect.x; y_offset = rect.y; dst = t; From 031f7241b8b035850312a95e5c23a0bff84e6d24 Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Thu, 30 Jan 2025 22:54:13 +0000 Subject: [PATCH 28/28] GS: Code cleanup at the behest of Const-Man --- pcsx2/GS/Renderers/HW/GSHwHack.cpp | 2 +- pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 38 +++++++++++------------- pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 8 ++--- pcsx2/ShaderCacheVersion.h | 2 +- 4 files changed, 24 insertions(+), 26 deletions(-) diff --git a/pcsx2/GS/Renderers/HW/GSHwHack.cpp b/pcsx2/GS/Renderers/HW/GSHwHack.cpp index c705d1b709a4e..6a7f92ad272df 100644 --- a/pcsx2/GS/Renderers/HW/GSHwHack.cpp +++ b/pcsx2/GS/Renderers/HW/GSHwHack.cpp @@ -872,7 +872,7 @@ bool GSHwHack::OI_PointListPalette(GSRendererHW& r, GSTexture* rt, GSTexture* ds && r.m_cached_ctx.FRAME.FBMSK == 0 // No frame buffer masking. ) { - int mask = (r.m_vt.m_max.p.xyxy() == r.m_vt.m_min.p.xyxy()).mask(); + const int mask = (r.m_vt.m_max.p.xyxy() == r.m_vt.m_min.p.xyxy()).mask(); if (mask == 0xf) return true; diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 7438f9fc5822b..9be2db4415505 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -952,7 +952,7 @@ GSVector2i GSRendererHW::GetValidSize(const GSTextureCache::Source* tex) // We can check if the next draw is doing the same from the next page, and assume it's a per line clear. // Battlefield 2 does this. - int pages = ((GSLocalMemory::GetEndBlockAddress(m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, m_r) + 1) - m_cached_ctx.FRAME.Block()) >> 5; + const int pages = ((GSLocalMemory::GetEndBlockAddress(m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, m_r) + 1) - m_cached_ctx.FRAME.Block()) >> 5; if (m_cached_ctx.FRAME.FBW > 1 && m_r.height() <= 64 && (pages % m_cached_ctx.FRAME.FBW) == 0 && m_env.CTXT[m_backed_up_ctx].FRAME.FBP == (m_cached_ctx.FRAME.FBP + pages) && NextDrawMatchesShuffle()) height = std::max(m_context->scissor.in.w, height); @@ -3057,7 +3057,7 @@ void GSRendererHW::Draw() rt->m_TEX0.TBP0 = m_cached_ctx.FRAME.Block(); GSVector2i new_size = rt->m_unscaled_size; // Make sure to use the original format for the offset. - int new_offset = std::abs((vertical_offset / frame_psm.pgs.y) * GSLocalMemory::m_psm[rt->m_TEX0.PSM].pgs.y); + const int new_offset = std::abs((vertical_offset / frame_psm.pgs.y) * GSLocalMemory::m_psm[rt->m_TEX0.PSM].pgs.y); texture_offset = new_offset; new_size.y += new_offset; @@ -3089,11 +3089,10 @@ void GSRendererHW::Draw() if (ds && vertical_offset && (m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0) != (m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) { - int z_vertical_offset = ((static_cast(m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0) / 32) / std::max(rt->m_TEX0.TBW, 1U)) * GSLocalMemory::m_psm[m_cached_ctx.ZBUF.PSM].pgs.y; - int z_offset = vertical_offset; - GL_CACHE("RT in RT Z copy on draw %d z_vert_offset %d z_offset %d", s_n, z_vertical_offset, z_offset); - GSVector4i dRect = GSVector4i(0, z_offset * ds->m_scale, ds->m_unscaled_size.x * ds->m_scale, std::min(z_offset + m_r.w + 1, z_offset + ds->m_unscaled_size.y) * ds->m_scale); - int new_height = std::max(static_cast(ds->m_unscaled_size.y * ds->m_scale), dRect.w); + const int z_vertical_offset = ((static_cast(m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0) / 32) / std::max(rt->m_TEX0.TBW, 1U)) * GSLocalMemory::m_psm[m_cached_ctx.ZBUF.PSM].pgs.y; + GL_CACHE("RT in RT Z copy on draw %d z_vert_offset %d z_offset %d", s_n, z_vertical_offset, vertical_offset); + GSVector4i dRect = GSVector4i(0, vertical_offset * ds->m_scale, ds->m_unscaled_size.x * ds->m_scale, std::min(vertical_offset + m_r.w + 1, vertical_offset + ds->m_unscaled_size.y) * ds->m_scale); + const int new_height = std::max(static_cast(ds->m_unscaled_size.y * ds->m_scale), dRect.w); GSTexture* tex = g_gs_device->CreateDepthStencil(ds->m_unscaled_size.x * ds->m_scale, new_height, GSTexture::Format::DepthStencil, true); g_gs_device->StretchRect(ds->m_texture, GSVector4(0.0f, z_vertical_offset / static_cast(ds->m_unscaled_size.y), 1.0f, std::min(z_vertical_offset + m_r.w + 1, ds->m_unscaled_size.y) / static_cast(ds->m_unscaled_size.y)), tex, GSVector4(dRect), ShaderConvert::DEPTH_COPY, false); @@ -3148,11 +3147,11 @@ void GSRendererHW::Draw() { if (m_r.w > rt->m_unscaled_size.y || m_r.z > rt->m_unscaled_size.x) { - u32 new_height = std::max(m_r.w, rt->m_unscaled_size.y); - u32 new_width = std::max(m_r.z, rt->m_unscaled_size.x); + const u32 new_height = std::max(m_r.w, rt->m_unscaled_size.y); + const u32 new_width = std::max(m_r.z, rt->m_unscaled_size.x); //DevCon.Warning("Resizing texture %d x %d draw %d", rt->m_unscaled_size.x, new_height, s_n); - rt->ResizeTexture(new_height, new_height); + rt->ResizeTexture(new_width, new_height); const bool frame_masked = ((m_cached_ctx.FRAME.FBMSK & frame_psm.fmsk) == frame_psm.fmsk) || (m_cached_ctx.TEST.ATE && m_cached_ctx.TEST.ATST == ATST_NEVER && !(m_cached_ctx.TEST.AFAIL & AFAIL_FB_ONLY)); @@ -3591,9 +3590,7 @@ void GSRendererHW::Draw() // Ignore single page/0 page stuff, that's just gonna get silly else if (buffer_width > 64 && update_rect.z > buffer_width) { - float multifactor = static_cast(update_rect.z) / static_cast(buffer_width); - - update_rect.w *= multifactor; + update_rect.w *= static_cast(update_rect.z) / static_cast(buffer_width); update_rect.z = buffer_width; } @@ -3935,10 +3932,11 @@ void GSRendererHW::Draw() { const int vertical_offset = ((static_cast(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0) / 32) / std::max(static_cast(rt->m_TEX0.TBW), 1)) * frame_psm.pgs.y; const int z_vertical_offset = ((static_cast(m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0) / 32) / std::max(rt->m_TEX0.TBW, 1U)) * GSLocalMemory::m_psm[m_cached_ctx.ZBUF.PSM].pgs.y; - const int z_offset = vertical_offset; - GL_CACHE("RT in RT Z copy back draw %d z_vert_offset %d z_offset %d", s_n, z_vertical_offset, z_offset); - GSVector4i dRect = GSVector4i(0, z_vertical_offset * ds->m_scale, ds->m_unscaled_size.x * ds->m_scale, std::min(z_vertical_offset + m_r.w + 1 - vertical_offset, ds->m_unscaled_size.y) * ds->m_scale); - g_gs_device->StretchRect(g_texture_cache->GetTemporaryZ(), GSVector4(0.0f, z_offset / static_cast(g_texture_cache->GetTemporaryZ()->GetHeight()), 1.0f, std::min(real_rect.w + 1, ds->m_unscaled_size.y + z_offset) / static_cast(g_texture_cache->GetTemporaryZ()->GetHeight())), ds->m_texture, GSVector4(dRect), ShaderConvert::DEPTH_COPY, false); + const GSVector4i dRect = GSVector4i(0, z_vertical_offset * ds->m_scale, ds->m_unscaled_size.x * ds->m_scale, std::min(z_vertical_offset + m_r.w + 1 - vertical_offset, ds->m_unscaled_size.y) * ds->m_scale); + + GL_CACHE("RT in RT Z copy back draw %d z_vert_offset %d z_offset %d", s_n, z_vertical_offset, vertical_offset); + g_gs_device->StretchRect(g_texture_cache->GetTemporaryZ(), GSVector4(0.0f, vertical_offset / static_cast(g_texture_cache->GetTemporaryZ()->GetHeight()), 1.0f, + std::min(real_rect.w + 1, ds->m_unscaled_size.y + vertical_offset) / static_cast(g_texture_cache->GetTemporaryZ()->GetHeight())), ds->m_texture, GSVector4(dRect), ShaderConvert::DEPTH_COPY, false); } } } @@ -5901,9 +5899,9 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c if (m_channel_shuffle && (tex_diff || frame_diff)) { - u32 page_offset = (m_cached_ctx.TEX0.TBP0 - src_target->m_TEX0.TBP0) >> 5; - u32 vertical_offset = (page_offset / src_target->m_TEX0.TBW) * GSLocalMemory::m_psm[src_target->m_TEX0.PSM].pgs.y; - u32 horizontal_offset = (page_offset % src_target->m_TEX0.TBW) * GSLocalMemory::m_psm[src_target->m_TEX0.PSM].pgs.x; + const u32 page_offset = (m_cached_ctx.TEX0.TBP0 - src_target->m_TEX0.TBP0) >> 5; + const u32 vertical_offset = (page_offset / src_target->m_TEX0.TBW) * GSLocalMemory::m_psm[src_target->m_TEX0.PSM].pgs.y; + const u32 horizontal_offset = (page_offset % src_target->m_TEX0.TBW) * GSLocalMemory::m_psm[src_target->m_TEX0.PSM].pgs.x; copy_range.y += vertical_offset; copy_range.x += horizontal_offset; diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index dbb37cac65575..e0c26738f317b 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -379,7 +379,7 @@ GSVector4i GSTextureCache::TranslateAlignedRectByPage(u32 tbp, u32 tebp, u32 tbw if (horizontal_offset) page_count += dst_pgw - horizontal_offset; - int new_height = (page_count / dst_pgw) * dst_page_size.y; + const int new_height = (page_count / dst_pgw) * dst_page_size.y; new_rect.x = 0; new_rect.z = dst_pgw * dst_page_size.x; new_rect.y = start_page.y * dst_page_size.y; @@ -3646,7 +3646,7 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r } } - u32 bbp = bp + bw * 0x10; + const u32 bbp = bp + bw * 0x10; if (bw >= 16 && bbp < 16384) { // Detect half of the render target (fix snow engine game) @@ -4216,7 +4216,7 @@ bool GSTextureCache::Move(u32 SBP, u32 SBW, u32 SPSM, int sx, int sy, u32 DBP, u dst = GetExactTarget(DBP, DBW, dpsm_s.depth ? DepthStencil : RenderTarget, DBP); } - + // Beware of the case where a game might create a larger texture by moving a bunch of chunks around. // We use dx/dy == 0 and the TBW check as a safeguard to make sure these go through to local memory. // We can also recreate the target if it's previously been created in the height cache with a valid size. @@ -7571,7 +7571,7 @@ std::shared_ptr GSTextureCache::PaletteMap::LookupPalet { // Palette is unused it = map.erase(it); // Erase element from map - // The palette object should now be gone as the shared pointer to the object in the map is deleted + // The palette object should now be gone as the shared pointer to the object in the map is deleted } else { diff --git a/pcsx2/ShaderCacheVersion.h b/pcsx2/ShaderCacheVersion.h index b8eda02966a62..af07babbaec96 100644 --- a/pcsx2/ShaderCacheVersion.h +++ b/pcsx2/ShaderCacheVersion.h @@ -3,4 +3,4 @@ /// Version number for GS and other shaders. Increment whenever any of the contents of the /// shaders change, to invalidate the cache. -static constexpr u32 SHADER_CACHE_VERSION = 58; +static constexpr u32 SHADER_CACHE_VERSION = 59;