diff --git a/bin/resources/GameIndex.yaml b/bin/resources/GameIndex.yaml index b91c1e33d702a..4d0118972bce4 100644 --- a/bin/resources/GameIndex.yaml +++ b/bin/resources/GameIndex.yaml @@ -174,9 +174,8 @@ PAPX-90020: recommendedBlendingLevel: 4 # Improves banding and effect emulation. autoFlush: 1 # Fixes light bloom intensity. estimateTextureRegion: 1 # Improves performance and reduces hash cache size. - halfPixelOffset: 4 # Aligns post effects. + halfPixelOffset: 2 # Aligns post effects. nativeScaling: 1 # Fixes post effects. - getSkipCount: "GSC_GiTS" PAPX-90201: name: "ファンタビジョン [体験版]" name-sort: "ふぁんたびじょん [たいけんばん]" @@ -523,7 +522,7 @@ PAPX-90524: region: "NTSC-J" gsHWFixes: roundSprite: 1 # Fixes font sizes and lines in UI. - cpuFramebufferConversion: 1 # Fixes sepia-tone flashback sequences. + textureInsideRT: 1 # Fixes sepia-tone flashback sequences. forceEvenSpritePosition: 1 # Fixes font artifacts and out-of-bound 2D textures. gpuPaletteConversion: 2 # Fixes micro-stuttering and reduces HC size. PBGP-0061: @@ -1248,26 +1247,31 @@ SCAJ-10011: region: "NTSC-Unk" gsHWFixes: alignSprite: 1 # Fixes vertical lines. + textureInsideRT: 1 # Fixes post effects. SCAJ-10012: name: "Taiko Drum Master" region: "NTSC-Unk" gsHWFixes: alignSprite: 1 # Fixes vertical lines. + textureInsideRT: 1 # Fixes post effects. SCAJ-10013: name: "Taiko no Tatsujin - Tobikkiri! Anime Special" region: "NTSC-Unk" gsHWFixes: alignSprite: 1 # Fixes vertical lines. + textureInsideRT: 1 # Fixes post effects. SCAJ-10014: name: "Taiko no Tatsujin - Wai Wai Happy! Rokudaime" region: "NTSC-Unk" gsHWFixes: alignSprite: 1 # Fixes vertical lines. + textureInsideRT: 1 # Fixes post effects. SCAJ-10015: name: "Taiko no Tatsujin - Doka! to Oomori Nanadaime" region: "NTSC-Unk" gsHWFixes: alignSprite: 1 # Fixes vertical lines. + textureInsideRT: 1 # Fixes post effects. SCAJ-20001: name: "Ratchet & Clank" region: "NTSC-Unk" @@ -1327,7 +1331,6 @@ SCAJ-20010: region: "NTSC-Unk" gsHWFixes: textureInsideRT: 1 # Fixes inside RT shuffling. - getSkipCount: "GSC_BigMuthaTruckers" SCAJ-20011: name: "Armored Core 3 - Silent Line" region: "NTSC-HK" @@ -1505,6 +1508,7 @@ SCAJ-20047: region: "NTSC-Unk" gsHWFixes: texturePreloading: 0 # Performs much better with no preload. + textureInsideRT: 1 # Fixes post processing. SCAJ-20048: name: "R:RACING EVOLUTION" name-sort: "Rれーしんぐえぼりゅーしょん" @@ -1554,6 +1558,7 @@ SCAJ-20060: region: "NTSC-Unk" gsHWFixes: texturePreloading: 0 # Performs much better with no preload. + textureInsideRT: 1 # Fixes post processing. SCAJ-20061: name: "Seven Samurai 20XX" region: "NTSC-Unk" @@ -1625,9 +1630,9 @@ SCAJ-20072: recommendedBlendingLevel: 4 # Improves banding and effect emulation. autoFlush: 1 # Fixes light bloom intensity. estimateTextureRegion: 1 # Improves performance and reduces hash cache size. - halfPixelOffset: 4 # Aligns post effects. + halfPixelOffset: 2 # Aligns post effects. nativeScaling: 1 # Fixes post effects. - getSkipCount: "GSC_GiTS" + textureInsideRT: 1 # Fixes post shuffles. SCAJ-20073: name: "Jak and Daxter II" region: "NTSC-Unk" @@ -1891,9 +1896,8 @@ SCAJ-20116: region: "NTSC-C-J" gsHWFixes: alignSprite: 1 # Fixes FMV lines. - cpuSpriteRenderBW: 2 # Prevents Death By VRAM explosion. - cpuSpriteRenderLevel: 2 # Needed for above. - getSkipCount: "GSC_DeathByDegreesTekkenNinaWilliams" + halfPixelOffset: 2 # Aligns post effects. + textureInsideRT: 1 # Fixes post shuffles. SCAJ-20117: name: "Fu-un Bakumatsu-den" region: "NTSC-Unk" @@ -1967,6 +1971,7 @@ SCAJ-20125: clampModes: eeClampMode: 2 # Fixes camera and stops constant coin noises on Pirates Cove. gsHWFixes: + textureInsideRT: 1 # Fixes heat haze half screen problem. alignSprite: 1 halfPixelOffset: 4 # Align post. nativeScaling: 1 # Fixes depth of field effect. @@ -1977,6 +1982,7 @@ SCAJ-20126: clampModes: eeClampMode: 2 # Fixes camera and stops constant coin noises on Pirates Cove. gsHWFixes: + textureInsideRT: 1 # Fixes heat haze half screen problem. alignSprite: 1 halfPixelOffset: 4 # Align post. nativeScaling: 1 # Fixes depth of field effect. @@ -2365,7 +2371,7 @@ SCAJ-20183: region: "NTSC-J" gsHWFixes: roundSprite: 1 # Fixes font sizes and lines in UI. - cpuFramebufferConversion: 1 # Fixes sepia-tone flashback sequences. + textureInsideRT: 1 # Fixes sepia-tone flashback sequences. forceEvenSpritePosition: 1 # Fixes font artifacts and out-of-bound 2D textures. gpuPaletteConversion: 2 # Fixes micro-stuttering and reduces HC size. SCAJ-20184: @@ -2454,6 +2460,7 @@ SCAJ-20199: clampModes: eeClampMode: 2 # Fixes camera and stops constant coin noises on Pirates Cove. gsHWFixes: + textureInsideRT: 1 # Fixes heat haze half screen problem. alignSprite: 1 halfPixelOffset: 4 # Align post. nativeScaling: 1 # Fixes depth of field effect. @@ -3561,6 +3568,7 @@ SCED-52137: autoFlush: 2 # Fixes sun luminosity. halfPixelOffset: 2 # Aligns Depth of Field. nativeScaling: 2 # Fixes Depth of Field effect. + textureInsideRT: 1 # Fixes half screen fog effect. SCED-52141: name: "WRC 3 [Demo]" region: "PAL-E" @@ -3572,6 +3580,7 @@ SCED-52141: autoFlush: 2 # Fixes sun luminosity. halfPixelOffset: 2 # Aligns Depth of Field. nativeScaling: 2 # Fixes Depth of Field effect. + textureInsideRT: 1 # Fixes half screen fog effect. SCED-52147: name: "EyeToy - Christmas Wishi Washi" region: "PAL-E" @@ -3861,9 +3870,13 @@ SCED-52899: SCED-52932: name: "Bonus Demo 8" region: "PAL-M5" + gsHWFixes: + textureInsideRT: 1 # Fixes shadows in TOCA Race Driver 2. SCED-52933: name: "Bonus Demo 8" region: "PAL-M5" + gsHWFixes: + textureInsideRT: 1 # Fixes shadows in TOCA Race Driver 2. SCED-52935: name: "SingStar Party [Demo]" region: "PAL-E" @@ -3937,6 +3950,8 @@ SCED-52997: SCED-53018: name: "Bonus Demo 8 (Geu)" region: "PAL-G" + gsHWFixes: + textureInsideRT: 1 # Fixes shadows in TOCA Race Driver 2. SCED-53043: name: "Magazine Ufficiale PlayStation 2 Speciale Platinum Italia" region: "PAL-I" @@ -4149,6 +4164,7 @@ SCED-53538: clampModes: eeClampMode: 2 # Fixes camera and stops constant coin noises on Pirates Cove. gsHWFixes: + textureInsideRT: 1 # Fixes heat haze half screen problem. alignSprite: 1 halfPixelOffset: 4 # Align post. nativeScaling: 1 # Fixes depth of field effect. @@ -4548,8 +4564,7 @@ SCES-50000: clampModes: vuClampMode: 2 # Fixes texture rendering in the intro. gsHWFixes: - cpuFramebufferConversion: 1 - textureInsideRT: 1 + textureInsideRT: 1 # Fixes post effects halfPixelOffset: 2 # Fixes title screen and some intro post processing alignment. roundSprite: 1 # Fixes ui and hud alignment. gpuPaletteConversion: 2 # Lots of CLUTs in large textures. @@ -5272,6 +5287,7 @@ SCES-51684: autoFlush: 2 # Fixes sun luminosity. halfPixelOffset: 2 # Aligns Depth of Field. nativeScaling: 2 # Fixes Depth of Field effect. + textureInsideRT: 1 # Fixes half screen fog effect. patches: 80802EA9: content: |- @@ -5314,6 +5330,7 @@ SCES-51844: region: "PAL-M5" gsHWFixes: texturePreloading: 0 # Performs much better with no preload. + textureInsideRT: 1 # Fixes post processing. SCES-51895: name: "EyeToy - Groove" region: "PAL-M11" @@ -5408,6 +5425,7 @@ SCES-52137: autoFlush: 2 # Fixes sun luminosity. halfPixelOffset: 2 # Aligns Depth of Field. nativeScaling: 2 # Fixes Depth of Field effect. + textureInsideRT: 1 # Fixes half screen fog effect. SCES-52154: name: "EyeToy - Chat" region: "PAL-M11" @@ -5615,9 +5633,8 @@ SCES-52586: compat: 5 gsHWFixes: alignSprite: 1 # Fixes FMV lines. - cpuSpriteRenderBW: 2 # Prevents Death By VRAM explosion. - cpuSpriteRenderLevel: 2 # Needed for above. - getSkipCount: "GSC_DeathByDegreesTekkenNinaWilliams" + halfPixelOffset: 2 # Aligns post effects. + textureInsideRT: 1 # Fixes post shuffles. SCES-52596: name: "This is Football 2005" region: "PAL-Unk" @@ -5637,6 +5654,7 @@ SCES-52684: autoFlush: 2 # Fixes sun luminosity. halfPixelOffset: 2 # Aligns Depth of Field. nativeScaling: 2 # Fixes Depth of Field effect. + textureInsideRT: 1 # Fixes half screen fog effect. SCES-52748: name: "EyeToy - Play 2" region: "PAL-M12" @@ -5715,18 +5733,16 @@ SCES-53053: region: "PAL-F-I" gsHWFixes: alignSprite: 1 # Fixes FMV lines. - cpuSpriteRenderBW: 2 # Prevents Death By VRAM explosion. - cpuSpriteRenderLevel: 2 # Needed for above. - getSkipCount: "GSC_DeathByDegreesTekkenNinaWilliams" + halfPixelOffset: 2 # Aligns post effects. + textureInsideRT: 1 # Fixes post shuffles. SCES-53054: name: "Death by Degrees" region: "PAL-E-G" compat: 5 gsHWFixes: alignSprite: 1 # Fixes FMV lines. - cpuSpriteRenderBW: 2 # Prevents Death By VRAM explosion. - cpuSpriteRenderLevel: 2 # Needed for above. - getSkipCount: "GSC_DeathByDegreesTekkenNinaWilliams" + halfPixelOffset: 2 # Aligns post effects. + textureInsideRT: 1 # Fixes post shuffles. SCES-53055: name: "Eyetoy - Antigrav" region: "PAL-M5" @@ -5769,6 +5785,7 @@ SCES-53202: clampModes: eeClampMode: 2 # Fixes camera and stops constant coin noises on Pirates Cove. gsHWFixes: + textureInsideRT: 1 # Fixes heat haze half screen problem. alignSprite: 1 halfPixelOffset: 4 # Align post. nativeScaling: 1 # Fixes depth of field effect. @@ -6969,6 +6986,7 @@ SCKA-20015: compat: 5 gsHWFixes: texturePreloading: 0 # Performs much better with no preload. + textureInsideRT: 1 # Fixes post processing. SCKA-20016: name: "SoulCalibur II" region: "NTSC-K" @@ -7042,9 +7060,9 @@ SCKA-20027: recommendedBlendingLevel: 4 # Improves banding and effect emulation. autoFlush: 1 # Fixes light bloom intensity. estimateTextureRegion: 1 # Improves performance and reduces hash cache size. - halfPixelOffset: 4 # Aligns post effects. + halfPixelOffset: 2 # Aligns post effects. nativeScaling: 1 # Fixes post effects. - getSkipCount: "GSC_GiTS" + textureInsideRT: 1 # Fixes post shuffles. SCKA-20028: name: "Ico [PlayStation2 Big Hit Series]" region: "NTSC-K" @@ -7096,6 +7114,7 @@ SCKA-20034: region: "NTSC-K" gsHWFixes: texturePreloading: 0 # Performs much better with no preload. + textureInsideRT: 1 # Fixes post processing. SCKA-20035: name: "Hot Shots Golf 3 [PlayStation 2 Big Hit Series]" region: "NTSC-K" @@ -7126,9 +7145,8 @@ SCKA-20039: region: "NTSC-K" gsHWFixes: alignSprite: 1 # Fixes FMV lines. - cpuSpriteRenderBW: 2 # Prevents Death By VRAM explosion. - cpuSpriteRenderLevel: 2 # Needed for above. - getSkipCount: "GSC_DeathByDegreesTekkenNinaWilliams" + halfPixelOffset: 2 # Aligns post effects. + textureInsideRT: 1 # Fixes post shuffles. SCKA-20040: name: "Jak 3" region: "NTSC-K" @@ -7212,6 +7230,7 @@ SCKA-20049: clampModes: eeClampMode: 2 # Fixes camera and stops constant coin noises on Pirates Cove. gsHWFixes: + textureInsideRT: 1 # Fixes heat haze half screen problem. alignSprite: 1 halfPixelOffset: 4 # Align post. nativeScaling: 1 # Fixes depth of field effect. @@ -7435,6 +7454,7 @@ SCKA-20081: clampModes: eeClampMode: 2 # Fixes camera and stops constant coin noises on Pirates Cove. gsHWFixes: + textureInsideRT: 1 # Fixes heat haze half screen problem. alignSprite: 1 halfPixelOffset: 4 # Align post. nativeScaling: 1 # Fixes depth of field effect. @@ -8610,9 +8630,8 @@ SCPS-15064: recommendedBlendingLevel: 4 # Improves banding and effect emulation. autoFlush: 1 # Fixes light bloom intensity. estimateTextureRegion: 1 # Improves performance and reduces hash cache size. - halfPixelOffset: 4 # Aligns post effects. + halfPixelOffset: 2 # Aligns post effects. nativeScaling: 1 # Fixes post effects. - getSkipCount: "GSC_GiTS" patches: A5768F53: content: |- @@ -9104,7 +9123,7 @@ SCPS-15118: region: "NTSC-J" gsHWFixes: roundSprite: 1 # Fixes font sizes and lines in UI. - cpuFramebufferConversion: 1 # Fixes sepia-tone flashback sequences. + textureInsideRT: 1 # Fixes sepia-tone flashback sequences. forceEvenSpritePosition: 1 # Fixes font artifacts and out-of-bound 2D textures. gpuPaletteConversion: 2 # Fixes micro-stuttering and reduces HC size. SCPS-15119: @@ -9805,7 +9824,7 @@ SCPS-19333: region: "NTSC-J" gsHWFixes: roundSprite: 1 # Fixes font sizes and lines in UI. - cpuFramebufferConversion: 1 # Fixes sepia-tone flashback sequences. + textureInsideRT: 1 # Fixes sepia-tone flashback sequences. forceEvenSpritePosition: 1 # Fixes font artifacts and out-of-bound 2D textures. gpuPaletteConversion: 2 # Fixes micro-stuttering and reduces HC size. SCPS-19335: @@ -13281,6 +13300,9 @@ SLED-52736: SLED-52851: name: "TOCA Race Driver 2" region: "PAL-Unk" + gsHWFixes: + alignSprite: 1 # Fixes lighting and vertical lines, also works with normal vertex. + textureInsideRT: 1 # Fixes shadows. SLED-52852: name: "Forgotten Realms - Demon Stone [Demo]" region: "PAL-E" @@ -13367,6 +13389,10 @@ SLED-53109: SLED-53137: name: "Stolen" region: "PAL-Unk" + gsHWFixes: + halfPixelOffset: 2 # Fixes misaligned bloom effects. + texturePreloading: 1 # Performs much better with partial preload. + textureInsideRT: 1 # Fixes post effects. SLED-53198: name: "Rugby 2005" region: "PAL-Unk" @@ -13524,6 +13550,7 @@ SLED-53723: autoFlush: 1 # Corrects vignette to match software. estimateTextureRegion: 1 # Improves performance and reduces hash cache size. halfPixelOffset: 4 # Aligns blur more correctly to match software. + textureInsideRT: 1 # Fixes post processing effects. SLED-53731: name: "Battlefield 2 - Modern Combat [Demo]" region: "PAL-E" @@ -13532,8 +13559,7 @@ SLED-53731: autoFlush: 1 # Post-processing. halfPixelOffset: 2 # Offset post-processing. texturePreloading: 1 # Improves performance. - getSkipCount: "GSC_Battlefield2" # Depth clear. - beforeDraw: "OI_Battlefield2" # Framebuffer copy, fixes rendering for bottom part of screen. + textureInsideRT: 1 # Fixes per line drawing. SLED-53732: name: "Spartan - Total Warrior [Demo]" region: "PAL" @@ -15822,8 +15848,6 @@ SLES-50876: gsHWFixes: autoFlush: 1 # Aligns and corrects shadows. halfPixelOffset: 2 # Fixes sun and depth line. - cpuCLUTRender: 1 # Fixes janky coloured cars. - gpuTargetCLUT: 1 # Fixes janky coloured cars. textureInsideRT: 1 # Fixes car textures. cpuSpriteRenderBW: 4 # Alleviates text and sky rendering issues. cpuSpriteRenderLevel: 2 # Needed for above. @@ -17001,7 +17025,6 @@ SLES-51355: compat: 5 gsHWFixes: textureInsideRT: 1 # Fixes inside RT shuffling. - getSkipCount: "GSC_BigMuthaTruckers" SLES-51356: name: "Road Trip Adventure" region: "PAL-M3" @@ -18478,6 +18501,8 @@ SLES-51997: compat: 5 clampModes: eeClampMode: 3 # For grey screen ingame. + gsHWFixes: + textureInsideRT: 1 # Fixes post effects on player 2. SLES-51998: name: "Kao the Kangaroo - Round 2" region: "PAL-M5" @@ -18689,7 +18714,7 @@ SLES-52101: name: "Wrath Unleashed" region: "PAL-M5" gsHWFixes: - textureInsideRT: 1 # Fixes flashing and some models still very broken in hardware mode. + textureInsideRT: 1 # Fixes colors. SLES-52102: name: "Hugo Bukkazoom!" region: "PAL-M12" @@ -18752,6 +18777,7 @@ SLES-52132: gsHWFixes: texturePreloading: 1 # Performs much better with partial preload. autoFlush: 1 # Fixes bloom rendering. + textureInsideRT: 1 # Fixes post processing. roundModes: vu1RoundMode: 0 # Fixes missing light cones curtains and certain effects. SLES-52133: @@ -18760,6 +18786,7 @@ SLES-52133: gsHWFixes: texturePreloading: 1 # Performs much better with partial preload. autoFlush: 1 # Fixes bloom rendering. + textureInsideRT: 1 # Fixes post processing. roundModes: vu1RoundMode: 0 # Fixes missing light cones curtains and certain effects. SLES-52134: @@ -18769,6 +18796,7 @@ SLES-52134: gsHWFixes: texturePreloading: 1 # Performs much better with partial preload. autoFlush: 1 # Fixes bloom rendering. + textureInsideRT: 1 # Fixes post processing. roundModes: vu1RoundMode: 0 # Fixes missing light cones curtains and certain effects. SLES-52135: @@ -18777,6 +18805,7 @@ SLES-52135: gsHWFixes: texturePreloading: 1 # Performs much better with partial preload. autoFlush: 1 # Fixes bloom rendering. + textureInsideRT: 1 # Fixes post processing. roundModes: vu1RoundMode: 0 # Fixes missing light cones curtains and certain effects. SLES-52136: @@ -18785,6 +18814,7 @@ SLES-52136: gsHWFixes: texturePreloading: 1 # Performs much better with partial preload. autoFlush: 1 # Fixes bloom rendering. + textureInsideRT: 1 # Fixes post processing. roundModes: vu1RoundMode: 0 # Fixes missing light cones curtains and certain effects. SLES-52143: @@ -18821,8 +18851,6 @@ SLES-52153: gsHWFixes: autoFlush: 1 # Aligns and corrects shadows. halfPixelOffset: 2 # Fixes sun and depth line. - cpuCLUTRender: 1 # Fixes janky coloured cars. - gpuTargetCLUT: 1 # Fixes janky coloured cars. textureInsideRT: 1 # Fixes car textures. cpuSpriteRenderBW: 4 # Alleviates text and sky rendering issues. cpuSpriteRenderLevel: 2 # Needed for above. @@ -19162,6 +19190,8 @@ SLES-52322: texturePreloading: 1 # Performs better with partial preload because it is slow on locations outside gameplay foremost. mergeSprite: 1 # Fixes misaligned white lines. PCRTCOverscan: 1 # Fixes missing HUD. + textureInsideRT: 1 # Fixes post shuffle effect. + halfPixelOffset: 2 # Fixes boxes around shuffle effect pages. SLES-52323: name: "Richard Burns Rally" region: "PAL-M5" @@ -20037,6 +20067,7 @@ SLES-52637: region: "PAL-M5" gsHWFixes: alignSprite: 1 # Fixes lighting and vertical lines, also works with normal vertex. + textureInsideRT: 1 # Fixes shadows. SLES-52638: name: "DTM Race Driver 2" region: "PAL-M5" @@ -20753,9 +20784,9 @@ SLES-52882: region: "PAL-M5" compat: 4 gsHWFixes: - disablePartialInvalidation: 1 # Improves performance. halfPixelOffset: 2 # Fixes misaligned bloom effects. texturePreloading: 1 # Performs much better with partial preload. + textureInsideRT: 1 # Fixes post effects. SLES-52884: name: "Duel Masters" region: "PAL-M5" @@ -20933,6 +20964,8 @@ SLES-52942: gsHWFixes: halfPixelOffset: 2 # Aligns post bloom. nativeScaling: 2 # Fixes light blooms. + textureInsideRT: 1 # Improves performance. + preloadFrameData: 1 # Fixes light trails. getSkipCount: "GSC_MidnightClub3" patches: EBE1972D: @@ -21197,9 +21230,9 @@ SLES-53020: recommendedBlendingLevel: 4 # Improves banding and effect emulation. autoFlush: 1 # Fixes light bloom intensity. estimateTextureRegion: 1 # Improves performance and reduces hash cache size. - halfPixelOffset: 4 # Aligns post effects. + halfPixelOffset: 2 # Aligns post effects. nativeScaling: 1 # Fixes post effects. - getSkipCount: "GSC_GiTS" + textureInsideRT: 1 # Fixes post shuffles. patches: BF6F101F: content: |- @@ -23337,6 +23370,7 @@ SLES-53703: autoFlush: 1 # Corrects vignette to match software. estimateTextureRegion: 1 # Improves performance and reduces hash cache size. halfPixelOffset: 4 # Aligns blur more correctly to match software. + textureInsideRT: 1 # Fixes post processing effects. SLES-53704: name: "Peter Jackson's King Kong - The Official Game of the Movie" name-sort: "King Kong, Peter Jackson's - The Official Game of the Movie" @@ -23346,6 +23380,7 @@ SLES-53704: autoFlush: 1 # Corrects vignette to match software. estimateTextureRegion: 1 # Improves performance and reduces hash cache size. halfPixelOffset: 4 # Aligns blur more correctly to match software. + textureInsideRT: 1 # Fixes post processing effects. SLES-53705: name: "Peter Jackson's King Kong - The Official Game of the Movie" name-sort: "King Kong, Peter Jackson's - The Official Game of the Movie" @@ -23355,6 +23390,7 @@ SLES-53705: autoFlush: 1 # Corrects vignette to match software. estimateTextureRegion: 1 # Improves performance and reduces hash cache size. halfPixelOffset: 4 # Aligns blur more correctly to match software. + textureInsideRT: 1 # Fixes post processing effects. SLES-53706: name: "The Chronicles of Narnia - The Lion, the Witch and the Wardrobe" name-sort: "Chronicles of Narnia, The - The Lion, the Witch and the Wardrobe" @@ -23408,6 +23444,8 @@ SLES-53717: gsHWFixes: halfPixelOffset: 2 # Aligns post bloom. nativeScaling: 2 # Fixes light blooms. + textureInsideRT: 1 # Improves performance. + preloadFrameData: 1 # Fixes light trails. getSkipCount: "GSC_MidnightClub3" patches: 208183AF: @@ -23459,8 +23497,7 @@ SLES-53729: autoFlush: 1 # Post-processing. halfPixelOffset: 2 # Offset post-processing. texturePreloading: 1 # Improves performance. - getSkipCount: "GSC_Battlefield2" # Depth clear. - beforeDraw: "OI_Battlefield2" # Framebuffer copy, fixes rendering for bottom part of screen. + textureInsideRT: 1 # Fixes per line drawing. SLES-53730: name: "Battlefield 2 - Modern Combat" region: "PAL-M3" @@ -23469,8 +23506,7 @@ SLES-53730: autoFlush: 1 # Post-processing. halfPixelOffset: 2 # Offset post-processing. texturePreloading: 1 # Improves performance. - getSkipCount: "GSC_Battlefield2" # Depth clear. - beforeDraw: "OI_Battlefield2" # Framebuffer copy, fixes rendering for bottom part of screen. + textureInsideRT: 1 # Fixes per line drawing. SLES-53734: name: "50 Cent - Bulletproof" region: "PAL-E" @@ -25715,12 +25751,18 @@ SLES-54510: SLES-54511: name: "UEFA Champions League 2006-2007" region: "PAL-E" + gsHWFixes: + textureInsideRT: 1 # Fixes weird glitches. SLES-54512: name: "UEFA Champions League 2006-2007" region: "PAL-F-G" + gsHWFixes: + textureInsideRT: 1 # Fixes weird glitches. SLES-54513: name: "UEFA Champions League 2006-2007" region: "PAL-I-S" + gsHWFixes: + textureInsideRT: 1 # Fixes weird glitches. SLES-54516: name: "Thrillville" region: "PAL-F-G" @@ -26655,6 +26697,7 @@ SLES-54819: gsHWFixes: getSkipCount: "GSC_Manhunt2" autoFlush: 1 # Fixes missing lights and light intensity. + textureInsideRT: 1 # Fixes post lighting. SLES-54820: name: "Stuntman - Ignition" region: "PAL-M5" @@ -27088,6 +27131,7 @@ SLES-54962: autoFlush: 1 # Fixes bloom intensity. halfPixelOffset: 4 # Mostly aligns post processing. nativeScaling: 1 # Fixes post processing smoothness and position. + getSkipCount: "GSC_GuitarHero" SLES-54963: name: "Tony Hawk's Proving Ground" region: "PAL-E" @@ -27119,7 +27163,7 @@ SLES-54972: region: "PAL-M3" gsHWFixes: roundSprite: 1 # Fixes font sizes and lines in UI. - cpuFramebufferConversion: 1 # Fixes sepia-tone flashback sequences. + textureInsideRT: 1 # Fixes sepia-tone flashback sequences. forceEvenSpritePosition: 1 # Fixes font artifacts and out-of-bound 2D textures. gpuPaletteConversion: 2 # Fixes micro-stuttering and reduces HC size. SLES-54973: @@ -27135,6 +27179,7 @@ SLES-54974: autoFlush: 1 # Fixes bloom intensity. halfPixelOffset: 4 # Mostly aligns post processing. nativeScaling: 1 # Fixes post processing smoothness and position. + getSkipCount: "GSC_GuitarHero" SLES-54975: name: "George Of The Jungle" region: "PAL-E" @@ -27555,6 +27600,8 @@ SLES-55109: region: "PAL-E" roundModes: vu0RoundMode: 0 # Fixes invisible wall collision in bedroom. + gsHWFixes: + textureInsideRT: 1 # Fixes half screen. SLES-55110: name: "Odin Sphere" region: "PAL-M5" @@ -27762,6 +27809,7 @@ SLES-55191: autoFlush: 1 # Fixes bloom intensity. halfPixelOffset: 4 # Mostly aligns post processing. nativeScaling: 1 # Fixes post processing smoothness and position. + getSkipCount: "GSC_GuitarHero" SLES-55192: name: "Steam Express" region: "PAL-M5" @@ -27823,6 +27871,7 @@ SLES-55200: autoFlush: 1 # Fixes bloom intensity. halfPixelOffset: 4 # Mostly aligns post processing. nativeScaling: 1 # Fixes post processing smoothness and position. + getSkipCount: "GSC_GuitarHero" SLES-55201: name: "Riding Star" region: "PAL-M4" @@ -30195,8 +30244,6 @@ SLKA-25196: gsHWFixes: autoFlush: 1 # Aligns and corrects shadows. halfPixelOffset: 2 # Fixes sun and depth line. - cpuCLUTRender: 1 # Fixes janky coloured cars. - gpuTargetCLUT: 1 # Fixes janky coloured cars. textureInsideRT: 1 # Fixes car textures. cpuSpriteRenderBW: 4 # Alleviates text and sky rendering issues. cpuSpriteRenderLevel: 2 # Needed for above. @@ -30321,6 +30368,7 @@ SLKA-25218: gsHWFixes: texturePreloading: 1 # Performs much better with partial preload. autoFlush: 1 # Fixes bloom rendering. + textureInsideRT: 1 # Fixes post processing. roundModes: vu1RoundMode: 0 # Fixes missing light cones curtains and certain effects. SLKA-25219: @@ -30826,13 +30874,11 @@ SLKA-25330: name: "Battlefield 2 - Modern Combat" region: "NTSC-K" gsHWFixes: - autoFlush: 2 # Post-processing. + minimumBlendingLevel: 4 # Fixes ground texture rendering. + autoFlush: 1 # Post-processing. halfPixelOffset: 2 # Offset post-processing. - texturePreloading: 1 # Spikes all over the place otherwise. - textureInsideRT: 1 # Fixes light shinging through objects. - cpuCLUTRender: 1 # Fixes light shining through objects. - getSkipCount: "GSC_Battlefield2" # Depth clear. - beforeDraw: "OI_Battlefield2" # Framebuffer copy, fixes rendering for bottom part of screen. + texturePreloading: 1 # Improves performance. + textureInsideRT: 1 # Fixes per line drawing. SLKA-25331: name: "Marc Ecko's Getting Up - Contents Under Pressure" region: "NTSC-K" @@ -30867,6 +30913,7 @@ SLKA-25337: autoFlush: 1 # Corrects vignette to match software. estimateTextureRegion: 1 # Improves performance and reduces hash cache size. halfPixelOffset: 4 # Aligns blur more correctly to match software. + textureInsideRT: 1 # Fixes post processing effects. SLKA-25338: name: "The Godfather" name-sort: "Godfather, The" @@ -31227,6 +31274,7 @@ SLKA-25414: autoFlush: 1 # Fixes bloom intensity. halfPixelOffset: 4 # Mostly aligns post processing. nativeScaling: 1 # Fixes post processing smoothness and position. + getSkipCount: "GSC_GuitarHero" SLKA-25417: name: "Jin Samguk Mussang 4 - Empires" region: "NTSC-K" @@ -31282,6 +31330,7 @@ SLKA-25434: autoFlush: 1 # Fixes bloom intensity. halfPixelOffset: 4 # Mostly aligns post processing. nativeScaling: 1 # Fixes post processing smoothness and position. + getSkipCount: "GSC_GuitarHero" SLKA-25435: name: "Sengoku Basara X" region: "NTSC-J-K" @@ -31694,8 +31743,7 @@ SLPM-55034: autoFlush: 1 # Post-processing. halfPixelOffset: 2 # Offset post-processing. texturePreloading: 1 # Improves performance. - getSkipCount: "GSC_Battlefield2" # Depth clear. - beforeDraw: "OI_Battlefield2" # Framebuffer copy, fixes rendering for bottom part of screen. + textureInsideRT: 1 # Fixes per line drawing. SLPM-55035: name: "ファイトナイト ラウンド2 [EA:SY! 1980]" name-sort: "ふぁいとないと らうんど2 [EA:SY! 1980]" @@ -32998,8 +33046,7 @@ SLPM-60109: clampModes: vuClampMode: 2 # Fixes texture rendering in the intro. gsHWFixes: - cpuFramebufferConversion: 1 - textureInsideRT: 1 + textureInsideRT: 1 # Fixes post effects halfPixelOffset: 2 # Fixes title screen and some intro post processing alignment. roundSprite: 1 # Fixes ui and hud alignment. gpuPaletteConversion: 2 # Lots of CLUTs in large textures. @@ -33503,6 +33550,7 @@ SLPM-60217: region: "NTSC-J" gsHWFixes: texturePreloading: 0 # Performs much better with no preload. + textureInsideRT: 1 # Fixes post processing. SLPM-60218: name: "GUNGRAVE O.D. [体験版]" name-sort: "がんぐれいぶ おーでぃー [たいけんばん]" @@ -33675,9 +33723,8 @@ SLPM-60257: region: "NTSC-J" gsHWFixes: alignSprite: 1 # Fixes FMV lines. - cpuSpriteRenderBW: 2 # Prevents Death By VRAM explosion. - cpuSpriteRenderLevel: 2 # Needed for above. - getSkipCount: "GSC_DeathByDegreesTekkenNinaWilliams" + halfPixelOffset: 2 # Aligns post effects. + textureInsideRT: 1 # Fixes post shuffles. SLPM-60258: name: "THE TYPING OF THE DEAD ZOMBIE PANIC [体験版]" name-sort: "ざ たいぴんぐおぶ ざ でっど ぞんび ぱにっく [たいけんばん]" @@ -34231,8 +34278,6 @@ SLPM-61092: gsHWFixes: autoFlush: 1 # Aligns and corrects shadows. halfPixelOffset: 2 # Fixes sun and depth line. - cpuCLUTRender: 1 # Fixes janky coloured cars. - gpuTargetCLUT: 1 # Fixes janky coloured cars. textureInsideRT: 1 # Fixes car textures. cpuSpriteRenderBW: 4 # Alleviates text and sky rendering issues. cpuSpriteRenderLevel: 2 # Needed for above. @@ -36518,7 +36563,6 @@ SLPM-62378: region: "NTSC-J" gsHWFixes: textureInsideRT: 1 # Fixes inside RT shuffling. - getSkipCount: "GSC_BigMuthaTruckers" SLPM-62379: name: "カラオケレボリューション J-POPベストVol.2" name-sort: "からおけれぼりゅーしょん J-POPべすとVol.2" @@ -39143,6 +39187,8 @@ SLPM-65073: name-sort: "げんそうすいこでん3 [しょかいせいさんぶん:とくしゅしよう]" name-en: "Gensou Suikoden III [Limited Edition]" region: "NTSC-J" + gsHWFixes: + textureInsideRT: 1 # Fixes half screen. memcardFilters: # This looks like a mess because it includes all serials for Suikoden 3, Suikoden 2, Suikogaiden 1, and Suikogaiden 2. A lot of these probably aren't actually required but it's not really hurting anything to have them here. - "SLPM-65073" - "SLPM-65074" @@ -39162,6 +39208,8 @@ SLPM-65074: name-sort: "げんそうすいこでん3" name-en: "Gensou Suikoden III" region: "NTSC-J" + gsHWFixes: + textureInsideRT: 1 # Fixes half screen. memcardFilters: - "SLPM-65073" - "SLPM-65074" @@ -40077,7 +40125,6 @@ SLPM-65234: region: "NTSC-J" gsHWFixes: textureInsideRT: 1 # Fixes inside RT shuffling. - getSkipCount: "GSC_BigMuthaTruckers" SLPM-65235: name: "ニュールーマニア ポロリ青春" name-sort: "にゅーるーまにあ ぽろりせいしゅん" @@ -40498,6 +40545,8 @@ SLPM-65305: name-sort: "げんそうすいこでん3 [KONAMI THE BEST]" name-en: "Gensou Suikoden 3" region: "NTSC-J" + gsHWFixes: + textureInsideRT: 1 # Fixes half screen. memcardFilters: - "SLPM-65073" - "SLPM-65074" @@ -42041,6 +42090,7 @@ SLPM-65583: autoFlush: 2 # Fixes sun luminosity. halfPixelOffset: 2 # Aligns Depth of Field. nativeScaling: 2 # Fixes Depth of Field effect. + textureInsideRT: 1 # Fixes half screen fog effect. SLPM-65584: name: "真・三國無双3 猛将伝" name-sort: "しんさんごくむそう3 もうしょうでん" @@ -42663,6 +42713,8 @@ SLPM-65694: name-sort: "げんそうすいこでん3 [こなみでんどうせれくしょん]" name-en: "Gensou Suikoden 3 [KONAMI Dendou Selection]" region: "NTSC-J" + gsHWFixes: + textureInsideRT: 1 # Fixes half screen. memcardFilters: - "SLPM-65073" - "SLPM-65074" @@ -42935,8 +42987,6 @@ SLPM-65741: gsHWFixes: autoFlush: 1 # Aligns and corrects shadows. halfPixelOffset: 2 # Fixes sun and depth line. - cpuCLUTRender: 1 # Fixes janky coloured cars. - gpuTargetCLUT: 1 # Fixes janky coloured cars. textureInsideRT: 1 # Fixes car textures. cpuSpriteRenderBW: 4 # Alleviates text and sky rendering issues. cpuSpriteRenderLevel: 2 # Needed for above. @@ -45073,6 +45123,7 @@ SLPM-66103: autoFlush: 2 # Fixes sun luminosity. halfPixelOffset: 2 # Aligns Depth of Field. nativeScaling: 2 # Fixes Depth of Field effect. + textureInsideRT: 1 # Fixes half screen fog effect. SLPM-66104: name: "ぷよぷよフィーバー2 [チュー!]" name-sort: "ぷよぷよふぃーばー2" @@ -45746,8 +45797,7 @@ SLPM-66206: autoFlush: 1 # Post-processing. halfPixelOffset: 2 # Offset post-processing. texturePreloading: 1 # Improves performance. - getSkipCount: "GSC_Battlefield2" # Depth clear. - beforeDraw: "OI_Battlefield2" # Framebuffer copy, fixes rendering for bottom part of screen. + textureInsideRT: 1 # Fixes per line drawing. SLPM-66207: name: "どろろ [SEGA THE BEST 2800]" name-sort: "どろろ [SEGA THE BEST 2800]" @@ -45780,6 +45830,7 @@ SLPM-66211: autoFlush: 1 # Corrects vignette to match software. estimateTextureRegion: 1 # Improves performance and reduces hash cache size. halfPixelOffset: 4 # Aligns blur more correctly to match software. + textureInsideRT: 1 # Fixes post processing effects. SLPM-66212: name: "SEGA RALLY 2006" name-sort: "せが らりー 2006" @@ -47619,6 +47670,7 @@ SLPM-66498: region: "NTSC-J" gsHWFixes: alignSprite: 1 # Fixes lighting and vertical lines, also works with normal vertex. + textureInsideRT: 1 # Fixes shadows. SLPM-66499: name: "神様家族 応援願望" name-sort: "かみさまかぞく おうえんがんぼう" @@ -48521,8 +48573,7 @@ SLPM-66651: autoFlush: 1 # Post-processing. halfPixelOffset: 2 # Offset post-processing. texturePreloading: 1 # Improves performance. - getSkipCount: "GSC_Battlefield2" # Depth clear. - beforeDraw: "OI_Battlefield2" # Framebuffer copy, fixes rendering for bottom part of screen. + textureInsideRT: 1 # Fixes per line drawing. SLPM-66652: name: "バーンアウト リベンジ [EA BEST HITS]" name-sort: "ばーんあうと りべんじ [EA BEST HITS]" @@ -50992,6 +51043,8 @@ SLPM-68505: name-sort: "げんそうすいこでん3" name-en: "Gensou Suikoden III" region: "NTSC-J" + gsHWFixes: + textureInsideRT: 1 # Fixes half screen. SLPM-68509: name: "頭文字D Special Stage [講談社懸賞品]" name-sort: "いにしゃるD すぺしゃる すてーじ [こうだんしゃけんしょうひん]" @@ -51876,8 +51929,7 @@ SLPS-20001: clampModes: vuClampMode: 2 # Fixes texture rendering in the intro. gsHWFixes: - cpuFramebufferConversion: 1 - textureInsideRT: 1 + textureInsideRT: 1 # Fixes post effects halfPixelOffset: 2 # Fixes title screen and some intro post processing alignment. roundSprite: 1 # Fixes ui and hud alignment. gpuPaletteConversion: 2 # Lots of CLUTs in large textures. @@ -53700,6 +53752,7 @@ SLPS-20382: region: "NTSC-J" gsHWFixes: alignSprite: 1 # Fixes vertical lines. + textureInsideRT: 1 # Fixes post effects. SLPS-20383: name: "太鼓の達人 あつまれ!祭りだ!!四代目" name-sort: "たいこのたつじん あつまれ!まつりだ!!よんだいめ" @@ -53707,6 +53760,7 @@ SLPS-20383: region: "NTSC-J" gsHWFixes: alignSprite: 1 # Fixes vertical lines. + textureInsideRT: 1 # Fixes post effects. SLPS-20384: name: "流行り神 警視庁怪異事件ファイル [初回限定版]" name-sort: "はやりがみ けいしちょうかいいじけんふぁいる [しょかいげんていばん]" @@ -53774,6 +53828,7 @@ SLPS-20399: region: "NTSC-J" gsHWFixes: alignSprite: 1 # Fixes vertical lines. + textureInsideRT: 1 # Fixes post effects. SLPS-20400: name: "太鼓の達人 ゴー!ゴー!五代目 [ソフト単体]" name-sort: "たいこのたつじん ごー!ごー!ごだいめ [そふとたんたい]" @@ -53781,6 +53836,7 @@ SLPS-20400: region: "NTSC-J" gsHWFixes: alignSprite: 1 # Fixes vertical lines. + textureInsideRT: 1 # Fixes post effects. SLPS-20401: name: "テクモ ヒットパレード" name-sort: "てくも ひっとぱれーど" @@ -53852,6 +53908,7 @@ SLPS-20413: region: "NTSC-J" gsHWFixes: alignSprite: 1 # Fixes vertical lines. + textureInsideRT: 1 # Fixes post effects. SLPS-20414: name: "太鼓の達人 TAIKO DRUM MASTER" name-sort: "たいこのたつじん TAIKO DRUM MASTER" @@ -53860,6 +53917,7 @@ SLPS-20414: compat: 5 gsHWFixes: alignSprite: 1 # Fixes vertical lines. + textureInsideRT: 1 # Fixes post effects. SLPS-20416: name: "陰陽大戦記 白虎演舞 [“EyeToy”カメラ同梱版]" name-sort: "おんみょうたいせんき びゃっこえんぶ [あいとーいかめらどうこんばん]" @@ -53911,6 +53969,7 @@ SLPS-20424: region: "NTSC-J" gsHWFixes: alignSprite: 1 # Fixes vertical lines. + textureInsideRT: 1 # Fixes post effects. SLPS-20425: name: "太鼓の達人 とびっきり!アニメスペシャル [ソフト単体]" name-sort: "たいこのたつじん とびっきり!あにめすぺしゃる [そふとたんたい]" @@ -53918,6 +53977,7 @@ SLPS-20425: region: "NTSC-J" gsHWFixes: alignSprite: 1 # Fixes vertical lines. + textureInsideRT: 1 # Fixes post effects. SLPS-20426: name: "マダガスカル" name-sort: "まだかすかる" @@ -54027,6 +54087,7 @@ SLPS-20450: region: "NTSC-J" gsHWFixes: alignSprite: 1 # Fixes vertical lines. + textureInsideRT: 1 # Fixes post effects. SLPS-20451: name: "太鼓の達人 わいわいハッピー!六代目" name-sort: "たいこのたつじん わいわいはっぴー!ろくだいめ" @@ -54034,6 +54095,7 @@ SLPS-20451: region: "NTSC-J" gsHWFixes: alignSprite: 1 # Fixes vertical lines. + textureInsideRT: 1 # Fixes post effects. SLPS-20452: name: "SIMPLE2000シリーズ Ultimate Vol.30 降臨!族車ゴッド~仏恥義理★愛羅武勇~" name-sort: "しんぷる2000しりーず あるてぃめっと Vol.30 こうりん!ぞくしゃごっど ぶっちぎりあいらぶゆう" @@ -54231,6 +54293,7 @@ SLPS-20485: region: "NTSC-J" gsHWFixes: alignSprite: 1 # Fixes vertical lines. + textureInsideRT: 1 # Fixes post effects. SLPS-20486: name: "太鼓の達人 ドカッ!と大盛り七代目 [ソフト単体]" name-sort: "たいこのたつじん どかっ!とおおもりななだいめ [そふとたんたい]" @@ -54238,6 +54301,7 @@ SLPS-20486: region: "NTSC-J" gsHWFixes: alignSprite: 1 # Fixes vertical lines. + textureInsideRT: 1 # Fixes post effects. SLPS-20487: name: "パチスロキング! 科学忍者隊ガッチャマン" name-sort: "ぱちすろきんぐ! かがくにんじゃたいがっちゃまん" @@ -56027,6 +56091,7 @@ SLPS-25289: region: "NTSC-J" gsHWFixes: texturePreloading: 0 # Performs much better with no preload. + textureInsideRT: 1 # Fixes post processing. SLPS-25290: name: "タイムクライシス3 [ソフト単体]" name-sort: "たいむくらいしす3 [そふとたんたい]" @@ -56034,6 +56099,7 @@ SLPS-25290: region: "NTSC-J" gsHWFixes: texturePreloading: 0 # Performs much better with no preload. + textureInsideRT: 1 # Fixes post processing. SLPS-25291: name: "Baldur's Gate - DARK ALLIANCE - [PCCW Japan The BEST]" name-sort: "ばるだーずげーと だーくあらいあんす [PCCW Japan The BEST]" @@ -56756,6 +56822,7 @@ SLPS-25406: gsHWFixes: texturePreloading: 1 # Performs much better with partial preload. autoFlush: 1 # Fixes bloom rendering. + textureInsideRT: 1 # Fixes post processing. roundModes: vu1RoundMode: 0 # Fixes missing light cones curtains and certain effects. SLPS-25407: @@ -56883,9 +56950,8 @@ SLPS-25422: region: "NTSC-J" gsHWFixes: alignSprite: 1 # Fixes FMV lines. - cpuSpriteRenderBW: 2 # Prevents Death By VRAM explosion. - cpuSpriteRenderLevel: 2 # Needed for above. - getSkipCount: "GSC_DeathByDegreesTekkenNinaWilliams" + halfPixelOffset: 2 # Aligns post effects. + textureInsideRT: 1 # Fixes post shuffles. SLPS-25423: name: "怪盗アプリコット 完全版 [限定版]" name-sort: "かいとうあぷりこっと かんぜんばん [げんていばん]" @@ -57371,6 +57437,7 @@ SLPS-25510: clampModes: eeClampMode: 2 # Fixes camera and stops constant coin noises on Pirates Cove. gsHWFixes: + textureInsideRT: 1 # Fixes heat haze half screen problem. alignSprite: 1 # Fixes vertical lines. halfPixelOffset: 4 # Align post. nativeScaling: 1 # Fixes depth of field effect. @@ -57657,6 +57724,7 @@ SLPS-25563: gsHWFixes: texturePreloading: 1 # Performs much better with partial preload. autoFlush: 1 # Fixes bloom rendering. + textureInsideRT: 1 # Fixes post processing. roundModes: vu1RoundMode: 0 # Fixes missing light cones curtains and certain effects. SLPS-25564: @@ -59350,6 +59418,7 @@ SLPS-25840: autoFlush: 1 # Fixes bloom intensity. halfPixelOffset: 4 # Mostly aligns post processing. nativeScaling: 1 # Fixes post processing smoothness and position. + getSkipCount: "GSC_GuitarHero" SLPS-25841: name: "テイルズ オブ デスティニー ディレクターズカット [プレミアムBOX]" name-sort: "ているず おぶ ですてぃにー でぃれくたーずかっと [ぷれみあむBOX]" @@ -59606,6 +59675,7 @@ SLPS-25886: autoFlush: 1 # Fixes bloom intensity. halfPixelOffset: 4 # Mostly aligns post processing. nativeScaling: 1 # Fixes post processing smoothness and position. + getSkipCount: "GSC_GuitarHero" SLPS-25887: name: "スーパーロボット大戦Z" name-sort: "すーぱーろぼっとたいせんZ" @@ -59642,6 +59712,7 @@ SLPS-25889: autoFlush: 1 # Fixes bloom intensity. halfPixelOffset: 4 # Mostly aligns post processing. nativeScaling: 1 # Fixes post processing smoothness and position. + getSkipCount: "GSC_GuitarHero" SLPS-25890: name: "ギターヒーロー3 レジェンドオブロック [ソフト単体]" name-sort: "ぎたーひーろー3 れじぇんどおぶろっく [そふとたんたい]" @@ -59654,6 +59725,7 @@ SLPS-25890: autoFlush: 1 # Fixes bloom intensity. halfPixelOffset: 4 # Mostly aligns post processing. nativeScaling: 1 # Fixes post processing smoothness and position. + getSkipCount: "GSC_GuitarHero" SLPS-25891: name: "乃木坂春香の秘密 こすぷれ、はじめました♥ [限定版]" name-sort: "のぎざかはるかのひみつ こすぷれ はじめました [げんていばん]" @@ -60226,8 +60298,7 @@ SLPS-71502: name-en: "Ridge Racer V [MEGA HITS!]" region: "NTSC-J" gsHWFixes: - cpuFramebufferConversion: 1 - textureInsideRT: 1 + textureInsideRT: 1 # Fixes post effects halfPixelOffset: 2 # Fixes title screen and some intro post processing alignment. roundSprite: 1 # Fixes ui and hud alignment. gpuPaletteConversion: 2 # Lots of CLUTs in large textures. @@ -60540,6 +60611,7 @@ SLPS-73223: clampModes: eeClampMode: 2 # Fixes camera and stops constant coin noises on Pirates Cove. gsHWFixes: + textureInsideRT: 1 # Fixes heat haze half screen problem. alignSprite: 1 # Fixes vertical lines. halfPixelOffset: 4 # Align post. nativeScaling: 1 # Fixes depth of field effect. @@ -61144,8 +61216,7 @@ SLUS-20002: clampModes: vuClampMode: 2 # Fixes texture rendering in the intro. gsHWFixes: - cpuFramebufferConversion: 1 - textureInsideRT: 1 + textureInsideRT: 1 # Fixes post effects halfPixelOffset: 2 # Fixes title screen and some intro post processing alignment. roundSprite: 1 # Fixes ui and hud alignment. gpuPaletteConversion: 2 # Lots of CLUTs in large textures. @@ -62326,7 +62397,6 @@ SLUS-20291: compat: 5 gsHWFixes: textureInsideRT: 1 # Fixes inside RT shuffling. - getSkipCount: "GSC_BigMuthaTruckers" SLUS-20292: name: "Tsugunai - Atonement" region: "NTSC-U" @@ -62802,6 +62872,8 @@ SLUS-20387: name: "Suikoden III" region: "NTSC-U" compat: 5 + gsHWFixes: + textureInsideRT: 1 # Fixes half screen. memcardFilters: # Allows import of Suikoden II clear data. - "SLUS-20387" - "SLUS-00958" @@ -63048,6 +63120,8 @@ SLUS-20433: compat: 5 clampModes: eeClampMode: 3 # For grey screen ingame. + gsHWFixes: + textureInsideRT: 1 # Fixes post effects on player 2. SLUS-20434: name: "Myst III - Exile" region: "NTSC-U" @@ -63868,8 +63942,6 @@ SLUS-20587: gsHWFixes: autoFlush: 1 # Aligns and corrects shadows. halfPixelOffset: 2 # Fixes sun and depth line. - cpuCLUTRender: 1 # Fixes janky coloured cars. - gpuTargetCLUT: 1 # Fixes janky coloured cars. textureInsideRT: 1 # Fixes car textures. cpuSpriteRenderBW: 4 # Alleviates text and sky rendering issues. cpuSpriteRenderLevel: 2 # Needed for above. @@ -63966,7 +64038,6 @@ SLUS-20605: region: "NTSC-U" gsHWFixes: textureInsideRT: 1 # Fixes inside RT shuffling. - getSkipCount: "GSC_BigMuthaTruckers" SLUS-20606: name: "Bounty Hunter - Seek & Destroy" region: "NTSC-U" @@ -64156,6 +64227,7 @@ SLUS-20645: compat: 5 gsHWFixes: texturePreloading: 0 # Performs much better with no preload. + textureInsideRT: 1 # Fixes post processing. SLUS-20646: name: "Mark Davis Pro Bass Challenge" region: "NTSC-U" @@ -64624,6 +64696,8 @@ SLUS-20732: texturePreloading: 1 # Performs better with partial preload because it is slow on locations outside gameplay foremost. mergeSprite: 1 # Fixes misaligned white lines. PCRTCOverscan: 1 # Fixes missing HUD. + textureInsideRT: 1 # Fixes post shuffle effect. + halfPixelOffset: 2 # Fixes boxes around shuffle effect pages. SLUS-20733: name: "Castlevania - Lament of Innocence" region: "NTSC-U" @@ -65167,7 +65241,7 @@ SLUS-20840: region: "NTSC-U" compat: 5 gsHWFixes: - textureInsideRT: 1 # Fixes flashing and some models still very broken in hardware mode. + textureInsideRT: 1 # Fixes colors. SLUS-20841: name: "NFL Street" region: "NTSC-U" @@ -65421,6 +65495,7 @@ SLUS-20882: gsHWFixes: texturePreloading: 1 # Performs much better with partial preload. autoFlush: 1 # Fixes bloom rendering. + textureInsideRT: 1 # Fixes post processing. roundModes: vu1RoundMode: 0 # Fixes missing light cones curtains and certain effects. SLUS-20883: @@ -65748,9 +65823,8 @@ SLUS-20934: compat: 5 gsHWFixes: alignSprite: 1 # Fixes FMV lines. - cpuSpriteRenderBW: 2 # Prevents Death By VRAM explosion. - cpuSpriteRenderLevel: 2 # Needed for above. - getSkipCount: "GSC_DeathByDegreesTekkenNinaWilliams" + halfPixelOffset: 2 # Aligns post effects. + textureInsideRT: 1 # Fixes post shuffles. SLUS-20935: name: "IHRA Professional Drag Racing 2005" region: "NTSC-U" @@ -66184,9 +66258,9 @@ SLUS-21006: recommendedBlendingLevel: 4 # Improves banding and effect emulation. autoFlush: 1 # Fixes light bloom intensity. estimateTextureRegion: 1 # Improves performance and reduces hash cache size. - halfPixelOffset: 4 # Aligns post effects. + halfPixelOffset: 2 # Aligns post effects. nativeScaling: 1 # Fixes post effects. - getSkipCount: "GSC_GiTS" + textureInsideRT: 1 # Fixes post shuffles. patches: default: content: |- @@ -66330,8 +66404,7 @@ SLUS-21026: autoFlush: 1 # Post-processing. halfPixelOffset: 2 # Offset post-processing. texturePreloading: 1 # Improves performance. - getSkipCount: "GSC_Battlefield2" # Depth clear. - beforeDraw: "OI_Battlefield2" # Framebuffer copy, fixes rendering for bottom part of screen. + textureInsideRT: 1 # Fixes per line drawing. SLUS-21027: name: "The Lord of the Rings - The Third Age" name-sort: "Lord of the Rings, The - The Third Age" @@ -66351,6 +66424,8 @@ SLUS-21029: gsHWFixes: halfPixelOffset: 2 # Aligns post bloom. nativeScaling: 2 # Fixes light blooms. + textureInsideRT: 1 # Improves performance. + preloadFrameData: 1 # Fixes light trails. getSkipCount: "GSC_MidnightClub3" patches: 0DD3417A: @@ -66425,6 +66500,7 @@ SLUS-21039: compat: 5 gsHWFixes: alignSprite: 1 # Fixes lighting and vertical lines, also works with normal vertex. + textureInsideRT: 1 # Fixes shadows. SLUS-21040: name: "The Shield" name-sort: "Shield, The" @@ -66541,6 +66617,7 @@ SLUS-21059: clampModes: eeClampMode: 2 # Fixes camera and stops constant coin noises on Pirates Cove. gsHWFixes: + textureInsideRT: 1 # Fixes heat haze half screen problem. alignSprite: 1 # Fixes vertical lines. halfPixelOffset: 4 # Align post. nativeScaling: 1 # Fixes depth of field effect. @@ -66733,9 +66810,9 @@ SLUS-21099: region: "NTSC-U" compat: 4 gsHWFixes: - disablePartialInvalidation: 1 # Improves performance. halfPixelOffset: 2 # Fixes misaligned bloom effects. texturePreloading: 1 # Performs much better with partial preload. + textureInsideRT: 1 # Fixes post effects. SLUS-21100: name: "NCAA March Madness 2005" region: "NTSC-U" @@ -67085,6 +67162,7 @@ SLUS-21160: clampModes: eeClampMode: 2 # Fixes camera and stops constant coin noises on Pirates Cove. gsHWFixes: + textureInsideRT: 1 # Fixes heat haze half screen problem. alignSprite: 1 # Fixes vertical lines. halfPixelOffset: 4 # Align post. nativeScaling: 1 # Fixes depth of field effect. @@ -68133,6 +68211,7 @@ SLUS-21311: autoFlush: 1 # Corrects vignette to match software. estimateTextureRegion: 1 # Improves performance and reduces hash cache size. halfPixelOffset: 4 # Aligns blur more correctly to match software. + textureInsideRT: 1 # Fixes post processing effects. SLUS-21312: name: "Wallace & Gromit - The Curse of the Were-Rabbit" region: "NTSC-U" @@ -68417,6 +68496,8 @@ SLUS-21355: gsHWFixes: halfPixelOffset: 2 # Aligns post bloom. nativeScaling: 2 # Fixes light blooms. + textureInsideRT: 1 # Improves performance. + preloadFrameData: 1 # Fixes light trails. getSkipCount: "GSC_MidnightClub3" patches: 60A42FF5: @@ -69618,6 +69699,8 @@ SLUS-21581: name: "UEFA Champions League 2006-2007" region: "NTSC-U" compat: 5 + gsHWFixes: + textureInsideRT: 1 # Fixes weird glitches. SLUS-21582: name: "MVP '07 - NCAA Baseball" region: "NTSC-U" @@ -69815,6 +69898,7 @@ SLUS-21613: gsHWFixes: getSkipCount: "GSC_Manhunt2" autoFlush: 1 # Fixes missing lights and light intensity. + textureInsideRT: 1 # Fixes post lighting. SLUS-21614: name: "Star Wars - The Force Unleashed" region: "NTSC-U" @@ -69838,7 +69922,7 @@ SLUS-21615: compat: 5 gsHWFixes: roundSprite: 1 # Fixes font sizes and lines in UI. - cpuFramebufferConversion: 1 # Fixes sepia-tone flashback sequences. + textureInsideRT: 1 # Fixes sepia-tone flashback sequences. forceEvenSpritePosition: 1 # Fixes font artifacts and out-of-bound 2D textures. gpuPaletteConversion: 2 # Fixes micro-stuttering and reduces HC size. SLUS-21616: @@ -70163,6 +70247,7 @@ SLUS-21672: autoFlush: 1 # Fixes bloom intensity. halfPixelOffset: 4 # Mostly aligns post processing. nativeScaling: 1 # Fixes post processing smoothness and position. + getSkipCount: "GSC_GuitarHero" SLUS-21673: name: "College Hoops 2K8" region: "NTSC-U" @@ -70386,6 +70471,8 @@ SLUS-21716: compat: 5 roundModes: vu0RoundMode: 0 # Fixes invisible wall collision in bedroom. + gsHWFixes: + textureInsideRT: 1 # Fixes half screen. SLUS-21717: name: "Dora the Explorer - Dora Saves the Mermaids" region: "NTSC-U" @@ -70533,6 +70620,7 @@ SLUS-21740: autoFlush: 1 # Fixes bloom intensity. halfPixelOffset: 4 # Mostly aligns post processing. nativeScaling: 1 # Fixes post processing smoothness and position. + getSkipCount: "GSC_GuitarHero" SLUS-21741: name: "Sea Monsters - A Prehistoric Adventure" region: "NTSC-U" @@ -72100,6 +72188,8 @@ SLUS-29087: eeClampMode: 3 # Characters are visible in-game. gsHWFixes: texturePreloading: 1 # Performs better with partial preload because it is slow on locations outside gameplay foremost. + textureInsideRT: 1 # Fixes post shuffle effect. + halfPixelOffset: 2 # Fixes boxes around shuffle effect pages. SLUS-29088: name: "Champions of Norrath - Realms of EverQuest [Demo]" region: "NTSC-U" @@ -72198,8 +72288,7 @@ SLUS-29117: autoFlush: 1 # Post-processing. halfPixelOffset: 2 # Offset post-processing. texturePreloading: 1 # Improves performance. - getSkipCount: "GSC_Battlefield2" # Depth clear. - beforeDraw: "OI_Battlefield2" # Framebuffer copy, fixes rendering for bottom part of screen. + textureInsideRT: 1 # Fixes per line drawing. SLUS-29118: name: "Need for Speed - Underground 2 [Demo]" region: "NTSC-U" @@ -72214,9 +72303,9 @@ SLUS-29123: recommendedBlendingLevel: 4 # Improves banding and effect emulation. autoFlush: 1 # Fixes light bloom intensity. estimateTextureRegion: 1 # Improves performance and reduces hash cache size. - halfPixelOffset: 4 # Aligns post effects. + halfPixelOffset: 2 # Aligns post effects. nativeScaling: 1 # Fixes post effects. - getSkipCount: "GSC_GiTS" + textureInsideRT: 1 # Fixes post shuffles. SLUS-29124: name: "Fight Club [Demo]" region: "NTSC-U" @@ -72351,8 +72440,7 @@ SLUS-29152: autoFlush: 1 # Post-processing. halfPixelOffset: 2 # Offset post-processing. texturePreloading: 1 # Improves performance. - getSkipCount: "GSC_Battlefield2" # Depth clear. - beforeDraw: "OI_Battlefield2" # Framebuffer copy, fixes rendering for bottom part of screen. + textureInsideRT: 1 # Fixes per line drawing. SLUS-29153: name: "Burnout Revenge [Demo]" region: "NTSC-U" @@ -72467,8 +72555,7 @@ SLUS-29172: autoFlush: 1 # Post-processing. halfPixelOffset: 2 # Offset post-processing. texturePreloading: 1 # Improves performance. - getSkipCount: "GSC_Battlefield2" # Depth clear. - beforeDraw: "OI_Battlefield2" # Framebuffer copy, fixes rendering for bottom part of screen. + textureInsideRT: 1 # Fixes per line drawing. SLUS-29173: name: "The Sims 2 [Demo]" name-sort: "Sims 2, The [Demo]" diff --git a/bin/resources/shaders/dx11/tfx.fx b/bin/resources/shaders/dx11/tfx.fx index 7a722cdca1e61..d7cf69fa7253b 100644 --- a/bin/resources/shaders/dx11/tfx.fx +++ b/bin/resources/shaders/dx11/tfx.fx @@ -1123,11 +1123,8 @@ PS_OUTPUT ps_main(PS_INPUT input) { if (PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) { - C.rb = C.br; - float g_temp = C.g; - - C.g = C.a; - C.a = g_temp; + C.br = C.rb; + C.ag = C.ga; } else if(PS_PROCESS_BA & SHUFFLE_READ) { diff --git a/bin/resources/shaders/opengl/tfx_fs.glsl b/bin/resources/shaders/opengl/tfx_fs.glsl index c641a6743fd5b..47443659f2af0 100644 --- a/bin/resources/shaders/opengl/tfx_fs.glsl +++ b/bin/resources/shaders/opengl/tfx_fs.glsl @@ -1086,11 +1086,8 @@ void ps_main() C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80u))); #elif PS_SHUFFLE_ACROSS #if(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) - C.rb = C.br; - float g_temp = C.g; - - C.g = C.a; - C.a = g_temp; + C.br = C.rb; + C.ag = C.ga; #elif(PS_PROCESS_BA & SHUFFLE_READ) C.rb = C.bb; C.ga = C.aa; diff --git a/bin/resources/shaders/vulkan/tfx.glsl b/bin/resources/shaders/vulkan/tfx.glsl index 69c4345db9ca7..f7a7133bab925 100644 --- a/bin/resources/shaders/vulkan/tfx.glsl +++ b/bin/resources/shaders/vulkan/tfx.glsl @@ -945,7 +945,7 @@ vec4 ps_color() vec4 T = sample_color(st); #endif - #if PS_SHUFFLE && !PS_READ16_SRC && !PS_SHUFFLE_SAME + #if PS_SHUFFLE && !PS_READ16_SRC && !PS_SHUFFLE_SAME && !(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) uvec4 denorm_c_before = uvec4(T); #if (PS_PROCESS_BA & SHUFFLE_READ) T.r = float((denorm_c_before.b << 3) & 0xF8u); @@ -1320,7 +1320,7 @@ void main() ps_blend(C, alpha_blend); #if PS_SHUFFLE - #if !PS_READ16_SRC && !PS_SHUFFLE_SAME + #if !PS_READ16_SRC && !PS_SHUFFLE_SAME && !(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) uvec4 denorm_c_after = uvec4(C); #if (PS_PROCESS_BA & SHUFFLE_READ) C.b = float(((denorm_c_after.r >> 3) & 0x1Fu) | ((denorm_c_after.g << 2) & 0xE0u)); @@ -1350,11 +1350,8 @@ void main() // Write RB part. Mask will take care of the correct destination #elif PS_SHUFFLE_ACROSS #if(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) - C.rb = C.br; - float g_temp = C.g; - - C.g = C.a; - C.a = g_temp; + C.br = C.rb; + C.ag = C.ga; #elif(PS_PROCESS_BA & SHUFFLE_READ) C.rb = C.bb; C.ga = C.aa; diff --git a/pcsx2/GS/GSState.cpp b/pcsx2/GS/GSState.cpp index c664204864483..d886bd3ba9923 100644 --- a/pcsx2/GS/GSState.cpp +++ b/pcsx2/GS/GSState.cpp @@ -119,6 +119,10 @@ GSState::~GSState() _aligned_free(m_vertex.buff); if (m_index.buff) _aligned_free(m_index.buff); + if (m_draw_vertex.buff) + _aligned_free(m_draw_vertex.buff); + if (m_draw_index.buff) + _aligned_free(m_draw_index.buff); } std::string GSState::GetDrawDumpPath(const char* format, ...) @@ -467,7 +471,8 @@ void GSState::DumpVertices(const std::string& filename) file << std::setfill('0') << std::setw(3) << unsigned(v.RGBAQ.R) << DEL; file << std::setfill('0') << std::setw(3) << unsigned(v.RGBAQ.G) << DEL; file << std::setfill('0') << std::setw(3) << unsigned(v.RGBAQ.B) << DEL; - file << std::setfill('0') << std::setw(3) << unsigned(v.RGBAQ.A); + file << std::setfill('0') << std::setw(3) << unsigned(v.RGBAQ.A) << DEL; + file << "FOG: " << std::setfill('0') << std::setw(3) << unsigned(v.FOG); file << std::endl; } @@ -849,7 +854,7 @@ void GSState::ApplyTEX0(GIFRegTEX0& TEX0) // Urban Chaos writes to the memory backing the CLUT in the middle of a shuffle, and // it's unclear whether the CLUT would actually get reloaded in that case. if (TEX0.CBP != m_mem.m_clut.GetCLUTCBP()) - m_channel_shuffle = false; + m_channel_shuffle_abort = true; } TEX0.CPSM &= 0xa; // 1010b @@ -1674,7 +1679,8 @@ void GSState::FlushPrim() Console.Warning("GS: Possible invalid draw, Frame PSM %x ZPSM %x", m_context->FRAME.PSM, m_context->ZBUF.PSM); } #endif - + // Update scissor, it may have been modified by a previous draw + m_env.CTXT[PRIM->CTXT].UpdateScissor(); m_vt.Update(m_vertex.buff, m_index.buff, m_vertex.tail, m_index.tail, GSUtil::GetPrimClass(PRIM->PRIM)); // Texel coordinate rounding @@ -2794,8 +2800,10 @@ void GSState::GrowVertexBuffer() const u32 maxcount = std::max(m_vertex.maxcount * 3 / 2, 10000); GSVertex* vertex = static_cast(_aligned_malloc(sizeof(GSVertex) * maxcount, 32)); + GSVertex* draw_vertex = static_cast(_aligned_malloc(sizeof(GSVertex) * maxcount, 32)); // Worst case index list is a list of points with vs expansion, 6 indices per point u16* index = static_cast(_aligned_malloc(sizeof(u16) * maxcount * 6, 32)); + u16* draw_index = static_cast(_aligned_malloc(sizeof(u16) * maxcount * 6, 32)); if (!vertex || !index) { @@ -2821,6 +2829,22 @@ void GSState::GrowVertexBuffer() _aligned_free(m_index.buff); } + if (m_draw_vertex.buff) + { + std::memcpy(draw_vertex, m_draw_vertex.buff, sizeof(GSVertex) * m_vertex.tail); + + _aligned_free(m_draw_vertex.buff); + } + + if (m_draw_index.buff) + { + std::memcpy(draw_index, m_draw_index.buff, sizeof(u16) * m_index.tail); + + _aligned_free(m_draw_index.buff); + } + + m_draw_vertex.buff = draw_vertex; + m_draw_index.buff = draw_index; m_vertex.buff = vertex; m_vertex.maxcount = maxcount - 3; // -3 to have some space at the end of the buffer before DrawingKick can grow it m_index.buff = index; @@ -3094,6 +3118,16 @@ __forceinline bool GSState::IsAutoFlushDraw(u32 prim) if (!(GSUtil::GetChannelMask(m_context->TEX0.PSM) & GSUtil::GetChannelMask(m_context->FRAME.PSM, m_context->FRAME.FBMSK | ~(GSLocalMemory::m_psm[m_context->FRAME.PSM].fmsk)))) return false; + // Try to detect shuffles, because these will not autoflush, they by design clash. + if (GSLocalMemory::m_psm[m_context->FRAME.PSM].bpp == 16 && GSLocalMemory::m_psm[m_context->TEX0.PSM].bpp == 16) + { + // Pretty confident here... + GSVertex* buffer = &m_vertex.buff[0]; + const bool const_spacing = std::abs(buffer[m_index.buff[0]].U - buffer[m_index.buff[0]].XYZ.X) == std::abs(m_v.U - m_v.XYZ.X) && std::abs(buffer[m_index.buff[1]].XYZ.X - buffer[m_index.buff[0]].XYZ.X) < 64; + + if (const_spacing) + return false; + } const u32 frame_mask = GSLocalMemory::m_psm[m_context->FRAME.PSM].fmsk; const bool frame_hit = m_context->FRAME.Block() == m_context->TEX0.TBP0 && !(m_context->TEST.ATE && m_context->TEST.ATST == 0 && m_context->TEST.AFAIL == 2) && ((m_context->FRAME.FBMSK & frame_mask) != frame_mask); // There's a strange behaviour we need to test on a PS2 here, if the FRAME is a Z format, like Powerdrome something swaps over, and it seems Alpha Fail of "FB Only" writes to the Z.. it's odd. @@ -3859,7 +3893,8 @@ GSState::TextureMinMaxResult GSState::GetTextureMinMax(GIFRegTEX0 TEX0, GIFRegCL const GSVector2 grad(uv_range / pos_range); // Adjust texture range when sprites get scissor clipped. Since we linearly interpolate, this // optimization doesn't work when perspective correction is enabled. - if (m_vt.m_primclass == GS_SPRITE_CLASS && PRIM->FST == 1 && m_primitive_covers_without_gaps != NoGapsType::GapsFound) + // Allowing for quads when the gradiant is 1. It's not guaranteed (would need to check the grandient on each vector), but should be close enough. + if (m_primitive_covers_without_gaps != NoGapsType::GapsFound && (m_vt.m_primclass == GS_SPRITE_CLASS || (m_vt.m_primclass == GS_TRIANGLE_CLASS && grad.x == 1.0f && grad.y == 1.0f && TrianglesAreQuads(false)))) { // When coordinates are fractional, GS appears to draw to the right/bottom (effectively // taking the ceiling), not to the top/left (taking the floor). @@ -3870,11 +3905,24 @@ GSState::TextureMinMaxResult GSState::GetTextureMinMax(GIFRegTEX0 TEX0, GIFRegCL const GSVertex* vert_first = &m_vertex.buff[m_index.buff[0]]; const GSVertex* vert_second = &m_vertex.buff[m_index.buff[1]]; + const GSVertex* vert_third = &m_vertex.buff[m_index.buff[2]]; GSVector4 new_st = st; + bool u_forward_check = false; + bool x_forward_check = false; + if (m_vt.m_primclass == GS_TRIANGLE_CLASS) + { + u_forward_check = PRIM->FST ? ((vert_first->U < vert_second->U) || (vert_first->U < vert_third->U)) : (((vert_first->ST.S / vert_first->RGBAQ.Q) < (vert_second->ST.S / vert_second->RGBAQ.Q)) || ((vert_first->ST.S / vert_first->RGBAQ.Q) < (vert_third->ST.S / vert_third->RGBAQ.Q))); + x_forward_check = (vert_first->XYZ.X < vert_second->XYZ.X) || (vert_first->XYZ.X < vert_third->XYZ.X); + } + else + { + u_forward_check = PRIM->FST ? (vert_first->U < vert_second->U) : ((vert_first->ST.T / vert_first->RGBAQ.Q) < (vert_second->ST.T / vert_first->RGBAQ.Q)); + x_forward_check = vert_first->XYZ.Y < vert_second->XYZ.Y; + } // Check if the UV coords are going in a different direction to the verts, if they match direction, no need to swap - const bool u_forward = vert_first->U < vert_second->U; - const bool x_forward = vert_first->XYZ.X < vert_second->XYZ.X; + const bool u_forward = u_forward_check; + const bool x_forward = x_forward_check; const bool swap_x = u_forward != x_forward; if (int_rc.left < scissored_rc.left) @@ -3897,9 +3945,20 @@ GSState::TextureMinMaxResult GSState::GetTextureMinMax(GIFRegTEX0 TEX0, GIFRegCL st.x = new_st.x; st.z = new_st.z; } - - const bool v_forward = vert_first->V < vert_second->V; - const bool y_forward = vert_first->XYZ.Y < vert_second->XYZ.Y; + bool v_forward_check = false; + bool y_forward_check = false; + if (m_vt.m_primclass == GS_TRIANGLE_CLASS) + { + v_forward_check = PRIM->FST ? ((vert_first->V < vert_second->V) || (vert_first->V < vert_third->V)) : (((vert_first->ST.T / vert_first->RGBAQ.Q) < (vert_second->ST.T / vert_second->RGBAQ.Q)) || ((vert_first->ST.T / vert_first->RGBAQ.Q) < (vert_third->ST.T / vert_third->RGBAQ.Q))); + y_forward_check = (vert_first->XYZ.Y < vert_second->XYZ.Y) || (vert_first->XYZ.Y < vert_third->XYZ.Y); + } + else + { + v_forward_check = PRIM->FST ? (vert_first->V < vert_second->V) : ((vert_first->ST.T / vert_first->RGBAQ.Q) < (vert_second->ST.T / vert_first->RGBAQ.Q)); + y_forward_check = vert_first->XYZ.Y < vert_second->XYZ.Y; + } + const bool v_forward = v_forward_check; + const bool y_forward = y_forward_check; const bool swap_y = v_forward != y_forward; if (int_rc.top < scissored_rc.top) diff --git a/pcsx2/GS/GSState.h b/pcsx2/GS/GSState.h index 75416d86d5228..cf09202b40c13 100644 --- a/pcsx2/GS/GSState.h +++ b/pcsx2/GS/GSState.h @@ -145,6 +145,21 @@ class GSState : public GSAlignedClass<32> u32 tail; } m_index = {}; + struct + { + GSVertex* buff; + u32 head, tail, next, maxcount; // head: first vertex, tail: last vertex + 1, next: last indexed + 1 + u32 xy_tail; + GSVector4i xy[4]; + GSVector4i xyhead; + } m_draw_vertex = {}; + + struct + { + u16* buff; + u32 tail; + } m_draw_index = {}; + void UpdateContext(); void UpdateScissor(); @@ -224,6 +239,10 @@ class GSState : public GSAlignedClass<32> bool m_texflush_flag = false; bool m_isPackedUV_HackFlag = false; bool m_channel_shuffle = false; + bool m_in_target_draw = false; + bool m_channel_shuffle_abort = false; + + u32 m_target_offset = 0; u8 m_scanmask_used = 0; u32 m_dirty_gs_regs = 0; int m_backed_up_ctx = 0; diff --git a/pcsx2/GS/GSVector4i.h b/pcsx2/GS/GSVector4i.h index 53bfc2c6d5e37..63c9cc1f98520 100644 --- a/pcsx2/GS/GSVector4i.h +++ b/pcsx2/GS/GSVector4i.h @@ -1599,6 +1599,11 @@ class alignas(16) GSVector4i return loadh(&v); } + __forceinline static GSVector4i loadl(const GSVector2i& v) + { + return loadl(&v); + } + __forceinline static GSVector4i load(const void* pl, const void* ph) { return loadh(ph, loadl(pl)); diff --git a/pcsx2/GS/Renderers/HW/GSHwHack.cpp b/pcsx2/GS/Renderers/HW/GSHwHack.cpp index 2d7239692d331..6a7f92ad272df 100644 --- a/pcsx2/GS/Renderers/HW/GSHwHack.cpp +++ b/pcsx2/GS/Renderers/HW/GSHwHack.cpp @@ -35,52 +35,6 @@ static bool s_nativeres; // Partial level, broken on all renderers. //////////////////////////////////////////////////////////////////////////////// -bool GSHwHack::GSC_DeathByDegreesTekkenNinaWilliams(GSRendererHW& r, int& skip) -{ - // Note: Game also has issues with texture shuffle not supported on strange clamp mode. - // See https://forums.pcsx2.net/Thread-GSDX-Texture-Cache-Bug-Report-Death-By-Degrees-SLUS-20934-NTSC - if (skip == 0) - { - if (!s_nativeres && RTME && RFBP == 0 && RTBP0 == 0x34a0 && RTPSM == PSMCT32) - { - // Don't enable hack on native res if crc is below aggressive. - // Upscaling issue similar to Tekken 5. - skip = 1; // Animation pane - } -#if 0 - else if (RFBP == 0x3500 && RTPSM == PSMT8 && RFBMSK == 0xFFFF00FF) - { - // Needs to be further tested so put it on Aggressive for now, likely channel shuffle. - skip = 4; // Underwater white fog - } -#endif - } - else - { - if (!s_nativeres && RTME && (RFBP | RTBP0 | RFPSM | RTPSM) && RFBMSK == 0x00FFFFFF) - { - // Needs to be further tested so assume it's related with the upscaling hack. - skip = 1; // Animation speed - } - } - - return true; -} - -bool GSHwHack::GSC_GiTS(GSRendererHW& r, int& skip) -{ - if (skip == 0) - { - if (RTME && RFBP == 0x03000 && RFPSM == PSMCT32 && RTPSM == PSMT8) - { - // Channel effect not properly supported yet - skip = 9; - } - } - - return true; -} - // Channel effect not properly supported yet bool GSHwHack::GSC_Manhunt2(GSRendererHW& r, int& skip) { @@ -131,6 +85,22 @@ bool GSHwHack::GSC_SacredBlaze(GSRendererHW& r, int& skip) return true; } +bool GSHwHack::GSC_GuitarHero(GSRendererHW& r, int& skip) +{ + // Crowd sprite generation is a mess, better done in software. + if (skip == 0) + { + if (RTBW <= 4 && RTME && RFBW <= 4 && (r.m_context->TEX1.MMIN & 1) == 0) + { + r.ClearGSLocalMemory(r.m_context->offset.zb, r.m_r, 0); + r.SwPrimRender(r, RFBP != 0x2DC0, false); + skip = 1; + } + } + + return true; +} + bool GSHwHack::GSC_SFEX3(GSRendererHW& r, int& skip) { if (skip == 0) @@ -194,7 +164,7 @@ bool GSHwHack::GSC_Tekken5(GSRendererHW& r, int& skip) return true; } - if (!s_nativeres && r.PRIM->PRIM == GS_SPRITE && RTME && RTEX0.TFX == 1 && RFPSM == RTPSM && RTPSM == PSMCT32 && RFBMSK == 0xFF000000 && r.m_index.tail > 2) + if (!s_nativeres && r.PRIM->PRIM == GS_SPRITE && RTME && RTEX0.TFX == 1 && !r.PRIM->ABE && RFPSM == RTPSM && RTPSM == PSMCT32 && RFBMSK == 0xFF000000 && r.m_index.tail > 2) { // Don't enable hack on native res. // Fixes ghosting/blur effect and white lines appearing in stages: Moonfit Wilderness, Acid Rain - caused by upscaling. @@ -204,12 +174,6 @@ bool GSHwHack::GSC_Tekken5(GSRendererHW& r, int& skip) const GSVector4i read_size(r.m_vt.m_min.t.x, r.m_vt.m_min.t.y, r.m_vt.m_max.t.x + 0.5f, r.m_vt.m_max.t.y + 0.5f); r.ReplaceVerticesWithSprite(draw_size, read_size, GSVector2i(read_size.width(), read_size.height()), draw_size); } - else if (RZTST == 1 && RTME && (RFBP == 0x02bc0 || RFBP == 0x02be0 || RFBP == 0x02d00 || RFBP == 0x03480 || RFBP == 0x034a0) && RFPSM == RTPSM && RTBP0 == 0x00000 && RTPSM == PSMCT32) - { - // The moving display effect(flames) is not emulated properly in the entire screen so let's remove the effect in the stage: Burning Temple. Related to half screen bottom issue. - // Fixes black lines in the stage: Burning Temple - caused by upscaling. Note the black lines can also be fixed with Merge Sprite hack. - skip = 2; - } } return true; @@ -863,44 +827,6 @@ bool GSHwHack::GSC_MetalGearSolid3(GSRendererHW& r, int& skip) return true; } -bool GSHwHack::GSC_BigMuthaTruckers(GSRendererHW& r, int& skip) -{ - // Rendering pattern: - // CRTC frontbuffer at 0x0 is interlaced (half vertical resolution), - // game needs to do a depth effect (so green channel to alpha), - // but there is a vram limitation so green is pushed into the alpha channel of the CRCT buffer, - // vertical resolution is half so only half is processed at once - // We, however, don't have this limitation so we'll replace the draw with a full-screen TS. - - const GIFRegTEX0& Texture = RTEX0; - - GIFRegTEX0 Frame = {}; - Frame.TBW = RFRAME.FBW; - Frame.TBP0 = RFRAME.Block(); - const int frame_offset_pal = GSLocalMemory::GetEndBlockAddress(0xa00, 10, PSMCT32, GSVector4i(0, 0, 640, 256)) + 1; - const int frame_offset_ntsc = GSLocalMemory::GetEndBlockAddress(0xa00, 10, PSMCT32, GSVector4i(0, 0, 640, 224)) + 1; - const GSVector4i rect = GSVector4i(r.m_vt.m_min.p.x, r.m_vt.m_min.p.y, r.m_vt.m_max.p.x, r.m_vt.m_max.p.y); - - if (RPRIM->TME && Frame.TBW == 10 && Texture.TBW == 10 && Texture.PSM == PSMCT16 && ((rect.w == 512 && Frame.TBP0 == frame_offset_pal) || (Frame.TBP0 == frame_offset_ntsc && rect.w == 448))) - { - // 224 ntsc, 256 pal. - GL_INS("GSC_BigMuthaTruckers half bottom offset %d", r.m_context->XYOFFSET.OFX >> 4); - - const size_t count = r.m_vertex.next; - GSVertex* v = &r.m_vertex.buff[0]; - const u16 offset = (u16)rect.w * 16; - - for (size_t i = 0; i < count; i++) - v[i].XYZ.Y += offset; - - r.m_vt.m_min.p.y += rect.w; - r.m_vt.m_max.p.y += rect.w; - r.m_cached_ctx.FRAME.FBP = 0x50; // 0xA00 >> 5 - } - - return true; -} - bool GSHwHack::GSC_HitmanBloodMoney(GSRendererHW& r, int& skip) { // The game does a stupid thing where it backs up the last 2 pages of the framebuffer with shuffles, uploads a CT32 texture to it @@ -946,6 +872,10 @@ bool GSHwHack::OI_PointListPalette(GSRendererHW& r, GSTexture* rt, GSTexture* ds && r.m_cached_ctx.FRAME.FBMSK == 0 // No frame buffer masking. ) { + const int mask = (r.m_vt.m_max.p.xyxy() == r.m_vt.m_min.p.xyxy()).mask(); + if (mask == 0xf) + return true; + const u32 FBP = r.m_cached_ctx.FRAME.Block(); const u32 FBW = r.m_cached_ctx.FRAME.FBW; GL_INS("PointListPalette - m_r = <%d, %d => %d, %d>, n_vertices = %u, FBP = 0x%x, FBW = %u", r.m_r.x, r.m_r.y, r.m_r.z, r.m_r.w, n_vertices, FBP, FBW); @@ -1047,7 +977,7 @@ bool GSHwHack::OI_SonicUnleashed(GSRendererHW& r, GSTexture* rt, GSTexture* ds, // compute shadow in RG, // save result in alpha with a TS, // Restore RG channel that we previously copied to render shadows. - + // Important note: The game downsizes the target to half height, then later expands it back up to full size, that's why PCSX2 doesn't like it, we don't support that behaviour. const GIFRegTEX0& Texture = RTEX0; GIFRegTEX0 Frame = {}; @@ -1058,9 +988,9 @@ bool GSHwHack::OI_SonicUnleashed(GSRendererHW& r, GSTexture* rt, GSTexture* ds, if ((!rt) || (!RPRIM->TME) || (GSLocalMemory::m_psm[Texture.PSM].bpp != 16) || (GSLocalMemory::m_psm[Frame.PSM].bpp != 16) || (Texture.TBP0 == Frame.TBP0) || (Frame.TBW != 16 && Texture.TBW != 16)) return true; - GL_INS("OI_SonicUnleashed replace draw by a copy"); + GL_INS("OI_SonicUnleashed replace draw by a copy draw %d", r.s_n); - GSTextureCache::Target* src = g_texture_cache->LookupTarget(Texture, GSVector2i(1, 1), r.GetTextureScaleFactor(), GSTextureCache::RenderTarget); + GSTextureCache::Target* src = g_texture_cache->LookupTarget(Texture, GSVector2i(1, 1), r.GetTextureScaleFactor(), GSTextureCache::RenderTarget, true, 0, false, false, true, true, GSVector4i::zero(), true); if (!src) return true; @@ -1086,6 +1016,7 @@ bool GSHwHack::OI_SonicUnleashed(GSRendererHW& r, GSTexture* rt, GSTexture* ds, const GSVector2i copy_size(std::min(rt_size.x, src_size.x), std::min(rt_size.y, src_size.y)); const GSVector4 sRect(0.0f, 0.0f, static_cast(copy_size.x) / static_cast(src_size.x), static_cast(copy_size.y) / static_cast(src_size.y)); + // This is kind of a bodge because the game confuses everything since the source is really 16bit and it assumes it's really drawing 16bit on the copy back, resizing the target. const GSVector4 dRect(0, 0, copy_size.x, copy_size.y); g_gs_device->StretchRect(src->m_texture, sRect, rt, dRect, true, true, true, false); @@ -1147,43 +1078,6 @@ bool GSHwHack::OI_BurnoutGames(GSRendererHW& r, GSTexture* rt, GSTexture* ds, GS return false; } -bool GSHwHack::GSC_Battlefield2(GSRendererHW& r, int& skip) -{ - if (skip == 0) - { - if (RZBP >= RFBP && RFBP >= 0x2000 && RZBP >= 0x2700 && ((RZBP - RFBP) == 0x700)) - { - skip = 7; - - GIFRegTEX0 TEX0 = {}; - TEX0.TBP0 = RFBP; - TEX0.TBW = 8; - GSTextureCache::Target* dst = g_texture_cache->LookupTarget(TEX0, r.GetTargetSize(), r.GetTextureScaleFactor(), GSTextureCache::DepthStencil); - if (dst) - { - g_gs_device->ClearDepth(dst->m_texture, 0.0f); - } - } - } - - return true; -} - -bool GSHwHack::OI_Battlefield2(GSRendererHW& r, GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) -{ - if (!RPRIM->TME || RFRAME.Block() > 0xD00 || RTEX0.TBP0 > 0x1D00) - return true; - - if (rt && t && RFRAME.Block() == 0 && RTEX0.TBP0 == 0x1000) - { - const GSVector4i rc(0, 0, std::min(rt->GetWidth(), t->m_texture->GetWidth()), std::min(rt->GetHeight(), t->m_texture->GetHeight())); - g_gs_device->CopyRect(t->m_texture, rt, rc, 0, 0); - } - - g_texture_cache->InvalidateTemporarySource(); - return false; -} - bool GSHwHack::OI_HauntingGround(GSRendererHW& r, GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) { // Haunting Ground clears two targets by doing a direct colour write at 0x3000, covering a target at 0x3380. @@ -1468,6 +1362,7 @@ const GSHwHack::Entry GSHwHack::s_get_skip_count_function CRC_F(GSC_Manhunt2), CRC_F(GSC_MidnightClub3), CRC_F(GSC_SacredBlaze), + CRC_F(GSC_GuitarHero), CRC_F(GSC_SakuraWarsSoLongMyLove), CRC_F(GSC_Simple2000Vol114), CRC_F(GSC_SFEX3), @@ -1477,14 +1372,12 @@ const GSHwHack::Entry GSHwHack::s_get_skip_count_function CRC_F(GSC_ZettaiZetsumeiToshi2), CRC_F(GSC_BlackAndBurnoutSky), CRC_F(GSC_BlueTongueGames), - CRC_F(GSC_Battlefield2), CRC_F(GSC_NFSUndercover), CRC_F(GSC_PolyphonyDigitalGames), CRC_F(GSC_MetalGearSolid3), CRC_F(GSC_HitmanBloodMoney), // Channel Effect - CRC_F(GSC_GiTS), CRC_F(GSC_SteambotChronicles), // Depth Issue @@ -1493,10 +1386,6 @@ const GSHwHack::Entry GSHwHack::s_get_skip_count_function // Half Screen bottom issue CRC_F(GSC_Tekken5), - // Texture shuffle - CRC_F(GSC_DeathByDegreesTekkenNinaWilliams), // + Upscaling issues - CRC_F(GSC_BigMuthaTruckers), - // Upscaling hacks CRC_F(GSC_UltramanFightingEvolution), }; @@ -1508,7 +1397,6 @@ const GSHwHack::Entry GSHwHack::s_before_draw_functions[] CRC_F(OI_SonicUnleashed), CRC_F(OI_ArTonelico2), CRC_F(OI_BurnoutGames), - CRC_F(OI_Battlefield2), CRC_F(OI_HauntingGround), }; diff --git a/pcsx2/GS/Renderers/HW/GSHwHack.h b/pcsx2/GS/Renderers/HW/GSHwHack.h index 2da65232a11fb..f32ecbc7efccb 100644 --- a/pcsx2/GS/Renderers/HW/GSHwHack.h +++ b/pcsx2/GS/Renderers/HW/GSHwHack.h @@ -6,10 +6,9 @@ class GSHwHack { public: - static bool GSC_DeathByDegreesTekkenNinaWilliams(GSRendererHW& r, int& skip); - static bool GSC_GiTS(GSRendererHW& r, int& skip); static bool GSC_Manhunt2(GSRendererHW& r, int& skip); static bool GSC_SacredBlaze(GSRendererHW& r, int& skip); + static bool GSC_GuitarHero(GSRendererHW& r, int& skip); static bool GSC_SFEX3(GSRendererHW& r, int& skip); static bool GSC_Tekken5(GSRendererHW& r, int& skip); static bool GSC_BurnoutGames(GSRendererHW& r, int& skip); @@ -25,11 +24,9 @@ class GSHwHack static bool GSC_UrbanReign(GSRendererHW& r, int& skip); static bool GSC_SteambotChronicles(GSRendererHW& r, int& skip); static bool GSC_BlueTongueGames(GSRendererHW& r, int& skip); - static bool GSC_Battlefield2(GSRendererHW& r, int& skip); static bool GSC_NFSUndercover(GSRendererHW& r, int& skip); static bool GSC_PolyphonyDigitalGames(GSRendererHW& r, int& skip); static bool GSC_MetalGearSolid3(GSRendererHW& r, int& skip); - static bool GSC_BigMuthaTruckers(GSRendererHW& r, int& skip); static bool GSC_HitmanBloodMoney(GSRendererHW& r, int& skip); static bool OI_PointListPalette(GSRendererHW& r, GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); @@ -38,7 +35,6 @@ class GSHwHack static bool OI_SonicUnleashed(GSRendererHW& r, GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); static bool OI_ArTonelico2(GSRendererHW& r, GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); static bool OI_BurnoutGames(GSRendererHW& r, GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); - static bool OI_Battlefield2(GSRendererHW& r, GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); static bool OI_HauntingGround(GSRendererHW& r, GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); static bool MV_Growlanser(GSRendererHW& r); diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index eb3acb08e4951..9be2db4415505 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -346,7 +346,7 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, tex_pos &= 0xFF; shuffle_across = (((tex_pos + 8) >> 4) ^ ((pos + 8) >> 4)) & 0x8; - const bool full_width = !shuffle_across && ((second_vert.XYZ.X - first_vert.XYZ.X) >> 4) >= 16 && m_r.width() > 8; + const bool full_width = ((second_vert.XYZ.X - first_vert.XYZ.X) >> 4) >= 16 && m_r.width() > 8 && tex && tex->m_from_target && rt == tex->m_from_target; process_ba = ((pos > 112 && pos < 136) || full_width) ? SHUFFLE_WRITE : 0; process_rg = (!process_ba || full_width) ? SHUFFLE_WRITE : 0; // "same group" means it can read blue and write alpha using C32 tricks @@ -471,7 +471,7 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, GSVector4::storeh(&v[1].ST.S, st); } } - m_r = fpr; + m_r = r; m_vertex.head = m_vertex.tail = m_vertex.next = 2; m_index.tail = 2; return; @@ -489,7 +489,7 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, // Dogs will reuse the Z in a different size format for a completely unrelated draw with an FBW of 2, then go back to using it in full width const bool size_is_wrong = tex->m_target ? (static_cast(tex->m_from_target_TEX0.TBW * 64) < tex->m_from_target->m_valid.z / 2) : false; const u32 draw_page_width = std::max(static_cast(m_vt.m_max.p.x + (!(process_ba & SHUFFLE_WRITE) ? 8.9f : 0.9f)) / 64, 1); - const bool single_direction_doubled = (m_vt.m_max.p.y > rt->m_valid.w) != (m_vt.m_max.p.x > rt->m_valid.z); + const bool single_direction_doubled = (m_vt.m_max.p.y > rt->m_valid.w) != (m_vt.m_max.p.x > rt->m_valid.z) || (IsSinglePageDraw() && m_r.height() > 32); if (size_is_wrong || (rt && ((rt->m_TEX0.TBW % draw_page_width) == 0 || single_direction_doubled))) { @@ -554,7 +554,7 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, } else { - if ((floor(m_vt.m_max.p.y) <= rt->m_valid.w) && ((floor(m_vt.m_max.p.x) > (m_cached_ctx.FRAME.FBW * 64)) || (rt->m_TEX0.TBW != m_cached_ctx.FRAME.FBW))) + if (((m_r.width() + 8) & ~(GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].pgs.x - 1)) != GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].pgs.x && (floor(m_vt.m_max.p.y) <= rt->m_valid.w) && ((floor(m_vt.m_max.p.x) > (m_cached_ctx.FRAME.FBW * 64)) || (rt->m_TEX0.TBW != m_cached_ctx.FRAME.FBW))) { half_bottom_vert = false; half_bottom_uv = false; @@ -587,6 +587,14 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, else v[i + 1 - reversed_U].U += 128u; } + else + { + if (((pos + 8) >> 4) & 0x8) + { + v[i + reversed_pos].XYZ.X -= 128u; + v[i + 1 - reversed_pos].XYZ.X -= 128u; + } + } if (half_bottom_vert) { @@ -704,6 +712,14 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, m_vt.m_max.t.x += 8.0f; } } + else + { + if (fmod(std::floor(m_vt.m_min.p.x), 64.0f) == 8.0f) + { + m_vt.m_min.p.x -= 8.0f; + m_vt.m_max.p.x -= 8.0f; + } + } if (half_right_vert) { @@ -717,10 +733,25 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, m_vt.m_max.p.y = floor(m_vt.m_max.p.y + 1.9f) / 2.0f; } - m_context->scissor.in.x = m_vt.m_min.p.x; - m_context->scissor.in.z = m_vt.m_max.p.x + 0.9f; - m_context->scissor.in.y = m_vt.m_min.p.y; - m_context->scissor.in.w = m_vt.m_max.p.y + 0.9f; + if (m_context->scissor.in.x & 8) + { + m_context->scissor.in.x &= ~0xf; //m_vt.m_min.p.x; + + if (half_right_vert) + m_context->scissor.in.x /= 2; + } + if (m_context->scissor.in.z & 8) + { + m_context->scissor.in.z += 8; //m_vt.m_min.p.x; + + if (half_right_vert) + m_context->scissor.in.z /= 2; + } + if (half_bottom_vert) + { + m_context->scissor.in.y /= 2; + m_context->scissor.in.w /= 2; + } // Only do this is the source is being interpreted as 16bit if (half_bottom_uv) @@ -734,6 +765,38 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, m_vt.m_min.t.x /= 2.0f; m_vt.m_max.t.x = (m_vt.m_max.t.x + 1.9f) / 2.0f; } + + // Special case used in Call of Duty - World at War where it doubles the height and halves the width, but the height is double doubled. + // Check the height of the original texture, if it's half of the draw height, then make it wide instead. + if (half_bottom_uv && tex->m_from_target && m_cached_ctx.TEX0.TBW == m_cached_ctx.FRAME.FBW && + tex->m_from_target->m_TEX0.TBW == (m_cached_ctx.TEX0.TBW * 2) && (m_cached_ctx.TEX0.TBW * 64) == floor(m_vt.m_max.t.x)) + { + m_r.z *= 2; + m_r.w /= 2; + + m_vt.m_max.t.y /= 2; + m_vt.m_max.t.x *= 2; + m_vt.m_max.p.y /= 2; + m_vt.m_max.p.x *= 2; + m_context->scissor.in.w /= 2; + m_context->scissor.in.z *= 2; + + v[1].XYZ.X = ((v[m_index.buff[m_index.tail - 1]].XYZ.X - m_context->XYOFFSET.OFX) * 2) + m_context->XYOFFSET.OFX; + v[1].XYZ.Y = ((v[m_index.buff[m_index.tail - 1]].XYZ.Y - m_context->XYOFFSET.OFY) / 2) + m_context->XYOFFSET.OFY; + + v[1].U = v[m_index.buff[m_index.tail - 1]].U * 2; + v[1].V = v[m_index.buff[m_index.tail - 1]].V / 2; + + v[1].ST.S = v[m_index.buff[m_index.tail - 1]].ST.S * 2; + v[1].ST.T = v[m_index.buff[m_index.tail - 1]].ST.T / 2; + + m_vertex.head = m_vertex.tail = m_vertex.next = 2; + m_index.tail = 2; + + m_cached_ctx.TEX0.TBW *= 2; + m_cached_ctx.FRAME.FBW *= 2; + GL_CACHE("Half width/double height shuffle detected, width changed to %d", m_cached_ctx.FRAME.FBW); + } } GSVector4 GSRendererHW::RealignTargetTextureCoordinate(const GSTextureCache::Source* tex) @@ -887,6 +950,12 @@ GSVector2i GSRendererHW::GetValidSize(const GSTextureCache::Source* tex) // e.g. Burnout 3, God of War II, etc. int height = std::min(m_context->scissor.in.w, m_r.w); + // We can check if the next draw is doing the same from the next page, and assume it's a per line clear. + // Battlefield 2 does this. + const int pages = ((GSLocalMemory::GetEndBlockAddress(m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, m_r) + 1) - m_cached_ctx.FRAME.Block()) >> 5; + if (m_cached_ctx.FRAME.FBW > 1 && m_r.height() <= 64 && (pages % m_cached_ctx.FRAME.FBW) == 0 && m_env.CTXT[m_backed_up_ctx].FRAME.FBP == (m_cached_ctx.FRAME.FBP + pages) && NextDrawMatchesShuffle()) + height = std::max(m_context->scissor.in.w, height); + // If the draw is less than a page high, FBW=0 is the same as FBW=1. const GSLocalMemory::psm_t& frame_psm = GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM]; int width = std::min(std::max(m_cached_ctx.FRAME.FBW, 1) * 64, m_context->scissor.in.z); @@ -897,7 +966,7 @@ GSVector2i GSRendererHW::GetValidSize(const GSTextureCache::Source* tex) } // If it's a channel shuffle, it'll likely be just a single page, so assume full screen. - if (m_channel_shuffle) + if (m_channel_shuffle || (tex && IsPageCopy())) { const int page_x = frame_psm.pgs.x - 1; const int page_y = frame_psm.pgs.y - 1; @@ -905,8 +974,8 @@ GSVector2i GSRendererHW::GetValidSize(const GSTextureCache::Source* tex) // Round up the page as channel shuffles are generally done in pages at a time // Keep in mind the source might be an 8bit texture - int src_width = tex->GetUnscaledWidth(); - int src_height = tex->GetUnscaledHeight(); + int src_width = tex->m_from_target ? tex->m_from_target->m_valid.width() : tex->GetUnscaledWidth(); + int src_height = tex->m_from_target ? tex->m_from_target->m_valid.height() : tex->GetUnscaledHeight(); if (!tex->m_from_target && GSLocalMemory::m_psm[tex->m_TEX0.PSM].bpp == 8) { @@ -973,7 +1042,8 @@ GSVector2i GSRendererHW::GetTargetSize(const GSTextureCache::Source* tex, const bool GSRendererHW::IsPossibleChannelShuffle() const { if (!PRIM->TME || m_cached_ctx.TEX0.PSM != PSMT8 || // 8-bit texture draw - m_vt.m_primclass != GS_SPRITE_CLASS) // draw_sprite_tex + m_vt.m_primclass != GS_SPRITE_CLASS || // draw_sprite_tex + (m_vertex.tail <= 2 && (((m_vt.m_max.p - m_vt.m_min.p) <= GSVector4(8.0f)).mask() & 0x3) == 0x3)) // Powerdrome does a tiny shuffle on a couple of pixels, can't reliably translate this. { return false; } @@ -987,6 +1057,7 @@ bool GSRendererHW::IsPossibleChannelShuffle() const const int draw_height = std::abs(v[1].XYZ.Y - v[0].XYZ.Y) >> 4; const bool mask_clamp = (m_cached_ctx.CLAMP.WMS | m_cached_ctx.CLAMP.WMT) & 0x2; + const bool draw_match = (draw_height == 2) || (draw_width == 8); if (draw_match || mask_clamp) @@ -1019,6 +1090,26 @@ bool GSRendererHW::IsPossibleChannelShuffle() const return false; } +bool GSRendererHW::IsPageCopy() const +{ + if (!PRIM->TME) + return false; + + const int get_next_ctx = (m_state_flush_reason == CONTEXTCHANGE) ? m_env.PRIM.CTXT : m_backed_up_ctx; + const GSDrawingContext& next_ctx = m_env.CTXT[get_next_ctx]; + + if (next_ctx.TEX0.TBP0 != (m_cached_ctx.TEX0.TBP0 + 0x20)) + return false; + + if (next_ctx.FRAME.FBP != (m_cached_ctx.FRAME.FBP + 0x1)) + return false; + + if (!NextDrawMatchesShuffle()) + return false; + + return true; +} + bool GSRendererHW::NextDrawMatchesShuffle() const { // Make sure nothing unexpected has changed. @@ -1038,7 +1129,7 @@ bool GSRendererHW::NextDrawMatchesShuffle() const return true; } -bool GSRendererHW::IsSplitTextureShuffle(GSTextureCache::Target* rt) +bool GSRendererHW::IsSplitTextureShuffle(GIFRegTEX0& rt_TEX0, GSVector4i& valid_area) { // For this to work, we're peeking into the next draw, therefore we need dirty registers. if (m_dirty_gs_regs == 0) @@ -1081,7 +1172,7 @@ bool GSRendererHW::IsSplitTextureShuffle(GSTextureCache::Target* rt) const u32 pages_high = static_cast(aligned_rc.height()) / frame_psm.pgs.y; const u32 num_pages = m_context->FRAME.FBW * pages_high; // Jurassic - The Hunted will do a split shuffle with a height of 512 (256) when it's supposed to be 448, so it redoes one row of the shuffle. - const u32 rt_half = (((rt->m_valid.height() / GSLocalMemory::m_psm[rt->m_TEX0.PSM].pgs.y) / 2) * rt->m_TEX0.TBW) + (rt->m_TEX0.TBP0 >> 5); + const u32 rt_half = (((valid_area.height() / GSLocalMemory::m_psm[rt_TEX0.PSM].pgs.y) / 2) * rt_TEX0.TBW) + (rt_TEX0.TBP0 >> 5); // If this is a split texture shuffle, the next draw's FRAME/TEX0 should line up. // Re-add the offset we subtracted in Draw() to get the original FBP/TBP0.. this won't handle wrapping. Oh well. // "Potential" ones are for Jak3 which does a split shuffle on a 128x128 texture with a width of 256, writing to the lower half then offsetting 2 pages. @@ -1117,7 +1208,7 @@ bool GSRendererHW::IsSplitTextureShuffle(GSTextureCache::Target* rt) // If the game has changed the texture width to 1 we need to retanslate it to whatever the rt has so the final rect is correct. if (m_cached_ctx.FRAME.FBW == 1) - m_split_texture_shuffle_fbw = rt->m_TEX0.TBW; + m_split_texture_shuffle_fbw = rt_TEX0.TBW; else m_split_texture_shuffle_fbw = m_cached_ctx.FRAME.FBW; } @@ -1126,10 +1217,10 @@ bool GSRendererHW::IsSplitTextureShuffle(GSTextureCache::Target* rt) u32 total_pages = num_pages; // If the current draw is further than the half way point and the next draw is the half way point, then we can assume it's just overdrawing. - if (next_ctx.FRAME.FBP == rt_half && num_pages > (rt_half - (rt->m_TEX0.TBP0 >> 5))) + if (next_ctx.FRAME.FBP == rt_half && num_pages > (rt_half - (rt_TEX0.TBP0 >> 5))) { - vertical_pages = (rt->m_valid.height() / GSLocalMemory::m_psm[rt->m_TEX0.PSM].pgs.y) / 2; - total_pages = vertical_pages * rt->m_TEX0.TBW; + vertical_pages = (valid_area.height() / GSLocalMemory::m_psm[rt_TEX0.PSM].pgs.y) / 2; + total_pages = vertical_pages * rt_TEX0.TBW; } if ((m_split_texture_shuffle_pages % m_split_texture_shuffle_fbw) == 0) @@ -1176,6 +1267,16 @@ GSVector4i GSRendererHW::GetDrawRectForPages(u32 bw, u32 psm, u32 num_pages) return GSVector4i::loadh(size); } +bool GSRendererHW::IsSinglePageDraw() const +{ + const GSVector2i& frame_pgs = GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].pgs; + + if (m_r.width() <= frame_pgs.x && m_r.height() <= frame_pgs.y) + return true; + + return false; +} + bool GSRendererHW::TryToResolveSinglePageFramebuffer(GIFRegFRAME& FRAME, bool only_next_draw) { const u32 start_bp = FRAME.Block(); @@ -1586,7 +1687,11 @@ void GSRendererHW::Move() const int w = m_env.TRXREG.RRW; const int h = m_env.TRXREG.RRH; - + GL_CACHE("Starting Move! 0x%x W:%d F:%s => 0x%x W:%d F:%s (DIR %d%d), sPos(%d %d) dPos(%d %d) size(%d %d) draw %d", + m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW, psm_str(m_env.BITBLTBUF.SPSM), + m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, psm_str(m_env.BITBLTBUF.DPSM), + m_env.TRXPOS.DIRX, m_env.TRXPOS.DIRY, + sx, sy, dx, dy, w, h, s_n); if (g_texture_cache->Move(m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW, m_env.BITBLTBUF.SPSM, sx, sy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, m_env.BITBLTBUF.DPSM, dx, dy, w, h)) { @@ -1999,9 +2104,7 @@ void GSRendererHW::Draw() DumpVertices(s); } -#ifdef ENABLE_OGL_DEBUG static u32 num_skipped_channel_shuffle_draws = 0; -#endif // We mess with this state as an optimization, so take a copy and use that instead. const GSDrawingContext* context = m_context; @@ -2024,25 +2127,80 @@ void GSRendererHW::Draw() // Fortunately, it seems to change the FBMSK along the way, so this check alone is sufficient. // Tomb Raider: Underworld does similar, except with R, G, B in separate palettes, therefore // we need to split on those too. - m_channel_shuffle = IsPossibleChannelShuffle() && m_last_channel_shuffle_fbmsk == m_context->FRAME.FBMSK && - m_last_channel_shuffle_fbp <= m_context->FRAME.Block() && m_last_channel_shuffle_end_block > m_context->FRAME.Block(); + m_channel_shuffle = !m_channel_shuffle_abort && IsPossibleChannelShuffle() && m_last_channel_shuffle_fbmsk == m_context->FRAME.FBMSK && + m_last_channel_shuffle_fbp <= m_context->FRAME.Block() && m_last_channel_shuffle_end_block > m_context->FRAME.Block() && + m_last_channel_shuffle_tbp <= m_context->TEX0.TBP0; -#ifdef ENABLE_OGL_DEBUG if (m_channel_shuffle) { + // Tombraider does vertical strips 2 pages at a time, then puts them horizontally, it's a mess, so let it do the full screen shuffle. + m_full_screen_shuffle |= !IsPageCopy() && NextDrawMatchesShuffle(); + // These HLE's skip several channel shuffles in a row which change blends etc. Let's not break the flow, it gets upset. + if (!m_conf.ps.urban_chaos_hle && !m_conf.ps.tales_of_abyss_hle) + { + m_last_channel_shuffle_fbp = m_context->FRAME.Block(); + m_last_channel_shuffle_tbp = m_context->TEX0.TBP0; + } + num_skipped_channel_shuffle_draws++; return; } + if (m_channel_shuffle_width) + { + if (m_last_rt) + { + //DevCon.Warning("Skipped %d draw %d was abort %d", num_skipped_channel_shuffle_draws, s_n, (int)m_channel_shuffle_abort); + // Some games like Tomb raider abort early, we're never going to know the real height, and the system doesn't work right for partials. + // But it's good enough for games like Hitman Blood Money which only shuffle part of the screen + + if (!m_full_screen_shuffle) + { + const u32 width_pages = ((num_skipped_channel_shuffle_draws + 1) % std::max(1U, m_channel_shuffle_width) % std::max(1U, m_channel_shuffle_width)) * 64;; + m_conf.scissor.w = m_conf.scissor.y + (((num_skipped_channel_shuffle_draws + 1 + (m_channel_shuffle_width - 1)) / std::max(1U, m_channel_shuffle_width)) * 32) * m_conf.cb_ps.ScaleFactor.z; + if (width_pages) + m_conf.scissor.z = m_conf.scissor.x + (((num_skipped_channel_shuffle_draws + 1) % std::max(1U, m_channel_shuffle_width) % std::max(1U, m_channel_shuffle_width)) * 64) * m_conf.cb_ps.ScaleFactor.z; + } + g_gs_device->RenderHW(m_conf); + + if (GSConfig.DumpGSData) + { + const u64 frame = g_perfmon.GetFrame(); + + std::string s; + + if (GSConfig.SaveRT && (s_n - 1) >= GSConfig.SaveN) + { + s = GetDrawDumpPath("%05d_f%lld_rt1_%05x_(%05x)_%s.bmp", s_n - 1, frame, m_cached_ctx.FRAME.Block(), m_last_rt-> m_TEX0.TBP0, psm_str(m_cached_ctx.FRAME.PSM)); + + m_last_rt->m_texture->Save(s); + } + + if (GSConfig.SaveL > 0 && ((s_n - 1) - GSConfig.SaveN) > GSConfig.SaveL) + { + GSConfig.DumpGSData = 0; + } + } + g_texture_cache->InvalidateTemporarySource(); + CleanupDraw(false); + } + } +#ifdef ENABLE_OGL_DEBUG if (num_skipped_channel_shuffle_draws > 0) - GL_INS("Skipped %u channel shuffle draws", num_skipped_channel_shuffle_draws); - num_skipped_channel_shuffle_draws = 0; -#else - if (m_channel_shuffle) - return; + GL_CACHE("Skipped %d channel shuffle draws ending at %d", num_skipped_channel_shuffle_draws, s_n); #endif + num_skipped_channel_shuffle_draws = 0; + + m_last_channel_shuffle_fbp = 0xffff; + m_last_channel_shuffle_tbp = 0xffff; + m_last_channel_shuffle_end_block = 0xffff; } + m_last_rt = nullptr; + m_channel_shuffle_width = 0; + m_full_screen_shuffle = false; + m_channel_shuffle_abort = false; + GL_PUSH("HW Draw %d (Context %u)", s_n, PRIM->CTXT); GL_INS("FLUSH REASON: %s%s", GetFlushReasonString(m_state_flush_reason), (m_state_flush_reason != GSFlushReason::CONTEXTCHANGE && m_dirty_gs_regs) ? " AND POSSIBLE CONTEXT CHANGE" : @@ -2152,14 +2310,6 @@ void GSRendererHW::Draw() const bool draw_sprite_tex = PRIM->TME && (m_vt.m_primclass == GS_SPRITE_CLASS); - // We trigger the sw prim render here super early, to avoid creating superfluous render targets. - if (CanUseSwPrimRender(no_rt, no_ds, draw_sprite_tex) && SwPrimRender(*this, true, true)) - { - GL_CACHE("Possible texture decompression, drawn with SwPrimRender() (BP %x BW %u TBP0 %x TBW %u)", - m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBMSK, m_cached_ctx.TEX0.TBP0, m_cached_ctx.TEX0.TBW); - return; - } - // GS doesn't fill the right or bottom edges of sprites/triangles, and for a pixel to be shaded, the vertex // must cross the center. In other words, the range is equal to the floor of coordinates +0.5. Except for // the case where the minimum equals the maximum, because at least one pixel is filled per line. @@ -2187,6 +2337,14 @@ void GSRendererHW::Draw() return; } + // We trigger the sw prim render here super early, to avoid creating superfluous render targets. + if (CanUseSwPrimRender(no_rt, no_ds, draw_sprite_tex) && SwPrimRender(*this, true, true)) + { + GL_CACHE("Possible texture decompression, drawn with SwPrimRender() (BP %x BW %u TBP0 %x TBW %u)", + m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBMSK, m_cached_ctx.TEX0.TBP0, m_cached_ctx.TEX0.TBW); + return; + } + // We want to fix up the context if we're doing a double half clear, regardless of whether we do the CPU fill. const ClearType is_possible_mem_clear = IsConstantDirectWriteMemClear(); if (!GSConfig.UserHacks_DisableSafeFeatures && is_possible_mem_clear) @@ -2410,7 +2568,7 @@ void GSRendererHW::Draw() GIFRegTEX0 TEX0 = {}; GSTextureCache::Source* src = nullptr; TextureMinMaxResult tmm; - + bool possible_shuffle = false; // Disable texture mapping if the blend is black and using alpha from vertex. if (m_process_texture) { @@ -2527,9 +2685,33 @@ void GSRendererHW::Draw() GIFRegTEX0 FRAME_TEX0; bool shuffle_target = false; - if (!no_rt && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) + if (!no_rt && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16 && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp >= 16 && + (m_vt.m_primclass == GS_SPRITE_CLASS || (m_vt.m_primclass == GS_TRIANGLE_CLASS && (m_index.tail % 6) == 0 && TrianglesAreQuads(true) && m_index.tail > 6))) { - if (m_cached_ctx.FRAME.Block() != m_cached_ctx.TEX0.TBP0) + if (!shuffle_target && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16) + { + const GSVertex* v = &m_vertex.buff[0]; + + const int first_x = std::clamp((static_cast(((v[0].XYZ.X - m_context->XYOFFSET.OFX) + 8))) >> 4, 0, 2048); + const bool offset_last = PRIM->FST ? (v[1].U > v[0].U) : ((v[1].ST.S / v[1].RGBAQ.Q) > (v[0].ST.S / v[1].RGBAQ.Q)); + const int first_u = PRIM->FST ? ((v[0].U + (offset_last ? 0 : 9)) >> 4) : std::clamp(static_cast(((1 << m_cached_ctx.TEX0.TW) * (v[0].ST.S / v[1].RGBAQ.Q)) + (offset_last ? 0.0f : 0.6f)), 0, 2048); + const int second_u = PRIM->FST ? ((v[1].U + (offset_last ? 9 : 0)) >> 4) : std::clamp(static_cast(((1 << m_cached_ctx.TEX0.TW) * (v[1].ST.S / v[1].RGBAQ.Q)) + (offset_last ? 0.6f : 0.0f)), 0, 2048); + // offset coordinates swap around RG/BA. (Ace Combat) + const u32 minv = m_cached_ctx.CLAMP.MINV; + const u32 minu = m_cached_ctx.CLAMP.MINU; + // Make sure minu or minv are actually a mask on some bits, false positives of games setting 512 (0x1ff) are not masks used for shuffles. + const bool rgba_shuffle = ((m_cached_ctx.CLAMP.WMS == m_cached_ctx.CLAMP.WMT && m_cached_ctx.CLAMP.WMS == CLAMP_REGION_REPEAT) && (minu && minv && ((minu + 1 & minu) || (minv + 1 & minv)))); + const bool shuffle_coords = ((first_x ^ first_u) & 0xF) == 8 || rgba_shuffle; + + // Round up half of second coord, it can sometimes be slightly under. + const int draw_width = std::abs(v[1].XYZ.X + 9 - v[0].XYZ.X) >> 4; + const int read_width = std::abs(second_u - first_u); + + // m_skip check is just mainly for NFS Undercover, but should hopefully pick up any other games which rewrite shuffles. + shuffle_target = shuffle_coords && (((draw_width & 7) == 0 && std::abs(draw_width - read_width) <= 1) || m_skip > 50); + } + + if (!shuffle_target) { // FBW is going to be wrong for channel shuffling into a new target, so take it from the source. FRAME_TEX0.U64 = 0; @@ -2538,28 +2720,16 @@ void GSRendererHW::Draw() FRAME_TEX0.PSM = m_cached_ctx.FRAME.PSM; GSTextureCache::Target* tgt = g_texture_cache->LookupTarget(FRAME_TEX0, GSVector2i(m_vt.m_max.p.x, m_vt.m_max.p.y), GetTextureScaleFactor(), GSTextureCache::RenderTarget, false, - fm); + fm, false, false, false, false, GSVector4i::zero(), true); if (tgt) shuffle_target = tgt->m_32_bits_fmt; tgt = nullptr; } - if (!shuffle_target && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16) - { - const GSVertex* v = &m_vertex.buff[0]; - - const int first_x = ((v[0].XYZ.X - m_context->XYOFFSET.OFX) + 8) >> 4; - const int first_u = PRIM->FST ? ((v[0].U + 8) >> 4) : static_cast(((1 << m_cached_ctx.TEX0.TW) * (v[0].ST.S / v[1].RGBAQ.Q)) + 0.5f); - const int second_u = PRIM->FST ? ((v[1].U + 8) >> 4) : static_cast(((1 << m_cached_ctx.TEX0.TW) * (v[1].ST.S / v[1].RGBAQ.Q)) + 0.5f); - const bool shuffle_coords = (first_x ^ first_u) & 8; - const int draw_width = std::abs(v[1].XYZ.X - v[0].XYZ.X) >> 4; - const int read_width = std::abs(second_u - first_u); - - shuffle_target = shuffle_coords && draw_width == 8 && draw_width == read_width; - } } - const bool possible_shuffle = !no_rt && (((shuffle_target && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) || (m_cached_ctx.FRAME.Block() == m_cached_ctx.TEX0.TBP0 && ((m_cached_ctx.TEX0.PSM & 0x6) || m_cached_ctx.FRAME.PSM != m_cached_ctx.TEX0.PSM))) || IsPossibleChannelShuffle()); + + possible_shuffle = !no_rt && (((shuffle_target /*&& GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16*/) /*|| (m_cached_ctx.FRAME.Block() == m_cached_ctx.TEX0.TBP0 && ((m_cached_ctx.TEX0.PSM & 0x6) || m_cached_ctx.FRAME.PSM != m_cached_ctx.TEX0.PSM))*/) || IsPossibleChannelShuffle()); const bool need_aem_color = GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].trbpp <= 24 && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].pal == 0 && ((PRIM->ABE && m_context->ALPHA.C == 0) || IsDiscardingDstAlpha()) && m_draw_env->TEXA.AEM; const u32 color_mask = (m_vt.m_max.c > GSVector4i::zero()).mask(); const bool texture_function_color = m_cached_ctx.TEX0.TFX == TFX_DECAL || (color_mask & 0xFFF) || (m_cached_ctx.TEX0.TFX > TFX_DECAL && (color_mask & 0xF000)); @@ -2585,6 +2755,7 @@ void GSRendererHW::Draw() return; } + possible_shuffle &= src && (src->m_from_target != nullptr || (m_skip && possible_shuffle)); // We don't know the alpha range of direct sources when we first tried to optimize the alpha test. // Moving the texture lookup before the ATST optimization complicates things a lot, so instead, // recompute it, and everything derived from it again if it changes. @@ -2626,18 +2797,15 @@ void GSRendererHW::Draw() // Urban Reign trolls by scissoring a draw to a target at 0x0-0x117F to 378x449 which ends up the size being rounded up to 640x480 // causing the buffer to expand to around 0x1400, which makes a later framebuffer at 0x1180 to fail to be created correctly. // We can cheese this by checking if the Z is masked and the resultant colour is going to be black anyway. - const bool output_black = PRIM->ABE && ((m_context->ALPHA.A == 1 && m_context->ALPHA.B == 0 && GetAlphaMinMax().min >= 128) || m_context->ALPHA.IsBlack()) && m_draw_env->COLCLAMP.CLAMP == 1; + const bool output_black = PRIM->ABE && ((m_context->ALPHA.A == 1 && m_context->ALPHA.D > 1) || (m_context->ALPHA.IsBlack() && m_context->ALPHA.D != 1)) && m_draw_env->COLCLAMP.CLAMP == 1; const bool can_expand = !(m_cached_ctx.ZBUF.ZMSK && output_black); // Estimate size based on the scissor rectangle and height cache. - const GSVector2i t_size = GetTargetSize(src, can_expand); + GSVector2i t_size = GetTargetSize(src, can_expand); const GSVector4i t_size_rect = GSVector4i::loadh(t_size); // Ensure draw rect is clamped to framebuffer size. Necessary for updating valid area. const GSVector4i unclamped_draw_rect = m_r; - // Don't clamp on shuffle, the height cache may troll us with the REAL height. - if (!m_texture_shuffle && m_split_texture_shuffle_pages == 0) - m_r = m_r.rintersect(t_size_rect); float target_scale = GetTextureScaleFactor(); int scale_draw = IsScalingDraw(src, m_primitive_covers_without_gaps != NoGapsType::GapsFound); @@ -2694,29 +2862,142 @@ void GSRendererHW::Draw() GSTextureCache::Target* rt = nullptr; GIFRegTEX0 FRAME_TEX0; + const GSLocalMemory::psm_t& frame_psm = GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM]; + + m_in_target_draw = false; + m_target_offset = 0; + + GSTextureCache::Target* ds = nullptr; + GIFRegTEX0 ZBUF_TEX0; + if (!no_ds) + { + ZBUF_TEX0.U64 = 0; + ZBUF_TEX0.TBP0 = m_cached_ctx.ZBUF.Block(); + ZBUF_TEX0.TBW = m_cached_ctx.FRAME.FBW; + ZBUF_TEX0.PSM = m_cached_ctx.ZBUF.PSM; + + ds = g_texture_cache->LookupTarget(ZBUF_TEX0, t_size, target_scale, GSTextureCache::DepthStencil, + m_cached_ctx.DepthWrite(), 0, false, force_preload, preserve_depth, preserve_depth, unclamped_draw_rect, IsPossibleChannelShuffle(), is_possible_mem_clear && ZBUF_TEX0.TBP0 != m_cached_ctx.FRAME.Block(), false, + src, nullptr, -1); + + ZBUF_TEX0.TBW = m_channel_shuffle ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; + + if (!ds && m_cached_ctx.FRAME.FBP != m_cached_ctx.ZBUF.ZBP) + { + ds = g_texture_cache->CreateTarget(ZBUF_TEX0, t_size, GetValidSize(src), target_scale, GSTextureCache::DepthStencil, + true, 0, false, force_preload, preserve_depth, m_r, src); + if (!ds) [[unlikely]] + { + GL_INS("ERROR: Failed to create ZBUF target, skipping."); + CleanupDraw(true); + return; + } + } + else + { + // If it failed to check depth test earlier, we can now check the top bits from the alpha to get a bit more accurate picture. + if (((zm && m_cached_ctx.TEST.ZTST > ZTST_ALWAYS) || (m_vt.m_eq.z && m_cached_ctx.TEST.ZTST == ZTST_GEQUAL)) && GSLocalMemory::m_psm[m_cached_ctx.ZBUF.PSM].trbpp == 32) + { + if (ds->m_alpha_max != 0) + { + const u32 max_z = (static_cast(ds->m_alpha_max + 1) << 24) - 1; + + switch (m_cached_ctx.TEST.ZTST) + { + case ZTST_GEQUAL: + // Every Z value will pass + if (max_z <= m_vt.m_min.p.z) + { + m_cached_ctx.TEST.ZTST = ZTST_ALWAYS; + if (zm) + { + ds = nullptr; + no_ds = true; + } + } + break; + case ZTST_GREATER: + // Every Z value will pass + if (max_z < m_vt.m_min.p.z) + { + m_cached_ctx.TEST.ZTST = ZTST_ALWAYS; + if (zm) + { + ds = nullptr; + no_ds = true; + } + } + break; + default: + break; + } + } + } + } + } + if (!no_rt) { + possible_shuffle |= draw_sprite_tex && m_primitive_covers_without_gaps != NoGapsType::FullCover && (((src && src->m_target && src->m_from_target && src->m_from_target->m_32_bits_fmt) && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) || + IsPossibleChannelShuffle()); + + const bool possible_horizontal_texture_shuffle = possible_shuffle && src && src->m_from_target && m_r.w <= src->m_from_target->m_valid.w && m_r.z > src->m_from_target->m_valid.z && m_cached_ctx.FRAME.FBW > src->m_from_target_TEX0.TBW; + // FBW is going to be wrong for channel shuffling into a new target, so take it from the source. FRAME_TEX0.U64 = 0; - FRAME_TEX0.TBP0 = m_cached_ctx.FRAME.Block(); - FRAME_TEX0.TBW = (m_channel_shuffle && src->m_target) ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; + FRAME_TEX0.TBP0 = ((m_last_channel_shuffle_end_block + 1) == m_cached_ctx.FRAME.Block() && possible_shuffle) ? m_last_channel_shuffle_fbp : m_cached_ctx.FRAME.Block(); + FRAME_TEX0.TBW = (possible_horizontal_texture_shuffle || (possible_shuffle && src && src->m_from_target && IsPossibleChannelShuffle())) ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; FRAME_TEX0.PSM = m_cached_ctx.FRAME.PSM; + // Don't clamp on shuffle, the height cache may troll us with the REAL height. + if (!possible_shuffle && m_split_texture_shuffle_pages == 0) + m_r = m_r.rintersect(t_size_rect); + + GSVector4i lookup_rect = unclamped_draw_rect; + // Do the lookup with the real format on a shuffle, if possible. + if (possible_shuffle && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16 && GSLocalMemory ::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) + { + // Creating a new target on a shuffle, possible temp buffer, but let's try to get the real format. + const int get_next_ctx = (m_state_flush_reason == CONTEXTCHANGE) ? m_env.PRIM.CTXT : m_backed_up_ctx; + const GSDrawingContext& next_ctx = m_env.CTXT[get_next_ctx]; + + if (next_ctx.FRAME.Block() == FRAME_TEX0.TBP0 && next_ctx.FRAME.PSM != FRAME_TEX0.PSM) + FRAME_TEX0.PSM = next_ctx.FRAME.PSM; + else if (next_ctx.TEX0.TBP0 == FRAME_TEX0.TBP0 && next_ctx.TEX0.PSM != FRAME_TEX0.PSM) + FRAME_TEX0.PSM = next_ctx.TEX0.PSM; + else + FRAME_TEX0.PSM = PSMCT32; // Guess full color if no upcoming hint, it'll fix itself later. + + // This is just for overlap detection, it doesn't matter which direction we do this in + if (GSLocalMemory::m_psm[FRAME_TEX0.PSM].bpp == 32 && src && src->m_from_target) + { + // Shuffling with a double width (Sonic Unleashed for example which does a wierd shuffle/not shuffle green backup/restore). + if (std::abs((lookup_rect.width() / 2) - src->m_from_target->m_unscaled_size.x) <= 8) + { + lookup_rect.x /= 2; + lookup_rect.z /= 2; + } + else + { + lookup_rect.y /= 2; + lookup_rect.w /= 2; + } + } + } + // Normally we would use 1024 here to match the clear above, but The Godfather does a 1023x1023 draw instead // (very close to 1024x1024, but apparently the GS rounds down..). So, catch that here, we don't want to // create that target, because the clear isn't black, it'll hang around and never get invalidated. - const bool is_square = (t_size.y == t_size.x) && m_r.w >= 1023 && m_primitive_covers_without_gaps == NoGapsType::FullCover; - const bool is_clear = is_possible_mem_clear && is_square; - const bool possible_shuffle = draw_sprite_tex && (((src && src->m_target && src->m_from_target && src->m_from_target->m_32_bits_fmt) && - GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16 && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) || - IsPossibleChannelShuffle()); + const bool is_large_rect = (t_size.y >= t_size.x) && m_r.w >= 1023 && m_primitive_covers_without_gaps == NoGapsType::FullCover; + const bool is_clear = is_possible_mem_clear && is_large_rect; // Preserve downscaled target when copying directly from a downscaled target, or it's a normal draw using a downscaled target. Clears that are drawing to the target can also preserve size. // Of course if this size is different (in width) or this is a shuffle happening, this will be bypassed. const bool preserve_downscale_draw = scale_draw < 0 || (scale_draw == 0 && ((src && src->m_from_target && src->m_from_target->m_downscaled) || is_possible_mem_clear == ClearType::ClearWithDraw)); rt = g_texture_cache->LookupTarget(FRAME_TEX0, t_size, ((src && src->m_scale != 1) && GSConfig.UserHacks_NativeScaling == GSNativeScaling::Normal && !possible_shuffle) ? GetTextureScaleFactor() : target_scale, GSTextureCache::RenderTarget, true, - fm, false, force_preload, preserve_rt_rgb, preserve_rt_alpha, unclamped_draw_rect, possible_shuffle, is_possible_mem_clear && FRAME_TEX0.TBP0 != m_cached_ctx.ZBUF.Block(), GSConfig.UserHacks_NativeScaling != GSNativeScaling::Off && preserve_downscale_draw && is_possible_mem_clear != ClearType::NormalClear); + fm, false, force_preload, preserve_rt_rgb, preserve_rt_alpha, lookup_rect, possible_shuffle, is_possible_mem_clear && FRAME_TEX0.TBP0 != m_cached_ctx.ZBUF.Block(), + GSConfig.UserHacks_NativeScaling != GSNativeScaling::Off && preserve_downscale_draw && is_possible_mem_clear != ClearType::NormalClear, src, ds, (no_ds || !ds) ? -1 : (m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0)); // Draw skipped because it was a clear and there was no target. if (!rt) @@ -2737,9 +3018,25 @@ void GSRendererHW::Draw() CleanupDraw(true); return; } + else if (IsPageCopy() && src->m_from_target && m_cached_ctx.TEX0.TBP0 >= src->m_from_target->m_TEX0.TBP0) + { + FRAME_TEX0.TBW = src->m_from_target->m_TEX0.TBW; + } + + if (possible_shuffle && IsSplitTextureShuffle(FRAME_TEX0, lookup_rect)) + { + // If TEX0 == FBP, we're going to have a source left in the TC. + // That source will get used in the actual draw unsafely, so kick it out. + if (m_cached_ctx.FRAME.Block() == m_cached_ctx.TEX0.TBP0) + g_texture_cache->InvalidateVideoMem(context->offset.fb, m_r, false); + + CleanupDraw(true); + return; + } - rt = g_texture_cache->CreateTarget(FRAME_TEX0, t_size, GetValidSize(src), (scale_draw < 0 && is_possible_mem_clear != ClearType::NormalClear) ? src->m_from_target->GetScale() : target_scale, GSTextureCache::RenderTarget, true, - fm, false, force_preload, preserve_rt_color || possible_shuffle, m_r, src); + rt = g_texture_cache->CreateTarget(FRAME_TEX0, t_size, GetValidSize(src), (scale_draw < 0 && is_possible_mem_clear != ClearType::NormalClear) ? src->m_from_target->GetScale() : target_scale, + GSTextureCache::RenderTarget, true, fm, false, force_preload, preserve_rt_color || possible_shuffle, lookup_rect, src); + if (!rt) [[unlikely]] { GL_INS("ERROR: Failed to create FRAME target, skipping."); @@ -2747,12 +3044,129 @@ void GSRendererHW::Draw() return; } } + else if (rt->m_TEX0.TBP0 != m_cached_ctx.FRAME.Block()) + { + int vertical_offset = ((static_cast(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0) / 32) / std::max(static_cast(rt->m_TEX0.TBW), 1)) * frame_psm.pgs.y; // I know I could just not shift it.. + int texture_offset = 0; + int horizontal_offset = ((static_cast((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) / 32) % static_cast(std::max(rt->m_TEX0.TBW, 1U))) * frame_psm.pgs.x; + // Used to reduce the offset made later in channel shuffles + m_target_offset = std::abs(static_cast((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) >> 5); + + if (vertical_offset < 0) + { + rt->m_TEX0.TBP0 = m_cached_ctx.FRAME.Block(); + GSVector2i new_size = rt->m_unscaled_size; + // Make sure to use the original format for the offset. + const int new_offset = std::abs((vertical_offset / frame_psm.pgs.y) * GSLocalMemory::m_psm[rt->m_TEX0.PSM].pgs.y); + texture_offset = new_offset; + new_size.y += new_offset; + + rt->ResizeTexture(new_size.x, new_size.y, true, true, GSVector4i::loadh(new_size * rt->m_scale).loadl(GSVector2i(0, new_offset * rt->m_scale))); + + if (src && src->m_from_target && src->m_from_target == rt && src->m_target_direct) + { + src->m_texture = rt->m_texture; + } + + rt->m_valid.y += new_offset; + rt->m_valid.w += new_offset; + rt->m_drawn_since_read.y += new_offset; + rt->m_drawn_since_read.w += new_offset; + + t_size.y += std::abs(vertical_offset); + vertical_offset = 0; + } + + if (horizontal_offset < 0) + { + // Thankfully this doesn't really happen, but catwoman moves the framebuffer backwards 1 page with a channel shuffle, which is really messy and not easy to deal with. + // Hopefully the quick channel shuffle will just guess this and run with it. + rt->m_TEX0.TBP0 += horizontal_offset; + horizontal_offset = 0; + } + // Z isn't offset but RT is, so we need a temp Z to align it, hopefully nothing will ever write to the Z too, right?? + if (ds && vertical_offset && (m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0) != (m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) + { + + const int z_vertical_offset = ((static_cast(m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0) / 32) / std::max(rt->m_TEX0.TBW, 1U)) * GSLocalMemory::m_psm[m_cached_ctx.ZBUF.PSM].pgs.y; + GL_CACHE("RT in RT Z copy on draw %d z_vert_offset %d z_offset %d", s_n, z_vertical_offset, vertical_offset); + GSVector4i dRect = GSVector4i(0, vertical_offset * ds->m_scale, ds->m_unscaled_size.x * ds->m_scale, std::min(vertical_offset + m_r.w + 1, vertical_offset + ds->m_unscaled_size.y) * ds->m_scale); + const int new_height = std::max(static_cast(ds->m_unscaled_size.y * ds->m_scale), dRect.w); + GSTexture* tex = g_gs_device->CreateDepthStencil(ds->m_unscaled_size.x * ds->m_scale, new_height, GSTexture::Format::DepthStencil, true); + g_gs_device->StretchRect(ds->m_texture, GSVector4(0.0f, z_vertical_offset / static_cast(ds->m_unscaled_size.y), 1.0f, std::min(z_vertical_offset + m_r.w + 1, ds->m_unscaled_size.y) / static_cast(ds->m_unscaled_size.y)), tex, GSVector4(dRect), ShaderConvert::DEPTH_COPY, false); + + g_texture_cache->SetTemporaryZ(tex); + } + + GSVertex* v = &m_vertex.buff[0]; + + for (u32 i = 0; i < m_vertex.tail; i++) + { + v[i].XYZ.X += horizontal_offset << 4; + v[i].XYZ.Y += vertical_offset << 4; + } + + if (texture_offset && src && src->m_from_target && src->m_target_direct && src->m_from_target == rt) + { + GSVector4i src_region = src->GetRegionRect(); + + if (src_region.rempty()) + { + src_region = GSVector4i::loadh(rt->m_unscaled_size); + src_region.y += texture_offset; + } + else + { + src_region.y += texture_offset; + src_region.w += texture_offset; + } + src->m_region.SetX(src_region.x, src_region.z); + src->m_region.SetY(src_region.y, src_region.w); + } + + m_context->scissor.in.x += horizontal_offset; + m_context->scissor.in.z += horizontal_offset; + m_context->scissor.in.y += vertical_offset; + m_context->scissor.in.w += vertical_offset; + m_r.y += vertical_offset; + m_r.w += vertical_offset; + m_r.x += horizontal_offset; + m_r.z += horizontal_offset; + m_in_target_draw = rt->m_TEX0.TBP0 != m_cached_ctx.FRAME.Block(); + m_vt.m_min.p.x += horizontal_offset; + m_vt.m_max.p.x += horizontal_offset; + m_vt.m_min.p.y += vertical_offset; + m_vt.m_max.p.y += vertical_offset; + + t_size.x = rt->m_unscaled_size.x - horizontal_offset; + t_size.y = rt->m_unscaled_size.y - vertical_offset; + + // Don't resize if the BPP don't match. + if (frame_psm.bpp == GSLocalMemory::m_psm[rt->m_TEX0.PSM].bpp) + { + if (m_r.w > rt->m_unscaled_size.y || m_r.z > rt->m_unscaled_size.x) + { + const u32 new_height = std::max(m_r.w, rt->m_unscaled_size.y); + const u32 new_width = std::max(m_r.z, rt->m_unscaled_size.x); + + //DevCon.Warning("Resizing texture %d x %d draw %d", rt->m_unscaled_size.x, new_height, s_n); + rt->ResizeTexture(new_width, new_height); + + const bool frame_masked = ((m_cached_ctx.FRAME.FBMSK & frame_psm.fmsk) == frame_psm.fmsk) || (m_cached_ctx.TEST.ATE && m_cached_ctx.TEST.ATST == ATST_NEVER && !(m_cached_ctx.TEST.AFAIL & AFAIL_FB_ONLY)); + + rt->UpdateValidity(m_r, !frame_masked); + rt->UpdateDrawn(m_r, !frame_masked); + } + } + } + if (src && src->m_from_target && src->m_target_direct && src->m_from_target == rt) { src->m_texture = rt->m_texture; src->m_scale = rt->GetScale(); src->m_unscaled_size = rt->m_unscaled_size; + } target_scale = rt->GetScale(); @@ -2764,35 +3178,40 @@ void GSRendererHW::Draw() if (m_channel_shuffle) { m_last_channel_shuffle_fbp = rt->m_TEX0.TBP0; + m_last_channel_shuffle_tbp = src->m_TEX0.TBP0; // If it's a new target, we don't know where the end is as it's starting on a shuffle, so just do every shuffle following. m_last_channel_shuffle_end_block = (rt->m_last_draw >= s_n) ? (MAX_BLOCKS - 1) : (rt->m_end_block < rt->m_TEX0.TBP0 ? (rt->m_end_block + MAX_BLOCKS) : rt->m_end_block); } + else + m_last_channel_shuffle_end_block = 0xFFFF; } - GSTextureCache::Target* ds = nullptr; - GIFRegTEX0 ZBUF_TEX0; - if (!no_ds) + // Only run if DS was new and matched the framebuffer. + if (!no_ds && !ds) { ZBUF_TEX0.U64 = 0; ZBUF_TEX0.TBP0 = m_cached_ctx.ZBUF.Block(); - ZBUF_TEX0.TBW = m_channel_shuffle ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; + ZBUF_TEX0.TBW = m_cached_ctx.FRAME.FBW; ZBUF_TEX0.PSM = m_cached_ctx.ZBUF.PSM; ds = g_texture_cache->LookupTarget(ZBUF_TEX0, t_size, target_scale, GSTextureCache::DepthStencil, - m_cached_ctx.DepthWrite(), 0, false, force_preload, preserve_depth, preserve_depth, unclamped_draw_rect, IsPossibleChannelShuffle(), is_possible_mem_clear && ZBUF_TEX0.TBP0 != m_cached_ctx.FRAME.Block()); + m_cached_ctx.DepthWrite(), 0, false, force_preload, preserve_depth, preserve_depth, unclamped_draw_rect, IsPossibleChannelShuffle(), is_possible_mem_clear && ZBUF_TEX0.TBP0 != m_cached_ctx.FRAME.Block(), false, + src, nullptr, -1); + ZBUF_TEX0.TBW = m_channel_shuffle ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; + + // This should never happen, but just to be safe.. if (!ds) { - - ds = g_texture_cache->CreateTarget(ZBUF_TEX0, t_size, GetValidSize(src), target_scale, GSTextureCache::DepthStencil, - true, 0, false, force_preload, preserve_depth, m_r, src); - if (!ds) [[unlikely]] - { - GL_INS("ERROR: Failed to create ZBUF target, skipping."); - CleanupDraw(true); - return; - } + ds = g_texture_cache->CreateTarget(ZBUF_TEX0, t_size, GetValidSize(src), target_scale, GSTextureCache::DepthStencil, + true, 0, false, force_preload, preserve_depth, m_r, src); + if (!ds) [[unlikely]] + { + GL_INS("ERROR: Failed to create ZBUF target, skipping."); + CleanupDraw(true); + return; + } } else { @@ -2802,7 +3221,7 @@ void GSRendererHW::Draw() if (ds->m_alpha_max != 0) { const u32 max_z = (static_cast(ds->m_alpha_max + 1) << 24) - 1; - + switch (m_cached_ctx.TEST.ZTST) { case ZTST_GEQUAL: @@ -2846,11 +3265,13 @@ void GSRendererHW::Draw() { // Hypothesis: texture shuffle is used as a postprocessing effect so texture will be an old target. // Initially code also tested the RT but it gives too much false-positive - const int first_x = ((v[0].XYZ.X - m_context->XYOFFSET.OFX) + 8) >> 4; + const int horizontal_offset = ((static_cast((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) / 32) % static_cast(std::max(rt->m_TEX0.TBW, 1U))) * frame_psm.pgs.x; + const int first_x = (((v[0].XYZ.X - m_context->XYOFFSET.OFX) + 8) >> 4) - horizontal_offset; const int first_u = PRIM->FST ? ((v[0].U + 8) >> 4) : static_cast(((1 << m_cached_ctx.TEX0.TW) * (v[0].ST.S / v[1].RGBAQ.Q)) + 0.5f); const bool shuffle_coords = (first_x ^ first_u) & 8; const u32 draw_end = GSLocalMemory::GetEndBlockAddress(m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, m_r) + 1; - const bool draw_uses_target = src->m_from_target && ((src->m_from_target_TEX0.TBP0 <= m_cached_ctx.FRAME.Block() && + const u32 draw_start = GSLocalMemory::GetStartBlockAddress(m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, m_r); + const bool draw_uses_target = src->m_from_target && ((src->m_from_target_TEX0.TBP0 <= draw_start && src->m_from_target->UnwrappedEndBlock() > m_cached_ctx.FRAME.Block()) || (m_cached_ctx.FRAME.Block() < src->m_from_target_TEX0.TBP0 && draw_end > src->m_from_target_TEX0.TBP0)); @@ -2867,17 +3288,17 @@ void GSRendererHW::Draw() (shuffle_coords || rt->m_32_bits_fmt)) && (src->m_32_bits_fmt || m_copy_16bit_to_target_shuffle) && (draw_sprite_tex || (m_vt.m_primclass == GS_TRIANGLE_CLASS && (m_index.tail % 6) == 0 && TrianglesAreQuads(true))); - }; - if (m_texture_shuffle && IsSplitTextureShuffle(rt)) - { - // If TEX0 == FBP, we're going to have a source left in the TC. - // That source will get used in the actual draw unsafely, so kick it out. - if (m_cached_ctx.FRAME.Block() == m_cached_ctx.TEX0.TBP0) - g_texture_cache->InvalidateVideoMem(context->offset.fb, m_r, false); + if (m_texture_shuffle && IsSplitTextureShuffle(rt->m_TEX0, rt->m_valid)) + { + // If TEX0 == FBP, we're going to have a source left in the TC. + // That source will get used in the actual draw unsafely, so kick it out. + if (m_cached_ctx.FRAME.Block() == m_cached_ctx.TEX0.TBP0) + g_texture_cache->InvalidateVideoMem(context->offset.fb, m_r, false); - CleanupDraw(true); - return; + CleanupDraw(true); + return; + } } if ((src->m_target || (m_cached_ctx.FRAME.Block() == m_cached_ctx.TEX0.TBP0)) && IsPossibleChannelShuffle()) @@ -2900,6 +3321,7 @@ void GSRendererHW::Draw() if (rt) { m_last_channel_shuffle_fbp = rt->m_TEX0.TBP0; + m_last_channel_shuffle_tbp = src->m_TEX0.TBP0; // Urban Chaos goes from Z16 to C32, so let's just use the rt's original end block. if (!src->m_from_target || GSLocalMemory::m_psm[src->m_from_target_TEX0.PSM].bpp != GSLocalMemory::m_psm[rt->m_TEX0.PSM].bpp) m_last_channel_shuffle_end_block = rt->m_end_block; @@ -3022,7 +3444,9 @@ void GSRendererHW::Draw() // Deferred update of TEX0. We don't want to change it when we're doing a shuffle/clear, because it // may increase the buffer width, or change PSM, which breaks P8 conversion amongst other things. + // Some texture shuffles can be to new targets (or reused ones) so they may need their valid rects adjusting. const bool can_update_size = !is_possible_mem_clear && !m_texture_shuffle && !m_channel_shuffle; + if (!m_texture_shuffle && !m_channel_shuffle) { // Try to turn blits in to single sprites, saves upscaling problems when striped clears/blits. @@ -3061,7 +3485,7 @@ void GSRendererHW::Draw() } } const bool blending_cd = PRIM->ABE && !m_context->ALPHA.IsOpaque(); - if (rt && ((!is_possible_mem_clear || blending_cd) || rt->m_TEX0.PSM != FRAME_TEX0.PSM)) + if (rt && ((!is_possible_mem_clear || blending_cd) || rt->m_TEX0.PSM != FRAME_TEX0.PSM) && !m_in_target_draw) { if (rt->m_TEX0.TBW != FRAME_TEX0.TBW && !m_cached_ctx.ZBUF.ZMSK && (m_cached_ctx.FRAME.FBMSK & 0xFF000000)) { @@ -3072,11 +3496,22 @@ void GSRendererHW::Draw() if (m_cached_ctx.FRAME.FBMSK & 0xF0000000) rt->m_valid_alpha_high = false; } - rt->m_TEX0 = FRAME_TEX0; + if (FRAME_TEX0.TBW != 1 || (m_r.width() > frame_psm.pgs.x || m_r.height() > frame_psm.pgs.y)) + { + FRAME_TEX0.TBP0 = rt->m_TEX0.TBP0; + rt->m_TEX0 = FRAME_TEX0; + + } } - if (ds && (!is_possible_mem_clear || ds->m_TEX0.PSM != ZBUF_TEX0.PSM || (rt && ds->m_TEX0.TBW != rt->m_TEX0.TBW))) - ds->m_TEX0 = ZBUF_TEX0; + if (ds && (!is_possible_mem_clear || ds->m_TEX0.PSM != ZBUF_TEX0.PSM || (rt && ds->m_TEX0.TBW != rt->m_TEX0.TBW)) && !m_in_target_draw) + { + if (ZBUF_TEX0.TBW != 1 || (m_r.width() > frame_psm.pgs.x || m_r.height() > frame_psm.pgs.y)) + { + ZBUF_TEX0.TBP0 = ds->m_TEX0.TBP0; + ds->m_TEX0 = ZBUF_TEX0; + } + } } else if (!m_texture_shuffle) { @@ -3084,8 +3519,8 @@ void GSRendererHW::Draw() // The FBW should also be okay, since it's coming from the source. if (rt) { - const bool update_fbw = (m_channel_shuffle && src->m_target) && (!PRIM->ABE || IsOpaque() || m_context->ALPHA.IsBlack()); - rt->m_TEX0.TBW = update_fbw ? FRAME_TEX0.TBW : std::max(rt->m_TEX0.TBW, FRAME_TEX0.TBW); + const bool update_fbw = !m_in_target_draw && (m_channel_shuffle && src->m_target) && (!PRIM->ABE || IsOpaque() || m_context->ALPHA.IsBlack()); + rt->m_TEX0.TBW = update_fbw ? ((src && src->m_from_target && src->m_32_bits_fmt) ? src->m_from_target->m_TEX0.TBW : FRAME_TEX0.TBW) : std::max(rt->m_TEX0.TBW, FRAME_TEX0.TBW); rt->m_TEX0.PSM = FRAME_TEX0.PSM; } if (ds) @@ -3094,6 +3529,11 @@ void GSRendererHW::Draw() ds->m_TEX0.PSM = ZBUF_TEX0.PSM; } } + // Probably grabbed an old 16bit target (Band Hero) + /*else if (m_texture_shuffle && GSLocalMemory::m_psm[rt->m_TEX0.PSM].bpp == 16) + { + rt->m_TEX0.PSM = PSMCT32; + }*/ // Figure out which channels we're writing. if (rt) @@ -3106,25 +3546,66 @@ void GSRendererHW::Draw() GSTextureCache::Target* old_ds = nullptr; // If the draw is dated, we're going to expand in to black, so it's just a pointless rescale which will mess up our valid rects and end blocks. - if(!(m_cached_ctx.TEST.DATE && m_cached_ctx.TEST.DATM)) + if (!(m_cached_ctx.TEST.DATE && m_cached_ctx.TEST.DATM)) { GSVector2i new_size = t_size; - + GSVector4i update_rect = m_r; + const GIFRegTEX0& draw_TEX0 = rt ? rt->m_TEX0 : ds->m_TEX0; + const int buffer_width = std::max(draw_TEX0.TBW, 1U) * 64; // We need to adjust the size if it's a texture shuffle as we could end up making the RT twice the size. - if (src && m_texture_shuffle && m_split_texture_shuffle_pages == 0) + if (src && m_texture_shuffle && !m_copy_16bit_to_target_shuffle) { if ((new_size.x > src->m_valid_rect.z && m_vt.m_max.p.x == new_size.x) || (new_size.y > src->m_valid_rect.w && m_vt.m_max.p.y == new_size.y)) { if (new_size.y <= src->m_valid_rect.w && (rt->m_TEX0.TBW != m_cached_ctx.FRAME.FBW)) + { new_size.x /= 2; + } else + { new_size.y /= 2; + } } + + if (update_rect.z > src->m_valid_rect.z && (rt->m_TEX0.TBW != m_cached_ctx.FRAME.FBW)) + { + // This is a case for Superman Shadow of Apokalypse where it is *nearly* double height and slightly wider, but the page count adds up. + if (update_rect.w > src->m_valid_rect.w) + { + update_rect = src->m_valid_rect; + } + else + { + update_rect.x /= 2; + update_rect.z /= 2; + } + } + else + { + update_rect.y /= 2; + update_rect.w /= 2; + } + } + // NFS Undercover does a draw with double width of the actual width 1280x240, which functions the same as doubling the height. + // Ignore single page/0 page stuff, that's just gonna get silly + else if (buffer_width > 64 && update_rect.z > buffer_width) + { + update_rect.w *= static_cast(update_rect.z) / static_cast(buffer_width); + update_rect.z = buffer_width; + } + + if (m_in_target_draw && src && m_channel_shuffle && src->m_from_target && src->m_from_target == rt && m_cached_ctx.TEX0.TBP0 == src->m_from_target->m_TEX0.TBP0) + { + new_size.y = std::max(new_size.y, static_cast((((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0) >> 5) / rt->m_TEX0.TBW) * frame_psm.pgs.y) * 2); + GSVector4i new_valid = rt->m_valid; + new_valid.w = new_size.y; + rt->UpdateValidity(new_valid, true); } // We still need to make sure the dimensions of the targets match. - const int new_w = std::max(new_size.x, std::max(rt ? rt->m_unscaled_size.x : 0, ds ? ds->m_unscaled_size.x : 0)); - const int new_h = std::max(new_size.y, std::max(rt ? rt->m_unscaled_size.y : 0, ds ? ds->m_unscaled_size.y : 0)); + // Limit new size to 2048, the GS can't address more than this so may avoid some bugs/crashes. + const int new_w = std::min(2048, std::max(new_size.x, std::max(rt ? rt->m_unscaled_size.x : 0, ds ? ds->m_unscaled_size.x : 0))); + const int new_h = std::min(2048, std::max(new_size.y, std::max(rt ? rt->m_unscaled_size.y : 0, ds ? ds->m_unscaled_size.y : 0))); if (rt) { const u32 old_end_block = rt->m_end_block; @@ -3136,6 +3617,25 @@ void GSRendererHW::Draw() if (rt->GetUnscaledWidth() != new_w || rt->GetUnscaledHeight() != new_h) GL_INS("Resize RT from %dx%d to %dx%d", rt->GetUnscaledWidth(), rt->GetUnscaledHeight(), new_w, new_h); + // May not be needed/could cause problems with garbage loaded from GS memory + /*if (preserve_rt_color) + { + RGBAMask mask; + mask._u32 = 0xF; + + if (new_w > rt->m_unscaled_size.x) + { + GSVector4i width_dirty_rect = GSVector4i(rt->m_unscaled_size.x, 0, new_w, new_h); + g_texture_cache->AddDirtyRectTarget(rt, width_dirty_rect, rt->m_TEX0.PSM, rt->m_TEX0.TBW, mask); + } + + if (new_h > rt->m_unscaled_size.y) + { + GSVector4i height_dirty_rect = GSVector4i(0, rt->m_unscaled_size.y, new_w, new_h); + g_texture_cache->AddDirtyRectTarget(rt, height_dirty_rect, rt->m_TEX0.PSM, rt->m_TEX0.TBW, mask); + } + }*/ + rt->ResizeTexture(new_w, new_h); if (!m_texture_shuffle && !m_channel_shuffle) @@ -3154,10 +3654,14 @@ void GSRendererHW::Draw() rt->ResizeDrawn(rt->GetUnscaledRect()); } - const GSVector4i update_rect = m_r.rintersect(GSVector4i::loadh(new_size)); + const bool rt_update = can_update_size || (m_texture_shuffle && (src && rt && src->m_from_target != rt)); + + // if frame is masked or afailing always to never write frame, wanna make sure we don't touch it. This might happen if DATE or Alpha Test is being used to write to Z. + const bool frame_masked = ((m_cached_ctx.FRAME.FBMSK & frame_psm.fmsk) == frame_psm.fmsk) || (m_cached_ctx.TEST.ATE && m_cached_ctx.TEST.ATST == ATST_NEVER && !(m_cached_ctx.TEST.AFAIL & AFAIL_FB_ONLY)); // Limit to 2x the vertical height of the resolution (for double buffering) - rt->UpdateValidity(update_rect, can_update_size || (m_r.w <= (resolution.y * 2) && !m_texture_shuffle)); - rt->UpdateDrawn(update_rect, can_update_size || (m_r.w <= (resolution.y * 2) && !m_texture_shuffle)); + rt->UpdateValidity(update_rect, !frame_masked && (rt_update || (m_r.w <= (resolution.y * 2) && !m_texture_shuffle))); + rt->UpdateDrawn(update_rect, !frame_masked && (rt_update || (m_r.w <= (resolution.y * 2) && !m_texture_shuffle))); + // Probably changing to double buffering, so invalidate any old target that was next to it. // This resolves an issue where the PCRTC will find the old target in FMV's causing flashing. // Grandia Xtreme, Onimusha Warlord. @@ -3191,6 +3695,7 @@ void GSRendererHW::Draw() pxAssert(ds->GetScale() == target_scale); if (ds->GetUnscaledWidth() != new_w || ds->GetUnscaledHeight() != new_h) GL_INS("Resize DS from %dx%d to %dx%d", ds->GetUnscaledWidth(), ds->GetUnscaledHeight(), new_w, new_h); + ds->ResizeTexture(new_w, new_h); if (!m_texture_shuffle && !m_channel_shuffle) @@ -3200,8 +3705,12 @@ void GSRendererHW::Draw() } // Limit to 2x the vertical height of the resolution (for double buffering) - ds->UpdateValidity(m_r, can_update_size || m_r.w <= (resolution.y * 2)); - ds->UpdateDrawn(m_r, can_update_size || m_r.w <= (resolution.y * 2)); + // Dark cloud writes to 424 when the buffer is only 416 high, but masks the Z. + // Updating the valid causes the Z to overlap the framebuffer, which is obviously incorrect. + const bool z_masked = m_cached_ctx.ZBUF.ZMSK; + + ds->UpdateValidity(m_r, !z_masked && (can_update_size || m_r.w <= (resolution.y * 2))); + ds->UpdateDrawn(m_r, !z_masked && (can_update_size || m_r.w <= (resolution.y * 2))); if (!new_rect && new_height && old_end_block != ds->m_end_block) { @@ -3290,7 +3799,7 @@ void GSRendererHW::Draw() if (rt && GSConfig.SaveRT && s_n >= GSConfig.SaveN) { - s = GetDrawDumpPath("%05d_f%lld_rt0_%05x_%s.bmp", s_n, frame, m_cached_ctx.FRAME.Block(), psm_str(m_cached_ctx.FRAME.PSM)); + s = GetDrawDumpPath("%05d_f%lld_rt0_%05x_(%05x)_%s.bmp", s_n, frame, m_cached_ctx.FRAME.Block(), rt->m_TEX0.TBP0, psm_str(m_cached_ctx.FRAME.PSM)); if (rt->m_texture) rt->m_texture->Save(s); @@ -3298,9 +3807,11 @@ void GSRendererHW::Draw() if (ds && GSConfig.SaveDepth && s_n >= GSConfig.SaveN) { - s = GetDrawDumpPath("%05d_f%lld_rz0_%05x_%s.bmp", s_n, frame, m_cached_ctx.ZBUF.Block(), psm_str(m_cached_ctx.ZBUF.PSM)); + s = GetDrawDumpPath("%05d_f%lld_rz0_%05x_(%05x)_%s.bmp", s_n, frame, m_cached_ctx.ZBUF.Block(), ds->m_TEX0.TBP0, psm_str(m_cached_ctx.ZBUF.PSM)); - if (ds->m_texture) + if (g_texture_cache->GetTemporaryZ()) + g_texture_cache->GetTemporaryZ()->Save(s); + else if (ds->m_texture) ds->m_texture->Save(s); } } @@ -3376,7 +3887,6 @@ void GSRendererHW::Draw() if (!skip_draw) DrawPrims(rt, ds, src, tmm); - // // Temporary source *must* be invalidated before normal, because otherwise it'll be double freed. g_texture_cache->InvalidateTemporarySource(); @@ -3389,9 +3899,10 @@ void GSRendererHW::Draw() if ((fm & fm_mask) != fm_mask && rt) { + const bool frame_masked = ((m_cached_ctx.FRAME.FBMSK & frame_psm.fmsk) == frame_psm.fmsk) || (m_cached_ctx.TEST.ATE && m_cached_ctx.TEST.ATST == ATST_NEVER && !(m_cached_ctx.TEST.AFAIL & AFAIL_FB_ONLY)); //rt->m_valid = rt->m_valid.runion(r); // Limit to 2x the vertical height of the resolution (for double buffering) - rt->UpdateValidity(real_rect, can_update_size || (real_rect.w <= (resolution.y * 2) && !m_texture_shuffle)); + rt->UpdateValidity(real_rect, !frame_masked && (can_update_size || (real_rect.w <= (resolution.y * 2) && !m_texture_shuffle))); g_texture_cache->InvalidateVideoMem(context->offset.fb, real_rect, false); @@ -3402,15 +3913,32 @@ void GSRendererHW::Draw() if (zm != 0xffffffff && ds) { + const bool z_masked = m_cached_ctx.ZBUF.ZMSK; + //ds->m_valid = ds->m_valid.runion(r); // Limit to 2x the vertical height of the resolution (for double buffering) - ds->UpdateValidity(real_rect, can_update_size || (real_rect.w <= (resolution.y * 2) && !m_texture_shuffle)); + ds->UpdateValidity(real_rect, !z_masked && (can_update_size || (real_rect.w <= (resolution.y * 2) && !m_texture_shuffle))); g_texture_cache->InvalidateVideoMem(context->offset.zb, real_rect, false); // Remove overwritten RTs at the ZBP. g_texture_cache->InvalidateVideoMemType( GSTextureCache::RenderTarget, m_cached_ctx.ZBUF.Block(), m_cached_ctx.ZBUF.PSM, zm); + + + if (ds && g_texture_cache->GetTemporaryZ()) + { + if (m_cached_ctx.DepthWrite()) + { + const int vertical_offset = ((static_cast(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0) / 32) / std::max(static_cast(rt->m_TEX0.TBW), 1)) * frame_psm.pgs.y; + const int z_vertical_offset = ((static_cast(m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0) / 32) / std::max(rt->m_TEX0.TBW, 1U)) * GSLocalMemory::m_psm[m_cached_ctx.ZBUF.PSM].pgs.y; + const GSVector4i dRect = GSVector4i(0, z_vertical_offset * ds->m_scale, ds->m_unscaled_size.x * ds->m_scale, std::min(z_vertical_offset + m_r.w + 1 - vertical_offset, ds->m_unscaled_size.y) * ds->m_scale); + + GL_CACHE("RT in RT Z copy back draw %d z_vert_offset %d z_offset %d", s_n, z_vertical_offset, vertical_offset); + g_gs_device->StretchRect(g_texture_cache->GetTemporaryZ(), GSVector4(0.0f, vertical_offset / static_cast(g_texture_cache->GetTemporaryZ()->GetHeight()), 1.0f, + std::min(real_rect.w + 1, ds->m_unscaled_size.y + vertical_offset) / static_cast(g_texture_cache->GetTemporaryZ()->GetHeight())), ds->m_texture, GSVector4(dRect), ShaderConvert::DEPTH_COPY, false); + } + } } // @@ -3421,7 +3949,7 @@ void GSRendererHW::Draw() std::string s; - if (rt && GSConfig.SaveRT && s_n >= GSConfig.SaveN) + if (rt && GSConfig.SaveRT && s_n >= GSConfig.SaveN && !m_last_rt) { s = GetDrawDumpPath("%05d_f%lld_rt1_%05x_(%05x)_%s.bmp", s_n, frame, m_cached_ctx.FRAME.Block(), rt->m_TEX0.TBP0, psm_str(m_cached_ctx.FRAME.PSM)); @@ -3435,7 +3963,7 @@ void GSRendererHW::Draw() ds->m_texture->Save(s); } - if (GSConfig.SaveL > 0 && (s_n - GSConfig.SaveN) > GSConfig.SaveL) + if (GSConfig.SaveL > 0 && (s_n - GSConfig.SaveN) > GSConfig.SaveL && !m_last_rt) { GSConfig.DumpGSData = 0; } @@ -3505,7 +4033,7 @@ bool GSRendererHW::VerifyIndices() return true; } -void GSRendererHW::SetupIA(float target_scale, float sx, float sy) +void GSRendererHW::SetupIA(float target_scale, float sx, float sy, bool req_vert_backup) { GL_PUSH("IA"); @@ -3586,7 +4114,20 @@ void GSRendererHW::SetupIA(float target_scale, float sx, float sy) { m_conf.topology = GSHWDrawConfig::Topology::Triangle; m_conf.vs.expand = GSHWDrawConfig::VSExpand::Sprite; - m_conf.verts = m_vertex.buff; + + if (req_vert_backup) + { + memcpy(m_draw_vertex.buff, m_vertex.buff, sizeof(GSVertex) * m_vertex.next); + memcpy(m_draw_index.buff, m_index.buff, sizeof(u16) * m_index.tail); + + m_conf.verts = m_draw_vertex.buff; + m_conf.indices = m_draw_index.buff; + } + else + { + m_conf.verts = m_vertex.buff; + m_conf.indices = m_index.buff; + } m_conf.nverts = m_vertex.next; m_conf.nindices = m_index.tail * 3; m_conf.indices_per_prim = 6; @@ -3627,9 +4168,20 @@ void GSRendererHW::SetupIA(float target_scale, float sx, float sy) ASSUME(0); } - m_conf.verts = m_vertex.buff; + if (req_vert_backup) + { + memcpy(m_draw_vertex.buff, m_vertex.buff, sizeof(GSVertex) * m_vertex.next); + memcpy(m_draw_index.buff, m_index.buff, sizeof(u16) * m_index.tail); + + m_conf.verts = m_draw_vertex.buff; + m_conf.indices = m_draw_index.buff; + } + else + { + m_conf.verts = m_vertex.buff; + m_conf.indices = m_index.buff; + } m_conf.nverts = m_vertex.next; - m_conf.indices = m_index.buff; m_conf.nindices = m_index.tail; } @@ -3875,7 +4427,7 @@ bool GSRendererHW::TestChannelShuffle(GSTextureCache::Target* src) return m_channel_shuffle; } -__ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool test_only) +__ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool test_only, GSTextureCache::Target* rt) { if ((src->m_texture->GetType() == GSTexture::Type::DepthStencil) && !src->m_32_bits_fmt) { @@ -3900,7 +4452,7 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool m_conf.ps.urban_chaos_hle = 1; } } - else if (m_index.tail <= 64 && m_cached_ctx.CLAMP.WMT == 3) + else if (m_index.tail <= 64 && !IsPageCopy() && m_cached_ctx.CLAMP.WMT == 3) { // Blood will tell. I think it is channel effect too but again // implemented in a different way. I don't want to add more CRC stuff. So @@ -4008,8 +4560,8 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool min_uv.x -= block_offset.x * t_psm.bs.x; min_uv.y -= block_offset.y * t_psm.bs.y; - if (GSLocalMemory::IsPageAligned(src->m_TEX0.PSM, m_r) && - block_offset.eq(m_r_block_offset)) + //if (/*GSLocalMemory::IsPageAligned(src->m_TEX0.PSM, m_r) &&*/ + // block_offset.eq(m_r_block_offset)) { if (min_uv.eq(GSVector4i::cxpr(0, 0, 0, 0))) channel = ChannelFetch_RED; @@ -4057,17 +4609,75 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool // Performance GPU note: it could be wise to reduce the size to // the rendered size of the framebuffer - GSVertex* s = &m_vertex.buff[0]; - s[0].XYZ.X = static_cast(m_context->XYOFFSET.OFX + 0); - s[1].XYZ.X = static_cast(m_context->XYOFFSET.OFX + 16384); - s[0].XYZ.Y = static_cast(m_context->XYOFFSET.OFY + 0); - s[1].XYZ.Y = static_cast(m_context->XYOFFSET.OFY + 16384); + const GSLocalMemory::psm_t frame_psm = GSLocalMemory::m_psm[m_context->FRAME.PSM]; + m_full_screen_shuffle = (m_r.height() > frame_psm.pgs.y) || (m_r.width() > frame_psm.pgs.x) || GSConfig.UserHacks_TextureInsideRt == GSTextureInRtMode::Disabled; + if (GSConfig.UserHacks_TextureInsideRt == GSTextureInRtMode::Disabled || (!m_in_target_draw && IsPageCopy()) || m_conf.ps.urban_chaos_hle || m_conf.ps.tales_of_abyss_hle) + { + GSVertex* s = &m_vertex.buff[0]; + s[0].XYZ.X = static_cast(m_context->XYOFFSET.OFX + 0); + s[1].XYZ.X = static_cast(m_context->XYOFFSET.OFX + 16384); + s[0].XYZ.Y = static_cast(m_context->XYOFFSET.OFY + 0); + s[1].XYZ.Y = static_cast(m_context->XYOFFSET.OFY + 16384); + + s[0].U = 0; + s[1].U = 16384; + s[0].V = 0; + s[1].V = 16384; + + m_r = GSVector4i(0, 0, 1024, 1024); + + // We need to count the pages that get shuffled to, some games (like Hitman Blood Money dialogue blur effects) only do half the screen. + if (!m_full_screen_shuffle && !m_conf.ps.urban_chaos_hle && !m_conf.ps.tales_of_abyss_hle && src) + m_channel_shuffle_width = src->m_TEX0.TBW; + } + else + { + const u32 frame_page_offset = std::max(static_cast(((m_r.x / frame_psm.pgs.x) + (m_r.y / frame_psm.pgs.y) * rt->m_TEX0.TBW)), 0); + m_r = GSVector4i(m_r.x & ~(frame_psm.pgs.x - 1), m_r.y & ~(frame_psm.pgs.y - 1), (m_r.z + (frame_psm.pgs.x - 1)) & ~(frame_psm.pgs.x - 1), (m_r.w + (frame_psm.pgs.y - 1)) & ~(frame_psm.pgs.y - 1)); + + // Hitman suffers from this, not sure on the exact scenario at the moment, but we need the barrier. + if (PRIM->ABE && m_context->ALPHA.IsCdInBlend()) + { + if (m_prim_overlap == PRIM_OVERLAP_NO || !g_gs_device->Features().texture_barrier) + m_conf.require_one_barrier = true; + else + m_conf.require_full_barrier = true; + } + + // This is for offsetting the texture, however if the texture has a region clamp, we don't want to move it. + // A good two test games for this is Ghost in the Shell (no region clamp) and Tekken 5 (offset clamp on shadows) + if (rt && rt->m_TEX0.TBP0 == m_cached_ctx.FRAME.Block()) + { + const bool req_offset = (m_cached_ctx.CLAMP.WMS != 3 || (m_cached_ctx.CLAMP.MAXU & ~0xF) == 0) && + (m_cached_ctx.CLAMP.WMT != 3 || (m_cached_ctx.CLAMP.MAXV & ~0x3) == 0); + //DevCon.Warning("Draw %d offset %d", s_n, frame_page_offset); + // Offset the frame but clear the draw offset + if (req_offset) + m_cached_ctx.FRAME.FBP += frame_page_offset; + } + + m_in_target_draw |= frame_page_offset > 0; + GSVertex* s = &m_vertex.buff[0]; + s[0].XYZ.X = static_cast(m_context->XYOFFSET.OFX + (m_r.x << 4)); + s[1].XYZ.X = static_cast(m_context->XYOFFSET.OFX + (m_r.z << 4)); + s[0].XYZ.Y = static_cast(m_context->XYOFFSET.OFY + (m_r.y << 4)); + s[1].XYZ.Y = static_cast(m_context->XYOFFSET.OFY + (m_r.w << 4)); + + const GSLocalMemory::psm_t tex_psm = GSLocalMemory::m_psm[m_context->TEX0.PSM]; + const u32 tex_page_offset = (m_vt.m_min.t.x / tex_psm.pgs.x) + (m_vt.m_min.t.y / tex_psm.pgs.y); - m_r = GSVector4i(0, 0, 1024, 1024); + s[0].U = m_r.x << 4; + s[1].U = m_r.z << 4; + s[0].V = m_r.y << 4; + s[1].V = m_r.w << 4; + m_last_channel_shuffle_fbmsk = 0xFFFFFFFF; + } + m_vertex.head = m_vertex.tail = m_vertex.next = 2; m_index.tail = 2; m_primitive_covers_without_gaps = NoGapsType::FullCover; + m_channel_shuffle_abort = false; return true; } @@ -4885,7 +5495,7 @@ __ri void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Target* rt, // Hazard handling (i.e. reading from the current RT/DS). GSTextureCache::SourceRegion source_region = tex->GetRegion(); - bool target_region = (tex->IsFromTarget() && source_region.HasEither()); + bool target_region = tex->IsFromTarget() && source_region.HasEither(); GSVector2i unscaled_size = target_region ? tex->GetRegionSize() : tex->GetUnscaledSize(); float scale = tex->GetScale(); HandleTextureHazards(rt, ds, tex, tmm, source_region, target_region, unscaled_size, scale, src_copy); @@ -5211,9 +5821,13 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c const GSTextureCache::Source* tex, const TextureMinMaxResult& tmm, GSTextureCache::SourceRegion& source_region, bool& target_region, GSVector2i& unscaled_size, float& scale, GSDevice::RecycledTexture& src_copy) { + + const int tex_diff = tex->m_from_target ? static_cast(m_cached_ctx.TEX0.TBP0 - tex->m_from_target->m_TEX0.TBP0) : static_cast(m_cached_ctx.TEX0.TBP0 - tex->m_TEX0.TBP0); + const int frame_diff = rt ? static_cast(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0) : 0; + // Detect framebuffer read that will need special handling const GSTextureCache::Target* src_target = nullptr; - if (m_conf.tex == m_conf.rt) + if (m_conf.tex == m_conf.rt && !(m_channel_shuffle && tex && (tex_diff != frame_diff || target_region))) { // Can we read the framebuffer directly? (i.e. sample location matches up). if (CanUseTexIsFB(rt, tex, tmm)) @@ -5233,7 +5847,8 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c GL_CACHE("Source is render target, taking copy."); src_target = rt; } - else if (m_conf.tex == m_conf.ds) + // Be careful of single page channel shuffles where depth is the source but it's not going to the same place, we can't read this directly. + else if (m_conf.tex == m_conf.ds && (!m_channel_shuffle || static_cast(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0) == static_cast(m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0))) { // GL, Vulkan (in General layout), not DirectX! const bool can_read_current_depth_buffer = g_gs_device->Features().test_and_sample_depth; @@ -5253,6 +5868,10 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c GL_CACHE("Source is depth buffer, unsafe to read, taking copy."); src_target = ds; } + else if (m_channel_shuffle && tex->m_from_target && tex_diff != frame_diff) + { + src_target = tex->m_from_target; + } else if (!m_downscale_source) { // No match. @@ -5267,7 +5886,7 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c GSVector4i copy_range; GSVector2i copy_size; GSVector2i copy_dst_offset; - + bool copied_rt = false; // Shuffles take the whole target. This should've already been halved. // We can't partially copy depth targets in DirectX, and GL/Vulkan should use the direct read above. // Restricting it also breaks Tom and Jerry... @@ -5275,7 +5894,37 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c { copy_range = src_bounds; copy_size = src_unscaled_size; + GSVector4i::storel(©_dst_offset, copy_range); + if (m_channel_shuffle && (tex_diff || frame_diff)) + { + + const u32 page_offset = (m_cached_ctx.TEX0.TBP0 - src_target->m_TEX0.TBP0) >> 5; + const u32 vertical_offset = (page_offset / src_target->m_TEX0.TBW) * GSLocalMemory::m_psm[src_target->m_TEX0.PSM].pgs.y; + const u32 horizontal_offset = (page_offset % src_target->m_TEX0.TBW) * GSLocalMemory::m_psm[src_target->m_TEX0.PSM].pgs.x; + + copy_range.y += vertical_offset; + copy_range.x += horizontal_offset; + copy_size.y -= vertical_offset; + copy_size.x -= horizontal_offset; + target_region = false; + source_region.bits = 0; + //copied_rt = tex->m_from_target != nullptr; + if (m_in_target_draw && (page_offset || frame_diff)) + { + copy_size.x = m_r.width(); + copy_size.y = m_r.height(); + copy_range.w = copy_range.y + copy_size.y; + copy_range.z = copy_range.x + copy_size.x; + + if (tex_diff != frame_diff) + { + GSVector4i::storel(©_dst_offset, m_r); + copy_size.x += copy_dst_offset.x; + copy_size.y += copy_dst_offset.y; + } + } + } } else { @@ -5285,7 +5934,7 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c copy_size.y = std::min(tex_size.y, src_unscaled_size.y); // Use the texture min/max to get the copy range if not reinterpreted. - if (m_texture_shuffle) + if (m_texture_shuffle || m_channel_shuffle) copy_range = GSVector4i::loadh(copy_size); else copy_range = tmm.coverage; @@ -5356,12 +6005,9 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c static_cast(std::ceil(static_cast(copy_dst_offset.y) * scale))); src_copy.reset(src_target->m_texture->IsDepthStencil() ? - g_gs_device->CreateDepthStencil( - scaled_copy_size.x, scaled_copy_size.y, src_target->m_texture->GetFormat(), false) : - (m_downscale_source ? g_gs_device->CreateRenderTarget(scaled_copy_size.x, scaled_copy_size.y, src_target->m_texture->GetFormat(), true, - true) : - g_gs_device->CreateTexture( - scaled_copy_size.x, scaled_copy_size.y, 1, src_target->m_texture->GetFormat(), true))); + g_gs_device->CreateDepthStencil(scaled_copy_size.x, scaled_copy_size.y, src_target->m_texture->GetFormat(), false) : + (m_downscale_source || copied_rt) ? g_gs_device->CreateRenderTarget(scaled_copy_size.x, scaled_copy_size.y, src_target->m_texture->GetFormat(), true, true) : + g_gs_device->CreateTexture(scaled_copy_size.x, scaled_copy_size.y, 1, src_target->m_texture->GetFormat(), true)); if (!src_copy) [[unlikely]] { Console.Error("Failed to allocate %dx%d texture for hazard copy", scaled_copy_size.x, scaled_copy_size.y); @@ -5369,6 +6015,7 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c m_conf.ps.tfx = 4; return; } + if (m_downscale_source) { g_perfmon.Put(GSPerfMon::TextureCopies, 1); @@ -5415,6 +6062,13 @@ bool GSRendererHW::CanUseTexIsFB(const GSTextureCache::Target* rt, const GSTextu return false; } + // the texture is offset, and the frame isn't also offset, we can't do this. + if (tex->GetRegion().HasX() || tex->GetRegion().HasY()) + { + if (m_cached_ctx.FRAME.Block() != m_cached_ctx.TEX0.TBP0) + return false; + } + // If we're a shuffle, tex-is-fb is always fine. if (m_texture_shuffle || m_channel_shuffle) { @@ -5564,6 +6218,7 @@ void GSRendererHW::CleanupDraw(bool invalidate_temp_src) if (invalidate_temp_src) g_texture_cache->InvalidateTemporarySource(); + g_texture_cache->InvalidateTemporaryZ(); // Restore Scissor. m_context->UpdateScissor(); @@ -5603,7 +6258,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta m_conf.cb_vs.texture_offset = {}; m_conf.ps.scanmsk = env.SCANMSK.MSK; m_conf.rt = rt ? rt->m_texture : nullptr; - m_conf.ds = ds ? ds->m_texture : nullptr; + m_conf.ds = ds ? (g_texture_cache->GetTemporaryZ() ? g_texture_cache->GetTemporaryZ() : ds->m_texture) : nullptr; // Z setup has to come before channel shuffle EmulateZbuffer(ds); @@ -5614,7 +6269,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta // vertex list (it will interact with PrimitiveOverlap and accurate // blending) if (m_channel_shuffle && tex && tex->m_from_target) - EmulateChannelShuffle(tex->m_from_target, false); + EmulateChannelShuffle(tex->m_from_target, false, rt); // Upscaling hack to avoid various line/grid issues MergeSprite(tex); @@ -5974,7 +6629,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta const bool full_cover = rt->m_valid.rintersect(m_r).eq(rt->m_valid) && m_primitive_covers_without_gaps == NoGapsType::FullCover && !(DATE || !always_passing_alpha || !IsDepthAlwaysPassing()); // Restrict this to only when we're overwriting the whole target. - new_scale_rt_alpha = full_cover; + new_scale_rt_alpha = full_cover || rt->m_last_draw >= s_n; } } @@ -6117,7 +6772,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta float sx, sy, ox2, oy2; const float ox = static_cast(static_cast(m_context->XYOFFSET.OFX)); const float oy = static_cast(static_cast(m_context->XYOFFSET.OFY)); - if (GSConfig.UserHacks_HalfPixelOffset != GSHalfPixelOffset::Native && rtscale > 1.0f) + if ((GSConfig.UserHacks_HalfPixelOffset != GSHalfPixelOffset::Native || m_channel_shuffle) && rtscale > 1.0f) { sx = 2.0f * rtscale / (rtsize.x << 4); sy = 2.0f * rtscale / (rtsize.y << 4); @@ -6260,7 +6915,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta m_conf.drawarea = m_channel_shuffle ? scissor : scissor.rintersect(ComputeBoundingBox(rtsize, rtscale)); m_conf.scissor = (DATE && !DATE_BARRIER) ? m_conf.drawarea : scissor; - SetupIA(rtscale, sx, sy); + SetupIA(rtscale, sx, sy, m_channel_shuffle_width != 0); if (ate_second_pass) { @@ -6348,7 +7003,10 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta m_conf.drawlist = (m_conf.require_full_barrier && m_vt.m_primclass == GS_SPRITE_CLASS) ? &m_drawlist : nullptr; - g_gs_device->RenderHW(m_conf); + if (!m_channel_shuffle_width) + g_gs_device->RenderHW(m_conf); + else + m_last_rt = rt; } // If the EE uploaded a new CLUT since the last draw, use that. @@ -6608,7 +7266,7 @@ bool GSRendererHW::CanUseSwPrimRender(bool no_rt, bool no_ds, bool draw_sprite_t { GSTextureCache::Target* rt = g_texture_cache->GetTargetWithSharedBits(m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.PSM); - if (!rt) + if (!rt || (!rt->m_dirty.empty() && rt->m_dirty.GetTotalRect(rt->m_TEX0, rt->m_unscaled_size).rintersect(m_r).eq(m_r))) return true; rt = nullptr; @@ -7059,7 +7717,7 @@ bool GSRendererHW::TryGSMemClear(bool no_rt, bool preserve_rt, bool invalidate_r g_texture_cache->InvalidateContainedTargets( GSLocalMemory::GetStartBlockAddress( m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, m_r), - rt_end_bp, m_cached_ctx.FRAME.PSM); + rt_end_bp, m_cached_ctx.FRAME.PSM, m_cached_ctx.FRAME.FBW); GSUploadQueue clear_queue; clear_queue.draw = s_n; @@ -7082,7 +7740,7 @@ bool GSRendererHW::TryGSMemClear(bool no_rt, bool preserve_rt, bool invalidate_r g_texture_cache->InvalidateContainedTargets( GSLocalMemory::GetStartBlockAddress( m_cached_ctx.ZBUF.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.ZBUF.PSM, m_r), - ds_end_bp, m_cached_ctx.ZBUF.PSM); + ds_end_bp, m_cached_ctx.ZBUF.PSM, m_cached_ctx.FRAME.FBW); } } @@ -7207,7 +7865,9 @@ void GSRendererHW::ClearGSLocalMemory(const GSOffset& off, const GSVector4i& r, bool GSRendererHW::OI_BlitFMV(GSTextureCache::Target* _rt, GSTextureCache::Source* tex, const GSVector4i& r_draw) { - if (r_draw.w > 1024 && (m_vt.m_primclass == GS_SPRITE_CLASS) && (m_vertex.next == 2) && m_process_texture && !PRIM->ABE && tex && !tex->m_target && m_cached_ctx.TEX0.TBW > 0) + // Not required when using Tex in RT + if (r_draw.w > 1024 && (m_vt.m_primclass == GS_SPRITE_CLASS) && (m_vertex.next == 2) && m_process_texture && !PRIM->ABE && + tex && !tex->m_target && m_cached_ctx.TEX0.TBW > 0 && GSConfig.UserHacks_TextureInsideRt == GSTextureInRtMode::Disabled) { GL_PUSH("OI_BlitFMV"); diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.h b/pcsx2/GS/Renderers/HW/GSRendererHW.h index 02dce7ece7759..70c7bd3c39fbe 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.h +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.h @@ -92,9 +92,9 @@ class GSRendererHW : public GSRenderer void DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Target* ds, GSTextureCache::Source* tex, const TextureMinMaxResult& tmm); void ResetStates(); - void SetupIA(float target_scale, float sx, float sy); + void SetupIA(float target_scale, float sx, float sy, bool req_vert_backup); void EmulateTextureShuffleAndFbmask(GSTextureCache::Target* rt, GSTextureCache::Source* tex); - bool EmulateChannelShuffle(GSTextureCache::Target* src, bool test_only); + bool EmulateChannelShuffle(GSTextureCache::Target* src, bool test_only, GSTextureCache::Target* rt = nullptr); void EmulateBlending(int rt_alpha_min, int rt_alpha_max, const bool DATE, bool& DATE_PRIMID, bool& DATE_BARRIER, GSTextureCache::Target* rt, bool can_scale_rt_alpha, bool& new_rt_alpha_scale); void CleanupDraw(bool invalidate_temp_src); @@ -113,12 +113,14 @@ class GSRendererHW : public GSRenderer void SetTCOffset(); bool IsPossibleChannelShuffle() const; + bool IsPageCopy() const; bool NextDrawMatchesShuffle() const; - bool IsSplitTextureShuffle(GSTextureCache::Target* rt); + bool IsSplitTextureShuffle(GIFRegTEX0& rt_TEX0, GSVector4i& valid_area); GSVector4i GetSplitTextureShuffleDrawRect() const; u32 GetEffectiveTextureShuffleFbmsk() const; static GSVector4i GetDrawRectForPages(u32 bw, u32 psm, u32 num_pages); + bool IsSinglePageDraw() const; bool TryToResolveSinglePageFramebuffer(GIFRegFRAME& FRAME, bool only_next_draw); bool IsSplitClearActive() const; @@ -172,7 +174,12 @@ class GSRendererHW : public GSRenderer u32 m_last_channel_shuffle_fbmsk = 0; u32 m_last_channel_shuffle_fbp = 0; + u32 m_last_channel_shuffle_tbp = 0; u32 m_last_channel_shuffle_end_block = 0; + u32 m_channel_shuffle_width = 0; + bool m_full_screen_shuffle = false; + + GSTextureCache::Target* m_last_rt; GIFRegFRAME m_split_clear_start = {}; GIFRegZBUF m_split_clear_start_Z = {}; diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index ffcc7b14169b6..e0c26738f317b 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -18,6 +18,7 @@ #include "fmt/format.h" #include +#include #ifdef __APPLE__ #include @@ -234,7 +235,7 @@ bool GSTextureCache::CanTranslate(u32 bp, u32 bw, u32 spsm, GSVector4i r, u32 db // The page width matches. // The rect width is less than the width of the destination texture and the height is less than or equal to 1 page high. // The rect width and height is equal to the page size and it covers the width of the incoming bw, so lines are sequential. - const bool page_aligned_rect = masked_rect.eq(r); + const bool page_aligned_rect = masked_rect.xyxy().eq(r.xyxy()); const bool width_match = ((bw * 64) / src_page_size.x) == ((dbw * 64) / dst_page_size.x); const bool sequential_pages = page_aligned_rect && r.x == 0 && r.z == src_pixel_width; const bool single_row = (((bw * 64) / src_page_size.x) <= ((dbw * 64) / dst_page_size.x)) && r.z <= src_pixel_width && r.w <= src_page_size.y; @@ -252,6 +253,163 @@ bool GSTextureCache::CanTranslate(u32 bp, u32 bw, u32 spsm, GSVector4i r, u32 db } +GSVector4i GSTextureCache::TranslateAlignedRectByPage(u32 tbp, u32 tebp, u32 tbw, u32 tpsm, u32 sbp, u32 spsm, u32 sbw, GSVector4i src_r, bool is_invalidation) +{ + const GSVector2i src_page_size = GSLocalMemory::m_psm[spsm].pgs; + const GSVector2i dst_page_size = GSLocalMemory::m_psm[tpsm].pgs; + const int clamped_sbw = static_cast(std::max(1U, sbw)); + const int clamped_tbw = static_cast(std::max(1U, tbw)); + const int src_bw = clamped_sbw * 64; + const int dst_bw = clamped_tbw * 64; + const GSLocalMemory::psm_t& s_psm = GSLocalMemory::m_psm[spsm]; + const GSLocalMemory::psm_t& t_psm = GSLocalMemory::m_psm[tpsm]; + const int src_pgw = std::max(1, src_bw / src_page_size.x); + const int dst_pgw = std::max(1, dst_bw / dst_page_size.x); + GSVector4i in_rect = src_r; + + if (sbp < tebp && tebp < tbp) + sbp += 0x4000; + // DST = the target we're trying to fit in to. + // SRC = the format being requested, so we want to from SRC to DST. + int page_offset = (static_cast(sbp) - static_cast(tbp)) >> 5; + int block_offset = (static_cast(sbp) - static_cast(tbp)) & 0x1F; + + if (!(s_psm.bpp == t_psm.bpp)) + { + const int src_bpp = s_psm.bpp; + + if (block_offset) + in_rect = in_rect.ralign(s_psm.bs); + else + in_rect = in_rect.ralign(s_psm.pgs); + + // Convert rect down in to pages and blocks. + const GSVector4i in_pages = GSVector4i(in_rect.x / s_psm.pgs.x, in_rect.y / s_psm.pgs.y, in_rect.z / s_psm.pgs.x, in_rect.w / s_psm.pgs.y); + in_rect -= GSVector4i(in_pages.x * s_psm.pgs.x, in_pages.y * s_psm.pgs.y, in_pages.z * s_psm.pgs.x, in_pages.w * s_psm.pgs.y); + // Handle a minimum of 1 block, they are a different shape between 16 and 32bit. 8x8 vs 16x8. + // FIXME: Block layouts are different between 32bit/8bit and other formats (8x4 instead of 4x8), so this could be a problem if the game invalidates too much. + const GSVector4i in_blocks = GSVector4i(in_rect.x / s_psm.bs.x, in_rect.y / s_psm.bs.y, (in_rect.z + (s_psm.bs.x - 1)) / s_psm.bs.x, (in_rect.w + (s_psm.bs.y - 1)) / s_psm.bs.y); + + // Project Snowblind and Tomb Raider access the rect offset by 1 page and use a region to correct it, we need to account for that here. + in_rect = GSVector4i(in_pages.x * t_psm.pgs.x, in_pages.y * t_psm.pgs.y, in_pages.z * t_psm.pgs.x, in_pages.w * t_psm.pgs.y); + in_rect += GSVector4i(in_blocks.x * t_psm.bs.x, in_blocks.y * t_psm.bs.y, in_blocks.z * t_psm.bs.x, in_blocks.w * t_psm.bs.y); + + if (in_rect.rempty()) + { + DevCon.Warning("Error translating rect"); + return GSVector4i::zero(); + } + } + + GSVector4i new_rect = GSVector4i::zero(); + + if (src_pgw != dst_pgw) + { + const int horizontal_dst_page_offset = page_offset % clamped_tbw; + const bool single_row = ((src_pgw + horizontal_dst_page_offset) <= clamped_tbw) && (in_rect.height() <= dst_page_size.y); + const bool single_page = (in_rect.width() <= t_psm.pgs.x) && (in_rect.height() <= t_psm.pgs.y); + const int vertical_offset = in_rect.y / t_psm.pgs.y; + const int horizontal_offset = in_rect.x / t_psm.pgs.x; + const int rect_offset = horizontal_offset + (vertical_offset * src_pgw); + const int rect_pages = ((in_rect.width() / t_psm.pgs.x) % src_pgw) + ((in_rect.height() / t_psm.pgs.y) * src_pgw); + page_offset += rect_offset; + in_rect -= GSVector4i(horizontal_offset * t_psm.pgs.x, vertical_offset * t_psm.pgs.y).xyxy(); + + if (sbw == 0) // Intentionally check this separately + { + // BW == 0 loops vertically on the first page. So just copy the whole page vertically. + if (in_rect.z > dst_page_size.x) + { + new_rect.x = 0; + new_rect.z = (dst_page_size.x); + } + else + { + new_rect.x = in_rect.x; + new_rect.z = in_rect.z; + } + if (in_rect.w > dst_page_size.y) + { + new_rect.y = 0; + new_rect.w = dst_page_size.y; + } + else + { + new_rect.y = in_rect.y; + new_rect.w = in_rect.w; + } + } + else if (src_pgw == 1 && (horizontal_dst_page_offset + rect_pages) <= clamped_tbw) // Intentionally check this separately + { + new_rect.x = (horizontal_dst_page_offset * t_psm.pgs.x) + in_rect.x; + new_rect.z = new_rect.x + (rect_pages * t_psm.pgs.x); + new_rect.y = (page_offset / dst_pgw) * t_psm.pgs.y; + new_rect.w = new_rect.y + t_psm.pgs.y; + } + else if (single_row || single_page) // Single page and single row should be handled the same here + { + //The offsets will move this to the right place + const GSVector2i start_page = GSVector2i(page_offset % dst_pgw, page_offset / dst_pgw); + new_rect.x = (start_page.x * t_psm.pgs.x) + in_rect.x; + new_rect.z = (start_page.x * t_psm.pgs.x) + in_rect.z; + new_rect.y = (start_page.y * t_psm.pgs.y) + in_rect.y; + new_rect.w = (start_page.y * t_psm.pgs.y) + in_rect.w; + } + else + { + + + // Fills full length, so count pages based on the width, adjust rect to fill original rect. + // Battle Assault 3 does a move with BW 7 instead of 8 and does 448x512, instead of 512x448. Same result, but confusing for us. + if ((in_rect.width() / dst_page_size.x) == src_pgw) + { + // The width is mismatched to the page. + if (!is_invalidation && GSConfig.UserHacks_TextureInsideRt < GSTextureInRtMode::MergeTargets) + { + DevCon.Warning("Uneven pages mess up sbp %x dbp %x spgw %d dpgw %d src fmt %d dst fmt %d src_rect %d, %d, %d, %d draw %d", sbp, tbp, src_pgw, dst_pgw, spsm, tpsm, in_rect.x, in_rect.y, in_rect.z, in_rect.w, GSState::s_n); + return GSVector4i::zero(); + } + + const GSVector2i start_page = GSVector2i(page_offset % dst_pgw, page_offset / dst_pgw); + int page_count = (in_rect.height() / dst_page_size.y) * src_pgw; + + // Round up to a whole row, it's better than the alternative. + // Busin 0 - Wizardry Alternative Neo moves with non even rows. + const int horizontal_offset = (page_count % dst_pgw); + if (horizontal_offset) + page_count += dst_pgw - horizontal_offset; + + const int new_height = (page_count / dst_pgw) * dst_page_size.y; + new_rect.x = 0; + new_rect.z = dst_pgw * dst_page_size.x; + new_rect.y = start_page.y * dst_page_size.y; + new_rect.w = new_rect.y + new_height; + } + else + { + //TODO: Maybe control dirty blocks directly and add them page at a time for better granularity. + const GSVector2i start_page = GSVector2i((page_offset + rect_offset) % dst_pgw, page_offset / dst_pgw); + DevCon.Warning("Fudging start position"); + // Not easily translatable full pages and make sure the height is rounded upto encompass the half row. + new_rect.x = start_page.x * dst_page_size.x; + new_rect.z = new_rect.x + in_rect.z; + new_rect.y = start_page.y * dst_page_size.y; + new_rect.w = new_rect.y + in_rect.w; + } + } + } + else // Widths match + { + const int horizontal_dst_page_offset = page_offset % clamped_tbw; + const int vertical_dst_page_offset = page_offset / clamped_tbw; + GSVector4i offset_rect(horizontal_dst_page_offset * t_psm.pgs.x, vertical_dst_page_offset * t_psm.pgs.y); + new_rect = in_rect + offset_rect.xyxy(); + } + + return new_rect; +} + +/* GSVector4i GSTextureCache::TranslateAlignedRectByPage(u32 tbp, u32 tebp, u32 tbw, u32 tpsm, u32 sbp, u32 spsm, u32 sbw, GSVector4i src_r, bool is_invalidation) { const GSVector2i src_page_size = GSLocalMemory::m_psm[spsm].pgs; @@ -273,6 +431,15 @@ GSVector4i GSTextureCache::TranslateAlignedRectByPage(u32 tbp, u32 tebp, u32 tbw const int inc_horizontal_offset = (page_offset % src_pgw) * src_page_size.x; in_rect = (in_rect + GSVector4i(0, inc_vertical_offset).xyxy()).max_i32(GSVector4i(0)); in_rect = (in_rect + GSVector4i(inc_horizontal_offset, 0).xyxy()).max_i32(GSVector4i(0)); + + // Project Snowblind and Tomb Raider access the rect offset by 1 page and use a region to correct it, we need to account for that here. + if (in_rect.x >= (src_pgw * src_page_size.x)) + { + in_rect.z -= src_pgw * src_page_size.x; + in_rect.x -= src_pgw * src_page_size.x; + in_rect.y += src_page_size.y; + in_rect.w += src_page_size.y; + } page_offset = 0; single_page = (in_rect.width() / src_page_size.x) <= 1 && (in_rect.height() / src_page_size.y) <= 1; } @@ -400,7 +567,7 @@ GSVector4i GSTextureCache::TranslateAlignedRectByPage(u32 tbp, u32 tebp, u32 tbw } return new_rect; -} +}*/ GSVector4i GSTextureCache::TranslateAlignedRectByPage(Target* t, u32 sbp, u32 spsm, u32 sbw, GSVector4i src_r, bool is_invalidation) { @@ -850,6 +1017,13 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const bool is_depth, c bool inside_target = false; GSVector4i target_rc(r); + GSVector4i block_boundary_rect = target_rc; + block_boundary_rect.x = block_boundary_rect.x & ~(psm_s.bs.x - 1); + block_boundary_rect.y = block_boundary_rect.y & ~(psm_s.bs.y - 1); + // Round up to the nearst block boundary for lookup to avoid problems due to bilinear and inclusive rects. + block_boundary_rect.z = std::max(target_rc.x + 1, (block_boundary_rect.z + (psm_s.bs.x / 2)) & ~(psm_s.bs.x - 1)); + block_boundary_rect.w = std::max(target_rc.y + 1, (block_boundary_rect.w + (psm_s.bs.y / 2)) & ~(psm_s.bs.y - 1)); + for (auto t : m_dst[DepthStencil]) { if (!t->m_used || (!t->m_dirty.empty() && !is_depth)) @@ -877,53 +1051,30 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const bool is_depth, c const GSVector2i page_size = GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs; const bool can_translate = CanTranslate(bp, TEX0.TBW, psm, r, t->m_TEX0.TBP0, t->m_TEX0.PSM, t->m_TEX0.TBW); const bool swizzle_match = psm_s.depth == GSLocalMemory::m_psm[t->m_TEX0.PSM].depth; - GSVector4i new_rect = r; - - if (linear) - { - new_rect.z -= 1; - new_rect.w -= 1; - } + GSVector4i new_rect = block_boundary_rect; if (can_translate) { if (swizzle_match) { - target_rc = TranslateAlignedRectByPage(t, bp, psm, TEX0.TBW, new_rect); + block_boundary_rect = TranslateAlignedRectByPage(t, bp, psm, TEX0.TBW, new_rect); } else { - // If it's not page aligned, grab the whole pages it covers, to be safe. - if (psm_s.bpp != GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp) - { - const GSVector2i dst_page_size = psm_s.pgs; - target_rc = GSVector4i(target_rc.x / page_size.x, target_rc.y / page_size.y, - (target_rc.z + (page_size.x - 1)) / page_size.x, - (target_rc.w + (page_size.y - 1)) / page_size.y); - target_rc = GSVector4i(target_rc.x * dst_page_size.x, target_rc.y * dst_page_size.y, - target_rc.z * dst_page_size.x, target_rc.w * dst_page_size.y); - } - else - { - target_rc.x &= ~(page_size.x - 1); - target_rc.y &= ~(page_size.y - 1); - target_rc.z = (new_rect.z + (page_size.x - 1)) & ~(page_size.x - 1); - target_rc.w = (new_rect.w + (page_size.y - 1)) & ~(page_size.y - 1); - } - target_rc = TranslateAlignedRectByPage(t, bp & ~((1 << 5) - 1), psm, TEX0.TBW, target_rc); + const GSVector2i src_page_size = psm_s.pgs; + new_rect.x &= ~(src_page_size.x - 1); + new_rect.y &= ~(src_page_size.y - 1); + new_rect.z = (new_rect.z + (src_page_size.x - 1)) & ~(src_page_size.x - 1); + new_rect.w = (new_rect.w + (src_page_size.y - 1)) & ~(src_page_size.y - 1); + block_boundary_rect = TranslateAlignedRectByPage(t, bp & ~((1 << 5) - 1), psm, TEX0.TBW, new_rect); } - if (!target_rc.rempty()) + if (!block_boundary_rect.rempty()) { dst = t; inside_target = true; } } - if (linear) - { - new_rect.z += 1; - new_rect.w += 1; - } } } @@ -945,7 +1096,7 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const bool is_depth, c t->ResizeTexture(t->m_unscaled_size.x, t->m_unscaled_size.y); t->m_valid = dst->m_valid; } - + CopyRGBFromDepthToColor(t, dst); } @@ -997,8 +1148,8 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const bool is_depth, c if (inside_target) { // Need to set it up as a region target. - src->m_region.SetX(target_rc.x, target_rc.z); - src->m_region.SetY(target_rc.y, target_rc.w); + src->m_region.SetX(block_boundary_rect.x, block_boundary_rect.z); + src->m_region.SetY(block_boundary_rect.y, block_boundary_rect.w); } if (GSRendererHW::GetInstance()->IsTBPFrameOrZ(dst->m_TEX0.TBP0)) @@ -1038,7 +1189,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const const u32* const clut = g_gs_renderer->m_mem.m_clut; GSTexture* const gpu_clut = (psm_s.pal > 0) ? g_gs_renderer->m_mem.m_clut.GetGPUTexture() : nullptr; - const SourceRegion region = SourceRegion::Create(TEX0, CLAMP); + SourceRegion region = SourceRegion::Create(TEX0, CLAMP); // Prevent everything going to rubbish if a game somehow sends a TW/TH above 10, and region isn't being used. if ((TEX0.TW > 10 && !region.HasX()) || (TEX0.TH > 10 && !region.HasY())) @@ -1091,9 +1242,11 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const req_rect.y = region.HasY() ? region.GetMinY() : 0; GSVector4i block_boundary_rect = req_rect; + block_boundary_rect.x = block_boundary_rect.x & ~(psm_s.bs.x - 1); + block_boundary_rect.y = block_boundary_rect.y & ~(psm_s.bs.y - 1); // Round up to the nearst block boundary for lookup to avoid problems due to bilinear and inclusive rects. - block_boundary_rect.z = std::max(req_rect.x + 1, (block_boundary_rect.z + (psm_s.bs.x - 2)) & ~(psm_s.bs.x - 1)); - block_boundary_rect.w = std::max(req_rect.y + 1, (block_boundary_rect.w + (psm_s.bs.y - 2)) & ~(psm_s.bs.y - 1)); + block_boundary_rect.z = std::max(req_rect.x + 1, (block_boundary_rect.z + (psm_s.bs.x / 2)) & ~(psm_s.bs.x - 1)); + block_boundary_rect.w = std::max(req_rect.y + 1, (block_boundary_rect.w + (psm_s.bs.y / 2)) & ~(psm_s.bs.y - 1)); // Arc the Lad finds the wrong surface here when looking for a depth stencil. // Since we're currently not caching depth stencils (check ToDo in CreateSource) we should not look for it here. @@ -1115,12 +1268,13 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const if (((bp & (BLOCKS_PER_PAGE - 1)) != (t->m_TEX0.TBP0 & (BLOCKS_PER_PAGE - 1))) && (bp & (BLOCKS_PER_PAGE - 1))) continue; + //const bool overlaps = t->Inside(bp, bw, psm, block_boundary_rect); const bool overlaps = t->Overlaps(bp, bw, psm, block_boundary_rect); - // Try to make sure the target has available what we need, be careful of self referencing frames with font in the alpha. // Also is we have already found a target which we had to offset in to by using a region or exact address, // it's probable that's more correct than being inside (Tomb Raider Legends + Project Snowblind) - if (!overlaps || (found_t && dst->m_TEX0.TBP0 >= bp && (GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw))) + // Vakyrie Profile 2 also has some in draws which get done on a different target due to a slight offset, so we need to make sure we have the newer one. + if (!overlaps || (found_t && (GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw))) continue; const bool width_match = (std::max(64U, bw * 64U) >> GSLocalMemory::m_psm[psm].info.pageShiftX()) == @@ -1283,7 +1437,21 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const if (!possible_shuffle && frame_fbp != t->m_TEX0.TBP0 && rect_clean && bp == t->m_TEX0.TBP0 && t && GSUtil::HasCompatibleBits(psm, t->m_TEX0.PSM) && width_match && real_fmt_match) { if (!tex_merge_rt && t->Overlaps(bp, bw, psm, req_rect)) + { + // Resize but be careful of +bilinear in req_rect, as it can screw valid areas. + if (psm_s.bpp == GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp && !block_boundary_rect.rintersect(t->m_valid).eq(block_boundary_rect)) + { + RGBAMask rgba_mask; + rgba_mask.c.a = req_alpha; + rgba_mask.c.r = rgba_mask.c.g = rgba_mask.c.b = req_color; + if (block_boundary_rect.z > t->m_valid.z) + AddDirtyRectTarget(t, GSVector4i(t->m_valid.z, t->m_valid.y, block_boundary_rect.z, std::max(block_boundary_rect.w, t->m_valid.w)), t->m_TEX0.PSM, t->m_TEX0.TBW, rgba_mask); + if (block_boundary_rect.w > t->m_valid.w) + AddDirtyRectTarget(t, GSVector4i(t->m_valid.x, t->m_valid.w, std::max(block_boundary_rect.z, t->m_valid.z), block_boundary_rect.w), t->m_TEX0.PSM, t->m_TEX0.TBW, rgba_mask); + } + // Resize including the extra pixel for bilinear. ResizeTarget(t, req_rect, bp, psm, bw); + } } } @@ -1360,6 +1528,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const DevCon.Warning("Failed to update dst matched texture"); } t->m_valid_rgb = true; + t->m_TEX0 = dst_match->m_TEX0; break; } } @@ -1406,7 +1575,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const } else { - if (!t->HasValidBitsForFormat(psm, req_color, req_alpha) && !(possible_shuffle && GSLocalMemory::m_psm[psm].bpp == 16 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32)) + if (!t->HasValidBitsForFormat(psm, req_color, req_alpha, t->m_TEX0.TBW == TEX0.TBW) && !(possible_shuffle && GSLocalMemory::m_psm[psm].bpp == 16 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32)) continue; dst = t; @@ -1431,7 +1600,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const // Detect half of the render target (fix snow engine game) // Target Page (8KB) have always a width of 64 pixels // Half of the Target is TBW/2 pages * 8KB / (1 block * 256B) = 0x10 - if (!t->HasValidBitsForFormat(psm, req_color, req_alpha) && !(possible_shuffle && GSLocalMemory::m_psm[psm].bpp == 16 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32)) + if (!t->HasValidBitsForFormat(psm, req_color, req_alpha, t->m_TEX0.TBW == TEX0.TBW) && !(possible_shuffle && GSLocalMemory::m_psm[psm].bpp == 16 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32)) continue; half_right = true; @@ -1445,16 +1614,27 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const // Make sure the texture actually is INSIDE the RT, it's possibly not valid if it isn't. // Also check BP >= TBP, create source isn't equpped to expand it backwards and all data comes from the target. (GH3) else if (GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets && - (GSLocalMemory::m_psm[color_psm].bpp >= 16 || (possible_shuffle && GSLocalMemory::m_psm[color_psm].bpp == 8 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32)) && // Channel shuffles or non indexed lookups. - t->m_age <= 1 && (!found_t || t->m_last_draw > dst->m_last_draw) && CanTranslate(bp, bw, psm, block_boundary_rect, t->m_TEX0.TBP0, t->m_TEX0.PSM, t->m_TEX0.TBW)) + (GSLocalMemory::m_psm[color_psm].bpp >= 16 || (/*possible_shuffle &&*/ GSLocalMemory::m_psm[color_psm].bpp == 8 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32)) && // Channel shuffles or non indexed lookups. + t->m_age <= 1 && (!found_t || t->m_last_draw > dst->m_last_draw) /*&& CanTranslate(bp, bw, psm, block_boundary_rect, t->m_TEX0.TBP0, t->m_TEX0.PSM, t->m_TEX0.TBW)*/) { - - if (!t->HasValidBitsForFormat(psm, req_color, req_alpha) && !(possible_shuffle && GSLocalMemory::m_psm[psm].bpp == 16 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32)) + u32 horz_page_offset = ((bp - t->m_TEX0.TBP0) >> 5) % t->m_TEX0.TBW; + if (GSLocalMemory::m_psm[color_psm].bpp == 16 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32 && bw != 1 && + ((t->m_TEX0.TBW < (horz_page_offset + ((block_boundary_rect.z + GSLocalMemory::m_psm[psm].pgs.x - 1) / GSLocalMemory::m_psm[psm].pgs.x)) || + (t->m_TEX0.TBW != bw && block_boundary_rect.w > GSLocalMemory::m_psm[psm].pgs.y)))) + { + DevCon.Warning("BP %x - 16bit bad match for target bp %x bw %d src %d format %d", bp, t->m_TEX0.TBP0, t->m_TEX0.TBW, bw, t->m_TEX0.PSM); continue; - + } + // Keep note that 2 bw is basically 1 normal page, as bw is in 64 pixels, and 8bit pages are 128 pixels wide, aka 2 bw. + else if (!possible_shuffle && (GSLocalMemory::m_psm[color_psm].bpp == 8 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32 && + !((t->m_TEX0.TBW == (bw / 2)) || (((bw + 1) / 2) <= t->m_TEX0.TBW && (block_boundary_rect.w <= GSLocalMemory::m_psm[psm].pgs.y))))) + { + DevCon.Warning("BP %x - 8bit bad match for target bp %x bw %d src %d format %d", bp, t->m_TEX0.TBP0, t->m_TEX0.TBW, bw, t->m_TEX0.PSM); + continue; + } // PSM equality needed because CreateSource does not handle PSM conversion. // Only inclusive hit to limit false hits. - GSVector4i rect = req_rect; + GSVector4i rect = block_boundary_rect; int src_bw = bw; int src_psm = psm; @@ -1478,12 +1658,8 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const } if (bp > t->m_TEX0.TBP0) { - GSVector4i new_rect = possible_shuffle ? block_boundary_rect : rect; - if (linear) - { - new_rect.z -= 1; - new_rect.w -= 1; - } + GSVector4i new_rect = (GSLocalMemory::m_psm[color_psm].bpp != GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp && (psm & 0x7) != PSMCT16) ? block_boundary_rect : rect; + // Check if it is possible to hit with valid offset on the given Target. // Fixes Jak eyes rendering. // Fixes Xenosaga 3 last dungeon graphic bug. @@ -1523,9 +1699,9 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const rect.y -= new_rect.y & ~(page_size.y - 1); } - rect = rect.rintersect(t->m_valid); + //rect = rect.rintersect(t->m_valid); - if (rect.rempty()) + if (rect.rintersect(t->m_valid).rempty()) continue; if (!t->m_dirty.empty()) @@ -1535,6 +1711,12 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const continue; } + if (!t->HasValidBitsForFormat(psm, req_color, req_alpha, t->m_TEX0.TBW == TEX0.TBW) && !(possible_shuffle && GSLocalMemory::m_psm[psm].bpp == 16 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32)) + continue; + + if (!t->Inside(bp, bw, psm, block_boundary_rect)) + continue; + x_offset = rect.x; y_offset = rect.y; dst = t; @@ -1556,6 +1738,9 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const } if (so.is_valid) { + if (!t->HasValidBitsForFormat(psm, req_color, req_alpha, t->m_TEX0.TBW == TEX0.TBW) && !(possible_shuffle && GSLocalMemory::m_psm[psm].bpp == 16 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32)) + continue; + dst = t; // Offset from Target to Source in Target coords. x_offset = so.b2a_offset.x; @@ -1583,15 +1768,21 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const // Omitting that check here seemed less risky than blowing CS targets out... const GSVector2i& page_size = GSLocalMemory::m_psm[src_psm].pgs; const GSOffset offset(GSLocalMemory::m_psm[src_psm].info, bp, bw, psm); + const u32 offset_bp = offset.bn(region.GetMinX(), region.GetMinY()); if (bp < t->m_TEX0.TBP0 && region.HasX() && region.HasY() && (region.GetMinX() & (page_size.x - 1)) == 0 && (region.GetMinY() & (page_size.y - 1)) == 0 && - offset.bn(region.GetMinX(), region.GetMinY()) == t->m_TEX0.TBP0) + (offset.bn(region.GetMinX(), region.GetMinY()) == t->m_TEX0.TBP0 || + ((offset_bp >= t->m_TEX0.TBP0) && ((((offset_bp - t->m_TEX0.TBP0) >> 5) % bw) + (rect.width() / page_size.x)) <= bw))) { GL_CACHE("TC: Target 0x%x detected in front of TBP 0x%x with %d,%d offset (%d pages)", t->m_TEX0.TBP0, TEX0.TBP0, region.GetMinX(), region.GetMinY(), (region.GetMinY() / page_size.y) * TEX0.TBW + (region.GetMinX() / page_size.x)); - x_offset = -region.GetMinX(); - y_offset = -region.GetMinY(); + + if (!t->HasValidBitsForFormat(psm, req_color, req_alpha, t->m_TEX0.TBW == TEX0.TBW) && !(possible_shuffle && GSLocalMemory::m_psm[psm].bpp == 16 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32)) + continue; + + x_offset = ((((offset_bp - t->m_TEX0.TBP0) >> 5) % bw) * page_size.x) - region.GetMinX(); + y_offset = ((((offset_bp - t->m_TEX0.TBP0) >> 5) / bw) * page_size.y) - region.GetMinY(); dst = t; tex_merge_rt = false; found_t = true; @@ -1629,6 +1820,29 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const else continue; } + // Else read it back, might be our only choice. Ridge Racer writes to the right side of 0x1a40 for headlights, then tries to access it with the base of 0x9a0 + // naturally, it misses here. But let's make sure the formats match well enough. + else if (bw == t->m_TEX0.TBW && GSLocalMemory::m_psm[psm].bpp == GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp && t->Inside(bp, bw, psm, r)) + { + if (!t->HasValidBitsForFormat(psm, req_color, req_alpha, true)) + continue; + + GIFRegCLAMP fake_CLAMP; + fake_CLAMP.WMS = CLAMP_REGION_CLAMP; + fake_CLAMP.WMT = CLAMP_REGION_CLAMP; + fake_CLAMP.MINU = 0; + fake_CLAMP.MINV = 0; + fake_CLAMP.MAXV = std::min(static_cast(1u << TEX0.TH), 1022u); + fake_CLAMP.MAXU = std::min(static_cast(1u << TEX0.TW), 1022u); + region = SourceRegion::Create(TEX0, fake_CLAMP); + + const GSVector4i custom_offset_rect = TranslateAlignedRectByPage(t, bp, psm, bw, block_boundary_rect); + x_offset = custom_offset_rect.x; + y_offset = custom_offset_rect.y; + dst = t; + tex_merge_rt = false; + found_t = true; + } } } } @@ -1644,12 +1858,6 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const if (!found_t && !dst && !GSConfig.UserHacks_DisableDepthSupport) { - GSVector4i new_rect = req_rect; - - // Just in case the TextureMinMax trolls us as it does, when checking if inside the target. - new_rect.z -= 2; - new_rect.w -= 2; - // Let's try a trick to avoid to use wrongly a depth buffer // Unfortunately, I don't have any Arc the Lad testcase // @@ -1658,7 +1866,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const { for (auto t : m_dst[DepthStencil]) { - if (t->m_age <= 1 && t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(psm, t->m_TEX0.PSM) && t->Inside(bp, bw, psm, new_rect)) + if (t->m_age <= 1 && t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(psm, t->m_TEX0.PSM) && t->Inside(bp, bw, psm, block_boundary_rect)) { GL_INS("TC: Warning depth format read as color format. Pixels will be scrambled"); // Let's fetch a depth format texture. Rational, it will avoid the texture allocation and the @@ -1668,7 +1876,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const GIFRegTEX0 depth_TEX0; depth_TEX0.U32[0] = TEX0.U32[0] | (0x30u << 20u); depth_TEX0.U32[1] = TEX0.U32[1]; - src = LookupDepthSource(false, depth_TEX0, TEXA, CLAMP, req_rect, possible_shuffle, linear, frame_fbp, req_color, req_alpha); + src = LookupDepthSource(false, depth_TEX0, TEXA, CLAMP, block_boundary_rect, possible_shuffle, linear, frame_fbp, req_color, req_alpha); if (src != nullptr) { @@ -1690,7 +1898,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const } else { - src = LookupDepthSource(false, TEX0, TEXA, CLAMP, req_rect, possible_shuffle, linear, frame_fbp, req_color, req_alpha, true); + src = LookupDepthSource(false, TEX0, TEXA, CLAMP, block_boundary_rect, possible_shuffle, linear, frame_fbp, req_color, req_alpha, true); if (src != nullptr) { @@ -1774,6 +1982,21 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const } } + if (src->m_from_target && src->m_target_direct && src->m_region.HasEither()) + { + if (src->m_from_target->m_TEX0.TBP0 == src->m_TEX0.TBP0) + { + src->m_region.SetX(std::min(region.GetMinX(), src->m_region.GetMinX()), std::max(region.GetMaxX(), src->m_region.GetMaxX())); + src->m_region.SetY(std::min(region.GetMinY(), src->m_region.GetMinY()), std::max(region.GetMaxY(), src->m_region.GetMaxY())); + } + else if (src->m_TEX0.TBP0 > src->m_from_target->m_TEX0.TBP0) + { + GSVector4i dst_offset = TranslateAlignedRectByPage(src->m_from_target, src->m_TEX0.TBP0, src->m_TEX0.PSM, src->m_TEX0.TBW, GSVector4i(0, 0, 1, 1), false); + src->m_region.SetX(dst_offset.x + region.GetMinX(), dst_offset.x + region.GetMaxX()); + src->m_region.SetY(dst_offset.y + region.GetMinY(), dst_offset.y + region.GetMaxY()); + } + } + if (gpu_clut) AttachPaletteToSource(src, gpu_clut); else if (src->m_palette && (!src->m_palette_obj || !src->ClutMatch({clut, psm_s.pal}))) @@ -1803,7 +2026,8 @@ GSVector2i GSTextureCache::ScaleRenderTargetSize(const GSVector2i& sz, float sca } GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVector2i& size, float scale, int type, - bool used, u32 fbmask, bool is_frame, bool preload, bool preserve_rgb, bool preserve_alpha, const GSVector4i draw_rect, bool is_shuffle, bool possible_clear, bool preserve_scale) + bool used, u32 fbmask, bool is_frame, bool preload, bool preserve_rgb, bool preserve_alpha, const GSVector4i draw_rect, + bool is_shuffle, bool possible_clear, bool preserve_scale, GSTextureCache::Source* src, GSTextureCache::Target* ds, int offset) { const GSLocalMemory::psm_t& psm_s = GSLocalMemory::m_psm[TEX0.PSM]; const u32 bp = TEX0.TBP0; @@ -1812,8 +2036,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe const GSVector4 sRect(0, 0, 1, 1); GSVector4 dRect{}; bool clear = true; - const auto& calcRescale = [&size, &scale, &new_size, &new_scaled_size, &clear, &dRect](const Target* tgt) - { + const auto& calcRescale = [&size, &scale, &new_size, &new_scaled_size, &clear, &dRect](const Target* tgt) { // TODO Possible optimization: rescale only the validity rectangle of the old target texture into the new one. clear = (size.x > tgt->m_unscaled_size.x || size.y > tgt->m_unscaled_size.y); new_size = size.max(tgt->m_unscaled_size); @@ -1827,16 +2050,25 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe Target* dst = nullptr; auto& list = m_dst[type]; + const GSVector4i min_rect = draw_rect.max_u32(GSVector4i(0, 0, draw_rect.x, draw_rect.y)); // TODO: Move all frame stuff to its own routine too. if (!is_frame) { - for (auto i = list.begin(); i != list.end(); ++i) + for (auto i = list.begin(); i != list.end();) { Target* t = *i; if (bp == t->m_TEX0.TBP0) { bool can_use = true; + + if (dst && (GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw)) + { + DevCon.Warning("Ignoring target at %x as one at %x is newer", t->m_TEX0.TBP0, dst->m_TEX0.TBP0); + i++; + continue; + } + // if It's an old target and it's being completely overwritten, kill it. // Dragon Quest 8 reuses a render-target sized buffer as a single-page buffer, without clearing it. But, // it does dirty it by writing over the 64x64 region. So while we can't use this heuristic for tossing @@ -1867,7 +2099,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe // 2. Preserved data will be in the correct place (in most cases) // 3. Less deleting sources/targets // 4. We can basically do clears in hardware, if they aren't insane ones - if (can_use && !is_shuffle && ((preserve_alpha && preserve_rgb) || (draw_rect.w > GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y && !possible_clear)) && TEX0.TBW != t->m_TEX0.TBW && t->m_dirty.size() >= 1) + if (can_use && ((!is_shuffle && t->m_dirty.size() >= 1) || (is_shuffle && src && GSLocalMemory::m_psm[src->m_TEX0.PSM].bpp == 8 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 16)) && ((preserve_alpha && preserve_rgb) || (draw_rect.w > GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y && !possible_clear)) && TEX0.TBW != t->m_TEX0.TBW) { can_use = false; } @@ -1880,16 +2112,89 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe dst = t; dst->m_32_bits_fmt |= (psm_s.bpp != 16); - break; + + /*if (FindOverlappingTarget(dst)) + continue; + else*/ + break; } - else + else if(!(src && src->m_from_target == t)) { GL_INS("TC: Deleting RT BP 0x%x BW %d PSM %s due to change in target", t->m_TEX0.TBP0, t->m_TEX0.TBW, psm_str(t->m_TEX0.PSM)); InvalidateSourcesFromTarget(t); i = list.erase(i); delete t; + + continue; + } + } + // Probably pointing to half way through the target + else if (!min_rect.rempty() && GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets) + { + // Problem: Project - Snowblind and Tomb Raider offset the RT but not the Z + /*if (offset != -1 && (bp - t->m_TEX0.TBP0) != offset) + { + continue; + }*/ + + const u32 widthpage_offset = (std::abs(static_cast(bp - t->m_TEX0.TBP0)) >> 5) % std::max(t->m_TEX0.TBW, 1U); + /*const bool is_aligned_ok = widthpage_offset == 0 || (t->m_TEX0.TBW == TEX0.TBW && + ((((min_rect.z + 63) >> 6) + widthpage_offset) <= TEX0.TBW) || + ((widthpage_offset + TEX0.TBW) <= t->m_TEX0.TBW) || + min_rect.width() <= 64 || (widthpage_offset == (t->m_TEX0.TBW >> 1) && + (static_cast(min_rect.width()) <= (widthpage_offset * 64))));*/ + const bool is_aligned_ok = widthpage_offset == 0 || ((min_rect.width() <= static_cast((t->m_TEX0.TBW - widthpage_offset) * 64) && (t->m_TEX0.TBW == TEX0.TBW || TEX0.TBW == 1)) && bp >= t->m_TEX0.TBP0); + const bool no_target_or_newer = (!dst || ((GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw))); + const bool width_match = (t->m_TEX0.TBW == TEX0.TBW || (TEX0.TBW == 1 && draw_rect.w <= GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y)); + // if it's a shuffle, some games tend to offset back by a page, such as Tomb Raider, for no disernable reason, but it then causes problems. + // This can also happen horizontally (Catwoman moves everything one page left with shuffles), but this is too messy to deal with right now. + const bool overlaps = t->Overlaps(bp, TEX0.TBW, TEX0.PSM, min_rect) || (is_shuffle && src && GSLocalMemory::m_psm[src->m_TEX0.PSM].bpp == 8 && t->Overlaps(bp, TEX0.TBW, TEX0.PSM, min_rect + GSVector4i(0, 0, 0, 32))); + if (no_target_or_newer && is_aligned_ok && width_match && overlaps) + { + const GSLocalMemory::psm_t& s_psm = GSLocalMemory::m_psm[TEX0.PSM]; + + if (!is_shuffle && (!GSUtil::HasSameSwizzleBits(t->m_TEX0.PSM, TEX0.PSM) || + (widthpage_offset % std::max(t->m_TEX0.TBW, 1U)) != 0 && ((widthpage_offset + (min_rect.width() + (s_psm.pgs.x - 1)) / s_psm.pgs.x)) > t->m_TEX0.TBW)) + { + GL_INS("TC: Deleting RT BP 0x%x BW %d PSM %s due to change in target", t->m_TEX0.TBP0, t->m_TEX0.TBW, psm_str(t->m_TEX0.PSM)); + InvalidateSourcesFromTarget(t); + i = list.erase(i); + delete t; + + continue; + } + else if (t->m_dirty.empty()) + { + + if (TEX0.TBW == t->m_TEX0.TBW && !is_shuffle && widthpage_offset == 0 && ((min_rect.w + 63)/ 64) > 1) + { + // Beyond Good and Evil does this awful thing where it puts one framebuffer at 0xf00, with the first row of pages blanked out, and the whole thing goes down to 0x2080 + // which is a problem, because it then puts the Z buffer at 0x1fc0, then offsets THAT by 1 row of pages, so it starts at, you guessed it, 2080. + // So let's check the *real* start. + u32 real_start_address = GSLocalMemory::GetStartBlockAddress(t->m_TEX0.TBP0, t->m_TEX0.TBW, t->m_TEX0.PSM, t->m_drawn_since_read); + u32 new_end_address = GSLocalMemory::GetEndBlockAddress(TEX0.TBP0, TEX0.TBW, TEX0.PSM, min_rect); + + // Not really overlapping. + if (real_start_address > new_end_address) + { + i++; + continue; + } + } + + //DevCon.Warning("Here draw %d wanted %x PSM %x got %x PSM %x offset of %d pages width %d pages draw width %d", GSState::s_n, bp, TEX0.PSM, t->m_TEX0.TBP0, t->m_TEX0.PSM, (bp - t->m_TEX0.TBP0) >> 5, t->m_TEX0.TBW, draw_rect.width()); + dst = t; + + dst->m_32_bits_fmt |= (psm_s.bpp != 16); + //Continue just in case there's a newer target + if (used) + list.MoveFront(i.Index()); + break; + } } } + + i++; } } else @@ -2034,6 +2339,8 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe { calcRescale(dst); GSTexture* tex = g_gs_device->CreateDepthStencil(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::DepthStencil, false); + if (!tex) + return nullptr; g_gs_device->StretchRect(dst->m_texture, sRect, tex, dRect, ShaderConvert::FLOAT32_TO_FLOAT24, false); g_perfmon.Put(GSPerfMon::TextureCopies, 1); g_gs_device->Recycle(dst->m_texture); @@ -2042,6 +2349,69 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe dst->m_alpha_min = 0; dst->m_alpha_max = 0; } + else if (std::abs(static_cast(GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp - GSLocalMemory::m_psm[TEX0.PSM].bpp)) == 16) + { + dst->Update(false); + + const bool scale_down = GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp > GSLocalMemory::m_psm[TEX0.PSM].bpp; + new_size = dst->m_unscaled_size; + new_scaled_size = ScaleRenderTargetSize(dst->m_unscaled_size, scale); + + dRect = (GSVector4(GSVector4i::loadh(dst->m_unscaled_size)) * GSVector4(scale)).ceil(); + if (!is_shuffle || GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp == 16) + { + if (scale_down) + { + if ((new_size.y * 2) < 1024) + { + new_scaled_size.y *= 2; + new_size.y *= 2; + dst->m_valid.y *= 2; + dst->m_valid.w *= 2; + } + dRect.y *= 2; + dRect.w *= 2; + } + else + { + new_scaled_size.y /= 2; + new_size.y /= 2; + dRect.y /= 2; + dRect.w /= 2; + dst->m_valid.y /= 2; + dst->m_valid.w /= 2; + } + } + if (!is_shuffle) + { + GL_INS("TC Convert to 16bit: %dx%d: %dx%d @ %f -> %dx%d @ %f", dst->m_unscaled_size.x, dst->m_unscaled_size.y, + dst->m_texture->GetWidth(), dst->m_texture->GetHeight(), dst->m_scale, new_scaled_size.x, new_scaled_size.y, + scale); + + if (src && src->m_from_target && src->m_from_target == dst) + { + src->m_texture = dst->m_texture; + src->m_target_direct = false; + src->m_shared_texture = false; + + dst->ResizeTexture(new_size.x, new_size.y, true, true, GSVector4i(dRect), true); + } + else + { + dst->ResizeTexture(new_size.x, new_size.y, true, true, GSVector4i(dRect)); + } + } + + // New format or doing a shuffle to a 32bit target that used to be 16bit + if (!is_shuffle || GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp < GSLocalMemory::m_psm[TEX0.PSM].bpp) + dst->m_TEX0.PSM = TEX0.PSM; + // LEGO Dome Racers does a copy to a target as 8bit in alpha only, this doesn't really work great for us, so let's make it 32bit with invalid RGB. + else if (dst->m_TEX0.PSM == PSMT8H) + { + dst->m_TEX0.PSM = PSMCT32; + dst->m_valid_rgb = false; + } + } // If our RGB was invalidated, we need to pull it from depth. // Terminator 3 will reuse our dst_matched target with the RGB masked, then later use the full ARGB area, so we need to update the depth. @@ -2165,7 +2535,17 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe { continue; } - + // If the format is completely different, but it's the same location, it's likely just overwriting it, so get rid. + // Make sure it's not currently in use, that could be bad. + if (!is_shuffle && (!ds || (ds != t)) && + t->m_TEX0.TBW != TEX0.TBW && TEX0.TBW != 1 && !preserve_rgb && min_rect.w > GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y) + { + DevCon.Warning("Deleting Z draw %d", GSState::s_n); + InvalidateSourcesFromTarget(t); + i = rev_list.erase(i); + delete t; + continue; + } const GSLocalMemory::psm_t& t_psm_s = GSLocalMemory::m_psm[t->m_TEX0.PSM]; if (t_psm_s.bpp != psm_s.bpp) { @@ -2242,6 +2622,8 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe dst->m_valid_alpha_high = dst_match->m_valid_alpha_high; //&& psm_s.trbpp != 24; dst->m_valid_rgb = dst_match->m_valid_rgb; dst->m_was_dst_matched = true; + dst_match->m_was_dst_matched = true; + dst_match->m_valid_rgb = preserve_rgb; if (GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp == 16 && GSLocalMemory::m_psm[dst_match->m_TEX0.PSM].bpp > 16) dst->m_TEX0.TBW = dst_match->m_TEX0.TBW; // Be careful of shuffles of the depth as C16, but using a buffer width of 16 (Mercenaries). @@ -2471,11 +2853,12 @@ bool GSTextureCache::PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, cons RGBAMask rgba; rgba._u32 = GSUtil::GetChannelMask(TEX0.PSM); + dst->UpdateValidity(GSVector4i::loadh(valid_size)); if (!is_frame && !preload && !(src && src->m_TEX0.TBP0 == dst->m_TEX0.TBP0)) { - if ((preserve_target || !draw_rect.eq(dst->m_valid)) && GSRendererHW::GetInstance()->m_draw_transfers.size() > 0) + if ((preserve_target || !draw_rect.eq(GSVector4i::loadh(valid_size))) && GSRendererHW::GetInstance()->m_draw_transfers.size() > 0) { auto& transfers = GSRendererHW::GetInstance()->m_draw_transfers; const int last_draw = transfers.back().draw; @@ -2569,8 +2952,6 @@ bool GSTextureCache::PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, cons { const GSVector4i save_rect = preserve_target ? newrect : eerect; - if (!hw_clear) - dst->UpdateValidity(save_rect); GL_INS("Preloading the RT DATA from updated GS Memory"); AddDirtyRectTarget(dst, save_rect, TEX0.PSM, TEX0.TBW, rgba, GSLocalMemory::m_psm[TEX0.PSM].trbpp >= 16); } @@ -2605,112 +2986,113 @@ bool GSTextureCache::PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, cons auto j = i; Target* t = *j; - if (dst != t && t->m_TEX0.PSM == dst->m_TEX0.PSM/* && t->m_TEX0.TBW == dst->m_TEX0.TBW*/) - if (t->Overlaps(dst->m_TEX0.TBP0, dst->m_TEX0.TBW, dst->m_TEX0.PSM, dst->m_valid)) + if (dst != t && t->m_TEX0.PSM == dst->m_TEX0.PSM && t->Overlaps(dst->m_TEX0.TBP0, dst->m_TEX0.TBW, dst->m_TEX0.PSM, dst->m_valid) && + static_cast(((t->m_TEX0.TBP0 - dst->m_TEX0.TBP0) / 32) % std::max(dst->m_TEX0.TBW, 1U)) <= std::max(0, static_cast(dst->m_TEX0.TBW - t->m_TEX0.TBW))) + { + const u32 buffer_width = std::max(1U, dst->m_TEX0.TBW); + + // If the two targets are misaligned, it's likely a relocation, so we can just kill the old target. + // Kill targets that are overlapping new targets, but ignore the copy if the old target is dirty because we favour GS memory. + if (((((t->m_TEX0.TBP0 - dst->m_TEX0.TBP0) >> 5) % buffer_width) != 0) && !t->m_dirty.empty()) { - const u32 buffer_width = std::max(1U, dst->m_TEX0.TBW); + InvalidateSourcesFromTarget(t); + i = list.erase(j); + delete t; - // If the two targets are misaligned, it's likely a relocation, so we can just kill the old target. - // Kill targets that are overlapping new targets, but ignore the copy if the old target is dirty because we favour GS memory. - if (((((t->m_TEX0.TBP0 - dst->m_TEX0.TBP0) >> 5) % buffer_width) != 0) && !t->m_dirty.empty()) - { - InvalidateSourcesFromTarget(t); - i = list.erase(j); - delete t; + continue; + } + + // could be overwriting a double buffer, so if it's the second half of it, just reduce the size down to half. + if (((((t->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 1) + t->m_TEX0.TBP0) == dst->m_TEX0.TBP0) + { + GSVector4i new_valid = t->m_valid; + new_valid.w /= 2; + GL_INS("RT resize buffer for FBP 0x%x, %dx%d => %d,%d", t->m_TEX0.TBP0, t->m_valid.width(), t->m_valid.height(), new_valid.width(), new_valid.height()); + t->ResizeValidity(new_valid); + return hw_clear.value_or(false); + } + // The new texture is behind it but engulfs the whole thing, shrink the new target so it grows in the HW Draw resize. + else if (dst->m_TEX0.TBP0 < t->m_TEX0.TBP0 && (dst->UnwrappedEndBlock() + 1) > t->m_TEX0.TBP0) + { + const int rt_pages = ((t->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 5; + const int overlapping_pages = std::min(rt_pages, static_cast((dst->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 5); + const int overlapping_pages_height = ((overlapping_pages + (buffer_width - 1)) / buffer_width) * GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y; + if (overlapping_pages_height == 0 || (overlapping_pages % buffer_width)) + { + // No overlap top copy or the widths don't match. + i++; continue; } - // could be overwriting a double buffer, so if it's the second half of it, just reduce the size down to half. - if (((((t->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 1) + t->m_TEX0.TBP0) == dst->m_TEX0.TBP0) + const int dst_offset_height = ((((t->m_TEX0.TBP0 - dst->m_TEX0.TBP0) >> 5) / buffer_width) * GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y); + const int texture_height = (dst->m_TEX0.TBW == t->m_TEX0.TBW) ? (dst_offset_height + t->m_valid.w) : (dst_offset_height + overlapping_pages_height); + + if (texture_height > dst->m_unscaled_size.y && !dst->ResizeTexture(dst->m_unscaled_size.x, texture_height, true)) { - GSVector4i new_valid = t->m_valid; - new_valid.w /= 2; - GL_INS("RT resize buffer for FBP 0x%x, %dx%d => %d,%d", t->m_TEX0.TBP0, t->m_valid.width(), t->m_valid.height(), new_valid.width(), new_valid.height()); - t->ResizeValidity(new_valid); - return hw_clear.value_or(false); + // Resize failed, probably ran out of VRAM, better luck next time. Fall back to CPU. + DevCon.Warning("Failed to resize target on preload? Draw %d", GSState::s_n); + i++; + continue; } - // The new texture is behind it but engulfs the whole thing, shrink the new target so it grows in the HW Draw resize. - else if (dst->m_TEX0.TBP0 < t->m_TEX0.TBP0 && (dst->UnwrappedEndBlock() + 1) > t->m_TEX0.TBP0) + + const int dst_offset_width = (((t->m_TEX0.TBP0 - dst->m_TEX0.TBP0) >> 5) % buffer_width) * GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.x; + const int dst_offset_scaled_width = dst_offset_width * dst->m_scale; + const int dst_offset_scaled_height = dst_offset_height * dst->m_scale; + const GSVector4i dst_rect_scale = GSVector4i(t->m_valid.x, dst_offset_height, t->m_valid.z, texture_height); + + if (((!hw_clear && (preserve_target || preload)) || dst_rect_scale.rintersect(draw_rect).rempty()) && dst->GetScale() == t->GetScale()) { - const int rt_pages = ((t->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 5; - const int overlapping_pages = std::min(rt_pages, static_cast((dst->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 5); - const int overlapping_pages_height = ((overlapping_pages + (buffer_width - 1)) / buffer_width) * GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y; + int copy_width = ((t->m_texture->GetWidth()) > (dst->m_texture->GetWidth()) ? (dst->m_texture->GetWidth()) : t->m_texture->GetWidth()) - dst_offset_scaled_width; + int copy_height = (texture_height - dst_offset_height) * t->m_scale; - if (overlapping_pages_height == 0 || (overlapping_pages % buffer_width)) - { - // No overlap top copy or the widths don't match. - i++; - continue; - } + GL_INS("RT double buffer copy from FBP 0x%x, %dx%d => %d,%d", t->m_TEX0.TBP0, copy_width, copy_height, 0, dst_offset_scaled_height); - const int dst_offset_height = ((((t->m_TEX0.TBP0 - dst->m_TEX0.TBP0) >> 5) / buffer_width) * GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y); - const int texture_height = (dst->m_TEX0.TBW == t->m_TEX0.TBW) ? (dst_offset_height + t->m_valid.w) : (dst_offset_height + overlapping_pages_height); + + // Clear the dirty first + t->Update(); + dst->Update(); - if (texture_height > dst->m_unscaled_size.y && !dst->ResizeTexture(dst->m_unscaled_size.x, texture_height, true)) + // Clamp it if it gets too small, shouldn't happen but stranger things have happened. + if (copy_width < 0) { - // Resize failed, probably ran out of VRAM, better luck next time. Fall back to CPU. - DevCon.Warning("Failed to resize target on preload? Draw %d", GSState::s_n); - i++; - continue; + copy_width = 0; } - const int dst_offset_width = (((t->m_TEX0.TBP0 - dst->m_TEX0.TBP0) >> 5) % buffer_width) * GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.x; - const int dst_offset_scaled_width = dst_offset_width * dst->m_scale; - const int dst_offset_scaled_height = dst_offset_height * dst->m_scale; - const GSVector4i dst_rect_scale = GSVector4i(t->m_valid.x, dst_offset_height, t->m_valid.z, texture_height); - - if (((!hw_clear && (preserve_target || preload)) || dst_rect_scale.rintersect(draw_rect).rempty()) && dst->GetScale() == t->GetScale()) + // Invalidate has been moved to after DrawPrims(), because we might kill the current sources' backing. + if (!t->m_valid_rgb || !(t->m_valid_alpha_high || t->m_valid_alpha_low) || t->m_scale != dst->m_scale) { - int copy_width = ((t->m_texture->GetWidth()) > (dst->m_texture->GetWidth()) ? (dst->m_texture->GetWidth()) : t->m_texture->GetWidth()) - dst_offset_scaled_width; - int copy_height = (texture_height - dst_offset_height) * t->m_scale; - - GL_INS("RT double buffer copy from FBP 0x%x, %dx%d => %d,%d", t->m_TEX0.TBP0, copy_width, copy_height, 0, dst_offset_scaled_height); - - // Clear the dirty first - t->Update(); - dst->Update(); - - // Clamp it if it gets too small, shouldn't happen but stranger things have happened. - if (copy_width < 0) - { - copy_width = 0; - } - - // Invalidate has been moved to after DrawPrims(), because we might kill the current sources' backing. - if (!t->m_valid_rgb || !(t->m_valid_alpha_high || t->m_valid_alpha_low) || t->m_scale != dst->m_scale) + const GSVector4 src_rect = GSVector4(0, 0, copy_width, copy_height) / (GSVector4(t->m_texture->GetSize()).xyxy()); + const GSVector4 dst_rect = GSVector4(dst_offset_scaled_width, dst_offset_scaled_height, dst_offset_scaled_width + copy_width, dst_offset_scaled_height + copy_height); + g_gs_device->StretchRect(t->m_texture, src_rect, dst->m_texture, dst_rect, t->m_valid_rgb, t->m_valid_rgb, t->m_valid_rgb, t->m_valid_alpha_high || t->m_valid_alpha_low); + } + else + { + if ((copy_width + dst_offset_scaled_width) > (dst->m_unscaled_size.x * dst->m_scale) || (copy_height + dst_offset_scaled_height) > (dst->m_unscaled_size.y * dst->m_scale)) { - const GSVector4 src_rect = GSVector4(0, 0, copy_width, copy_height) / (GSVector4(t->m_texture->GetSize()).xyxy()); - const GSVector4 dst_rect = GSVector4(dst_offset_scaled_width, dst_offset_scaled_height, dst_offset_scaled_width + copy_width, dst_offset_scaled_height + copy_height); - g_gs_device->StretchRect(t->m_texture, src_rect, dst->m_texture, dst_rect, t->m_valid_rgb, t->m_valid_rgb, t->m_valid_rgb, t->m_valid_alpha_high || t->m_valid_alpha_low); + copy_width = std::min(copy_width, static_cast((dst->m_unscaled_size.x * dst->m_scale) - dst_offset_scaled_width)); + copy_height = std::min(copy_height, static_cast((dst->m_unscaled_size.y * dst->m_scale) - dst_offset_scaled_height)); } - else - { - if ((copy_width + dst_offset_scaled_width) > (dst->m_unscaled_size.x * dst->m_scale) || (copy_height + dst_offset_scaled_height) > (dst->m_unscaled_size.y * dst->m_scale)) - { - copy_width = std::min(copy_width, static_cast((dst->m_unscaled_size.x * dst->m_scale) - dst_offset_scaled_width)); - copy_height = std::min(copy_height, static_cast((dst->m_unscaled_size.y * dst->m_scale) - dst_offset_scaled_height)); - } - g_gs_device->CopyRect(t->m_texture, dst->m_texture, GSVector4i(0, 0, copy_width, copy_height), dst_offset_scaled_width, dst_offset_scaled_height); - } - } - - // src is using this target, so point it at the new copy. - if (src && src->m_target && src->m_from_target == t) - { - src->m_from_target = dst; - src->m_texture = dst->m_texture; - src->m_region.SetY(src->m_region.GetMinY() + dst_offset_height, src->m_region.GetMaxY() + dst_offset_height); - src->m_region.SetX(src->m_region.GetMinX() + dst_offset_width, src->m_region.GetMaxX() + dst_offset_width); + g_gs_device->CopyRect(t->m_texture, dst->m_texture, GSVector4i(0, 0, copy_width, copy_height), dst_offset_scaled_width, dst_offset_scaled_height); } + } - InvalidateSourcesFromTarget(t); - i = list.erase(j); - delete t; - continue; + // src is using this target, so point it at the new copy. + if (src && src->m_target && src->m_from_target == t) + { + src->m_from_target = dst; + src->m_texture = dst->m_texture; + src->m_region.SetY(src->m_region.GetMinY() + dst_offset_height, src->m_region.GetMaxY() + dst_offset_height); + src->m_region.SetX(src->m_region.GetMinX() + dst_offset_width, src->m_region.GetMaxX() + dst_offset_width); } + + InvalidateSourcesFromTarget(t); + i = list.erase(j); + delete t; + continue; } + } i++; } } @@ -2910,7 +3292,7 @@ void GSTextureCache::ScaleTargetForDisplay(Target* t, const GIFRegTEX0& dispfb, } // Inject the new size back into the cache. - GetTargetSize(t->m_TEX0.TBP0, t->m_TEX0.TBW, t->m_TEX0.PSM, 0, static_cast(needed_height)); + GetTargetSize(t->m_TEX0.TBP0, t->m_TEX0.TBW, t->m_TEX0.PSM, new_width, static_cast(needed_height)); } float GSTextureCache::ConvertColorToDepth(u32 c, ShaderConvert convert) @@ -3061,7 +3443,67 @@ bool GSTextureCache::PrepareDownloadTexture(u32 width, u32 height, GSTexture::Fo return true; } -void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 write_psm) +/*void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 write_psm, u32 write_bw) +{ + const bool preserve_alpha = (GSLocalMemory::m_psm[write_psm].trbpp == 24); + for (int type = 0; type < 2; type++) + { + auto& list = m_dst[type]; + for (auto i = list.begin(); i != list.end();) + { + Target* const t = *i; + if ((start_bp > t->UnwrappedEndBlock() || end_bp < t->m_TEX0.TBP0) || (start_bp != t->m_TEX0.TBP0 && (t->m_TEX0.TBP0 < start_bp || t->UnwrappedEndBlock() > end_bp) && t->m_dirty.empty())) + { + ++i; + continue; + } + + //const u32 total_pages = ((end_bp + 1) - t->m_TEX0.TBP0) >> 5; + // Not covering the whole target, and a different format, so just dirty it. + //if (start_bp >= t->m_TEX0.TBP0 && (t->UnwrappedEndBlock() > end_bp) && write_psm != t->m_TEX0.PSM && write_bw == t->m_TEX0.TBW) + //{ + // const GSLocalMemory::psm_t& target_psm = GSLocalMemory::m_psm[write_psm]; + // const u32 page_offset = ((start_bp - t->m_TEX0.TBP0) >> 5); + // const u32 vertical_offset = (page_offset / t->m_TEX0.TBW) * target_psm.pgs.y; + // GSVector4i dirty_area = GSVector4i(page_offset % t->m_TEX0.TBW, vertical_offset, t->m_valid.z, vertical_offset + ((total_pages / t->m_TEX0.TBW) * target_psm.pgs.y)); + // InvalidateVideoMem(g_gs_renderer->m_mem.GetOffset(t->m_TEX0.TBP0, t->m_TEX0.TBW, t->m_TEX0.PSM), dirty_area, true); + // ++i; + // continue; + //} + + InvalidateSourcesFromTarget(t); + + t->m_valid_alpha_low &= preserve_alpha; + t->m_valid_alpha_high &= preserve_alpha; + t->m_valid_rgb &= !(t->m_TEX0.TBP0 == start_bp); + + // Don't keep partial depth buffers around. + if ((!t->m_valid_alpha_low && !t->m_valid_alpha_high && !t->m_valid_rgb) || type == DepthStencil) + { + auto& rev_list = m_dst[1 - type]; + for (auto j = rev_list.begin(); j != rev_list.end();) + { + Target* const rev_t = *j; + if (rev_t->m_TEX0.TBP0 == t->m_TEX0.TBP0 && GSLocalMemory::m_psm[rev_t->m_TEX0.PSM].bpp == GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp) + { + rev_t->m_was_dst_matched = false; + break; + } + ++j; + } + + GL_CACHE("TC: InvalidateContainedTargets: Remove Target %s[%x, %s]", to_string(type), t->m_TEX0.TBP0, psm_str(t->m_TEX0.PSM)); + i = list.erase(i); + delete t; + continue; + } + + GL_CACHE("TC: InvalidateContainedTargets: Clear RGB valid on %s[%x, %s]", to_string(type), t->m_TEX0.TBP0, psm_str(t->m_TEX0.PSM)); + ++i; + } + } +}*/ +void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 write_psm, u32 write_bw) { const bool preserve_alpha = (GSLocalMemory::m_psm[write_psm].trbpp == 24); for (int type = 0; type < 2; type++) @@ -3178,40 +3620,50 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r const u32 bw = off.bw(); const u32 psm = off.psm(); + // Get the bounds that we're invalidating in blocks, so we can remove any targets which are completely contained. + // Unfortunately sometimes the draw rect is incorrect, and since the end block gets the rect -1, it'll underflow, + // so we need to prevent that from happening. Just make it a single block in that case, and hope for the best. + const u32 start_bp = GSLocalMemory::GetStartBlockAddress(off.bp(), off.bw(), off.psm(), rect); + const u32 end_bp = rect.rempty() ? start_bp : GSLocalMemory::GetUnwrappedEndBlockAddress(off.bp(), off.bw(), off.psm(), rect); + if (!target) { + const int pages = (end_bp + ((1<<5)-1) - start_bp) >> 5; // Remove Source that have same BP as the render target (color&dss) - // rendering will dirty the copy - auto& list = m_src.m_map[bp >> 5]; - for (auto i = list.begin(); i != list.end();) + /// rendering will dirty the copy + for (int pgs = 0; pgs < pages; pgs++) { - Source* s = *i; - ++i; - - if (GSUtil::HasSharedBits(bp, psm, s->m_TEX0.TBP0, s->m_TEX0.PSM) || - (GSUtil::HasSharedBits(bp, psm, s->m_from_target_TEX0.TBP0, s->m_TEX0.PSM) && s->m_target)) - { - m_src.RemoveAt(s); - } - } - - u32 bbp = bp + bw * 0x10; - if (bw >= 16 && bbp < 16384) - { - // Detect half of the render target (fix snow engine game) - // Target Page (8KB) have always a width of 64 pixels - // Half of the Target is TBW/2 pages * 8KB / (1 block * 256B) = 0x10 - auto& list = m_src.m_map[bbp >> 5]; + auto& list = m_src.m_map[((bp >> 5) + pgs) & 0x1ff]; for (auto i = list.begin(); i != list.end();) { Source* s = *i; ++i; - if (GSUtil::HasSharedBits(bbp, psm, s->m_TEX0.TBP0, s->m_TEX0.PSM)) + if ((GSUtil::HasSharedBits(psm, s->m_TEX0.PSM) && (end_bp > s->m_TEX0.TBP0 && start_bp < s->UnwrappedEndBlock()) && !s->m_target) || + (GSUtil::HasSharedBits(bp, psm, s->m_from_target_TEX0.TBP0, s->m_TEX0.PSM) && s->m_target)) { m_src.RemoveAt(s); } } + + const u32 bbp = bp + bw * 0x10; + if (bw >= 16 && bbp < 16384) + { + // Detect half of the render target (fix snow engine game) + // Target Page (8KB) have always a width of 64 pixels + // Half of the Target is TBW/2 pages * 8KB / (1 block * 256B) = 0x10 + auto& list = m_src.m_map[bbp >> 5]; + for (auto i = list.begin(); i != list.end();) + { + Source* s = *i; + ++i; + + if (GSUtil::HasSharedBits(bbp, psm, s->m_TEX0.TBP0, s->m_TEX0.PSM)) + { + m_src.RemoveAt(s); + } + } + } } } @@ -3220,8 +3672,7 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r // But this causes rects to be too big, especially in WRC games, I don't think there's any need to align them here. GSVector4i r = rect; - off.loopPages(rect, [this, &rect, bp, bw, psm, &found](u32 page) - { + off.loopPages(rect, [this, &rect, bp, bw, psm, &found](u32 page) { auto& list = m_src.m_map[page]; for (auto i = list.begin(); i != list.end();) { @@ -3286,11 +3737,6 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r if (!target) return; - // Get the bounds that we're invalidating in blocks, so we can remove any targets which are completely contained. - // Unfortunately sometimes the draw rect is incorrect, and since the end block gets the rect -1, it'll underflow, - // so we need to prevent that from happening. Just make it a single block in that case, and hope for the best. - const u32 start_bp = GSLocalMemory::GetStartBlockAddress(off.bp(), off.bw(), off.psm(), rect); - const u32 end_bp = rect.rempty() ? start_bp : GSLocalMemory::GetUnwrappedEndBlockAddress(off.bp(), off.bw(), off.psm(), rect); RGBAMask rgba; rgba._u32 = GSUtil::GetChannelMask(psm); @@ -3535,7 +3981,7 @@ void GSTextureCache::InvalidateLocalMem(const GSOffset& off, const GSVector4i& r // Check exact match first const bool bpp_match = GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == GSLocalMemory::m_psm[psm].bpp; const u32 page_mask = ((1 << 5) - 1); - const bool exact_mem_match = (read_start & ~page_mask) == (t->m_TEX0.TBP0 & ~page_mask) && ((read_end + page_mask) & ~page_mask) == ((t->m_end_block + page_mask) & ~page_mask); + const bool exact_mem_match = (read_start & ~page_mask) == (t->m_TEX0.TBP0 & ~page_mask) && ((read_end + (page_mask - 1)) & ~page_mask) <= t->m_end_block; const bool expecting_this_tex = exact_mem_match || (bpp_match && bw == t->m_TEX0.TBW && (((read_start & ~page_mask) == t->m_TEX0.TBP0) || (bp >= t->m_TEX0.TBP0 && ((read_end + page_mask) & ~page_mask) <= ((t->m_end_block + page_mask) & ~page_mask)))); if (!expecting_this_tex) @@ -3844,7 +4290,7 @@ bool GSTextureCache::Move(u32 SBP, u32 SBW, u32 SPSM, int sx, int sy, u32 DBP, u // Make sure the copy doesn't go out of bounds (it shouldn't). if ((scaled_dx + scaled_w) > dst->m_texture->GetWidth() || (scaled_dy + scaled_h) > dst->m_texture->GetHeight()) return false; - GL_CACHE("HW Move 0x%x[BW:%u PSM:%s] to 0x%x[BW:%u PSM:%s] <%d,%d->%d,%d> -> <%d,%d->%d,%d>", SBP, SBW, + GL_CACHE("HW Move after draw %d 0x%x[BW:%u PSM:%s] to 0x%x[BW:%u PSM:%s] <%d,%d->%d,%d> -> <%d,%d->%d,%d>", GSState::s_n, SBP, SBW, psm_str(SPSM), DBP, DBW, psm_str(DPSM), sx, sy, sx + w, sy + h, dx, dy, dx + w, dy + h); const bool cover_whole_target = dst->m_type == RenderTarget && GSVector4i(dx, dy, dx + w, dy + h).rintersect(dst->m_valid).eq(dst->m_valid); @@ -3970,6 +4416,7 @@ bool GSTextureCache::Move(u32 SBP, u32 SBW, u32 SPSM, int sx, int sy, u32 DBP, u // Invalidate any sources that overlap with the target (since they're now stale). InvalidateVideoMem(g_gs_renderer->m_mem.GetOffset(DBP, DBW, DPSM), GSVector4i(dx, dy, dx + w, dy + h), false); + return true; } @@ -4155,8 +4602,8 @@ GSTextureCache::Target* GSTextureCache::GetExactTarget(u32 BP, u32 BW, int type, for (auto it = rts.begin(); it != rts.end(); ++it) // Iterate targets from MRU to LRU. { Target* t = *it; - - if (t->m_TEX0.TBP0 == BP && t->m_TEX0.TBW == BW && t->UnwrappedEndBlock() >= end_bp) + const u32 tgt_bw = std::max(t->m_TEX0.TBW, 1U); + if ((t->m_TEX0.TBP0 == BP || (GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets && t->m_TEX0.TBP0 < BP && ((BP >> 5) % tgt_bw) == 0)) && tgt_bw == BW && t->UnwrappedEndBlock() >= end_bp) { rts.MoveFront(it.Index()); return t; @@ -4380,7 +4827,10 @@ void GSTextureCache::ReplaceSourceTexture(Source* s, GSTexture* new_texture, flo if (s->m_from_hash_cache) s->m_from_hash_cache->refcount++; else if (!s->m_shared_texture) + { + DevCon.Warning("replace %d", m_source_memory_usage); m_source_memory_usage += s->m_texture->GetMemUsage(); + } } void GSTextureCache::IncAge() @@ -4401,7 +4851,7 @@ void GSTextureCache::IncAge() AgeHashCache(); // As of 04/15/2024 this is s et to 60 (just 1 second of targets), which should be fine now as it doesn't destroy targets which haven't been covered. - // + // // For reference, here are some games sensitive to killing old targets: // Original maxage was 4 here, Xenosaga 2 needs at least 240, else it flickers on scene transitions. // ffx intro scene changes leave the old image untouched for a couple of frames and only then start using it @@ -4488,9 +4938,9 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con } bool hack = false; - bool channel_shuffle = false; + bool channel_shuffle = dst && (TEX0.PSM == PSMT8) && (GSRendererHW::GetInstance()->TestChannelShuffle(dst)); - if (dst && (x_offset != 0 || y_offset != 0)) + if (dst && (x_offset != 0 || y_offset != 0) && (TEX0.PSM != PSMT8 || channel_shuffle)) { const float scale = dst->m_scale; const int x = static_cast(scale * x_offset); @@ -4516,7 +4966,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con return nullptr; } - m_source_memory_usage += dTex->GetMemUsage(); + m_target_memory_usage += dTex->GetMemUsage(); // copy the rt in const GSVector4i area(GSVector4i(x, y, x + w, y + h).rintersect(GSVector4i(sTex->GetSize()).zwxy())); @@ -4554,7 +5004,8 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con src->m_unscaled_size = dst->m_unscaled_size; src->m_shared_texture = true; - channel_shuffle = GSRendererHW::GetInstance()->TestChannelShuffle(dst); + if(channel_shuffle) + m_temporary_source = src; } // Invalidate immediately on recursive draws, because if we don't here, InvalidateVideoMem() will. @@ -4646,8 +5097,9 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con src->m_valid_rect = dst->m_valid; src->m_end_block = dst->m_end_block; - // Do this first as we could be adding in alpha from an upgraded 24bit target. - dst->Update(); + // Do this first as we could be adding in alpha from an upgraded 24bit target. if the rect intersects a dirty area. + if (!dst->m_dirty.empty() && !src_range->rintersect(dst->m_dirty.GetTotalRect(dst->m_TEX0, dst->m_unscaled_size)).rempty()) + dst->Update(); src->m_valid_alpha_minmax = true; if ((src->m_TEX0.PSM & 0xf) == PSMCT24) @@ -4810,7 +5262,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con } // kill source immediately if it's the RT/DS, because that'll get invalidated immediately - if (GSRendererHW::GetInstance()->IsTBPFrameOrZ(dst->m_TEX0.TBP0)) + if (GSRendererHW::GetInstance()->IsTBPFrameOrZ(dst->m_TEX0.TBP0) || channel_shuffle) { GL_CACHE("TC: Source is RT or ZBUF, invalidating after draw."); m_temporary_source = src; @@ -4833,7 +5285,9 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con return nullptr; } - m_source_memory_usage += dTex->GetMemUsage(); + src->m_shared_texture = false; + src->m_target_direct = false; + m_target_memory_usage += dTex->GetMemUsage(); src->m_texture = dTex; if (use_texture) @@ -4869,6 +5323,23 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con g_gs_device->ConvertToIndexedTexture(sTex, dst->m_scale, x_offset, y_offset, std::max(dst->m_TEX0.TBW, 1u) * 64, dst->m_TEX0.PSM, dTex, std::max(TEX0.TBW, 1u) * 64, TEX0.PSM); + + // Adjust the region for the newly translated rect. + u32 const dst_y_height = GSLocalMemory::m_psm[dst->m_TEX0.PSM].pgs.y; + u32 const src_y_height = GSLocalMemory::m_psm[TEX0.PSM].pgs.y; + u32 const dst_page_offset = (y_offset / dst_y_height) * std::max(dst->m_TEX0.TBW, 1U); + y_offset = (dst_page_offset / (std::max(TEX0.TBW / 2U, 1U))) * src_y_height; + + u32 const src_page_width = GSLocalMemory::m_psm[TEX0.PSM].pgs.x; + x_offset = (x_offset / GSLocalMemory::m_psm[dst->m_TEX0.PSM].pgs.x) * GSLocalMemory::m_psm[TEX0.PSM].pgs.x; + if (x_offset >= static_cast(std::max(TEX0.TBW, 1U) * src_page_width)) + { + const u32 adjust = x_offset / src_page_width; + y_offset += adjust * GSLocalMemory::m_psm[TEX0.PSM].pgs.y; + x_offset -= src_page_width * adjust; + } + src->m_region.SetX(x_offset, x_offset + tw); + src->m_region.SetY(y_offset, y_offset + th); } else { @@ -4880,6 +5351,8 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con sTex, sRectF, dTex, GSVector4(destX, destY, new_size.x, new_size.y), shader, false); } + m_temporary_source = src; + g_perfmon.Put(GSPerfMon::TextureCopies, 1); #ifdef PCSX2_DEVBUILD @@ -5020,8 +5493,10 @@ GSTextureCache::Source* GSTextureCache::CreateMergedSource(GIFRegTEX0 TEX0, GIFR { // We *should* be able to use the TBW here as an indicator of size... except Destroy All Humans 2 sets // TBW to 10, and samples from 64 through 703... which means it'd be grabbing the next row at the end. - const int tex_width = std::max(64 * TEX0.TBW, region.GetMaxX()); - const int tex_height = region.HasY() ? region.GetHeight() : (1 << TEX0.TH); + // Round the size up to the next block + const GSLocalMemory::psm_t& psm_s = GSLocalMemory::m_psm[TEX0.PSM]; + const int tex_width = (std::max(64 * TEX0.TBW, region.GetMaxX()) + (psm_s.bs.x - 1)) & ~(psm_s.bs.x - 1); + const int tex_height = ((region.HasY() ? region.GetHeight() : (1 << TEX0.TH)) + (psm_s.bs.y - 1)) & ~(psm_s.bs.y - 1); const int scaled_width = static_cast(static_cast(tex_width) * scale); const int scaled_height = static_cast(static_cast(tex_height) * scale); @@ -5288,7 +5763,7 @@ GSTextureCache::Source* GSTextureCache::CreateMergedSource(GIFRegTEX0 TEX0, GIFR Console.Error("Failed to allocate %dx%d merged dest texture", scaled_width, scaled_height); return nullptr; } - + DevCon.Warning("Merged %d", m_source_memory_usage); m_source_memory_usage += dtex->GetMemUsage(); // Sort rect list by the texture, we want to batch as many as possible together. @@ -5634,8 +6109,7 @@ std::shared_ptr GSTextureCache::LookupPaletteObject(con void GSTextureCache::Read(Target* t, const GSVector4i& r) { - if ((!t->m_dirty.empty() && !t->m_dirty.GetTotalRect(t->m_TEX0, t->m_unscaled_size).rintersect(r).rempty()) - || r.width() == 0 || r.height() == 0) + if ((!t->m_dirty.empty() && !t->m_dirty.GetTotalRect(t->m_TEX0, t->m_unscaled_size).rintersect(r).rempty()) || r.width() == 0 || r.height() == 0) return; const GIFRegTEX0& TEX0 = t->m_TEX0; @@ -5856,7 +6330,10 @@ GSTextureCache::Source::~Source() // to recycle. if (!m_shared_texture && !m_from_hash_cache && m_texture) { - g_texture_cache->m_source_memory_usage -= m_texture->GetMemUsage(); + if(m_from_target) + g_texture_cache->m_target_memory_usage -= m_texture->GetMemUsage(); + else + g_texture_cache->m_source_memory_usage -= m_texture->GetMemUsage(); g_gs_device->Recycle(m_texture); } } @@ -6177,6 +6654,7 @@ GSTextureCache::Target::~Target() { // Targets should never be shared. pxAssert(!m_shared_texture); + if (m_texture) { g_texture_cache->m_target_memory_usage -= m_texture->GetMemUsage(); @@ -6340,7 +6818,7 @@ void GSTextureCache::Target::Update(bool cannot_scale) { if (alpha_minmax.second > 128 || (m_TEX0.PSM & 0xf) == PSMCT24) UnscaleRTAlpha(); - else if (!cannot_scale && total_rect.eq(m_valid)) + else if (!cannot_scale && total_rect.rintersect(m_valid).eq(m_valid)) m_rt_alpha_scale = true; } @@ -6414,7 +6892,7 @@ void GSTextureCache::Target::UpdateValidChannels(u32 psm, u32 fbmsk) m_valid_rgb |= (psm_s.trbpp >= 24 && (fbmsk & 0x00FFFFFF) != 0x00FFFFFF) || (psm_s.trbpp == 16); } -bool GSTextureCache::Target::HasValidBitsForFormat(u32 psm, bool req_color, bool req_alpha) +bool GSTextureCache::Target::HasValidBitsForFormat(u32 psm, bool req_color, bool req_alpha, bool width_match) { // Grab validities.. bool alpha_valid = false; @@ -6437,7 +6915,6 @@ bool GSTextureCache::Target::HasValidBitsForFormat(u32 psm, bool req_color, bool if (req_alpha && !alpha_valid && color_valid && (m_TEX0.PSM & 0xF) <= PSMCT24 && (psm & 0xF) == PSMCT32) { - RGBAMask mask; mask._u32 = 0x8; m_TEX0.PSM &= ~PSMCT24; @@ -6478,7 +6955,13 @@ void GSTextureCache::Target::ResizeValidity(const GSVector4i& rect) m_valid = m_valid.rintersect(rect); m_drawn_since_read = m_drawn_since_read.rintersect(rect); m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid); + + const u32 offset = ((UnwrappedEndBlock() + 1) - m_TEX0.TBP0) % (std::max(m_TEX0.TBW, 1U) << 5); + + if (offset) + m_end_block = m_end_block + ((std::max(m_TEX0.TBW, 1U) << 5) - offset); } + // Else No valid size, so need to resize down. // GL_CACHE("ResizeValidity (0x%x->0x%x) from R:%d,%d Valid: %d,%d", m_TEX0.TBP0, m_end_block, rect.z, rect.w, m_valid.z, m_valid.w); @@ -6491,17 +6974,25 @@ void GSTextureCache::Target::UpdateValidity(const GSVector4i& rect, bool can_res m_valid = rect; m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid); + const u32 offset = ((UnwrappedEndBlock() + 1) - m_TEX0.TBP0) % (std::max(m_TEX0.TBW, 1U) << 5); + + if (offset) + m_end_block = m_end_block + ((std::max(m_TEX0.TBW, 1U) << 5) - offset); } else if (can_resize) { m_valid = m_valid.runion(rect); m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid); + const u32 offset = ((UnwrappedEndBlock() + 1) - m_TEX0.TBP0) % (std::max(m_TEX0.TBW, 1U) << 5); + + if (offset) + m_end_block = m_end_block + ((std::max(m_TEX0.TBW, 1U) << 5) - offset); } // GL_CACHE("UpdateValidity (0x%x->0x%x) from R:%d,%d Valid: %d,%d", m_TEX0.TBP0, m_end_block, rect.z, rect.w, m_valid.z, m_valid.w); } -bool GSTextureCache::Target::ResizeTexture(int new_unscaled_width, int new_unscaled_height, bool recycle_old) +bool GSTextureCache::Target::ResizeTexture(int new_unscaled_width, int new_unscaled_height, bool recycle_old, bool require_new_rect, GSVector4i new_rect, bool keep_old) { if (m_unscaled_size.x == new_unscaled_width && m_unscaled_size.y == new_unscaled_height) return true; @@ -6525,7 +7016,7 @@ bool GSTextureCache::Target::ResizeTexture(int new_unscaled_width, int new_unsca // Only need to copy if it's been written to. if (m_texture->GetState() == GSTexture::State::Dirty) { - const GSVector4i rc = GSVector4i::loadh(size.min(new_size)); + const GSVector4i rc = require_new_rect ? new_rect : GSVector4i::loadh(size.min(new_size)); if (tex->IsDepthStencil()) { // Can't do partial copies in DirectX for depth textures, and it's probably not ideal in other @@ -6534,8 +7025,15 @@ bool GSTextureCache::Target::ResizeTexture(int new_unscaled_width, int new_unsca } else { - // Fast memcpy()-like path for color targets. - g_gs_device->CopyRect(m_texture, tex, rc, 0, 0); + if (require_new_rect) + { + g_gs_device->StretchRect(m_texture, tex, GSVector4(rc), ShaderConvert::COPY, false); + } + else + { + // Fast memcpy()-like path for color targets. + g_gs_device->CopyRect(m_texture, tex, rc, 0, 0); + } } g_perfmon.Put(GSPerfMon::TextureCopies, 1); @@ -6553,12 +7051,18 @@ bool GSTextureCache::Target::ResizeTexture(int new_unscaled_width, int new_unsca g_gs_device->InvalidateRenderTarget(tex); } - g_texture_cache->m_target_memory_usage = (g_texture_cache->m_target_memory_usage - m_texture->GetMemUsage()) + tex->GetMemUsage(); - if (recycle_old) - g_gs_device->Recycle(m_texture); + if (!keep_old) + { + g_texture_cache->m_target_memory_usage = (g_texture_cache->m_target_memory_usage - m_texture->GetMemUsage()) + tex->GetMemUsage(); + + if (recycle_old) + g_gs_device->Recycle(m_texture); + else + delete m_texture; + } else - delete m_texture; + g_texture_cache->m_target_memory_usage += tex->GetMemUsage(); m_texture = tex; m_unscaled_size = new_unscaled_size; @@ -6587,8 +7091,7 @@ void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0) m_surfaces.insert(s); // The source pointer will be stored/duplicated in all m_map[array of pages] - s->m_pages.loopPages([this, s](u32 page) - { + s->m_pages.loopPages([this, s](u32 page) { s->m_erase_it[page] = m_map[page].InsertFront(s); }); } @@ -6631,8 +7134,7 @@ void GSTextureCache::SourceMap::RemoveAt(Source* s) GL_CACHE("TC: Remove Src Texture: 0x%x TBW %u PSM %s", s->m_TEX0.TBP0, s->m_TEX0.TBW, psm_str(s->m_TEX0.PSM)); - s->m_pages.loopPages([this, s](u32 page) - { + s->m_pages.loopPages([this, s](u32 page) { m_map[page].EraseIndex(s->m_erase_it[page]); }); @@ -6869,6 +7371,29 @@ void GSTextureCache::InvalidateTemporarySource() m_temporary_source = nullptr; } +void GSTextureCache::SetTemporaryZ(GSTexture* temp_z) +{ + m_temporary_z = temp_z; +} + +GSTexture* GSTextureCache::GetTemporaryZ() +{ + if (!m_temporary_z) + return nullptr; + + return m_temporary_z; +} + + +void GSTextureCache::InvalidateTemporaryZ() +{ + if (!m_temporary_z) + return; + + g_gs_device->Recycle(m_temporary_z); + m_temporary_z = nullptr; +} + void GSTextureCache::InjectHashCacheTexture(const HashCacheKey& key, GSTexture* tex, const std::pair& alpha_minmax) { // When we insert we update memory usage. Old texture gets removed below. @@ -6962,6 +7487,7 @@ void GSTextureCache::Palette::InitializeTexture() } m_tex_palette->Update(GSVector4i(0, 0, m_pal, 1), m_clut, m_pal * sizeof(m_clut[0])); + g_texture_cache->m_source_memory_usage += m_tex_palette->GetMemUsage(); } } @@ -7045,7 +7571,7 @@ std::shared_ptr GSTextureCache::PaletteMap::LookupPalet { // Palette is unused it = map.erase(it); // Erase element from map - // The palette object should now be gone as the shared pointer to the object in the map is deleted + // The palette object should now be gone as the shared pointer to the object in the map is deleted } else { @@ -7109,10 +7635,7 @@ bool GSTextureCache::SurfaceOffsetKeyEqual::operator()(const GSTextureCache::Sur { const SurfaceOffsetKeyElem& lhs_elem = lhs.elems[i]; const SurfaceOffsetKeyElem& rhs_elem = rhs.elems[i]; - if (lhs_elem.bp != rhs_elem.bp - || lhs_elem.bw != rhs_elem.bw - || lhs_elem.psm != rhs_elem.psm - || !lhs_elem.rect.eq(rhs_elem.rect)) + if (lhs_elem.bp != rhs_elem.bp || lhs_elem.bw != rhs_elem.bw || lhs_elem.psm != rhs_elem.psm || !lhs_elem.rect.eq(rhs_elem.rect)) return false; } return true; diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.h b/pcsx2/GS/Renderers/HW/GSTextureCache.h index 3ee9f925b0aaa..a0a434f448773 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.h +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.h @@ -238,7 +238,7 @@ class GSTextureCache static Target* Create(GIFRegTEX0 TEX0, int w, int h, float scale, int type, bool clear); __fi bool HasValidAlpha() const { return (m_valid_alpha_low | m_valid_alpha_high); } - bool HasValidBitsForFormat(u32 psm, bool req_color, bool req_alpha); + bool HasValidBitsForFormat(u32 psm, bool req_color, bool req_alpha, bool width_match); void ResizeDrawn(const GSVector4i& rect); void UpdateDrawn(const GSVector4i& rect, bool can_resize = true); @@ -257,7 +257,7 @@ class GSTextureCache void UpdateValidChannels(u32 psm, u32 fbmsk); /// Resizes target texture, DOES NOT RESCALE. - bool ResizeTexture(int new_unscaled_width, int new_unscaled_height, bool recycle_old = true); + bool ResizeTexture(int new_unscaled_width, int new_unscaled_height, bool recycle_old = true, bool require_offset = false, GSVector4i offset = GSVector4i::zero(), bool keep_old = false); private: void UpdateTextureDebugName(); @@ -427,6 +427,7 @@ class GSTextureCache std::unordered_map m_surface_offset_cache; Source* m_temporary_source = nullptr; // invalidated after the draw + GSTexture* m_temporary_z = nullptr; // invalidated after the draw std::unique_ptr m_color_download_texture; std::unique_ptr m_uint16_download_texture; @@ -491,7 +492,7 @@ class GSTextureCache Target* FindTargetOverlap(Target* target, int type, int psm); Target* LookupTarget(GIFRegTEX0 TEX0, const GSVector2i& size, float scale, int type, bool used = true, u32 fbmask = 0, bool is_frame = false, bool preload = GSConfig.PreloadFrameWithGSData, bool preserve_rgb = true, bool preserve_alpha = true, - const GSVector4i draw_rc = GSVector4i::zero(), bool is_shuffle = false, bool possible_clear = false, bool preserve_scale = false); + const GSVector4i draw_rc = GSVector4i::zero(), bool is_shuffle = false, bool possible_clear = false, bool preserve_scale = false, GSTextureCache::Source* src = nullptr, GSTextureCache::Target* ds = nullptr, int offset = -1); Target* CreateTarget(GIFRegTEX0 TEX0, const GSVector2i& size, const GSVector2i& valid_size,float scale, int type, bool used = true, u32 fbmask = 0, bool is_frame = false, bool preload = GSConfig.PreloadFrameWithGSData, bool preserve_target = true, const GSVector4i draw_rc = GSVector4i::zero(), GSTextureCache::Source* src = nullptr); @@ -508,7 +509,7 @@ class GSTextureCache bool HasTargetInHeightCache(u32 bp, u32 fbw, u32 psm, u32 max_age = std::numeric_limits::max(), bool move_front = true); bool Has32BitTarget(u32 bp); - void InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 write_psm = PSMCT32); + void InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 write_psm = PSMCT32, u32 write_bw = 1); void InvalidateVideoMemType(int type, u32 bp, u32 write_psm = PSMCT32, u32 write_fbmsk = 0, bool dirty_only = false); void InvalidateVideoMemSubTarget(GSTextureCache::Target* rt); void InvalidateVideoMem(const GSOffset& off, const GSVector4i& r, bool target = true); @@ -517,7 +518,7 @@ class GSTextureCache /// Removes any sources which point to the specified target. void InvalidateSourcesFromTarget(const Target* t); - /// Replaces a source's texture externally. Required for some CRC hacks. + /// Removes any sources which point to the same address as a new target. void ReplaceSourceTexture(Source* s, GSTexture* new_texture, float new_scale, const GSVector2i& new_unscaled_size, HashCacheEntry* hc_entry, bool new_texture_is_shared); @@ -551,6 +552,11 @@ class GSTextureCache /// Invalidates a temporary source, a partial copy only created from the current RT/DS for the current draw. void InvalidateTemporarySource(); + void SetTemporaryZ(GSTexture* temp_z); + GSTexture* GetTemporaryZ(); + + /// Invalidates a temporary Z, a partial copy only created from the current DS for the current draw when Z is not offset but RT is + void InvalidateTemporaryZ(); /// Injects a texture into the hash cache, by using GSTexture::Swap(), transitively applying to all sources. Ownership of tex is transferred. void InjectHashCacheTexture(const HashCacheKey& key, GSTexture* tex, const std::pair& alpha_minmax); diff --git a/pcsx2/GS/Renderers/Metal/tfx.metal b/pcsx2/GS/Renderers/Metal/tfx.metal index b7c2f99c1cd5d..e020e82577923 100644 --- a/pcsx2/GS/Renderers/Metal/tfx.metal +++ b/pcsx2/GS/Renderers/Metal/tfx.metal @@ -1168,11 +1168,8 @@ struct PSMain { if (PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) { - C.rb = C.br; - float g_temp = C.g; - - C.g = C.a; - C.a = g_temp; + C.br = C.rb; + C.ag = C.ga; } else if(PS_PROCESS_BA & SHUFFLE_READ) { diff --git a/pcsx2/ShaderCacheVersion.h b/pcsx2/ShaderCacheVersion.h index b8eda02966a62..af07babbaec96 100644 --- a/pcsx2/ShaderCacheVersion.h +++ b/pcsx2/ShaderCacheVersion.h @@ -3,4 +3,4 @@ /// Version number for GS and other shaders. Increment whenever any of the contents of the /// shaders change, to invalidate the cache. -static constexpr u32 SHADER_CACHE_VERSION = 58; +static constexpr u32 SHADER_CACHE_VERSION = 59;