From 17f1f24dc2e04f4515331075f35f0ad74b94bb20 Mon Sep 17 00:00:00 2001 From: Near <77224854+near-san@users.noreply.github.com> Date: Thu, 23 Jul 2020 03:48:00 +0000 Subject: [PATCH] Update to ares v114r25 release. - PlayStation: implemented all GTE instructions (though certainly with many serious bugs remaining) - ares: improved Debug class notifications --- ares/ares/debug/debug.cpp | 2 +- ares/ares/debug/debug.hpp | 6 +- ares/ares/information.hpp | 2 +- ares/ps1/cpu/core/core.cpp | 8 + ares/ps1/cpu/core/core.hpp | 8 +- ares/ps1/cpu/core/cpu-instructions.cpp | 17 +- ares/ps1/cpu/core/decoder.hpp | 32 +- ares/ps1/cpu/core/gte-instructions.cpp | 60 ++- ares/ps1/cpu/core/gte.cpp | 583 ++++++++++++------------- ares/ps1/cpu/core/gte.hpp | 142 +++--- ares/ps1/cpu/core/memory.cpp | 8 +- ares/ps1/dma/transfer.cpp | 28 +- ares/ps1/gpu/gp0.cpp | 4 +- ares/ps1/ps1.hpp | 13 + 14 files changed, 502 insertions(+), 411 deletions(-) diff --git a/ares/ares/debug/debug.cpp b/ares/ares/debug/debug.cpp index df1b2bae22..b8b9886459 100644 --- a/ares/ares/debug/debug.cpp +++ b/ares/ares/debug/debug.cpp @@ -1,6 +1,6 @@ namespace ares { -Debug debug; +Debug _debug; auto Debug::reset() -> void { _unimplementedNotices.reset(); diff --git a/ares/ares/debug/debug.hpp b/ares/ares/debug/debug.hpp index 07975bf56b..a53c0d2268 100644 --- a/ares/ares/debug/debug.hpp +++ b/ares/ares/debug/debug.hpp @@ -1,6 +1,8 @@ namespace ares { struct Debug { + static constexpr bool enabled = true; + auto reset() -> void; template auto unimplemented(P&&... p) -> void { @@ -13,6 +15,8 @@ struct Debug { vector _unimplementedNotices; }; -extern Debug debug; +extern Debug _debug; } + +#define debug(function, ...) if constexpr(ares::_debug.enabled) ares::_debug.function(__VA_ARGS__) diff --git a/ares/ares/information.hpp b/ares/ares/information.hpp index 046f21f122..3a04673024 100644 --- a/ares/ares/information.hpp +++ b/ares/ares/information.hpp @@ -2,7 +2,7 @@ namespace ares { static const string Name = "ares"; - static const string Version = "114.24"; + static const string Version = "114.25"; static const string Copyright = "ares team"; static const string License = "BY-NC-ND 4.0"; static const string LicenseURI = "https://creativecommons.org/licenses/by-nc-nd/4.0/"; diff --git a/ares/ps1/cpu/core/core.cpp b/ares/ps1/cpu/core/core.cpp index 41697f6beb..ddcfb5a7ee 100644 --- a/ares/ps1/cpu/core/core.cpp +++ b/ares/ps1/cpu/core/core.cpp @@ -43,6 +43,14 @@ auto CPU::instruction() -> void { } if constexpr(1) { + if constexpr(Accuracy::CPU::AlignmentErrors) { + if(unlikely(address & 3)) { + exception.busInstruction(); + step(2); + return; + } + } + pipeline.address = address; pipeline.instruction = bus.readWord(address); debugger.instruction(); diff --git a/ares/ps1/cpu/core/core.hpp b/ares/ps1/cpu/core/core.hpp index 726a4ae4f2..ba2fdf1276 100644 --- a/ares/ps1/cpu/core/core.hpp +++ b/ares/ps1/cpu/core/core.hpp @@ -8,12 +8,12 @@ auto powerCore(bool reset) -> void; //memory.cpp - auto readByte(u32 address) -> u8; - auto readHalf(u32 address) -> u16; + auto readByte(u32 address) -> u32; + auto readHalf(u32 address) -> u32; auto readWord(u32 address) -> u32; - auto writeByte(u32 address, u8 data) -> void; - auto writeHalf(u32 address, u16 data) -> void; + auto writeByte(u32 address, u32 data) -> void; + auto writeHalf(u32 address, u32 data) -> void; auto writeWord(u32 address, u32 data) -> void; //decoder.cpp diff --git a/ares/ps1/cpu/core/cpu-instructions.cpp b/ares/ps1/cpu/core/cpu-instructions.cpp index b5e7e0f096..fc88689ae2 100644 --- a/ares/ps1/cpu/core/cpu-instructions.cpp +++ b/ares/ps1/cpu/core/cpu-instructions.cpp @@ -117,11 +117,17 @@ auto CPU::instructionLBU(u32& rt, cu32& rs, i16 imm) -> void { } auto CPU::instructionLH(u32& rt, cu32& rs, i16 imm) -> void { + if constexpr(Accuracy::CPU::AlignmentErrors) { + if(unlikely(rs + imm & 1)) return exception.addressLoad(); + } auto data = readHalf(rs + imm); fetch(rt, i16(data)); } auto CPU::instructionLHU(u32& rt, cu32& rs, i16 imm) -> void { + if constexpr(Accuracy::CPU::AlignmentErrors) { + if(unlikely(rs + imm & 1)) return exception.addressLoad(); + } auto data = readHalf(rs + imm); fetch(rt, u16(data)); } @@ -131,6 +137,9 @@ auto CPU::instructionLUI(u32& rt, u16 imm) -> void { } auto CPU::instructionLW(u32& rt, cu32& rs, i16 imm) -> void { + if constexpr(Accuracy::CPU::AlignmentErrors) { + if(unlikely(rs + imm & 3)) return exception.addressLoad(); + } auto data = readWord(rs + imm); fetch(rt, i32(data)); } @@ -196,6 +205,9 @@ auto CPU::instructionSB(cu32& rt, cu32& rs, i16 imm) -> void { } auto CPU::instructionSH(cu32& rt, cu32& rs, i16 imm) -> void { + if constexpr(Accuracy::CPU::AlignmentErrors) { + if(rs + imm & 1) return exception.addressStore(); + } writeHalf(rs + imm, rt); } @@ -240,7 +252,7 @@ auto CPU::instructionSRLV(u32& rd, cu32& rt, cu32& rs) -> void { } auto CPU::instructionSUB(u32& rd, cu32& rs, cu32& rt) -> void { - if((rs ^ rt) & (rs ^ rs + rt) & 0x8000'0000) return exception.arithmeticOverflow(); + if((rs ^ rt) & (rs ^ rs - rt) & 0x8000'0000) return exception.arithmeticOverflow(); write(rd, rs - rt); } @@ -249,6 +261,9 @@ auto CPU::instructionSUBU(u32& rd, cu32& rs, cu32& rt) -> void { } auto CPU::instructionSW(cu32& rt, cu32& rs, i16 imm) -> void { + if constexpr(Accuracy::CPU::AlignmentErrors) { + if(rs + imm & 3) return exception.addressStore(); + } writeWord(rs + imm, rt); } diff --git a/ares/ps1/cpu/core/decoder.hpp b/ares/ps1/cpu/core/decoder.hpp index bcb929fe79..4376ed7c8f 100644 --- a/ares/ps1/cpu/core/decoder.hpp +++ b/ares/ps1/cpu/core/decoder.hpp @@ -250,27 +250,27 @@ op(0x00, RTPS, LM, SF); op(0x01, RTPS, LM, SF); //0x00 mirror? op(0x06, NCLIP); - op(0x0c, OP); - op(0x10, DPCS); - op(0x11, INTPL); - op(0x12, MVMVA, TV, MV, MM); + op(0x0c, OP, LM, SF); + op(0x10, DPCS, LM, SF); + op(0x11, INTPL, LM, SF); + op(0x12, MVMVA, LM, TV, MV, MM, SF); op(0x13, NCDS, LM, SF); - op(0x14, CDP); + op(0x14, CDP, LM, SF); op(0x16, NCDT, LM, SF); - op(0x1a, DCPL); //0x29 mirror? - op(0x1b, NCCS); - op(0x1c, CC); - op(0x1e, NCS); - op(0x20, NCT); - op(0x28, SQR); - op(0x29, DCPL); - op(0x2a, DPCT); + op(0x1a, DCPL, LM, SF); //0x29 mirror? + op(0x1b, NCCS, LM, SF); + op(0x1c, CC, LM, SF); + op(0x1e, NCS, LM, SF); + op(0x20, NCT, LM, SF); + op(0x28, SQR, LM, SF); + op(0x29, DCPL, LM, SF); + op(0x2a, DPCT, LM, SF); op(0x2d, AVSZ3); op(0x2e, AVSZ4); op(0x30, RTPT, LM, SF); - op(0x3d, GPF); - op(0x3e, GPL); - op(0x3f, NCCT); + op(0x3d, GPF, LM, SF); + op(0x3e, GPL, LM, SF); + op(0x3f, NCCT, LM, SF); } #undef LM #undef TV diff --git a/ares/ps1/cpu/core/gte-instructions.cpp b/ares/ps1/cpu/core/gte-instructions.cpp index b4d6b9dc86..08dc010837 100644 --- a/ares/ps1/cpu/core/gte-instructions.cpp +++ b/ares/ps1/cpu/core/gte-instructions.cpp @@ -38,71 +38,93 @@ auto CPU::instructionAVSZ4() -> void { gte.updateError(); } -auto CPU::instructionCC() -> void { +auto CPU::instructionCC(bool lm, u8 sf) -> void { gte.clearFlags(); + gte.lm = lm; + gte.sf = sf; gte.cc(); gte.updateError(); } -auto CPU::instructionCDP() -> void { +auto CPU::instructionCDP(bool lm, u8 sf) -> void { gte.clearFlags(); + gte.lm = lm; + gte.sf = sf; gte.cdp(); gte.updateError(); } -auto CPU::instructionDCPL() -> void { +auto CPU::instructionDCPL(bool lm, u8 sf) -> void { gte.clearFlags(); + gte.lm = lm; + gte.sf = sf; gte.dcpl(); gte.updateError(); } -auto CPU::instructionDPCS() -> void { +auto CPU::instructionDPCS(bool lm, u8 sf) -> void { gte.clearFlags(); + gte.lm = lm; + gte.sf = sf; gte.dpcs(); gte.updateError(); } -auto CPU::instructionDPCT() -> void { +auto CPU::instructionDPCT(bool lm, u8 sf) -> void { gte.clearFlags(); + gte.lm = lm; + gte.sf = sf; gte.dpct(); gte.updateError(); } -auto CPU::instructionGPF() -> void { +auto CPU::instructionGPF(bool lm, u8 sf) -> void { gte.clearFlags(); + gte.lm = lm; + gte.sf = sf; gte.gpf(); gte.updateError(); } -auto CPU::instructionGPL() -> void { +auto CPU::instructionGPL(bool lm, u8 sf) -> void { gte.clearFlags(); + gte.lm = lm; + gte.sf = sf; gte.gpl(); gte.updateError(); } -auto CPU::instructionINTPL() -> void { +auto CPU::instructionINTPL(bool lm, u8 sf) -> void { gte.clearFlags(); + gte.lm = lm; + gte.sf = sf; gte.intpl(); gte.updateError(); } -auto CPU::instructionMVMVA(u8 tv, u8 mv, u8 mm) -> void { +auto CPU::instructionMVMVA(bool lm, u8 tv, u8 mv, u8 mm, u8 sf) -> void { gte.clearFlags(); + gte.lm = lm; gte.tv = tv; gte.mv = mv; gte.mm = mm; + gte.sf = sf; gte.mvmva(); gte.updateError(); } -auto CPU::instructionNCCS() -> void { +auto CPU::instructionNCCS(bool lm, u8 sf) -> void { gte.clearFlags(); + gte.lm = lm; + gte.sf = sf; gte.nccs(); gte.updateError(); } -auto CPU::instructionNCCT() -> void { +auto CPU::instructionNCCT(bool lm, u8 sf) -> void { gte.clearFlags(); + gte.lm = lm; + gte.sf = sf; gte.ncct(); gte.updateError(); } @@ -129,20 +151,26 @@ auto CPU::instructionNCLIP() -> void { gte.updateError(); } -auto CPU::instructionNCS() -> void { +auto CPU::instructionNCS(bool lm, u8 sf) -> void { gte.clearFlags(); + gte.lm = lm; + gte.sf = sf; gte.ncs(); gte.updateError(); } -auto CPU::instructionNCT() -> void { +auto CPU::instructionNCT(bool lm, u8 sf) -> void { gte.clearFlags(); + gte.lm = lm; + gte.sf = sf; gte.nct(); gte.updateError(); } -auto CPU::instructionOP() -> void { +auto CPU::instructionOP(bool lm, u8 sf) -> void { gte.clearFlags(); + gte.lm = lm; + gte.sf = sf; gte.op(); gte.updateError(); } @@ -163,8 +191,10 @@ auto CPU::instructionRTPT(bool lm, u8 sf) -> void { gte.updateError(); } -auto CPU::instructionSQR() -> void { +auto CPU::instructionSQR(bool lm, u8 sf) -> void { gte.clearFlags(); + gte.lm = lm; + gte.sf = sf; gte.sqr(); gte.updateError(); } diff --git a/ares/ps1/cpu/core/gte.cpp b/ares/ps1/cpu/core/gte.cpp index 62a3f66845..36b882454a 100644 --- a/ares/ps1/cpu/core/gte.cpp +++ b/ares/ps1/cpu/core/gte.cpp @@ -18,39 +18,39 @@ auto CPU::GTE::updateError() -> void { auto CPU::GTE::getDataRegister(uint index) -> u32 { u32 data; switch(index) { - case 0: data = u16(vx0) << 0 | u16(vy0) << 16; break; - case 1: data = u16(vz0) << 0; break; - case 2: data = u16(vx1) << 0 | u16(vy1) << 16; break; - case 3: data = u16(vz1) << 0; break; - case 4: data = u16(vx2) << 0 | u16(vy2) << 16; break; - case 5: data = u16(vz2) << 0; break; - case 6: data = rgbc; break; + case 0: data = u16(v.a.x) << 0 | u16(v.a.y) << 16; break; + case 1: data = u16(v.a.z) << 0; break; + case 2: data = u16(v.b.x) << 0 | u16(v.b.y) << 16; break; + case 3: data = u16(v.b.z) << 0; break; + case 4: data = u16(v.c.x) << 0 | u16(v.c.y) << 16; break; + case 5: data = u16(v.c.z) << 0; break; + case 6: data = rgbc.r << 0 | rgbc.g << 8 | rgbc.b << 16 | rgbc.t << 24; break; case 7: data = otz; break; - case 8: data = ir0; break; - case 9: data = ir1; break; - case 10: data = ir2; break; - case 11: data = ir3; break; - case 12: data = u16(sx0) << 0 | u16(sy0) << 16; break; - case 13: data = u16(sx1) << 0 | u16(sy1) << 16; break; - case 14: data = u16(sx2) << 0 | u16(sy2) << 16; break; - case 15: data = u16(sx2) << 0 | u16(sy2) << 16; break; - case 16: data = sz0; break; - case 17: data = sz1; break; - case 18: data = sz2; break; - case 19: data = sz3; break; - case 20: data = rgb0; break; - case 21: data = rgb1; break; - case 22: data = rgb2; break; - case 23: data = res1; break; - case 24: data = mac0; break; - case 25: data = mac1; break; - case 26: data = mac2; break; - case 27: data = mac3; break; + case 8: data = ir.t; break; + case 9: data = ir.x; break; + case 10: data = ir.y; break; + case 11: data = ir.z; break; + case 12: data = u16(screen[0].x) << 0 | u16(screen[0].y) << 16; break; + case 13: data = u16(screen[1].x) << 0 | u16(screen[1].y) << 16; break; + case 14: data = u16(screen[2].x) << 0 | u16(screen[2].y) << 16; break; + case 15: data = u16(screen[2].x) << 0 | u16(screen[2].y) << 16; break; //not screen[3] + case 16: data = screen[0].z; break; + case 17: data = screen[1].z; break; + case 18: data = screen[2].z; break; + case 19: data = screen[3].z; break; + case 20: data = rgb[0]; break; + case 21: data = rgb[1]; break; + case 22: data = rgb[2]; break; + case 23: data = rgb[3]; break; + case 24: data = mac.t; break; + case 25: data = mac.x; break; + case 26: data = mac.y; break; + case 27: data = mac.z; break; case 28: //IRGB case 29: {//ORGB - u8 r = uclamp<5>(ir1 >> 7); - u8 g = uclamp<5>(ir2 >> 7); - u8 b = uclamp<5>(ir3 >> 7); + u8 r = uclamp<5>(ir.x >> 7); + u8 g = uclamp<5>(ir.y >> 7); + u8 b = uclamp<5>(ir.z >> 7); data = r << 0 | g << 5 | b << 10; } break; case 30: data = lzcs; break; @@ -61,38 +61,38 @@ auto CPU::GTE::getDataRegister(uint index) -> u32 { auto CPU::GTE::setDataRegister(uint index, u32 data) -> void { switch(index) { - case 0: vx0 = data >> 0; vy0 = data >> 16; break; - case 1: vz0 = data >> 0; break; - case 2: vx1 = data >> 0; vy1 = data >> 16; break; - case 3: vz1 = data >> 0; break; - case 4: vx2 = data >> 0; vy2 = data >> 16; break; - case 5: vz2 = data >> 0; break; - case 6: rgbc = data; break; + case 0: v.a.x = data >> 0; v.a.y = data >> 16; break; + case 1: v.a.z = data >> 0; break; + case 2: v.b.x = data >> 0; v.b.y = data >> 16; break; + case 3: v.b.z = data >> 0; break; + case 4: v.c.x = data >> 0; v.c.y = data >> 16; break; + case 5: v.c.z = data >> 0; break; + case 6: rgbc.r = data >> 0; rgbc.g = data >> 8; rgbc.b = data >> 16; rgbc.t = data >> 24; break; case 7: otz = data; break; - case 8: ir0 = data; break; - case 9: ir1 = data; break; - case 10: ir2 = data; break; - case 11: ir3 = data; break; - case 12: sx0 = data >> 0; sy0 = data >> 16; break; - case 13: sx1 = data >> 0; sy1 = data >> 16; break; - case 14: sx2 = data >> 0; sy2 = data >> 16; break; + case 8: ir.t = data; break; + case 9: ir.x = data; break; + case 10: ir.y = data; break; + case 11: ir.z = data; break; + case 12: screen[0].x = data >> 0; screen[0].y = data >> 16; break; + case 13: screen[1].x = data >> 0; screen[1].y = data >> 16; break; + case 14: screen[2].x = data >> 0; screen[2].y = data >> 16; break; case 15: {//SXP - sx0 = sx1; sy0 = sy1; - sx1 = sx2; sy1 = sy2; - sx2 = data >> 0; sy2 = data >> 16; + screen[0].x = screen[1].x; screen[0].y = screen[1].y; + screen[1].x = screen[2].x; screen[1].y = screen[2].y; + screen[2].x = data >> 0; screen[2].y = data >> 16; } break; - case 16: sz0 = data; break; - case 17: sz1 = data; break; - case 18: sz2 = data; break; - case 19: sz3 = data; break; - case 20: rgb0 = data; break; - case 21: rgb1 = data; break; - case 22: rgb2 = data; break; - case 23: res1 = data; break; - case 24: mac0 = data; break; - case 25: mac1 = data; break; - case 26: mac2 = data; break; - case 27: mac3 = data; break; + case 16: screen[0].z = data; break; + case 17: screen[1].z = data; break; + case 18: screen[2].z = data; break; + case 19: screen[3].z = data; break; + case 20: rgb[0] = data; break; + case 21: rgb[1] = data; break; + case 22: rgb[2] = data; break; + case 23: rgb[3] = data; break; + case 24: mac.t = data; break; + case 25: mac.x = data; break; + case 26: mac.y = data; break; + case 27: mac.z = data; break; case 28: irgb = data; break; case 29: orgb = data; break; case 30: lzcs = data; break; @@ -103,30 +103,30 @@ auto CPU::GTE::setDataRegister(uint index, u32 data) -> void { auto CPU::GTE::getControlRegister(uint index) -> u32 { u32 data; switch(index) { - case 0: data = u16(rt11) << 0 | u16(rt12) << 16; break; - case 1: data = u16(rt13) << 0 | u16(rt21) << 16; break; - case 2: data = u16(rt22) << 0 | u16(rt23) << 16; break; - case 3: data = u16(rt31) << 0 | u16(rt32) << 16; break; - case 4: data = u16(rt33) << 0; break; - case 5: data = trx; break; - case 6: data = trY; break; - case 7: data = trz; break; - case 8: data = u16(l11) << 0 | u16(l12) << 16; break; - case 9: data = u16(l13) << 0 | u16(l21) << 16; break; - case 10: data = u16(l22) << 0 | u16(l23) << 16; break; - case 11: data = u16(l31) << 0 | u16(l32) << 16; break; - case 12: data = u16(l33) << 0; break; - case 13: data = rbk; break; - case 14: data = gbk; break; - case 15: data = bbk; break; - case 16: data = u16(lr1) << 0 | u16(lr2) << 16; break; - case 17: data = u16(lr3) << 0 | u16(lg1) << 16; break; - case 18: data = u16(lg2) << 0 | u16(lg3) << 16; break; - case 19: data = u16(lb1) << 0 | u16(lb2) << 16; break; - case 20: data = u16(lb3) << 0; break; - case 21: data = rfc; break; - case 22: data = gfc; break; - case 23: data = bfc; break; + case 0: data = u16(rotation.a.x) << 0 | u16(rotation.a.y) << 16; break; + case 1: data = u16(rotation.a.z) << 0 | u16(rotation.b.x) << 16; break; + case 2: data = u16(rotation.b.y) << 0 | u16(rotation.b.z) << 16; break; + case 3: data = u16(rotation.c.x) << 0 | u16(rotation.c.y) << 16; break; + case 4: data = u16(rotation.c.z) << 0; break; + case 5: data = translation.x; break; + case 6: data = translation.y; break; + case 7: data = translation.z; break; + case 8: data = u16(light.a.x) << 0 | u16(light.a.y) << 16; break; + case 9: data = u16(light.a.z) << 0 | u16(light.b.x) << 16; break; + case 10: data = u16(light.b.y) << 0 | u16(light.b.z) << 16; break; + case 11: data = u16(light.c.x) << 0 | u16(light.c.y) << 16; break; + case 12: data = u16(light.c.z) << 0; break; + case 13: data = backgroundColor.r; break; + case 14: data = backgroundColor.g; break; + case 15: data = backgroundColor.b; break; + case 16: data = u16(color.a.x) << 0 | u16(color.a.y) << 16; break; + case 17: data = u16(color.a.z) << 0 | u16(color.b.x) << 16; break; + case 18: data = u16(color.b.y) << 0 | u16(color.b.z) << 16; break; + case 19: data = u16(color.c.x) << 0 | u16(color.c.y) << 16; break; + case 20: data = u16(color.c.z) << 0; break; + case 21: data = farColor.r; break; + case 22: data = farColor.g; break; + case 23: data = farColor.b; break; case 24: data = ofx; break; case 25: data = ofy; break; case 26: data = h; break; @@ -141,30 +141,30 @@ auto CPU::GTE::getControlRegister(uint index) -> u32 { auto CPU::GTE::setControlRegister(uint index, u32 data) -> void { switch(index) { - case 0: rt11 = data >> 0; rt12 = data >> 16; break; - case 1: rt13 = data >> 0; rt21 = data >> 16; break; - case 2: rt22 = data >> 0; rt23 = data >> 16; break; - case 3: rt31 = data >> 0; rt32 = data >> 16; break; - case 4: rt33 = data >> 0; break; - case 5: trx = data; break; - case 6: trY = data; break; - case 7: trz = data; break; - case 8: l11 = data >> 0; l12 = data >> 16; break; - case 9: l13 = data >> 0; l21 = data >> 16; break; - case 10: l22 = data >> 0; l23 = data >> 16; break; - case 11: l31 = data >> 0; l32 = data >> 16; break; - case 12: l33 = data >> 0; break; - case 13: rbk = data; break; - case 14: gbk = data; break; - case 15: bbk = data; break; - case 16: lr1 = data >> 0; lr2 = data >> 16; break; - case 17: lr3 = data >> 0; lg1 = data >> 16; break; - case 18: lg2 = data >> 0; lg3 = data >> 16; break; - case 19: lb1 = data >> 0; lb2 = data >> 16; break; - case 20: lb3 = data >> 0; break; - case 21: rfc = data; break; - case 22: gfc = data; break; - case 23: bfc = data; break; + case 0: rotation.a.x = data >> 0; rotation.a.y = data >> 16; break; + case 1: rotation.a.z = data >> 0; rotation.b.x = data >> 16; break; + case 2: rotation.b.y = data >> 0; rotation.b.z = data >> 16; break; + case 3: rotation.c.x = data >> 0; rotation.c.y = data >> 16; break; + case 4: rotation.c.z = data >> 0; break; + case 5: translation.x = data; break; + case 6: translation.y = data; break; + case 7: translation.z = data; break; + case 8: light.a.x = data >> 0; light.a.y = data >> 16; break; + case 9: light.a.z = data >> 0; light.b.x = data >> 16; break; + case 10: light.b.y = data >> 0; light.b.z = data >> 16; break; + case 11: light.c.x = data >> 0; light.c.y = data >> 16; break; + case 12: light.c.z = data >> 0; break; + case 13: backgroundColor.r = data; break; + case 14: backgroundColor.g = data; break; + case 15: backgroundColor.b = data; break; + case 16: color.a.x = data >> 0; color.a.y = data >> 16; break; + case 17: color.a.z = data >> 0; color.b.x = data >> 16; break; + case 18: color.b.y = data >> 0; color.b.z = data >> 16; break; + case 19: color.c.x = data >> 0; color.c.y = data >> 16; break; + case 20: color.c.z = data >> 0; break; + case 21: farColor.r = data; break; + case 22: farColor.g = data; break; + case 23: farColor.b = data; break; case 24: ofx = data; break; case 25: ofy = data; break; case 26: h = data; break; @@ -178,10 +178,6 @@ auto CPU::GTE::setControlRegister(uint index, u32 data) -> void { // -#define R (u8(rgbc >> 0) << 4) -#define G (u8(rgbc >> 8) << 4) -#define B (u8(rgbc >> 16) << 4) - template auto CPU::GTE::check(i64 value) -> i64 { static constexpr i64 min = -(i64(1) << (id == 0 ? 31 : 43)); @@ -249,18 +245,18 @@ auto CPU::GTE::saturateRGB(i32 value) -> u32 { template auto CPU::GTE::setMac(i64 value) -> i64 { check(value); - if constexpr(id == 0) { mac0 = value; return value; } - if constexpr(id == 1) { mac1 = value >> sf; return value >> sf; } - if constexpr(id == 2) { mac2 = value >> sf; return value >> sf; } - if constexpr(id == 3) { mac3 = value >> sf; return value >> sf; } + if constexpr(id == 0) { mac.t = value; return value; } + if constexpr(id == 1) { mac.x = value >> sf; return value >> sf; } + if constexpr(id == 2) { mac.y = value >> sf; return value >> sf; } + if constexpr(id == 3) { mac.z = value >> sf; return value >> sf; } } template auto CPU::GTE::setIr(i32 value, bool lm) -> void { - if constexpr(id == 0) ir0 = saturate<0>(value, lm); - if constexpr(id == 1) ir1 = saturate<1>(value, lm); - if constexpr(id == 2) ir2 = saturate<2>(value, lm); - if constexpr(id == 3) ir3 = saturate<3>(value, lm); + if constexpr(id == 0) ir.t = saturate<0>(value, lm); + if constexpr(id == 1) ir.x = saturate<1>(value, lm); + if constexpr(id == 2) ir.y = saturate<2>(value, lm); + if constexpr(id == 3) ir.z = saturate<3>(value, lm); } template @@ -268,6 +264,12 @@ auto CPU::GTE::setMacAndIr(i64 value, bool lm) -> void { setIr(setMac(value), lm); } +auto CPU::GTE::setMacAndIr(const v64& vector) -> void { + setMacAndIr<1>(vector.x, lm); + setMacAndIr<2>(vector.y, lm); + setMacAndIr<3>(vector.z, lm); +} + auto CPU::GTE::setOtz(i64 value) -> void { static constexpr i64 min = 0x0000; static constexpr i64 max = 0xffff; @@ -279,6 +281,20 @@ auto CPU::GTE::setOtz(i64 value) -> void { // +auto CPU::GTE::matrixMultiply(const m16& matrix, const v16& vector, const v32& translation) -> v64 { + i64 x = extend<1>(extend<1>(extend<1>((i64(translation.x) << 12) + matrix.a.x * vector.x) + matrix.a.y * vector.y) + matrix.a.z * vector.z); + i64 y = extend<1>(extend<1>(extend<1>((i64(translation.y) << 12) + matrix.b.x * vector.x) + matrix.b.y * vector.y) + matrix.b.z * vector.z); + i64 z = extend<1>(extend<1>(extend<1>((i64(translation.z) << 12) + matrix.c.x * vector.x) + matrix.c.y * vector.y) + matrix.c.z * vector.z); + return {x, y, z}; +} + +auto CPU::GTE::vectorMultiply(const v16& vector1, const v16& vector2, const v16& translation) -> v64 { + i64 x = (i64(translation.x) << 12) + vector1.x * vector2.x; + i64 y = (i64(translation.y) << 12) + vector1.y * vector2.y; + i64 z = (i64(translation.z) << 12) + vector1.z * vector2.z; + return {x, y, z}; +} + auto CPU::GTE::divide(u32 lhs, u32 rhs) -> u32 { if(rhs * 2 <= lhs) { flag.divide_overflow = 1; @@ -297,29 +313,29 @@ auto CPU::GTE::divide(u32 lhs, u32 rhs) -> u32 { return min(0x1ffff, result); } -auto CPU::GTE::pushSX(i32 sx) -> void { +auto CPU::GTE::pushScreenX(i32 sx) -> void { if(sx < -1024) sx = -1024, flag.sx2_saturated = 1; if(sx > +1023) sx = +1023, flag.sx2_saturated = 1; - sx0 = sx1; - sx1 = sx2; - sx2 = sx; + screen[0].x = screen[1].x; + screen[1].x = screen[2].x; + screen[2].x = sx; } -auto CPU::GTE::pushSY(i32 sy) -> void { +auto CPU::GTE::pushScreenY(i32 sy) -> void { if(sy < -1024) sy = -1024, flag.sy2_saturated = 1; if(sy > +1023) sy = +1023, flag.sy2_saturated = 1; - sy0 = sy1; - sy1 = sy2; - sy2 = sy; + screen[0].y = screen[1].y; + screen[1].y = screen[2].y; + screen[2].y = sy; } -auto CPU::GTE::pushSZ(i32 sz) -> void { +auto CPU::GTE::pushScreenZ(i32 sz) -> void { if(sz < 0x0000) sz = 0x0000, flag.sz3_saturated = 1; if(sz > 0xffff) sz = 0xffff, flag.sz3_saturated = 1; - sz0 = sz1; - sz1 = sz2; - sz2 = sz3; - sz3 = sz; + screen[0].z = screen[1].z; + screen[1].z = screen[2].z; + screen[2].z = screen[3].z; + screen[3].z = sz; } auto CPU::GTE::pushColor(u32 r, u32 g, u32 b) -> void { @@ -327,281 +343,252 @@ auto CPU::GTE::pushColor(u32 r, u32 g, u32 b) -> void { g = saturateRGB<2>(g); b = saturateRGB<3>(b); - rgb0 = rgb1; - rgb1 = rgb2; - rgb2 = r << 0 | g << 8 | b << 16 | u8(rgbc >> 24) << 24; + rgb[0] = rgb[1]; + rgb[1] = rgb[2]; + rgb[2] = r << 0 | g << 8 | b << 16 | rgbc.t << 24; } auto CPU::GTE::pushColor() -> void { - pushColor(mac1 >> 4, mac2 >> 4, mac3 >> 4); + pushColor(mac.r >> 4, mac.g >> 4, mac.b >> 4); } // auto CPU::GTE::avsz3() -> void { - setOtz(setMac<0>(i64(zsf3) * (sz1 + sz2 + sz3))); + setOtz(setMac<0>(i64(zsf3) * (screen[1].z + screen[2].z + screen[3].z))); } auto CPU::GTE::avsz4() -> void { - setOtz(setMac<0>(i64(zsf4) * (sz0 + sz1 + sz2 + sz3))); + setOtz(setMac<0>(i64(zsf4) * (screen[0].z + screen[1].z + screen[2].z + screen[3].z))); } auto CPU::GTE::cc() -> void { - print("CC\n"); + setMacAndIr(matrixMultiply(color, ir, backgroundColor)); + setMacAndIr(vectorMultiply({rgbc.r << 4, rgbc.g << 4, rgbc.b << 4}, ir)); + pushColor(); } auto CPU::GTE::cdp() -> void { - print("CDP\n"); -} - -auto CPU::GTE::dpc(i16 r, i16 g, i16 b) -> void { - setMacAndIr<1>((i64(rfc) << 12) - (r << 12)); - setMacAndIr<2>((i64(gfc) << 12) - (g << 12)); - setMacAndIr<3>((i64(bfc) << 12) - (b << 12)); + setMacAndIr(matrixMultiply(color, ir, backgroundColor)); - setMacAndIr<1>((i64(ir0) << 12) + ir1 * r); - setMacAndIr<2>((i64(ir0) << 12) + ir2 * g); - setMacAndIr<3>((i64(ir0) << 12) + ir3 * b); + v16 i = ir; + setMacAndIr<1>((i64(farColor.r) << 12) - ((rgbc.r << 4) * ir.x)); + setMacAndIr<2>((i64(farColor.g) << 12) - ((rgbc.g << 4) * ir.y)); + setMacAndIr<3>((i64(farColor.b) << 12) - ((rgbc.b << 4) * ir.z)); + setMacAndIr<1>(((rgbc.r << 4) * i.x) + ir.t * ir.x, lm); + setMacAndIr<2>(((rgbc.g << 4) * i.y) + ir.t * ir.y, lm); + setMacAndIr<3>(((rgbc.b << 4) * i.z) + ir.t * ir.z, lm); pushColor(); } -auto CPU::GTE::dcpl() -> void { - i16 i1 = ir1; - i16 i2 = ir2; - i16 i3 = ir3; - - setMacAndIr<1>((i64(rfc) << 12) - R * i1); - setMacAndIr<2>((i64(gfc) << 12) - G * i2); - setMacAndIr<3>((i64(bfc) << 12) - B * i3); +auto CPU::GTE::dpc(const v16& color) -> void { + setMacAndIr<1>((i64(farColor.r) << 12) - (color.r << 12)); + setMacAndIr<2>((i64(farColor.g) << 12) - (color.g << 12)); + setMacAndIr<3>((i64(farColor.b) << 12) - (color.b << 12)); - setMacAndIr<1>(R * i1 + ir0 * ir1, lm); - setMacAndIr<2>(G * i2 + ir0 * ir2, lm); - setMacAndIr<3>(B * i3 + ir0 * ir3, lm); + setMacAndIr(vectorMultiply({ir.t, ir.t, ir.t}, ir, color)); + pushColor(); +} +auto CPU::GTE::dcpl() -> void { + v16 i = ir; + v16 color = {rgbc.r << 4, rgbc.g << 4, rgbc.b << 4}; + setMacAndIr<1>((i64(farColor.r) << 12) - color.r * i.x); + setMacAndIr<2>((i64(farColor.g) << 12) - color.g * i.y); + setMacAndIr<3>((i64(farColor.b) << 12) - color.b * i.z); + + setMacAndIr<1>(color.r * i.x + ir.t * ir.x, lm); + setMacAndIr<2>(color.g * i.y + ir.t * ir.y, lm); + setMacAndIr<3>(color.b * i.z + ir.t * ir.z, lm); pushColor(); } auto CPU::GTE::dpcs() -> void { - dpc(R, G, B); + dpc({rgbc.r << 4, rgbc.g << 4, rgbc.b << 4}); } auto CPU::GTE::dpct() -> void { - dpc(rgb0 << 4, rgb1 << 4, rgb2 << 4); - dpc(rgb0 << 4, rgb1 << 4, rgb2 << 4); - dpc(rgb0 << 4, rgb1 << 4, rgb2 << 4); + dpc({rgb[0] << 4, rgb[1] << 4, rgb[2] << 4}); + dpc({rgb[0] << 4, rgb[1] << 4, rgb[2] << 4}); + dpc({rgb[0] << 4, rgb[1] << 4, rgb[2] << 4}); } auto CPU::GTE::gpf() -> void { - print("GPF\n"); + setMacAndIr(vectorMultiply(ir, {ir.t, ir.t, ir.t})); + pushColor(); } auto CPU::GTE::gpl() -> void { - print("GPL\n"); + setMacAndIr<1>((i64(mac.x) << sf) + ir.t * ir.x, lm); + setMacAndIr<2>((i64(mac.y) << sf) + ir.t * ir.y, lm); + setMacAndIr<3>((i64(mac.z) << sf) + ir.t * ir.z, lm); + pushColor(); } auto CPU::GTE::intpl() -> void { - i16 i1 = ir1; - i16 i2 = ir2; - i16 i3 = ir3; + v16 i = ir; + setMacAndIr<1>((i64(farColor.r) << 12) - (i64(i.x) << 12)); + setMacAndIr<2>((i64(farColor.g) << 12) - (i64(i.y) << 12)); + setMacAndIr<3>((i64(farColor.b) << 12) - (i64(i.z) << 12)); - setMacAndIr<1>((i64(rfc) << 12) - (i64(i1) << 12)); - setMacAndIr<2>((i64(gfc) << 12) - (i64(i2) << 12)); - setMacAndIr<3>((i64(bfc) << 12) - (i64(i3) << 12)); - - setMacAndIr<1>((i64(ir0) << 12) + ir1 * i1, lm); - setMacAndIr<2>((i64(ir0) << 12) + ir2 * i2, lm); - setMacAndIr<3>((i64(ir0) << 12) + ir3 * i3, lm); + setMacAndIr<1>((i64(ir.t) << 12) + ir.x * i.x, lm); + setMacAndIr<2>((i64(ir.t) << 12) + ir.y * i.y, lm); + setMacAndIr<3>((i64(ir.t) << 12) + ir.z * i.z, lm); pushColor(); } auto CPU::GTE::mvmva() -> void { - i32 tx, ty, tz; + v32 tr; switch(tv) { - case 0: tx = trx; ty = trY; tz = trz; break; - case 1: tx = rbk; ty = gbk; tz = bbk; break; - case 2: tx = rfc; ty = gfc; tz = bfc; break; - case 3: tx = 0; ty = 0; tz = 0; break; + case 0: tr = translation; break; + case 1: tr = backgroundColor; break; + case 2: tr = farColor; break; + case 3: tr = {0, 0, 0}; break; } - i16 vx, vy, vz; + v16 vector; switch(mv) { - case 0: vx = vx0; vy = vy0; vz = vz0; break; - case 1: vx = vx1; vy = vy1; vz = vz1; break; - case 2: vx = vx2; vy = vy2; vz = vz2; break; - case 3: vx = ir1; vy = ir2; vz = ir3; break; + case 0: vector = v.a; break; + case 1: vector = v.b; break; + case 2: vector = v.c; break; + case 3: vector = ir; break; } - i16 m11, m12, m13; - i16 m21, m22, m23; - i16 m31, m32, m33; + m16 matrix; switch(mm) { - case 0: //rotation - m11 = rt11; m12 = rt12; m13 = rt13; - m21 = rt21; m22 = rt22; m23 = rt23; - m31 = rt31; m32 = rt32; m33 = rt33; - break; - case 1: //light - m11 = l11; m12 = l12; m13 = l13; - m21 = l21; m22 = l22; m23 = l23; - m31 = l31; m32 = l32; m33 = l33; - break; - case 2: //color - m11 = lr1; m12 = lr2; m13 = lr3; - m21 = lg1; m22 = lg2; m23 = lg3; - m31 = lb1; m32 = lb2; m33 = lb3; - break; + case 0: matrix = rotation; break; + case 1: matrix = light; break; + case 2: matrix = color; break; case 3: //reserved - m11 = -R; m12 = +R; m13 = ir0; - m21 = rt13; m22 = rt13; m23 = rt13; - m31 = rt22; m32 = rt22; m33 = rt22; + matrix.a.x = -(rgbc.r << 4); matrix.a.y = +(rgbc.r << 4); matrix.a.z = ir.t; + matrix.b.x = rotation.a.z; matrix.b.y = rotation.a.z; matrix.b.z = rotation.a.z; + matrix.c.x = rotation.b.y; matrix.c.y = rotation.b.y; matrix.c.z = rotation.b.y; break; } - i64 x, y, z; if(tv != 2) { - x = extend<1>(extend<1>(extend<1>((i64(tx) << 12) + m11 * vx) + m12 * vy) + m13 * vz); - y = extend<2>(extend<2>(extend<2>((i64(ty) << 12) + m21 * vx) + m22 * vy) + m23 * vz); - z = extend<3>(extend<3>(extend<3>((i64(tz) << 12) + m31 * vx) + m32 * vy) + m33 * vz); + setMacAndIr(matrixMultiply(matrix, vector, tr)); } else { - setIr<1>(extend<1>((i64(tx) << 12) + m11 * vx) >> sf); - setIr<2>(extend<2>((i64(ty) << 12) + m21 * vx) >> sf); - setIr<3>(extend<3>((i64(tz) << 12) + m31 * vx) >> sf); - - x = extend<1>(extend<1>(m12 * vy) + m13 * vz); - y = extend<1>(extend<1>(m22 * vy) + m23 * vz); - z = extend<1>(extend<1>(m32 * vy) + m33 * vz); + setIr<1>(extend<1>((i64(tr.x) << 12) + matrix.a.x * vector.x) >> sf); + setIr<2>(extend<2>((i64(tr.y) << 12) + matrix.b.x * vector.x) >> sf); + setIr<3>(extend<3>((i64(tr.z) << 12) + matrix.c.x * vector.x) >> sf); + + v64 result; + result.x = extend<1>(extend<1>(matrix.a.y * vector.y) + matrix.a.z * vector.z); + result.y = extend<1>(extend<1>(matrix.b.y * vector.y) + matrix.b.z * vector.z); + result.z = extend<1>(extend<1>(matrix.c.y * vector.y) + matrix.c.z * vector.z); + setMacAndIr(result); } - - setMacAndIr<1>(x, lm); - setMacAndIr<2>(y, lm); - setMacAndIr<3>(z, lm); } template -auto CPU::GTE::nc(i32 vx, i32 vy, i32 vz) -> void { - i64 x = extend<1>(extend<1>(extend<1>(l11 * vx) + l12 * vy) + l13 * vz); - i64 y = extend<2>(extend<2>(extend<2>(l21 * vx) + l22 * vy) + l23 * vz); - i64 z = extend<3>(extend<3>(extend<3>(l31 * vx) + l32 * vy) + l33 * vz); - - setMacAndIr<1>(x, lm); - setMacAndIr<2>(y, lm); - setMacAndIr<3>(z, lm); - - x = extend<1>(extend<1>(extend<1>((i64(rbk) << 12) + lr1 * ir1) + lr2 * ir2) + lr3 * ir3); - y = extend<2>(extend<2>(extend<2>((i64(gbk) << 12) + lg1 * ir1) + lg2 * ir2) + lg3 * ir3); - z = extend<3>(extend<3>(extend<3>((i64(bbk) << 12) + lb1 * ir1) + lb2 * ir2) + lb3 * ir3); - - setMacAndIr<1>(x, lm); - setMacAndIr<2>(y, lm); - setMacAndIr<3>(z, lm); +auto CPU::GTE::nc(const v16& vector) -> void { + setMacAndIr(matrixMultiply(light, vector)); + setMacAndIr(matrixMultiply(color, ir, backgroundColor)); if constexpr(m == 1) { - setMacAndIr<1>(R * ir1); - setMacAndIr<2>(G * ir2); - setMacAndIr<3>(B * ir3); + setMacAndIr<1>((rgbc.r << 4) * ir.x); + setMacAndIr<2>((rgbc.g << 4) * ir.y); + setMacAndIr<3>((rgbc.b << 4) * ir.z); } if constexpr(m == 2) { - i16 i1 = ir1; - i16 i2 = ir2; - i16 i3 = ir3; - - setMacAndIr<1>((i64(rfc) << 12) - R * ir1); - setMacAndIr<2>((i64(gfc) << 12) - G * ir2); - setMacAndIr<3>((i64(bfc) << 12) - B * ir3); - - setMacAndIr<1>(R * i1 + ir0 * ir1, lm); - setMacAndIr<2>(G * i2 + ir0 * ir2, lm); - setMacAndIr<3>(B * i3 + ir0 * ir3, lm); + v16 i = ir; + setMacAndIr<1>((i64(farColor.r) << 12) - (rgbc.r << 4) * i.x); + setMacAndIr<2>((i64(farColor.g) << 12) - (rgbc.g << 4) * i.y); + setMacAndIr<3>((i64(farColor.b) << 12) - (rgbc.b << 4) * i.z); + + setMacAndIr<1>((rgbc.r << 4) * i.x + ir.t * ir.x, lm); + setMacAndIr<2>((rgbc.g << 4) * i.y + ir.t * ir.y, lm); + setMacAndIr<3>((rgbc.b << 4) * i.z + ir.t * ir.z, lm); } pushColor(); } auto CPU::GTE::nccs() -> void { - nc<1>(vx0, vy0, vz0); + nc<1>(v.a); } auto CPU::GTE::ncct() -> void { - nc<1>(vx0, vy0, vz0); - nc<1>(vx1, vy1, vz1); - nc<1>(vx2, vy2, vz2); + nc<1>(v.a); + nc<1>(v.b); + nc<1>(v.c); } auto CPU::GTE::ncds() -> void { - nc<2>(vx0, vy0, vz0); + nc<2>(v.a); } auto CPU::GTE::ncdt() -> void { - nc<2>(vx0, vy0, vz0); - nc<2>(vx1, vy1, vz1); - nc<2>(vx2, vy2, vz2); + nc<2>(v.a); + nc<2>(v.b); + nc<2>(v.c); } auto CPU::GTE::nclip() -> void { - i64 p0 = i64(sx0) * i64(sy1); - i64 p1 = i64(sx1) * i64(sy2); - i64 p2 = i64(sx2) * i64(sy0); - i64 p3 = i64(sx0) * i64(sy2); - i64 p4 = i64(sx1) * i64(sy0); - i64 p5 = i64(sx2) * i64(sy1); + i64 p0 = i64(screen[0].x) * i64(screen[1].y); + i64 p1 = i64(screen[1].x) * i64(screen[2].y); + i64 p2 = i64(screen[2].x) * i64(screen[0].y); + i64 p3 = i64(screen[0].x) * i64(screen[2].y); + i64 p4 = i64(screen[1].x) * i64(screen[0].y); + i64 p5 = i64(screen[2].x) * i64(screen[1].y); setMac<0>(p0 + p1 + p2 - p3 - p4 - p5); } auto CPU::GTE::ncs() -> void { - nc<0>(vx0, vy0, vz0); + nc<0>(v.a); } auto CPU::GTE::nct() -> void { - nc<0>(vx0, vy0, vz0); - nc<0>(vx1, vy1, vz1); - nc<0>(vx2, vy2, vz2); + nc<0>(v.a); + nc<0>(v.b); + nc<0>(v.c); } auto CPU::GTE::op() -> void { - print("OP\n"); + setMac<1>(rotation.b.y * ir.z - rotation.c.z * ir.y); + setMac<2>(rotation.c.z * ir.x - rotation.a.x * ir.z); + setMac<3>(rotation.a.x * ir.y - rotation.b.y * ir.x); + + setIr<1>(mac.x, lm); + setIr<2>(mac.y, lm); + setIr<3>(mac.z, lm); } //rotation, translation, and perspective transformation -auto CPU::GTE::rtp(i32 vx, i32 vy, i32 vz, bool last) -> void { - i64 x = extend<1>(extend<1>(extend<1>((i64(trx) << 12) + rt11 * vx) + rt12 * vy) + rt13 * vz); - i64 y = extend<2>(extend<2>(extend<2>((i64(trY) << 12) + rt21 * vx) + rt22 * vy) + rt23 * vz); - i64 z = extend<3>(extend<3>(extend<3>((i64(trz) << 12) + rt31 * vx) + rt32 * vy) + rt33 * vz); - - setMacAndIr<1>(x, lm); - setMacAndIr<2>(y, lm); - setMac<3>(z); - saturate<3>(z >> 12); - ir3 = std::clamp(mac3, lm ? 0 : -0x8000, +0x7fff); - - pushSZ(z >> 12); - i64 dv = divide(h, sz3); - i32 sx = setMac<0>(dv * ir1 + ofx); - i32 sy = setMac<0>(dv * ir2 + ofy); - pushSX(sx >> 16); - pushSY(sy >> 16); +auto CPU::GTE::rtp(v16 vector, bool last) -> void { + v64 result = matrixMultiply(rotation, vector, translation); + setMacAndIr<1>(result.x, lm); + setMacAndIr<2>(result.y, lm); + setMac<3>(result.z); + saturate<3>(result.z >> 12); + ir.z = std::clamp(mac.z, lm ? 0 : -0x8000, +0x7fff); + + pushScreenZ(result.z >> 12); + i64 dv = divide(h, screen[3].z); + i32 sx = setMac<0>(dv * ir.x + ofx); + i32 sy = setMac<0>(dv * ir.y + ofy); + pushScreenX(sx >> 16); + pushScreenY(sy >> 16); if(!last) return; i64 sz = setMac<0>(dv * dqa + dqb); - ir0 = saturate<0>(sz >> 12); + ir.t = saturate<0>(sz >> 12); } auto CPU::GTE::rtps() -> void { - rtp(vx0, vy0, vz0, 1); + rtp(v.a, 1); } auto CPU::GTE::rtpt() -> void { - rtp(vx0, vy0, vz0, 0); - rtp(vx1, vy1, vz1, 0); - rtp(vx2, vy2, vz2, 1); + rtp(v.a, 0); + rtp(v.b, 0); + rtp(v.c, 1); } auto CPU::GTE::sqr() -> void { - print("SQR\n"); + setMacAndIr(vectorMultiply(ir, ir)); } - -#undef R -#undef G -#undef B diff --git a/ares/ps1/cpu/core/gte.hpp b/ares/ps1/cpu/core/gte.hpp index 07b5c1bcab..b65d93b42b 100644 --- a/ares/ps1/cpu/core/gte.hpp +++ b/ares/ps1/cpu/core/gte.hpp @@ -1,6 +1,49 @@ //{ //Geometry Transformation Engine struct GTE { + //color + struct c32 { + u8 r, g, b, t; + }; + + //screen + struct s16 { + i16 x, y; + u16 z; + }; + + //16-bit vector + struct v16 { union { + struct { i16 x, y, z; }; + struct { i16 r, g, b; }; + };}; + + //32-bit vector + struct v32 { union { + struct { i32 x, y, z; }; + struct { i32 r, g, b; }; + };}; + + //64-bit vector + struct v64 { + i64 x, y, z; + }; + + //16-bit vector with temporary + struct v16t : v16 { + i16 t; + }; + + //32-bit vector with temporary + struct v32t : v32 { + i32 t; + }; + + //16-bit matrix + struct m16 { + v16 a, b, c; + }; + //gte.cpp auto constructTable() -> void; @@ -21,12 +64,15 @@ template auto setMac(i64 value) -> i64; template auto setIr(i32 value, bool lm = 0) -> void; template auto setMacAndIr(i64 value, bool lm = 0) -> void; + auto setMacAndIr(const v64& vector) -> void; auto setOtz(i64 value) -> void; + auto matrixMultiply(const m16&, const v16&, const v32& = {0, 0, 0}) -> v64; + auto vectorMultiply(const v16&, const v16&, const v16& = {0, 0, 0}) -> v64; auto divide(u32 lhs, u32 rhs) -> u32; - auto pushSX(i32 sx) -> void; - auto pushSY(i32 sy) -> void; - auto pushSZ(i32 sz) -> void; + auto pushScreenX(i32 sx) -> void; + auto pushScreenY(i32 sy) -> void; + auto pushScreenZ(i32 sz) -> void; auto pushColor(u32 r, u32 g, u32 b) -> void; auto pushColor() -> void; @@ -35,14 +81,14 @@ auto cc() -> void; auto cdp() -> void; auto dcpl() -> void; - auto dpc(i16 r, i16 g, i16 b) -> void; + auto dpc(const v16&) -> void; auto dpcs() -> void; auto dpct() -> void; auto gpf() -> void; auto gpl() -> void; auto intpl() -> void; auto mvmva() -> void; - template auto nc(i32 vx, i32 vy, i32 vz) -> void; + template auto nc(const v16&) -> void; auto nccs() -> void; auto ncct() -> void; auto ncds() -> void; @@ -51,43 +97,31 @@ auto ncs() -> void; auto nct() -> void; auto op() -> void; - auto rtp(i32 vx, i32 vy, i32 vz, bool last) -> void; + auto rtp(v16, bool last) -> void; auto rtps() -> void; auto rtpt() -> void; auto sqr() -> void; - i16 vx0, vy0, vz0; - i16 vx1, vy1, vz1; - i16 vx2, vy2, vz2; - u32 rgbc; - u16 otz; - i16 ir0, ir1, ir2, ir3; - i16 sx0, sy0; - i16 sx1, sy1; - i16 sx2, sy2; - u16 sz0, sz1, sz2, sz3; - u32 rgb0, rgb1, rgb2; - u32 res1; - i32 mac0, mac1, mac2, mac3; - u32 irgb, orgb; - u32 lzcs, lzcr; - i16 rt11, rt12, rt13; - i16 rt21, rt22, rt23; - i16 rt31, rt32, rt33; - i32 trx, trY, trz; - i16 l11, l12, l13; - i16 l21, l22, l23; - i16 l31, l32, l33; - i32 rbk, gbk, bbk; - i16 lr1, lr2, lr3; - i16 lg1, lg2, lg3; - i16 lb1, lb2, lb3; - i32 rfc, gfc, bfc; - i32 ofx, ofy; - u16 h; - i16 dqa; - i32 dqb; - i16 zsf3, zsf4; + m16 v; //VX, VY, VZ + c32 rgbc; + u16 otz; + v16t ir; + s16 screen[4]; //SX, SY, SZ (screen[3].{x,y} do not exist) + u32 rgb[4]; //RGB3 is reserved + v32t mac; + u32 irgb, orgb; + u32 lzcs, lzcr; + m16 rotation; //RT1, RT2, RT3 + v32 translation; //TRX, TRY, TRZ + m16 light; //L1, L2, L3 + v32 backgroundColor; //RBK, GBK, BBK + m16 color; //LR, LG, LB + v32 farColor; //RFC, GFC, BFC + i32 ofx, ofy; + u16 h; + i16 dqa; + i32 dqb; + i16 zsf3, zsf4; struct Flag { u32 value; BitField<32, 12> ir0_saturated {&value}; @@ -97,7 +131,7 @@ BitField<32, 16> mac0_overflow {&value}; BitField<32, 17> divide_overflow{&value}; BitField<32, 18> sz3_saturated {&value}; - BitField<32, 19> otz_saturated {&value}; + BitField<32, 18> otz_saturated {&value}; BitField<32, 19> b_saturated {&value}; BitField<32, 20> g_saturated {&value}; BitField<32, 21> r_saturated {&value}; @@ -135,24 +169,24 @@ auto instructionAVSZ3() -> void; auto instructionAVSZ4() -> void; - auto instructionCC() -> void; - auto instructionCDP() -> void; - auto instructionDCPL() -> void; - auto instructionDPCS() -> void; - auto instructionDPCT() -> void; - auto instructionGPF() -> void; - auto instructionGPL() -> void; - auto instructionINTPL() -> void; - auto instructionMVMVA(u8 tv, u8 mv, u8 mm) -> void; - auto instructionNCCS() -> void; - auto instructionNCCT() -> void; + auto instructionCC(bool lm, u8 sf) -> void; + auto instructionCDP(bool lm, u8 sf) -> void; + auto instructionDCPL(bool lm, u8 sf) -> void; + auto instructionDPCS(bool lm, u8 sf) -> void; + auto instructionDPCT(bool lm, u8 sf) -> void; + auto instructionGPF(bool lm, u8 sf) -> void; + auto instructionGPL(bool lm, u8 sf) -> void; + auto instructionINTPL(bool lm, u8 sf) -> void; + auto instructionMVMVA(bool lm, u8 tv, u8 mv, u8 mm, u8 sf) -> void; + auto instructionNCCS(bool lm, u8 sf) -> void; + auto instructionNCCT(bool lm, u8 sf) -> void; auto instructionNCDS(bool lm, u8 sf) -> void; auto instructionNCDT(bool lm, u8 sf) -> void; auto instructionNCLIP() -> void; - auto instructionNCS() -> void; - auto instructionNCT() -> void; - auto instructionOP() -> void; + auto instructionNCS(bool lm, u8 sf) -> void; + auto instructionNCT(bool lm, u8 sf) -> void; + auto instructionOP(bool lm, u8 sf) -> void; auto instructionRTPS(bool lm, u8 sf) -> void; auto instructionRTPT(bool lm, u8 sf) -> void; - auto instructionSQR() -> void; + auto instructionSQR(bool lm, u8 sf) -> void; //}; diff --git a/ares/ps1/cpu/core/memory.cpp b/ares/ps1/cpu/core/memory.cpp index 9573be322d..779b1c0384 100644 --- a/ares/ps1/cpu/core/memory.cpp +++ b/ares/ps1/cpu/core/memory.cpp @@ -1,9 +1,9 @@ -inline auto CPU::readByte(u32 address) -> u8 { +inline auto CPU::readByte(u32 address) -> u32 { if(scc.status.cache.isolate) return cache.readByte(address); return bus.readByte(address); } -inline auto CPU::readHalf(u32 address) -> u16 { +inline auto CPU::readHalf(u32 address) -> u32 { if(scc.status.cache.isolate) return cache.readHalf(address); return bus.readHalf(address); } @@ -13,12 +13,12 @@ inline auto CPU::readWord(u32 address) -> u32 { return bus.readWord(address); } -inline auto CPU::writeByte(u32 address, u8 data) -> void { +inline auto CPU::writeByte(u32 address, u32 data) -> void { if(scc.status.cache.isolate) return cache.writeByte(address, data); return bus.writeByte(address, data); } -inline auto CPU::writeHalf(u32 address, u16 data) -> void { +inline auto CPU::writeHalf(u32 address, u32 data) -> void { if(scc.status.cache.isolate) return cache.writeHalf(address, data); return bus.writeHalf(address, data); } diff --git a/ares/ps1/dma/transfer.cpp b/ares/ps1/dma/transfer.cpp index 57f89406ee..b259ed293f 100644 --- a/ares/ps1/dma/transfer.cpp +++ b/ares/ps1/dma/transfer.cpp @@ -12,10 +12,10 @@ auto DMA::transferLinear(uint c) -> void { if(channel[c].direction == 0) { u32 data = 0; if(c == 0) { - debug.unimplemented("DMA MDECin read"); + debug(unimplemented, "DMA MDECin read"); } if(c == 1) { - debug.unimplemented("DMA MDECout read"); + debug(unimplemented, "DMA MDECout read"); } if(c == 2) { data = gpu.readGP0(); @@ -27,7 +27,7 @@ auto DMA::transferLinear(uint c) -> void { data = spu.readDMA(); } if(c == 5) { - debug.unimplemented("DMA PIO read"); + debug(unimplemented, "DMA PIO read"); } if(c == 6) { data = address - 4 & 0xfffffc; //point to previous entry @@ -39,25 +39,25 @@ auto DMA::transferLinear(uint c) -> void { if(channel[c].direction == 1) { u32 data = bus.readWord(address); if(c == 0) { - debug.unimplemented("DMA MDECin write"); + debug(unimplemented, "DMA MDECin write"); } if(c == 1) { - debug.unimplemented("DMA MDECout write"); + debug(unimplemented, "DMA MDECout write"); } if(c == 2) { gpu.writeGP0(data); } if(c == 3) { - debug.unimplemented("DMA CDROM write"); + debug(unimplemented, "DMA CDROM write"); } if(c == 4) { spu.writeDMA(data); } if(c == 5) { - debug.unimplemented("DMA PIO write"); + debug(unimplemented, "DMA PIO write"); } if(c == 6) { - debug.unimplemented("DMA OTC write"); + debug(unimplemented, "DMA OTC write"); } } @@ -89,25 +89,25 @@ auto DMA::transferLinked(uint c) -> void { u32 data = bus.readWord(address); if(c == 0) { - debug.unimplemented("DMA MDECin linked"); + debug(unimplemented, "DMA MDECin linked"); } if(c == 1) { - debug.unimplemented("DMA MDECout linked"); + debug(unimplemented, "DMA MDECout linked"); } if(c == 2) { gpu.writeGP0(data); } if(c == 3) { - debug.unimplemented("DMA CDROM linked"); + debug(unimplemented, "DMA CDROM linked"); } if(c == 4) { - debug.unimplemented("DMA SPU linked"); + debug(unimplemented, "DMA SPU linked"); } if(c == 5) { - debug.unimplemented("DMA PIO linked"); + debug(unimplemented, "DMA PIO linked"); } if(c == 6) { - debug.unimplemented("DMA OTC linked"); + debug(unimplemented, "DMA OTC linked"); } } diff --git a/ares/ps1/gpu/gp0.cpp b/ares/ps1/gpu/gp0.cpp index 3fc478de05..c7a2408da1 100644 --- a/ares/ps1/gpu/gp0.cpp +++ b/ares/ps1/gpu/gp0.cpp @@ -291,7 +291,7 @@ auto GPU::writeGP0(u32 value) -> void { //copy rectangle (VRAM to VRAM) if(command == 0x80) { if(queue.write(value) < 4) return; - debug.unimplemented("GPU copy VRAM to VRAM"); + debug(unimplemented, "GPU copy VRAM to VRAM"); return queue.reset(); } @@ -372,5 +372,5 @@ auto GPU::writeGP0(u32 value) -> void { return; } - debug.unimplemented("GP0(", hex(command, 2L), ") = ", hex(value, 6L)); + debug(unimplemented, "GP0(", hex(command, 2L), ") = ", hex(value, 6L)); } diff --git a/ares/ps1/ps1.hpp b/ares/ps1/ps1.hpp index f0d176d916..f6700cf63d 100644 --- a/ares/ps1/ps1.hpp +++ b/ares/ps1/ps1.hpp @@ -6,6 +6,19 @@ #include namespace ares::PlayStation { + struct Accuracy { + //enable all accuracy flags + static constexpr bool Reference = 1; + + struct CPU { + //0 = dynamic recompiler; 1 = interpreter + static constexpr bool Interpreter = 1 | Reference; + + //exceptions when the CPU accesses unaligned memory addresses + static constexpr bool AlignmentErrors = 0 | Reference; + }; + }; + struct Region { inline static auto NTSCJ() -> bool; inline static auto NTSCU() -> bool;