From bbc11edd7dc2c07d6505659188fbac16a9ada456 Mon Sep 17 00:00:00 2001
From: xperia64 <xperiancedapps@gmail.com>
Date: Thu, 9 Jun 2022 18:32:21 -0400
Subject: [PATCH 01/20] Rework WriteD3 based on hardware testing

---
 Source/Core/Core/DSP/DSPAccelerator.cpp | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/Source/Core/Core/DSP/DSPAccelerator.cpp b/Source/Core/Core/DSP/DSPAccelerator.cpp
index bdfcb65943b..b039d275a5c 100644
--- a/Source/Core/Core/DSP/DSPAccelerator.cpp
+++ b/Source/Core/Core/DSP/DSPAccelerator.cpp
@@ -45,16 +45,20 @@ void Accelerator::WriteD3(u16 value)
   // Pikmin 2 Wii writes non-stop to 0x10008000-0x1000801f (non-zero values too)
   // Zelda TP Wii writes non-stop to 0x10000000-0x1000001f (non-zero values too)
 
-  switch (m_sample_format)
+  // Writes only seem to be accepted when the upper most bit of the address is set
+  if (m_current_address & 0x80000000)
   {
-  case 0xA:  // u16 writes
+    // The format doesn't matter for D3 writes, all writes are u16 and the address is treated as if
+    // we are in a 16-bit format
     WriteMemory(m_current_address * 2, value >> 8);
     WriteMemory(m_current_address * 2 + 1, value & 0xFF);
     m_current_address++;
-    break;
-  default:
-    ERROR_LOG_FMT(DSPLLE, "dsp_write_aram_d3() - unknown format {:#x}", m_sample_format);
-    break;
+  }
+  else
+  {
+    ERROR_LOG_FMT(DSPLLE,
+                  "dsp_write_aram_d3() - tried to write to address {:#x} without high bit set",
+                  m_current_address);
   }
 }
 

From 256d9f870240bb245b43eba1d3553f32574eee29 Mon Sep 17 00:00:00 2001
From: xperia64 <xperiancedapps@gmail.com>
Date: Fri, 10 Jun 2022 21:38:26 -0400
Subject: [PATCH 02/20] Improve ReadD3 emulation

---
 Source/Core/Core/DSP/DSPAccelerator.cpp | 55 +++++++++++++++++++++----
 1 file changed, 48 insertions(+), 7 deletions(-)

diff --git a/Source/Core/Core/DSP/DSPAccelerator.cpp b/Source/Core/Core/DSP/DSPAccelerator.cpp
index b039d275a5c..f487f97ca6b 100644
--- a/Source/Core/Core/DSP/DSPAccelerator.cpp
+++ b/Source/Core/Core/DSP/DSPAccelerator.cpp
@@ -15,26 +15,67 @@ u16 Accelerator::ReadD3()
 {
   u16 val = 0;
 
-  switch (m_sample_format)
+  // The lower two bits of the sample format indicate the access size
+  switch (m_sample_format & 3)
   {
-  case 0x5:  // u8 reads
+  case 0x0:  // u4 reads
+    val = ReadMemory(m_current_address / 2);
+    if (m_current_address & 1)
+      val &= 0xf;
+    else
+      val >>= 4;
+    m_current_address++;
+    break;
+  case 0x1:  // u8 reads
     val = ReadMemory(m_current_address);
     m_current_address++;
     break;
-  case 0x6:  // u16 reads
+  case 0x2:  // u16 reads
     val = (ReadMemory(m_current_address * 2) << 8) | ReadMemory(m_current_address * 2 + 1);
     m_current_address++;
     break;
-  default:
-    ERROR_LOG_FMT(DSPLLE, "dsp_read_aram_d3() - unknown format {:#x}", m_sample_format);
+  case 0x3:  // produces garbage, but affects the current address
+    ERROR_LOG_FMT(DSPLLE, "dsp_read_aram_d3() - bad format {:#x}", m_sample_format);
+    m_current_address = (m_current_address & ~3) | ((m_current_address + 1) & 3);
     break;
   }
 
-  if (m_current_address >= m_end_address)
+  // There are edge cases that are currently not handled here in u4 and u8 mode
+  // In u8 mode, if ea & 1 == 0 and ca == ea + 1, the accelerator can be read twice. Upon the second
+  // read, the data returned appears to be the other half of the u16 from the first read, and the
+  // DSP resets the current address and throws exception 3
+
+  // During these reads, ca is not incremented normally.
+
+  // Instead, incrementing ca works like this: ca = (ca & ~ 3) | ((ca + 1) & 3)
+  // u4 mode extends this further.
+
+  // When ea & 3 == 0, and ca in [ea + 1, ea + 3], the accelerator can be read (4 - (ca - ea - 1))
+  // times. On the last read, the data returned appears to be the remaining nibble of the u16 from
+  // the first read, and the DSP resets the current address and throws exception 3
+
+  // When ea & 3 == 1, and ca in [ea + 1, ea + 2], the accelerator can be read (4 - (ca - ea - 1))
+  // times. On the last read, the data returned appears to be the remaining nibble of the u16 from
+  // the first read, and the DSP resets the current address and throws exception 3
+
+  // When ea & 3 == 2, and ca == ea + 1, the accelerator can be read 4 times. On the last read, the
+  // data returned appears to be the remaining nibble of the u16 from the first read, and the DSP
+  // resets the current address and throws exception 3
+
+  // There are extra extra edge cases if ca, ea, and potentially other registers are adjusted during
+  // this pre-reset phase
+
+  // The cleanest way to emulate the normal non-edge behavior is to only reset things if we just
+  // read the end address If the current address is larger than the end address (and not in the edge
+  // range), it ignores the end address
+  if (m_current_address - 1 == m_end_address)
   {
-    // Set address back to start address. (never seen this here!)
+    // Set address back to start address (confirmed on hardware)
     m_current_address = m_start_address;
+    OnEndException();
   }
+
+  SetCurrentAddress(m_current_address);
   return val;
 }
 

From 04c7c1a4a1f4198c9d862081a3cac8b5ac31a7da Mon Sep 17 00:00:00 2001
From: xperia64 <xperiancedapps@gmail.com>
Date: Sat, 18 Jun 2022 16:46:51 -0400
Subject: [PATCH 03/20] Rename accelerator accesses to 'raw' and 'sample'

---
 Source/Core/Core/DSP/DSPAccelerator.cpp            | 14 +++++++-------
 Source/Core/Core/DSP/DSPAccelerator.h              |  6 +++---
 Source/Core/Core/DSP/DSPCore.h                     | 10 +++++-----
 Source/Core/Core/DSP/DSPHWInterface.cpp            | 12 ++++++------
 Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h        |  2 +-
 .../GameCube_DSP_Users_Manual.tex                  |  4 ++--
 6 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/Source/Core/Core/DSP/DSPAccelerator.cpp b/Source/Core/Core/DSP/DSPAccelerator.cpp
index f487f97ca6b..b64e0f6fa6f 100644
--- a/Source/Core/Core/DSP/DSPAccelerator.cpp
+++ b/Source/Core/Core/DSP/DSPAccelerator.cpp
@@ -11,7 +11,7 @@
 
 namespace DSP
 {
-u16 Accelerator::ReadD3()
+u16 Accelerator::ReadRaw()
 {
   u16 val = 0;
 
@@ -35,7 +35,7 @@ u16 Accelerator::ReadD3()
     m_current_address++;
     break;
   case 0x3:  // produces garbage, but affects the current address
-    ERROR_LOG_FMT(DSPLLE, "dsp_read_aram_d3() - bad format {:#x}", m_sample_format);
+    ERROR_LOG_FMT(DSPLLE, "dsp_read_aram_raw() - bad format {:#x}", m_sample_format);
     m_current_address = (m_current_address & ~3) | ((m_current_address + 1) & 3);
     break;
   }
@@ -79,7 +79,7 @@ u16 Accelerator::ReadD3()
   return val;
 }
 
-void Accelerator::WriteD3(u16 value)
+void Accelerator::WriteRaw(u16 value)
 {
   // Zelda ucode writes a bunch of zeros to ARAM through d3 during
   // initialization.  Don't know if it ever does it later, too.
@@ -89,7 +89,7 @@ void Accelerator::WriteD3(u16 value)
   // Writes only seem to be accepted when the upper most bit of the address is set
   if (m_current_address & 0x80000000)
   {
-    // The format doesn't matter for D3 writes, all writes are u16 and the address is treated as if
+    // The format doesn't matter for raw writes, all writes are u16 and the address is treated as if
     // we are in a 16-bit format
     WriteMemory(m_current_address * 2, value >> 8);
     WriteMemory(m_current_address * 2 + 1, value & 0xFF);
@@ -98,12 +98,12 @@ void Accelerator::WriteD3(u16 value)
   else
   {
     ERROR_LOG_FMT(DSPLLE,
-                  "dsp_write_aram_d3() - tried to write to address {:#x} without high bit set",
+                  "dsp_write_aram_raw() - tried to write to address {:#x} without high bit set",
                   m_current_address);
   }
 }
 
-u16 Accelerator::Read(const s16* coefs)
+u16 Accelerator::ReadSample(const s16* coefs)
 {
   if (m_reads_stopped)
     return 0x0000;
@@ -177,7 +177,7 @@ u16 Accelerator::Read(const s16* coefs)
     m_current_address += 1;
     break;
   default:
-    ERROR_LOG_FMT(DSPLLE, "dsp_read_accelerator() - unknown format {:#x}", m_sample_format);
+    ERROR_LOG_FMT(DSPLLE, "dsp_read_accelerator_sample() - unknown format {:#x}", m_sample_format);
     step_size_bytes = 2;
     m_current_address += 1;
     val = 0;
diff --git a/Source/Core/Core/DSP/DSPAccelerator.h b/Source/Core/Core/DSP/DSPAccelerator.h
index 0a762f09da3..76368bb705e 100644
--- a/Source/Core/Core/DSP/DSPAccelerator.h
+++ b/Source/Core/Core/DSP/DSPAccelerator.h
@@ -14,10 +14,10 @@ class Accelerator
 public:
   virtual ~Accelerator() = default;
 
-  u16 Read(const s16* coefs);
+  u16 ReadSample(const s16* coefs);
   // Zelda ucode reads ARAM through 0xffd3.
-  u16 ReadD3();
-  void WriteD3(u16 value);
+  u16 ReadRaw();
+  void WriteRaw(u16 value);
 
   u32 GetStartAddress() const { return m_start_address; }
   u32 GetEndAddress() const { return m_end_address; }
diff --git a/Source/Core/Core/DSP/DSPCore.h b/Source/Core/Core/DSP/DSPCore.h
index a12f49270d7..da21e008375 100644
--- a/Source/Core/Core/DSP/DSPCore.h
+++ b/Source/Core/Core/DSP/DSPCore.h
@@ -152,10 +152,10 @@ enum : u32
   DSP_DSMAH = 0xce,  // DSP DMA Address High (External)
   DSP_DSMAL = 0xcf,  // DSP DMA Address Low (External)
 
-  DSP_FORMAT = 0xd1,   // Sample format
-  DSP_ACUNK = 0xd2,    // Set to 3 on my dumps
-  DSP_ACDATA1 = 0xd3,  // Used only by Zelda ucodes
-  DSP_ACSAH = 0xd4,    // Start of loop
+  DSP_FORMAT = 0xd1,  // Sample format
+  DSP_ACUNK = 0xd2,   // Set to 3 on my dumps
+  DSP_ACDRAW = 0xd3,  // Raw accelerator accesses
+  DSP_ACSAH = 0xd4,   // Start of loop
   DSP_ACSAL = 0xd5,
   DSP_ACEAH = 0xd6,  // End of sample (and loop)
   DSP_ACEAL = 0xd7,
@@ -164,7 +164,7 @@ enum : u32
   DSP_PRED_SCALE = 0xda,  // ADPCM predictor and scale
   DSP_YN1 = 0xdb,
   DSP_YN2 = 0xdc,
-  DSP_ACCELERATOR = 0xdd,  // ADPCM accelerator read. Used by AX.
+  DSP_ACDSAMP = 0xdd,  // Accelerator sample reads, processed differently depending on FORMAT
   DSP_GAIN = 0xde,
   DSP_ACUNK2 = 0xdf,  // Set to 0xc on my dumps
 
diff --git a/Source/Core/Core/DSP/DSPHWInterface.cpp b/Source/Core/Core/DSP/DSPHWInterface.cpp
index 3866dd1bde8..49aff51392e 100644
--- a/Source/Core/Core/DSP/DSPHWInterface.cpp
+++ b/Source/Core/Core/DSP/DSPHWInterface.cpp
@@ -170,8 +170,8 @@ void SDSP::WriteIFX(u32 address, u16 value)
   case DSP_PRED_SCALE:
     m_accelerator->SetPredScale(value);
     break;
-  case DSP_ACDATA1:  // Accelerator write (Zelda type) - "UnkZelda"
-    m_accelerator->WriteD3(value);
+  case DSP_ACDRAW:  // Raw accelerator write
+    m_accelerator->WriteRaw(value);
     break;
 
   default:
@@ -237,10 +237,10 @@ u16 SDSP::ReadIFXImpl(u16 address)
     return m_accelerator->GetYn2();
   case DSP_PRED_SCALE:
     return m_accelerator->GetPredScale();
-  case DSP_ACCELERATOR:  // ADPCM Accelerator reads
-    return m_accelerator->Read(reinterpret_cast<s16*>(&m_ifx_regs[DSP_COEF_A1_0]));
-  case DSP_ACDATA1:  // Accelerator reads (Zelda type) - "UnkZelda"
-    return m_accelerator->ReadD3();
+  case DSP_ACDSAMP:  // Processed sample accelerator read
+    return m_accelerator->ReadSample(reinterpret_cast<s16*>(&m_ifx_regs[DSP_COEF_A1_0]));
+  case DSP_ACDRAW:  // Raw accelerator read
+    return m_accelerator->ReadRaw();
 
   default:
   {
diff --git a/Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h b/Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h
index ee85f8fb4bd..c7d070750fc 100644
--- a/Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h
+++ b/Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h
@@ -189,7 +189,7 @@ void AcceleratorSetup(HLEAccelerator* accelerator, PB_TYPE* pb)
 // by the accelerator on real hardware).
 u16 AcceleratorGetSample(HLEAccelerator* accelerator)
 {
-  return accelerator->Read(accelerator->acc_pb->adpcm.coefs);
+  return accelerator->ReadSample(accelerator->acc_pb->adpcm.coefs);
 }
 
 // Reads samples from the input callback, resamples them to <count> samples at
diff --git a/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex b/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex
index be426f57907..3c1f7018436 100644
--- a/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex
+++ b/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex
@@ -695,7 +695,7 @@ Hardware registers (IFX) occupy the address space at \Address{0xFFxx} in the Dat
 \multicolumn{3}{|l|}{\textit{Accelerator}}                            \\ \hline
 \Address{0xFFD1} & \Register{FORMAT}  & Accelerator sample format     \\ \hline
 \Address{0xFFD2} & \Register{ACUNK1}  & Unknown, usually 3            \\ \hline
-\Address{0xFFD3} & \Register{ACDATA1} & Alternative ARAM interface    \\ \hline
+\Address{0xFFD3} & \Register{ACDRAW}  & Accelerator raw data          \\ \hline
 \Address{0xFFD4} & \Register{ACSAH}   & Accelerator start address H   \\ \hline
 \Address{0xFFD5} & \Register{ACSAL}   & Accelerator start address L   \\ \hline
 \Address{0xFFD6} & \Register{ACEAH}   & Accelerator end address H     \\ \hline
@@ -705,7 +705,7 @@ Hardware registers (IFX) occupy the address space at \Address{0xFFxx} in the Dat
 \Address{0xFFDA} & \Register{SCALE}   & ADPCM predictor and scale     \\ \hline
 \Address{0xFFDB} & \Register{YN1}     & ADPCM YN1                     \\ \hline
 \Address{0xFFDC} & \Register{YN2}     & ADPCM YN2                     \\ \hline
-\Address{0xFFDD} & \Register{ACDAT}   & Accelerator data              \\ \hline
+\Address{0xFFDD} & \Register{ACDSAMP} & Accelerator processed sample  \\ \hline
 \Address{0xFFDE} & \Register{GAIN}    & Gain                          \\ \hline
 \Address{0xFFDF} & \Register{ACUNK2}  & Unknown, usually \Value{0x0C} \\ \hline
 \Address{0xFFED} & \Register{AMDM}    & ARAM DMA Request Mask         \\ \hline

From ac2fdefcb4177f778daa5986b6452e7a62e8da1b Mon Sep 17 00:00:00 2001
From: xperia64 <xperiancedapps@gmail.com>
Date: Sat, 18 Jun 2022 18:38:31 -0400
Subject: [PATCH 04/20] Refactor accelerator format as a bitfield, use
 gain/yn1/yn2 in PCM mode

---
 Source/Core/Core/DSP/DSPAccelerator.cpp | 151 +++++++++++++++---------
 Source/Core/Core/DSP/DSPAccelerator.h   |  44 ++++++-
 Source/Core/Core/DSP/DSPHWInterface.cpp |  11 +-
 3 files changed, 140 insertions(+), 66 deletions(-)

diff --git a/Source/Core/Core/DSP/DSPAccelerator.cpp b/Source/Core/Core/DSP/DSPAccelerator.cpp
index b64e0f6fa6f..67663b77957 100644
--- a/Source/Core/Core/DSP/DSPAccelerator.cpp
+++ b/Source/Core/Core/DSP/DSPAccelerator.cpp
@@ -11,33 +11,42 @@
 
 namespace DSP
 {
-u16 Accelerator::ReadRaw()
+u16 Accelerator::GetCurrentSample()
 {
   u16 val = 0;
-
   // The lower two bits of the sample format indicate the access size
-  switch (m_sample_format & 3)
+  switch (m_sample_format.size)
   {
-  case 0x0:  // u4 reads
-    val = ReadMemory(m_current_address / 2);
+  case FormatSize::Size4Bit:
+    val = ReadMemory(m_current_address >> 1);
     if (m_current_address & 1)
       val &= 0xf;
     else
       val >>= 4;
-    m_current_address++;
     break;
-  case 0x1:  // u8 reads
+  case FormatSize::Size8Bit:
     val = ReadMemory(m_current_address);
-    m_current_address++;
     break;
-  case 0x2:  // u16 reads
+  case FormatSize::Size16Bit:
     val = (ReadMemory(m_current_address * 2) << 8) | ReadMemory(m_current_address * 2 + 1);
+    break;
+  default:  // produces garbage, but affects the current address
+    ERROR_LOG_FMT(DSPLLE, "dsp_get_current_sample() - bad format {:#x}", m_sample_format.hex);
+    break;
+  }
+  return val;
+}
+
+u16 Accelerator::ReadRaw()
+{
+  u16 val = GetCurrentSample();
+  if (m_sample_format.size != FormatSize::SizeInvalid)
+  {
     m_current_address++;
-    break;
-  case 0x3:  // produces garbage, but affects the current address
-    ERROR_LOG_FMT(DSPLLE, "dsp_read_aram_raw() - bad format {:#x}", m_sample_format);
+  }
+  else
+  {
     m_current_address = (m_current_address & ~3) | ((m_current_address + 1) & 3);
-    break;
   }
 
   // There are edge cases that are currently not handled here in u4 and u8 mode
@@ -108,34 +117,69 @@ u16 Accelerator::ReadSample(const s16* coefs)
   if (m_reads_stopped)
     return 0x0000;
 
-  u16 val;
-  u8 step_size_bytes = 0;
+  if (m_sample_format.raw_only)
+  {
+    // Seems to return garbage on hardware
+    ERROR_LOG_FMT(
+        DSPLLE,
+        "dsp_read_accelerator_sample() tried sample read with raw only bit set for format {:#x}",
+        m_sample_format.hex);
+    return 0x0000;
+  }
 
-  // let's do the "hardware" decode DSP_FORMAT is interesting - the Zelda
-  // ucode seems to indicate that the bottom two bits specify the "read size"
-  // and the address multiplier.  The bits above that may be things like sign
-  // extension and do/do not use ADPCM.  It also remains to be figured out
-  // whether there's a difference between the usual accelerator "read
-  // address" and 0xd3.
-  switch (m_sample_format)
+  if (m_sample_format.unk != 0)
   {
-  case 0x00:  // ADPCM audio
+    WARN_LOG_FMT(DSPLLE, "dsp_read_accelerator_sample() format {:#x} has unknown upper bits set",
+                 m_sample_format.hex);
+  }
+
+  u16 val = 0;
+  u8 step_size = 0;
+
+  s16 raw_sample = GetCurrentSample();
+  int coef_idx = (m_pred_scale >> 4) & 0x7;
+
+  s32 coef1 = coefs[coef_idx * 2 + 0];
+  s32 coef2 = coefs[coef_idx * 2 + 1];
+
+  u8 gain_shift = 0;
+  switch (m_sample_format.gain_cfg)
   {
+  case FormatGainCfg::GainShift11:
+    gain_shift = 11;
+    break;
+  case FormatGainCfg::GainShift0:
+    gain_shift = 0;
+    break;
+  case FormatGainCfg::GainShift16:
+    gain_shift = 16;
+    break;
+  default:
+    ERROR_LOG_FMT(DSPLLE, "dsp_read_accelerator_sample() invalid gain mode in format {:#x}",
+                  m_sample_format.hex);
+    break;
+  }
+
+  switch (m_sample_format.decode)
+  {
+  case FormatDecode::ADPCM:  // ADPCM audio
+  {
+    if (m_sample_format.size != FormatSize::Size4Bit)
+    {
+      ERROR_LOG_FMT(
+          DSPLLE, "dsp_read_accelerator_sample() tried to read ADPCM with bad size in format {:#x}",
+          m_sample_format.hex);
+      break;
+    }
     int scale = 1 << (m_pred_scale & 0xF);
-    int coef_idx = (m_pred_scale >> 4) & 0x7;
 
-    s32 coef1 = coefs[coef_idx * 2 + 0];
-    s32 coef2 = coefs[coef_idx * 2 + 1];
+    if (raw_sample >= 8)
+      raw_sample -= 16;
 
-    int temp = (m_current_address & 1) ? (ReadMemory(m_current_address >> 1) & 0xF) :
-                                         (ReadMemory(m_current_address >> 1) >> 4);
-
-    if (temp >= 8)
-      temp -= 16;
-
-    s32 val32 = (scale * temp) + ((0x400 + coef1 * m_yn1 + coef2 * m_yn2) >> 11);
+    // Not sure if GAIN is applied for ADPCM or not
+    s32 val32 = (scale * raw_sample) + ((0x400 + coef1 * m_yn1 + coef2 * m_yn2) >> 11);
     val = static_cast<s16>(std::clamp<s32>(val32, -0x7FFF, 0x7FFF));
-    step_size_bytes = 2;
+    step_size = 2;
 
     m_yn2 = m_yn1;
     m_yn1 = val;
@@ -158,42 +202,28 @@ u16 Accelerator::ReadSample(const s16* coefs)
     {
       m_pred_scale = ReadMemory((m_current_address & ~15) >> 1);
       m_current_address += 2;
-      step_size_bytes += 2;
+      step_size += 2;
     }
     break;
   }
-  case 0x0A:  // 16-bit PCM audio
-    val = (ReadMemory(m_current_address * 2) << 8) | ReadMemory(m_current_address * 2 + 1);
+  case FormatDecode::PCM:  // 16-bit PCM audio
+  {
+    s32 val32 = ((static_cast<s32>(m_gain) * raw_sample) >> gain_shift) +
+                (((coef1 * m_yn1) >> gain_shift) + ((coef2 * m_yn2) >> gain_shift));
+    val = static_cast<s16>(val32);
     m_yn2 = m_yn1;
     m_yn1 = val;
-    step_size_bytes = 2;
+    step_size = 2;
     m_current_address += 1;
     break;
-  case 0x19:  // 8-bit PCM audio
-    val = ReadMemory(m_current_address) << 8;
-    m_yn2 = m_yn1;
-    m_yn1 = val;
-    step_size_bytes = 2;
-    m_current_address += 1;
-    break;
-  default:
-    ERROR_LOG_FMT(DSPLLE, "dsp_read_accelerator_sample() - unknown format {:#x}", m_sample_format);
-    step_size_bytes = 2;
-    m_current_address += 1;
-    val = 0;
-    break;
   }
-
-  // TODO: Take GAIN into account
-  // adpcm = 0, pcm8 = 0x100, pcm16 = 0x800
-  // games using pcm8 : Phoenix Wright Ace Attorney (WiiWare), Megaman 9-10 (WiiWare)
-  // games using pcm16: GC Sega games, ...
+  }
 
   // Check for loop.
   // Somehow, YN1 and YN2 must be initialized with their "loop" values,
   // so yeah, it seems likely that we should raise an exception to let
   // the DSP program do that, at least if DSP_FORMAT == 0x0A.
-  if (m_current_address == (m_end_address + step_size_bytes - 1))
+  if (m_current_address == (m_end_address + step_size - 1))
   {
     // Set address back to start address.
     m_current_address = m_start_address;
@@ -237,7 +267,12 @@ void Accelerator::SetCurrentAddress(u32 address)
 
 void Accelerator::SetSampleFormat(u16 format)
 {
-  m_sample_format = format;
+  m_sample_format.hex = format;
+}
+
+void Accelerator::SetGain(s16 gain)
+{
+  m_gain = gain;
 }
 
 void Accelerator::SetYn1(s16 yn1)
diff --git a/Source/Core/Core/DSP/DSPAccelerator.h b/Source/Core/Core/DSP/DSPAccelerator.h
index 76368bb705e..c85c6db7402 100644
--- a/Source/Core/Core/DSP/DSPAccelerator.h
+++ b/Source/Core/Core/DSP/DSPAccelerator.h
@@ -3,6 +3,7 @@
 
 #pragma once
 
+#include "Common/BitField.h"
 #include "Common/CommonTypes.h"
 
 class PointerWrap;
@@ -22,7 +23,8 @@ public:
   u32 GetStartAddress() const { return m_start_address; }
   u32 GetEndAddress() const { return m_end_address; }
   u32 GetCurrentAddress() const { return m_current_address; }
-  u16 GetSampleFormat() const { return m_sample_format; }
+  u16 GetSampleFormat() const { return m_sample_format.hex; }
+  s16 GetGain() const { return m_gain; }
   s16 GetYn1() const { return m_yn1; }
   s16 GetYn2() const { return m_yn2; }
   u16 GetPredScale() const { return m_pred_scale; }
@@ -30,6 +32,7 @@ public:
   void SetEndAddress(u32 address);
   void SetCurrentAddress(u32 address);
   void SetSampleFormat(u16 format);
+  void SetGain(s16 gain);
   void SetYn1(s16 yn1);
   void SetYn2(s16 yn2);
   void SetPredScale(u16 pred_scale);
@@ -40,12 +43,49 @@ protected:
   virtual void OnEndException() = 0;
   virtual u8 ReadMemory(u32 address) = 0;
   virtual void WriteMemory(u32 address, u8 value) = 0;
+  u16 GetCurrentSample();
 
   // DSP accelerator registers.
   u32 m_start_address = 0;
   u32 m_end_address = 0;
   u32 m_current_address = 0;
-  u16 m_sample_format = 0;
+
+  enum class FormatSize : u16
+  {
+    Size4Bit = 0,
+    Size8Bit = 1,
+    Size16Bit = 2,
+    SizeInvalid = 3
+  };
+
+  enum class FormatDecode : u16
+  {
+    ADPCM = 0,
+    PCM = 1,
+  };
+
+  // When reading samples (at least in PCM mode), they are multiplied by the gain, then shifted
+  // right by an amount dependent on this config
+  enum class FormatGainCfg : u16
+  {
+    GainShift11 = 0,
+    GainShift0 = 1,
+    GainShift16 = 2,
+    GainInvalid = 3
+  };
+
+  union SampleFormat
+  {
+    u16 hex;
+    BitField<0, 2, FormatSize> size;
+    BitField<2, 1, bool, u16>
+        raw_only;  // When this bit is set, sample reads seem broken, while raw accesses work
+    BitField<3, 1, FormatDecode> decode;
+    BitField<4, 2, FormatGainCfg> gain_cfg;
+    BitField<6, 10, u16> unk;
+  } m_sample_format;
+
+  s16 m_gain = 0;
   s16 m_yn1 = 0;
   s16 m_yn2 = 0;
   u16 m_pred_scale = 0;
diff --git a/Source/Core/Core/DSP/DSPHWInterface.cpp b/Source/Core/Core/DSP/DSPHWInterface.cpp
index 49aff51392e..a812f181b1c 100644
--- a/Source/Core/Core/DSP/DSPHWInterface.cpp
+++ b/Source/Core/Core/DSP/DSPHWInterface.cpp
@@ -122,12 +122,6 @@ void SDSP::WriteIFX(u32 address, u16 value)
     m_ifx_regs[DSP_DSBL] = 0;
     break;
 
-  case DSP_GAIN:
-    if (value != 0)
-    {
-      DEBUG_LOG_FMT(DSPLLE, "Gain Written: {:#06x}", value);
-    }
-    [[fallthrough]];
   case DSP_DSPA:
   case DSP_DSMAH:
   case DSP_DSMAL:
@@ -161,6 +155,9 @@ void SDSP::WriteIFX(u32 address, u16 value)
   case DSP_FORMAT:
     m_accelerator->SetSampleFormat(value);
     break;
+  case DSP_GAIN:
+    m_accelerator->SetGain(value);
+    break;
   case DSP_YN1:
     m_accelerator->SetYn1(value);
     break;
@@ -231,6 +228,8 @@ u16 SDSP::ReadIFXImpl(u16 address)
     return static_cast<u16>(m_accelerator->GetCurrentAddress());
   case DSP_FORMAT:
     return m_accelerator->GetSampleFormat();
+  case DSP_GAIN:
+    return m_accelerator->GetGain();
   case DSP_YN1:
     return m_accelerator->GetYn1();
   case DSP_YN2:

From 0dd282f626abd3c59a679e1c6a9494891d774473 Mon Sep 17 00:00:00 2001
From: xperia64 <xperiancedapps@gmail.com>
Date: Sat, 18 Jun 2022 19:29:36 -0400
Subject: [PATCH 05/20] Fix DSPAcceleratorTest

---
 Source/Core/Core/DSP/DSPAccelerator.h            | 2 +-
 Source/UnitTests/Core/DSP/DSPAcceleratorTest.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Source/Core/Core/DSP/DSPAccelerator.h b/Source/Core/Core/DSP/DSPAccelerator.h
index c85c6db7402..723feaf52e3 100644
--- a/Source/Core/Core/DSP/DSPAccelerator.h
+++ b/Source/Core/Core/DSP/DSPAccelerator.h
@@ -83,7 +83,7 @@ protected:
     BitField<3, 1, FormatDecode> decode;
     BitField<4, 2, FormatGainCfg> gain_cfg;
     BitField<6, 10, u16> unk;
-  } m_sample_format;
+  } m_sample_format{0};
 
   s16 m_gain = 0;
   s16 m_yn1 = 0;
diff --git a/Source/UnitTests/Core/DSP/DSPAcceleratorTest.cpp b/Source/UnitTests/Core/DSP/DSPAcceleratorTest.cpp
index a5e33027466..a78a09f5ad7 100644
--- a/Source/UnitTests/Core/DSP/DSPAcceleratorTest.cpp
+++ b/Source/UnitTests/Core/DSP/DSPAcceleratorTest.cpp
@@ -17,7 +17,7 @@ public:
   {
     std::array<s16, 16> coefs{};
     m_accov_raised = false;
-    return Read(coefs.data());
+    return ReadSample(coefs.data());
   }
 
   bool EndExceptionRaised() const { return m_accov_raised; }

From c9bb258e88aa288f1bfbdb313f53afe36792a61b Mon Sep 17 00:00:00 2001
From: xperia64 <xperiancedapps@gmail.com>
Date: Sat, 18 Jun 2022 19:41:15 -0400
Subject: [PATCH 06/20] Add accelerator raw d3 test

---
 Source/DSPSpy/tests/accelerator_raw_test.ds | 112 ++++++++++++++++++++
 1 file changed, 112 insertions(+)
 create mode 100644 Source/DSPSpy/tests/accelerator_raw_test.ds

diff --git a/Source/DSPSpy/tests/accelerator_raw_test.ds b/Source/DSPSpy/tests/accelerator_raw_test.ds
new file mode 100644
index 00000000000..6c91ef958ef
--- /dev/null
+++ b/Source/DSPSpy/tests/accelerator_raw_test.ds
@@ -0,0 +1,112 @@
+incdir  "tests"
+include "dsp_base.inc"
+
+; To use: set up a buffer in main_spy,
+; then modify the start, current, and ending addresses
+; and verify things look correct
+loop_read_test:
+  ; Set the sample format
+  sr @0xffd1, $AC0.H
+
+  ; Test parameters
+  lri $AC0.M, #0x0000 ; start
+  lri $AC0.L, #0x0000 ; start
+  lri $AC1.M, #0x0000 ; end
+  lri $AC1.L, #0x0011 ; end
+
+  ; pred scale, coefs, etc do not matter for raw
+
+  ; Set the starting and current address
+  srs @ACSAH, $AC0.M
+  srs @ACCAH, $AC0.M
+  srs @ACSAL, $AC0.L
+  srs @ACCAL, $AC0.L
+  ; Set the ending address
+  srs @ACEAH, $AC1.M
+  srs @ACEAL, $AC1.L
+
+  call load_hw_reg_to_regs
+  call send_back ; check the accelerator regs before a read
+
+  bloopi #4, end_of_read_loop
+    lr $IX3, @0xffd3 ; Raw D3 reads
+    call load_hw_reg_to_regs
+    call send_back ; after a read
+    nop
+end_of_read_loop:
+    nop
+    ret
+
+loop_write_test:
+  ; Set the sample format
+  sr @0xffd1, $AC0.H
+
+  ; Test parameters
+  lri $AC0.M, #0x0000 ; start
+  lri $AC0.L, #0x0000 ; start
+  lri $AC1.M, #0x0000 ; end
+  lri $AC1.L, #0x0011 ; end
+
+  ; pred scale, coefs, etc do not matter for raw
+
+  ; Set the starting and current address
+  srs @ACSAH, $AC0.M
+  srs @ACCAH, $AC0.M
+  srs @ACSAL, $AC0.L
+  srs @ACCAL, $AC0.L
+  ; Set the ending address
+  srs @ACEAH, $AC1.M
+  srs @ACEAL, $AC1.L
+
+  call load_hw_reg_to_regs
+  call send_back ; check the accelerator regs before a write
+
+  bloopi #4, end_of_write_loop
+    sr @0xffd3, $IX3 ; Raw D3 writes
+    call load_hw_reg_to_regs
+    call send_back ; after a write
+    nop
+end_of_write_loop:
+    nop
+    ret
+
+test_main:
+lri $AC0.H, #0x00 ; 4-bit
+call loop_read_test
+lri $AC0.H, #0x01 ; 8-bit
+call loop_read_test
+lri $AC0.H, #0x02 ; 16-bit
+call loop_read_test
+lri $AC0.H, #0x00 ; "4-bit", but all writes are 16-bits
+call loop_write_test
+lri $AC0.H, #0x01 ; "8-bit", but all writes are 16-bits
+call loop_write_test
+lri $AC0.H, #0x02 ; 16-bit
+call loop_write_test
+jmp end_of_test
+
+load_hw_reg_to_regs:
+  lr $AR0, @0xffd1 ; format
+  lr $AR1, @0xffd2 ; unknown
+  lr $AR2, @0xffda ; pred scale
+  lr $AR3, @0xffdb ; yn1
+  lr $IX0, @0xffdc ; yn2
+  lr $IX1, @0xffdf ; unknown accelerator register
+
+  lri $AC0.H, #0
+  lrs $AC0.M, @ACSAH
+  lrs $AC0.L, @ACSAL
+
+  lri $AC1.H, #0
+  lrs $AC1.M, @ACEAH
+  lrs $AC1.L, @ACEAL
+
+  lrs $AX0.H, @ACCAH
+  lrs $AX0.L, @ACCAL
+  lrs $AX1.H, @ACCAH
+  lrs $AX1.L, @ACCAL
+
+  lrs $AX1.H, @ACCAH
+  lrs $AX1.L, @ACCAL
+
+  ret

From c7d8afc5a7b5fc312791eb751f36233f12a242db Mon Sep 17 00:00:00 2001
From: xperia64 <xperiancedapps@gmail.com>
Date: Sun, 19 Jun 2022 05:22:12 -0400
Subject: [PATCH 07/20] Use correct exceptions for d3 reads/writes

---
 Source/Core/Core/DSP/DSPAccelerator.cpp          |  5 +++--
 Source/Core/Core/DSP/DSPAccelerator.h            |  4 +++-
 Source/Core/Core/DSP/DSPCore.cpp                 | 13 ++++++++++++-
 Source/Core/Core/DSP/DSPCore.h                   | 14 +++++++-------
 Source/Core/Core/HW/DSPHLE/UCodes/AESnd.cpp      | 14 +++++++-------
 Source/Core/Core/HW/DSPHLE/UCodes/AESnd.h        |  4 +++-
 Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h      |  4 +++-
 Source/UnitTests/Core/DSP/DSPAcceleratorTest.cpp |  4 +++-
 8 files changed, 41 insertions(+), 21 deletions(-)

diff --git a/Source/Core/Core/DSP/DSPAccelerator.cpp b/Source/Core/Core/DSP/DSPAccelerator.cpp
index 67663b77957..457c37cda6e 100644
--- a/Source/Core/Core/DSP/DSPAccelerator.cpp
+++ b/Source/Core/Core/DSP/DSPAccelerator.cpp
@@ -81,7 +81,7 @@ u16 Accelerator::ReadRaw()
   {
     // Set address back to start address (confirmed on hardware)
     m_current_address = m_start_address;
-    OnEndException();
+    OnRawReadEndException();
   }
 
   SetCurrentAddress(m_current_address);
@@ -103,6 +103,7 @@ void Accelerator::WriteRaw(u16 value)
     WriteMemory(m_current_address * 2, value >> 8);
     WriteMemory(m_current_address * 2 + 1, value & 0xFF);
     m_current_address++;
+    OnRawWriteEndException();
   }
   else
   {
@@ -228,7 +229,7 @@ u16 Accelerator::ReadSample(const s16* coefs)
     // Set address back to start address.
     m_current_address = m_start_address;
     m_reads_stopped = true;
-    OnEndException();
+    OnSampleReadEndException();
   }
 
   SetCurrentAddress(m_current_address);
diff --git a/Source/Core/Core/DSP/DSPAccelerator.h b/Source/Core/Core/DSP/DSPAccelerator.h
index 723feaf52e3..c0411fe94ab 100644
--- a/Source/Core/Core/DSP/DSPAccelerator.h
+++ b/Source/Core/Core/DSP/DSPAccelerator.h
@@ -40,7 +40,9 @@ public:
   void DoState(PointerWrap& p);
 
 protected:
-  virtual void OnEndException() = 0;
+  virtual void OnRawReadEndException() = 0;
+  virtual void OnRawWriteEndException() = 0;
+  virtual void OnSampleReadEndException() = 0;
   virtual u8 ReadMemory(u32 address) = 0;
   virtual void WriteMemory(u32 address, u8 value) = 0;
   u16 GetCurrentSample();
diff --git a/Source/Core/Core/DSP/DSPCore.cpp b/Source/Core/Core/DSP/DSPCore.cpp
index b6d89064b56..731d71e023d 100644
--- a/Source/Core/Core/DSP/DSPCore.cpp
+++ b/Source/Core/Core/DSP/DSPCore.cpp
@@ -109,7 +109,18 @@ public:
 protected:
   u8 ReadMemory(u32 address) override { return Host::ReadHostMemory(address); }
   void WriteMemory(u32 address, u8 value) override { Host::WriteHostMemory(value, address); }
-  void OnEndException() override { m_dsp.SetException(ExceptionType::AcceleratorOverflow); }
+  void OnRawReadEndException() override
+  {
+    m_dsp.SetException(ExceptionType::AcceleratorRawReadOverflow);
+  }
+  void OnRawWriteEndException() override
+  {
+    m_dsp.SetException(ExceptionType::AcceleratorRawWriteOverflow);
+  }
+  void OnSampleReadEndException() override
+  {
+    m_dsp.SetException(ExceptionType::AcceleratorSampleReadOverflow);
+  }
 
 private:
   SDSP& m_dsp;
diff --git a/Source/Core/Core/DSP/DSPCore.h b/Source/Core/Core/DSP/DSPCore.h
index da21e008375..b8fa5683c4e 100644
--- a/Source/Core/Core/DSP/DSPCore.h
+++ b/Source/Core/Core/DSP/DSPCore.h
@@ -226,13 +226,13 @@ enum : u16
 // Exception vectors
 enum class ExceptionType
 {
-  StackOverflow = 1,        // 0x0002 stack under/over flow
-  EXP_2 = 2,                // 0x0004
-  EXP_3 = 3,                // 0x0006
-  EXP_4 = 4,                // 0x0008
-  AcceleratorOverflow = 5,  // 0x000a accelerator address overflow
-  EXP_6 = 6,                // 0x000c
-  ExternalInterrupt = 7     // 0x000e external int (message from CPU)
+  StackOverflow = 1,                  // 0x0002 stack under/over flow
+  EXP_2 = 2,                          // 0x0004
+  AcceleratorRawReadOverflow = 3,     // 0x0006 accelerator raw read address overflow
+  AcceleratorRawWriteOverflow = 4,    // 0x0008 accelerator raw write address overflow
+  AcceleratorSampleReadOverflow = 5,  // 0x000a accelerator sample reads address overflow
+  EXP_6 = 6,                          // 0x000c
+  ExternalInterrupt = 7               // 0x000e external int (message from CPU)
 };
 
 enum class Mailbox
diff --git a/Source/Core/Core/HW/DSPHLE/UCodes/AESnd.cpp b/Source/Core/Core/HW/DSPHLE/UCodes/AESnd.cpp
index 5084a6ed24b..0f8467f693e 100644
--- a/Source/Core/Core/HW/DSPHLE/UCodes/AESnd.cpp
+++ b/Source/Core/Core/HW/DSPHLE/UCodes/AESnd.cpp
@@ -246,7 +246,7 @@ AESndAccelerator::AESndAccelerator(DSP::DSPManager& dsp) : m_dsp(dsp)
 
 AESndAccelerator::~AESndAccelerator() = default;
 
-void AESndAccelerator::OnEndException()
+void AESndAccelerator::OnSampleReadEndException()
 {
   // exception5 - this updates internal state
   SetYn1(GetYn1());
@@ -372,7 +372,7 @@ void AESndUCode::DoMixing()
           while (counter_h >= 1)
           {
             counter_h--;
-            new_r = m_accelerator.Read(ACCELERATOR_COEFS.data());
+            new_r = m_accelerator.ReadSample(ACCELERATOR_COEFS.data());
             new_l = new_r;
           }
           break;
@@ -383,8 +383,8 @@ void AESndUCode::DoMixing()
           while (counter_h >= 1)
           {
             counter_h--;
-            new_r = m_accelerator.Read(ACCELERATOR_COEFS.data());
-            new_l = m_accelerator.Read(ACCELERATOR_COEFS.data());
+            new_r = m_accelerator.ReadSample(ACCELERATOR_COEFS.data());
+            new_l = m_accelerator.ReadSample(ACCELERATOR_COEFS.data());
           }
           break;  // falls through to mix_samples normally
 
@@ -394,7 +394,7 @@ void AESndUCode::DoMixing()
           while (counter_h >= 1)
           {
             counter_h--;
-            new_r = m_accelerator.Read(ACCELERATOR_COEFS.data());
+            new_r = m_accelerator.ReadSample(ACCELERATOR_COEFS.data());
             new_l = new_r;
           }
           new_r ^= 0x8000;
@@ -407,8 +407,8 @@ void AESndUCode::DoMixing()
           while (counter_h >= 1)
           {
             counter_h--;
-            new_r = m_accelerator.Read(ACCELERATOR_COEFS.data());
-            new_l = m_accelerator.Read(ACCELERATOR_COEFS.data());
+            new_r = m_accelerator.ReadSample(ACCELERATOR_COEFS.data());
+            new_l = m_accelerator.ReadSample(ACCELERATOR_COEFS.data());
           }
           new_r ^= 0x8000;
           new_l ^= 0x8000;
diff --git a/Source/Core/Core/HW/DSPHLE/UCodes/AESnd.h b/Source/Core/Core/HW/DSPHLE/UCodes/AESnd.h
index 874e0440db5..ebd644b757e 100644
--- a/Source/Core/Core/HW/DSPHLE/UCodes/AESnd.h
+++ b/Source/Core/Core/HW/DSPHLE/UCodes/AESnd.h
@@ -30,7 +30,9 @@ public:
   ~AESndAccelerator();
 
 protected:
-  void OnEndException() override;
+  void OnRawReadEndException() override {}
+  void OnRawWriteEndException() override {}
+  void OnSampleReadEndException() override;
   u8 ReadMemory(u32 address) override;
   void WriteMemory(u32 address, u8 value) override;
 
diff --git a/Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h b/Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h
index c7d070750fc..e5aefd0c770 100644
--- a/Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h
+++ b/Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h
@@ -134,7 +134,9 @@ public:
   PB_TYPE* acc_pb = nullptr;
 
 protected:
-  void OnEndException() override
+  void OnRawReadEndException() override {}
+  void OnRawWriteEndException() override {}
+  void OnSampleReadEndException() override
   {
     if (acc_pb->audio_addr.looping)
     {
diff --git a/Source/UnitTests/Core/DSP/DSPAcceleratorTest.cpp b/Source/UnitTests/Core/DSP/DSPAcceleratorTest.cpp
index a78a09f5ad7..77963d7df0e 100644
--- a/Source/UnitTests/Core/DSP/DSPAcceleratorTest.cpp
+++ b/Source/UnitTests/Core/DSP/DSPAcceleratorTest.cpp
@@ -23,7 +23,9 @@ public:
   bool EndExceptionRaised() const { return m_accov_raised; }
 
 protected:
-  void OnEndException() override
+  void OnRawReadEndException() override {}
+  void OnRawWriteEndException() override {}
+  void OnSampleReadEndException() override
   {
     EXPECT_TRUE(m_reads_stopped);
     m_accov_raised = true;

From 14fadbbe56c37abd65b41fa44a568009be97cce2 Mon Sep 17 00:00:00 2001
From: xperia64 <xperiancedapps@gmail.com>
Date: Sun, 19 Jun 2022 15:23:05 -0400
Subject: [PATCH 08/20] ADPCM mode doesn't entirely die with larger accesses,
 gain is PCM only

---
 Source/Core/Core/DSP/DSPAccelerator.cpp | 49 +++++++++++--------------
 1 file changed, 22 insertions(+), 27 deletions(-)

diff --git a/Source/Core/Core/DSP/DSPAccelerator.cpp b/Source/Core/Core/DSP/DSPAccelerator.cpp
index 457c37cda6e..16be268bf6e 100644
--- a/Source/Core/Core/DSP/DSPAccelerator.cpp
+++ b/Source/Core/Core/DSP/DSPAccelerator.cpp
@@ -143,41 +143,18 @@ u16 Accelerator::ReadSample(const s16* coefs)
   s32 coef1 = coefs[coef_idx * 2 + 0];
   s32 coef2 = coefs[coef_idx * 2 + 1];
 
-  u8 gain_shift = 0;
-  switch (m_sample_format.gain_cfg)
-  {
-  case FormatGainCfg::GainShift11:
-    gain_shift = 11;
-    break;
-  case FormatGainCfg::GainShift0:
-    gain_shift = 0;
-    break;
-  case FormatGainCfg::GainShift16:
-    gain_shift = 16;
-    break;
-  default:
-    ERROR_LOG_FMT(DSPLLE, "dsp_read_accelerator_sample() invalid gain mode in format {:#x}",
-                  m_sample_format.hex);
-    break;
-  }
-
   switch (m_sample_format.decode)
   {
   case FormatDecode::ADPCM:  // ADPCM audio
   {
-    if (m_sample_format.size != FormatSize::Size4Bit)
-    {
-      ERROR_LOG_FMT(
-          DSPLLE, "dsp_read_accelerator_sample() tried to read ADPCM with bad size in format {:#x}",
-          m_sample_format.hex);
-      break;
-    }
+    // ADPCM really only supports 4-bit decoding, but for larger values on hardware, it just ignores
+    // the upper 12 bits
+    raw_sample &= 0xF;
     int scale = 1 << (m_pred_scale & 0xF);
 
     if (raw_sample >= 8)
       raw_sample -= 16;
 
-    // Not sure if GAIN is applied for ADPCM or not
     s32 val32 = (scale * raw_sample) + ((0x400 + coef1 * m_yn1 + coef2 * m_yn2) >> 11);
     val = static_cast<s16>(std::clamp<s32>(val32, -0x7FFF, 0x7FFF));
     step_size = 2;
@@ -189,7 +166,7 @@ u16 Accelerator::ReadSample(const s16* coefs)
     // These two cases are handled in a special way, separate from normal overflow handling:
     // the ACCOV exception does not fire at all, the predscale register is not updated,
     // and if the end address is 16-byte aligned, the DSP loops to start_address + 1
-    // instead of start_address.
+    // instead of start_address. This probably needs to be adjusted when using 8 or 16-bit accesses.
     if ((m_end_address & 0xf) == 0x0 && m_current_address == m_end_address)
     {
       m_current_address = m_start_address + 1;
@@ -209,6 +186,24 @@ u16 Accelerator::ReadSample(const s16* coefs)
   }
   case FormatDecode::PCM:  // 16-bit PCM audio
   {
+    // Gain seems to only apply for PCM decoding
+    u8 gain_shift = 0;
+    switch (m_sample_format.gain_cfg)
+    {
+    case FormatGainCfg::GainShift11:
+      gain_shift = 11;
+      break;
+    case FormatGainCfg::GainShift0:
+      gain_shift = 0;
+      break;
+    case FormatGainCfg::GainShift16:
+      gain_shift = 16;
+      break;
+    default:
+      ERROR_LOG_FMT(DSPLLE, "dsp_read_accelerator_sample() invalid gain mode in format {:#x}",
+                    m_sample_format.hex);
+      break;
+    }
     s32 val32 = ((static_cast<s32>(m_gain) * raw_sample) >> gain_shift) +
                 (((coef1 * m_yn1) >> gain_shift) + ((coef2 * m_yn2) >> gain_shift));
     val = static_cast<s16>(val32);

From d517fe25f1b64891beacf2e5be7d8cd5b7759925 Mon Sep 17 00:00:00 2001
From: xperia64 <xperiancedapps@gmail.com>
Date: Sun, 19 Jun 2022 16:51:29 -0400
Subject: [PATCH 09/20] Add accelerator input MMIO register, fix MMIO PCM modes

---
 Source/Core/Core/DSP/DSPAccelerator.cpp | 35 ++++++++++++++++---------
 Source/Core/Core/DSP/DSPAccelerator.h   | 13 +++++----
 Source/Core/Core/DSP/DSPCore.h          |  2 +-
 Source/Core/Core/DSP/DSPHWInterface.cpp |  5 ++++
 Source/Core/Core/DSP/DSPTables.cpp      |  2 +-
 5 files changed, 38 insertions(+), 19 deletions(-)

diff --git a/Source/Core/Core/DSP/DSPAccelerator.cpp b/Source/Core/Core/DSP/DSPAccelerator.cpp
index 16be268bf6e..5553a5a7b06 100644
--- a/Source/Core/Core/DSP/DSPAccelerator.cpp
+++ b/Source/Core/Core/DSP/DSPAccelerator.cpp
@@ -118,16 +118,6 @@ u16 Accelerator::ReadSample(const s16* coefs)
   if (m_reads_stopped)
     return 0x0000;
 
-  if (m_sample_format.raw_only)
-  {
-    // Seems to return garbage on hardware
-    ERROR_LOG_FMT(
-        DSPLLE,
-        "dsp_read_accelerator_sample() tried sample read with raw only bit set for format {:#x}",
-        m_sample_format.hex);
-    return 0x0000;
-  }
-
   if (m_sample_format.unk != 0)
   {
     WARN_LOG_FMT(DSPLLE, "dsp_read_accelerator_sample() format {:#x} has unknown upper bits set",
@@ -136,8 +126,18 @@ u16 Accelerator::ReadSample(const s16* coefs)
 
   u16 val = 0;
   u8 step_size = 0;
+  s16 raw_sample;
+  if (m_sample_format.decode == FormatDecode::MMIOPCMHalt ||
+      m_sample_format.decode == FormatDecode::MMIOPCMInc)
+  {
+    // The addresses can be complete nonsense in either of these modes
+    raw_sample = m_input;
+  }
+  else
+  {
+    raw_sample = GetCurrentSample();
+  }
 
-  s16 raw_sample = GetCurrentSample();
   int coef_idx = (m_pred_scale >> 4) & 0x7;
 
   s32 coef1 = coefs[coef_idx * 2 + 0];
@@ -184,7 +184,9 @@ u16 Accelerator::ReadSample(const s16* coefs)
     }
     break;
   }
+  case FormatDecode::MMIOPCMHalt:
   case FormatDecode::PCM:  // 16-bit PCM audio
+  case FormatDecode::MMIOPCMInc:
   {
     // Gain seems to only apply for PCM decoding
     u8 gain_shift = 0;
@@ -210,7 +212,10 @@ u16 Accelerator::ReadSample(const s16* coefs)
     m_yn2 = m_yn1;
     m_yn1 = val;
     step_size = 2;
-    m_current_address += 1;
+    if (m_sample_format.decode != FormatDecode::MMIOPCMHalt)
+    {
+      m_current_address += 1;
+    }
     break;
   }
   }
@@ -286,4 +291,10 @@ void Accelerator::SetPredScale(u16 pred_scale)
 {
   m_pred_scale = pred_scale & 0x7f;
 }
+
+void Accelerator::SetInput(u16 input)
+{
+  m_input = input;
+}
+
 }  // namespace DSP
diff --git a/Source/Core/Core/DSP/DSPAccelerator.h b/Source/Core/Core/DSP/DSPAccelerator.h
index c0411fe94ab..54fc88baaa8 100644
--- a/Source/Core/Core/DSP/DSPAccelerator.h
+++ b/Source/Core/Core/DSP/DSPAccelerator.h
@@ -28,6 +28,7 @@ public:
   s16 GetYn1() const { return m_yn1; }
   s16 GetYn2() const { return m_yn2; }
   u16 GetPredScale() const { return m_pred_scale; }
+  u16 GetInput() const { return m_input; }
   void SetStartAddress(u32 address);
   void SetEndAddress(u32 address);
   void SetCurrentAddress(u32 address);
@@ -36,6 +37,7 @@ public:
   void SetYn1(s16 yn1);
   void SetYn2(s16 yn2);
   void SetPredScale(u16 pred_scale);
+  void SetInput(u16 input);
 
   void DoState(PointerWrap& p);
 
@@ -62,8 +64,10 @@ protected:
 
   enum class FormatDecode : u16
   {
-    ADPCM = 0,
-    PCM = 1,
+    ADPCM = 0,        // ADPCM reads from ARAM, ACCA increments
+    MMIOPCMHalt = 1,  // PCM Reads from ACIN, ACCA doesn't increment
+    PCM = 2,          // PCM reads from ARAM, ACCA increments
+    MMIOPCMInc = 3    // PCM reads from ACIN, ACCA increments
   };
 
   // When reading samples (at least in PCM mode), they are multiplied by the gain, then shifted
@@ -80,9 +84,7 @@ protected:
   {
     u16 hex;
     BitField<0, 2, FormatSize> size;
-    BitField<2, 1, bool, u16>
-        raw_only;  // When this bit is set, sample reads seem broken, while raw accesses work
-    BitField<3, 1, FormatDecode> decode;
+    BitField<2, 2, FormatDecode> decode;
     BitField<4, 2, FormatGainCfg> gain_cfg;
     BitField<6, 10, u16> unk;
   } m_sample_format{0};
@@ -91,6 +93,7 @@ protected:
   s16 m_yn1 = 0;
   s16 m_yn2 = 0;
   u16 m_pred_scale = 0;
+  u16 m_input = 0;
 
   // When an ACCOV is triggered, the accelerator stops reading back anything
   // and updating the current address register, unless the YN2 register is written to.
diff --git a/Source/Core/Core/DSP/DSPCore.h b/Source/Core/Core/DSP/DSPCore.h
index b8fa5683c4e..c0a5c0dba7b 100644
--- a/Source/Core/Core/DSP/DSPCore.h
+++ b/Source/Core/Core/DSP/DSPCore.h
@@ -166,7 +166,7 @@ enum : u32
   DSP_YN2 = 0xdc,
   DSP_ACDSAMP = 0xdd,  // Accelerator sample reads, processed differently depending on FORMAT
   DSP_GAIN = 0xde,
-  DSP_ACUNK2 = 0xdf,  // Set to 0xc on my dumps
+  DSP_ACIN = 0xdf,  // Feeds PCM samples written here
 
   DSP_AMDM = 0xef,  // ARAM DMA Request Mask 0: DMA with ARAM unmasked 1: masked
 
diff --git a/Source/Core/Core/DSP/DSPHWInterface.cpp b/Source/Core/Core/DSP/DSPHWInterface.cpp
index a812f181b1c..8e1170f5cd9 100644
--- a/Source/Core/Core/DSP/DSPHWInterface.cpp
+++ b/Source/Core/Core/DSP/DSPHWInterface.cpp
@@ -170,6 +170,9 @@ void SDSP::WriteIFX(u32 address, u16 value)
   case DSP_ACDRAW:  // Raw accelerator write
     m_accelerator->WriteRaw(value);
     break;
+  case DSP_ACIN:
+    m_accelerator->SetInput(value);
+    break;
 
   default:
     if ((address & 0xff) >= 0xa0)
@@ -240,6 +243,8 @@ u16 SDSP::ReadIFXImpl(u16 address)
     return m_accelerator->ReadSample(reinterpret_cast<s16*>(&m_ifx_regs[DSP_COEF_A1_0]));
   case DSP_ACDRAW:  // Raw accelerator read
     return m_accelerator->ReadRaw();
+  case DSP_ACIN:
+    return m_accelerator->GetInput();
 
   default:
   {
diff --git a/Source/Core/Core/DSP/DSPTables.cpp b/Source/Core/Core/DSP/DSPTables.cpp
index 21502fe0c8e..e691c909579 100644
--- a/Source/Core/Core/DSP/DSPTables.cpp
+++ b/Source/Core/Core/DSP/DSPTables.cpp
@@ -414,7 +414,7 @@ const std::array<pdlabel_t, 96> pdlabels =
   {0xffdc, "yn2", "yn2",},
   {0xffdd, "ARAM", "Direct Read from ARAM (uses ADPCM)",},
   {0xffde, "GAIN", "Gain",},
-  {0xffdf, "0xffdf", nullptr,},
+  {0xffdf, "ACIN", "Accelerator MMIO PCM input value",},
 
   {0xffe0, "0xffe0",nullptr,},
   {0xffe1, "0xffe1",nullptr,},

From c460cafecfec3744a597dac1c9003c9d5573ccaa Mon Sep 17 00:00:00 2001
From: xperia64 <xperiancedapps@gmail.com>
Date: Sun, 19 Jun 2022 16:57:35 -0400
Subject: [PATCH 10/20] Update ACDRAW/ACDSAMP reg names in DSPTables and DSPSpy
 tests

---
 Source/Core/Core/DSP/DSPTables.cpp           | 4 ++--
 Source/DSPSpy/tests/accelerator_loop_test.ds | 2 +-
 Source/DSPSpy/tests/accelerator_raw_test.ds  | 4 ++--
 Source/DSPSpy/tests/accelerator_test.ds      | 4 ++--
 Source/DSPSpy/tests/dsp_base_noirq.inc       | 2 +-
 Source/DSPSpy/tests/mul_test.ds              | 2 +-
 Source/DSPSpy/tests/rti_test.ds              | 6 +++---
 7 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/Source/Core/Core/DSP/DSPTables.cpp b/Source/Core/Core/DSP/DSPTables.cpp
index e691c909579..12e5d319189 100644
--- a/Source/Core/Core/DSP/DSPTables.cpp
+++ b/Source/Core/Core/DSP/DSPTables.cpp
@@ -402,7 +402,7 @@ const std::array<pdlabel_t, 96> pdlabels =
   {0xffd0, "0xffd0",nullptr,},
   {0xffd1, "SampleFormat", "SampleFormat",},
   {0xffd2, "0xffd2",nullptr,},
-  {0xffd3, "UnkZelda", "Unk Zelda reads/writes from/to it",},
+  {0xffd3, "ACDRAW", "Accelerator raw read/write from ARAM",},
   {0xffd4, "ACSAH", "Accelerator start address H",},
   {0xffd5, "ACSAL", "Accelerator start address L",},
   {0xffd6, "ACEAH", "Accelerator end address H",},
@@ -412,7 +412,7 @@ const std::array<pdlabel_t, 96> pdlabels =
   {0xffda, "pred_scale", "pred_scale",},
   {0xffdb, "yn1", "yn1",},
   {0xffdc, "yn2", "yn2",},
-  {0xffdd, "ARAM", "Direct Read from ARAM (uses ADPCM)",},
+  {0xffdd, "ACDSAMP", "Accelerator processed sample read from ARAM or ACIN",},
   {0xffde, "GAIN", "Gain",},
   {0xffdf, "ACIN", "Accelerator MMIO PCM input value",},
 
diff --git a/Source/DSPSpy/tests/accelerator_loop_test.ds b/Source/DSPSpy/tests/accelerator_loop_test.ds
index 6348f52efe2..82ec2c12832 100644
--- a/Source/DSPSpy/tests/accelerator_loop_test.ds
+++ b/Source/DSPSpy/tests/accelerator_loop_test.ds
@@ -31,7 +31,7 @@ call load_hw_reg_to_regs
 call send_back ; check the accelerator regs before a read
 
 bloopi #40, end_of_loop
-  lr $IX3, @ARAM
+  lr $IX3, @ACDSAMP
   call load_hw_reg_to_regs
   call send_back ; after a read
 end_of_loop:
diff --git a/Source/DSPSpy/tests/accelerator_raw_test.ds b/Source/DSPSpy/tests/accelerator_raw_test.ds
index 6c91ef958ef..3eeedaae67a 100644
--- a/Source/DSPSpy/tests/accelerator_raw_test.ds
+++ b/Source/DSPSpy/tests/accelerator_raw_test.ds
@@ -29,7 +29,7 @@ loop_read_test:
   call send_back ; check the accelerator regs before a read
 
   bloopi #4, end_of_read_loop
-    lr $IX3, @0xffd3 ; Raw D3 reads
+    lr $IX3, @ACDRAW ; Raw reads
     call load_hw_reg_to_regs
     call send_back ; after a read
     nop
@@ -62,7 +62,7 @@ loop_write_test:
   call send_back ; check the accelerator regs before a write
 
   bloopi #4, end_of_write_loop
-    sr @0xffd3, $IX3 ; Raw D3 writes
+    sr @ACDRAW, $IX3 ; Raw writes
     call load_hw_reg_to_regs
     call send_back ; after a write
     nop
diff --git a/Source/DSPSpy/tests/accelerator_test.ds b/Source/DSPSpy/tests/accelerator_test.ds
index a05866d5bbe..86049a600ed 100644
--- a/Source/DSPSpy/tests/accelerator_test.ds
+++ b/Source/DSPSpy/tests/accelerator_test.ds
@@ -35,8 +35,8 @@ test_accelerator_addrs_ex:
   lrs $AX0.L, @ACCAL
 
   ; Make the accelerator read memory
-  lrs $AX1.H, @ARAM
-  lrs $AX1.H, @ARAM
+  lrs $AX1.H, @ACDSAMP
+  lrs $AX1.H, @ACDSAMP
   ; AX1 -> new current position after read
   lrs $AX1.H, @ACCAH
   lrs $AX1.L, @ACCAL
diff --git a/Source/DSPSpy/tests/dsp_base_noirq.inc b/Source/DSPSpy/tests/dsp_base_noirq.inc
index 1558c7c5ca5..1b59b090e07 100644
--- a/Source/DSPSpy/tests/dsp_base_noirq.inc
+++ b/Source/DSPSpy/tests/dsp_base_noirq.inc
@@ -144,7 +144,7 @@ irq5:
 	lri		$ac0.m, #0xbbbb
 	sr		@0xffda, $ac0.m ; pred scale
 	sr		@0xffdb, $ac0.m ; yn1
-	lr		$ix2, @ARAM
+	lr		$ix2, @ACDSAMP
 	sr		@0xffdc, $ac0.m ; yn2
 	rti
 irq6:
diff --git a/Source/DSPSpy/tests/mul_test.ds b/Source/DSPSpy/tests/mul_test.ds
index 8f5ca9810f8..706488e35f4 100644
--- a/Source/DSPSpy/tests/mul_test.ds
+++ b/Source/DSPSpy/tests/mul_test.ds
@@ -214,7 +214,7 @@ LRI $AC1.M, #0xFFFF
 SRS @GAIN, $AC1.M
 
 ; Let's now load a sample through the accelerator.
-LRS $AC1.M, @ARAM
+LRS $AC1.M, @ACDSAMP
 call send_back
 
 jmp end_of_test
diff --git a/Source/DSPSpy/tests/rti_test.ds b/Source/DSPSpy/tests/rti_test.ds
index 81acd6af472..3dff950a79d 100644
--- a/Source/DSPSpy/tests/rti_test.ds
+++ b/Source/DSPSpy/tests/rti_test.ds
@@ -27,15 +27,15 @@ test_main:
 
 
 	LRI $AX1.H, #0x0000
-	LRS $AX0.L, @ARAM ; Trigger interrupt
+	LRS $AX0.L, @ACDSAMP ; Trigger interrupt
 	CALL send_back
 
 	LRI $AX1.H, #0x0001
-	LRS $AX0.L, @ARAM ; Trigger interrupt
+	LRS $AX0.L, @ACDSAMP ; Trigger interrupt
 	CALL send_back
 
 	LRI $AX1.H, #0x0000
-	LRS $AX0.L, @ARAM ; Trigger interrupt
+	LRS $AX0.L, @ACDSAMP ; Trigger interrupt
 	CALL send_back
 
 	jmp end_of_test

From 512da86b1a0be2501974729d505e8a2cff0edb91 Mon Sep 17 00:00:00 2001
From: xperia64 <xperiancedapps@gmail.com>
Date: Mon, 20 Jun 2022 17:42:39 -0400
Subject: [PATCH 11/20] Update DSP docs with accelerator info, fix ANDC/ORC
 instruction description typos

---
 .../GameCube_DSP_Users_Manual.tex             | 90 ++++++++++---------
 1 file changed, 50 insertions(+), 40 deletions(-)

diff --git a/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex b/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex
index 3c1f7018436..c601fa889ef 100644
--- a/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex
+++ b/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex
@@ -46,7 +46,7 @@
 % Document front page material
 \title{\textbf{\Huge GameCube DSP User's Manual}}
 \author{Reverse-engineered and documented by Duddie \\ \href{mailto:duddie@walla.com}{duddie@walla.com}}
-\date{\today\\v0.1.5}
+\date{\today\\v0.1.6}
 
 % Title formatting commands
 \newcommand{\OpcodeTitle}[1]{\subsection{#1}\label{instruction:#1}}
@@ -263,6 +263,7 @@ The purpose of this documentation is purely academic and it aims at understandin
 0.1.3            & 2022.05.27    & Pokechu22       & Renamed \texttt{CMPAR} instruction to \texttt{CMPAXH}                                    \\ \hline
 0.1.4            & 2022.06.02    & Pokechu22       & Fixed typos; added sections on 16-bit and 40-bit modes and on main and extended opcode writing to the same register. \\ \hline
 0.1.5            & 2022.09.29    & vpelletier      & Fixed \texttt{BLOOP} and \texttt{BLOOPI} suboperation order \\ \hline
+0.1.6            & 2022.06.20    & xperia64        & Acclerator documentation updates, fix register typo in ANDC and ORC descriptions         \\ \hline
 \end{tabular}
 \end{table}
 
@@ -387,7 +388,7 @@ You may not copy, modify, sublicense, or distribute the Document except as expre
 
 The Free Software Foundation may publish new, revised versions of the GNU Free Documentation License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. See https://www.gnu.org/licenses/.
 
-Each version of the License is given a distinguishing version number. If the Document specifies that a particular numbered version of this License "or any later version" applies to it, you have the option of following the terms and conditions either of that specified version or of any later version that has been published (not as a draft) by the Free Software Foundation. If the Document does not specify a version number of this License, you may choose any version ever published (not as a draft) by the Free Software Foundation. 
+Each version of the License is given a distinguishing version number. If the Document specifies that a particular numbered version of this License "or any later version" applies to it, you have the option of following the terms and conditions either of that specified version or of any later version that has been published (not as a draft) by the Free Software Foundation. If the Document does not specify a version number of this License, you may choose any version ever published (not as a draft) by the Free Software Foundation.
 
 \pagebreak{}
 
@@ -655,15 +656,15 @@ Exception vectors are located at address \Address{0x0000} in Instruction RAM.
 \centering
 \begin{tabular}{|l|l|l|l|}
 \hline
-\textbf{Level} & \textbf{Address} & \textbf{Name}  & \textbf{Description}          \\ \hline
-0              & \Address{0x0000} & \texttt{RESET} &                               \\ \hline
-1              & \Address{0x0002} & \texttt{STOVF} & Stack under/overflow          \\ \hline
-2              & \Address{0x0004} &                &                               \\ \hline
-3              & \Address{0x0006} &                &                               \\ \hline
-4              & \Address{0x0008} &                &                               \\ \hline
-5              & \Address{0x000A} & \texttt{ACCOV} & Accelerator address overflow  \\ \hline
-6              & \Address{0x000C} &                &                               \\ \hline
-7              & \Address{0x000E} & \texttt{INT}   & External interrupt (from CPU) \\ \hline
+\textbf{Level} & \textbf{Address} & \textbf{Name}   & \textbf{Description}                      \\ \hline
+0              & \Address{0x0000} & \texttt{RESET}  &                                           \\ \hline
+1              & \Address{0x0002} & \texttt{STOVF}  & Stack under/overflow                      \\ \hline
+2              & \Address{0x0004} &                 &                                           \\ \hline
+3              & \Address{0x0006} & \texttt{ACRROV} & Accelerator raw read address overflow     \\ \hline
+4              & \Address{0x0008} & \texttt{ACRWOV} & Accelerator raw write address overflow    \\ \hline
+5              & \Address{0x000A} & \texttt{ACSOV}  & Accelerator sample read address overflow  \\ \hline
+6              & \Address{0x000C} &                 &                                           \\ \hline
+7              & \Address{0x000E} & \texttt{INT}    & External interrupt (from CPU)             \\ \hline
 \end{tabular}
 \end{table}
 
@@ -681,11 +682,11 @@ Hardware registers (IFX) occupy the address space at \Address{0xFFxx} in the Dat
 \hline
 \textbf{Address} & \textbf{Name}      & \textbf{Description}          \\ \hline
 \multicolumn{3}{|l|}{\textit{ADPCM Coefficients}}                     \\ \hline
-\Address{0xFFA0} & \Register{COEF\_A1\_0} & A1 Coefficient \# 0 \\ \hline
-\Address{0xFFA1} & \Register{COEF\_A2\_0} & A2 Coefficient \# 0 \\ \hline
-\multicolumn{3}{|c|}{$\vdots$}                                  \\ \hline
-\Address{0xFFAE} & \Register{COEF\_A1\_7} & A1 Coefficient \# 7 \\ \hline
-\Address{0xFFAF} & \Register{COEF\_A2\_7} & A2 Coefficient \# 7 \\ \hline
+\Address{0xFFA0} & \Register{COEF\_A1\_0} & A1 Coefficient \# 0       \\ \hline
+\Address{0xFFA1} & \Register{COEF\_A2\_0} & A2 Coefficient \# 0       \\ \hline
+\multicolumn{3}{|c|}{$\vdots$}                                        \\ \hline
+\Address{0xFFAE} & \Register{COEF\_A1\_7} & A1 Coefficient \# 7       \\ \hline
+\Address{0xFFAF} & \Register{COEF\_A2\_7} & A2 Coefficient \# 7       \\ \hline
 \multicolumn{3}{|l|}{\textit{DMA Interface}}                          \\ \hline
 \Address{0xFFC9} & \Register{DSCR}    & DMA control                   \\ \hline
 \Address{0xFFCB} & \Register{DSBL}    & Block length                  \\ \hline
@@ -707,7 +708,7 @@ Hardware registers (IFX) occupy the address space at \Address{0xFFxx} in the Dat
 \Address{0xFFDC} & \Register{YN2}     & ADPCM YN2                     \\ \hline
 \Address{0xFFDD} & \Register{ACDSAMP} & Accelerator processed sample  \\ \hline
 \Address{0xFFDE} & \Register{GAIN}    & Gain                          \\ \hline
-\Address{0xFFDF} & \Register{ACUNK2}  & Unknown, usually \Value{0x0C} \\ \hline
+\Address{0xFFDF} & \Register{ACIN}    & Accelerator input             \\ \hline
 \Address{0xFFED} & \Register{AMDM}    & ARAM DMA Request Mask         \\ \hline
 \multicolumn{3}{|l|}{\textit{Interrupts}}                             \\ \hline
 \Address{0xFFFB} & \Register{DIRQ}    & IRQ request                   \\ \hline
@@ -762,20 +763,29 @@ The GameCube DSP is connected to the memory bus through a DMA channel. DMA can b
 \section{Accelerator}
 
 The accelerator is used to transfer data from accelerator memory (ARAM) to DSP memory. The accelerator area can be marked with \Register{ACSA} (start) and \Register{ACEA} (end) addresses.
-Current address for the accelerator can be set or read from the \Register{ACCA} register. Reading from accelerator memory is done by reading from the \Register{ACDAT} register.
-This register contains data from ARAM pointed to by the \Register{ACCA} register.
+Current address for the accelerator can be set or read from the \Register{ACCA} register. Accessing accelerator memory is done by reading or writing the \Register{ACDRAW} register for raw data, or reading the \Register{ACDSAMP} register for processed sample data.
+This register contains raw or processed sample data from ARAM pointed to by the \Register{ACCA} register.
 After reading the data, \Register{ACCA} is incremented by one.
-After \Register{ACCA} grows bigger than the area pointed to by \Register{ACEA}, it gets reset to a value from \Register{ACSA} and the \Exception{ACCOV} interrupt is generated.
+After \Register{ACCA} grows bigger than the area pointed to by \Register{ACEA}, it gets reset to a value from \Register{ACSA} and an exception is generated. Raw reads generate exception \Exception{ACRROV}, raw writes generate exception \Exception{ACRWOV}, and sample reads generate exception \Exception{ACSOV}.
 
-\RegisterBitOverview{0xFFD1}{FORMAT}{Accelerator sample format}{dddd dddd dddd dddd}
+\RegisterBitOverview{0xFFD1}{FORMAT}{Accelerator sample format}{---- ---- --gg ddss}
 
 \begin{RegisterBitDescriptions}
-\RegisterBitDescription{15--0}{d}{R/W}{\begin{tabular}[c]{@{}l@{}}
-\Value{0x00} - ADPCM audio \\
-\Value{0x05} - u8 reads (D3) \\
-\Value{0x06} - u16 reads (D3) \\
-\Value{0x0A} - 16-bit PCM audio, u16 writes (D3) \\
-\Value{0x19} - 8-bit PCM audio
+\RegisterBitDescription{5--4}{g}{R/W}{\begin{tabular}[c]{@{}l@{}}
+\Value{0} - PCM gain/coef scaling = 1/2048 \\
+\Value{1} - PCM gain/coef scaling = 1/1 \\
+\Value{2} - PCM gain/coef scaling = 1/65536 \\
+\end{tabular}}
+\RegisterBitDescription{3--2}{d}{R/W}{\begin{tabular}[c]{@{}l@{}}
+\Value{0} - ADPCM decoding from ARAM \\
+\Value{1} - PCM decoding from ACIN, ACCA doesn't increment \\
+\Value{2} - PCM decoding from ARAM \\
+\Value{3} - PCM decoding from ACIN, ACCA increments \\
+\end{tabular}}
+\RegisterBitDescription{1--0}{s}{R/W}{\begin{tabular}[c]{@{}l@{}}
+\Value{0} - 4-bit \\
+\Value{1} - 8-bit \\
+\Value{2} - 16-bit \\
 \end{tabular}}
 \end{RegisterBitDescriptions}
 
@@ -785,10 +795,10 @@ After \Register{ACCA} grows bigger than the area pointed to by \Register{ACEA},
 \RegisterBitDescription{15--0}{d}{R/W}{Usually 3}
 \end{RegisterBitDescriptions}
 
-\RegisterBitOverview{0xFFD3}{ACDATA1}{Alternative ARAM interface}{dddd dddd dddd dddd}
+\RegisterBitOverview{0xFFD3}{ACDRAW}{Raw ARAM Access}{dddd dddd dddd dddd}
 
 \begin{RegisterBitDescriptions}
-\RegisterBitDescription{15--0}{d}{R/W}{Reads from or writes to data pointed to by current accelerator address, and then increments the current address.  It is unclear whether this respects the start and end addresses.}
+\RegisterBitDescription{15--0}{d}{R/W}{Reads from or writes to raw data pointed to by current accelerator address, and then increments the current address.  Reads respect the FORMAT size, writes are always 16-bit and treat the addresses as such.  Writes require that the upper bit of the current address is set.  Reads that overflow the end address throw exception 3.  Writes that overflow throw exception 4.}
 \end{RegisterBitDescriptions}
 
 \RegisterBitOverview{0xFFD4}{ACSAH}{Accelerator Start Address H}{dddd dddd dddd dddd}
@@ -841,31 +851,31 @@ After \Register{ACCA} grows bigger than the area pointed to by \Register{ACEA},
 \RegisterBitOverview{0xFFDB}{YN1}{ADPCM YN1}{dddd dddd dddd dddd}
 
 \begin{RegisterBitDescriptions}
-\RegisterBitDescription{15--0}{d}{R/W}{Last value read by the accelerator, updated to the new value of \Register{ACDAT} when \Register{ACDAT} is read.  Used when calculating ADPCM, but updated for all sample formats.}
+\RegisterBitDescription{15--0}{d}{R/W}{Last value read by the accelerator, updated to the new value of \Register{ACDSAMP} when \Register{ACDSAMP} is read.  Used and updated for all sample formats.  Multiplied by the A1 coefficient selected by SCALE and scaled per FORMAT.}
 \end{RegisterBitDescriptions}
 
 \RegisterBitOverview{0xFFDC}{YN1}{ADPCM YN2}{dddd dddd dddd dddd}
 
 \begin{RegisterBitDescriptions}
-\RegisterBitDescription{15--0}{d}{R/W}{Second-last value read by the accelerator, updated to the previous value of \Register{YN1} when \Register{ACDAT} is read.  Used when calculating ADPCM, but updated for all sample formats.  Writing this value starts the accelerator.}
+\RegisterBitDescription{15--0}{d}{R/W}{Second-last value read by the accelerator, updated to the previous value of \Register{YN1} when \Register{ACDSAMP} is read.  Used and updated for all sample formats.  Multiplied by the A2 coefficient selected by SCALE and scaled per FORMAT.  Writing this value starts the accelerator.}
 \end{RegisterBitDescriptions}
 
-\RegisterBitOverview{0xFFDD}{ACDAT}{Accelerator data}{dddd dddd dddd dddd}
+\RegisterBitOverview{0xFFDD}{ACDSAMP}{Accelerator data}{dddd dddd dddd dddd}
 
 \begin{RegisterBitDescriptions}
-\RegisterBitDescription{15--0}{d}{R}{Reads new data from the accelerator.  When there is no data left, returns 0.}
+\RegisterBitDescription{15--0}{d}{R}{Reads new proccessed sample data from the accelerator.  Data is processed per FORMAT.  When there is no data left, returns 0.}
 \end{RegisterBitDescriptions}
 
 \RegisterBitOverview{0xFFDE}{GAIN}{Gain}{dddd dddd dddd dddd}
 
 \begin{RegisterBitDescriptions}
-\RegisterBitDescription{15--0}{d}{R/W}{Exact behavior unknown}
+\RegisterBitDescription{15--0}{d}{R/W}{Applied in PCM FORMATs.  Raw sample is multiplied by GAIN, then scaled per the gain scale bits of FORMAT.}
 \end{RegisterBitDescriptions}
 
-\RegisterBitOverview{0xFFDF}{ACUNK2}{Unknown 2}{dddd dddd dddd dddd}
+\RegisterBitOverview{0xFFDF}{ACIN}{Accelerator Input}{dddd dddd dddd dddd}
 
 \begin{RegisterBitDescriptions}
-\RegisterBitDescription{15--0}{d}{R/W}{Usually \Value{0x0C}}
+\RegisterBitDescription{15--0}{d}{R/W}{Used as the sample input in place of ARAM reads when FORMAT specifies it.}
 \end{RegisterBitDescriptions}
 
 \RegisterBitOverview{0xFFEF}{AMDM}{ARAM DMA Request Mask}{---- ---- ---- ---m}
@@ -1006,7 +1016,7 @@ Functions used for describing opcode operation.
 \begin{description}
   \item \Function{PUSH\_STACK(\$stR)}
   \begin{description}
-	\item \textbf{Description:} \\ 
+	\item \textbf{Description:} \\
       Pushes value onto given stack referenced by stack register \Register{\$stR}. Operation moves values down in internal stack.
 
     \item \textbf{Operation:} \\
@@ -1017,7 +1027,7 @@ Functions used for describing opcode operation.
 \begin{description}
   \item \Function{POP\_STACK(\$stR)}
   \begin{description}
-	\item \textbf{Description:} \\ 
+	\item \textbf{Description:} \\
       Pops value from stack referenced by stack register \Register{\$stR}. Operation moves values up in internal stack.
 
     \item \textbf{Operation:} \\
@@ -1358,7 +1368,7 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
   \end{DSPOpcodeFormat}
 
   \begin{DSPOpcodeDescription}
-    \item Logic AND middle part of accumulator \Register{\$acD.m} with middle part of accumulator \Register{\$ax(1-D)}.m.
+    \item Logic AND middle part of accumulator \Register{\$acD.m} with middle part of accumulator \Register{\$ac(1-D)}.m.
   \end{DSPOpcodeDescription}
 
   \begin{DSPOpcodeOperation}
@@ -3640,7 +3650,7 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
   \end{DSPOpcodeFormat}
 
   \begin{DSPOpcodeDescription}
-    \item Logic OR middle part of accumulator \Register{\$acD.m} with middle part of accumulator \Register{\$ax(1-D).m}.
+    \item Logic OR middle part of accumulator \Register{\$acD.m} with middle part of accumulator \Register{\$ac(1-D).m}.
   \end{DSPOpcodeDescription}
 
   \begin{DSPOpcodeOperation}

From cd77e682cac9a63520026f9cccd720eb7e48d49c Mon Sep 17 00:00:00 2001
From: xperia64 <xperiancedapps@gmail.com>
Date: Mon, 20 Jun 2022 17:49:23 -0400
Subject: [PATCH 12/20] Name gain shift/scaling better

---
 Source/Core/Core/DSP/DSPAccelerator.cpp | 14 +++++++-------
 Source/Core/Core/DSP/DSPAccelerator.h   | 16 ++++++++--------
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/Source/Core/Core/DSP/DSPAccelerator.cpp b/Source/Core/Core/DSP/DSPAccelerator.cpp
index 5553a5a7b06..bfe772083b5 100644
--- a/Source/Core/Core/DSP/DSPAccelerator.cpp
+++ b/Source/Core/Core/DSP/DSPAccelerator.cpp
@@ -190,16 +190,16 @@ u16 Accelerator::ReadSample(const s16* coefs)
   {
     // Gain seems to only apply for PCM decoding
     u8 gain_shift = 0;
-    switch (m_sample_format.gain_cfg)
+    switch (m_sample_format.gain_scale)
     {
-    case FormatGainCfg::GainShift11:
-      gain_shift = 11;
+    case FormatGainScale::GainScale2048:
+      gain_shift = 11;  // x / 2048 = x >> 11
       break;
-    case FormatGainCfg::GainShift0:
-      gain_shift = 0;
+    case FormatGainScale::GainScale1:
+      gain_shift = 0;  // x / 1 = x >> 0
       break;
-    case FormatGainCfg::GainShift16:
-      gain_shift = 16;
+    case FormatGainScale::GainScale65536:
+      gain_shift = 16;  // x / 65536 = x >> 16
       break;
     default:
       ERROR_LOG_FMT(DSPLLE, "dsp_read_accelerator_sample() invalid gain mode in format {:#x}",
diff --git a/Source/Core/Core/DSP/DSPAccelerator.h b/Source/Core/Core/DSP/DSPAccelerator.h
index 54fc88baaa8..ac3ed641afc 100644
--- a/Source/Core/Core/DSP/DSPAccelerator.h
+++ b/Source/Core/Core/DSP/DSPAccelerator.h
@@ -70,14 +70,14 @@ protected:
     MMIOPCMInc = 3    // PCM reads from ACIN, ACCA increments
   };
 
-  // When reading samples (at least in PCM mode), they are multiplied by the gain, then shifted
-  // right by an amount dependent on this config
-  enum class FormatGainCfg : u16
+  // When reading samples (at least in PCM mode), they are multiplied by the gain, then divided by
+  // the value specified here
+  enum class FormatGainScale : u16
   {
-    GainShift11 = 0,
-    GainShift0 = 1,
-    GainShift16 = 2,
-    GainInvalid = 3
+    GainScale2048 = 0,
+    GainScale1 = 1,
+    GainScale65536 = 2,
+    GainScaleInvalid = 3
   };
 
   union SampleFormat
@@ -85,7 +85,7 @@ protected:
     u16 hex;
     BitField<0, 2, FormatSize> size;
     BitField<2, 2, FormatDecode> decode;
-    BitField<4, 2, FormatGainCfg> gain_cfg;
+    BitField<4, 2, FormatGainScale> gain_scale;
     BitField<6, 10, u16> unk;
   } m_sample_format{0};
 

From 21d5e3182d27eb09ad31456140d8699516027c58 Mon Sep 17 00:00:00 2001
From: xperia64 <xperiancedapps@gmail.com>
Date: Mon, 20 Jun 2022 18:16:11 -0400
Subject: [PATCH 13/20] Fix DSP loop test init order, add DSP pcm test

---
 Source/DSPSpy/tests/accelerator_loop_test.ds | 13 ++--
 Source/DSPSpy/tests/accelerator_pcm_test.ds  | 79 ++++++++++++++++++++
 2 files changed, 86 insertions(+), 6 deletions(-)
 create mode 100644 Source/DSPSpy/tests/accelerator_pcm_test.ds

diff --git a/Source/DSPSpy/tests/accelerator_loop_test.ds b/Source/DSPSpy/tests/accelerator_loop_test.ds
index 82ec2c12832..d3c37e92d16 100644
--- a/Source/DSPSpy/tests/accelerator_loop_test.ds
+++ b/Source/DSPSpy/tests/accelerator_loop_test.ds
@@ -9,12 +9,6 @@ lri $AC0.L, #0x0000 ; start
 lri $AC1.M, #0x0000 ; end
 lri $AC1.L, #0x0011 ; end
 
-; Reset some registers
-lri $AC0.H, #0xffff
-sr @0xffda, $AC0.H ; pred scale
-sr @0xffdb, $AC0.H ; yn1
-sr @0xffdc, $AC0.H ; yn2
-
 ; Set the sample format
 lri $AC0.H, #0x0
 sr @0xffd1, $AC0.H
@@ -27,6 +21,12 @@ srs @ACCAL, $AC0.L
 srs @ACEAH, $AC1.M
 srs @ACEAL, $AC1.L
 
+; Reset some registers (these must be reset after setting FORMAT)
+lri $AC0.H, #0xffff
+sr @0xffda, $AC0.H ; pred scale
+sr @0xffdb, $AC0.H ; yn1
+sr @0xffdc, $AC0.H ; yn2
+
 call load_hw_reg_to_regs
 call send_back ; check the accelerator regs before a read
 
@@ -34,6 +34,7 @@ bloopi #40, end_of_loop
   lr $IX3, @ACDSAMP
   call load_hw_reg_to_regs
   call send_back ; after a read
+  nop ; Loops that end at a return of a call are buggy on hw
 end_of_loop:
   nop
 
diff --git a/Source/DSPSpy/tests/accelerator_pcm_test.ds b/Source/DSPSpy/tests/accelerator_pcm_test.ds
new file mode 100644
index 00000000000..24b4eae8739
--- /dev/null
+++ b/Source/DSPSpy/tests/accelerator_pcm_test.ds
@@ -0,0 +1,79 @@
+incdir  "tests"
+include "dsp_base.inc"
+
+test_main:
+
+; Test parameters
+lri $AC0.M, #0x0000 ; start
+lri $AC0.L, #0x0000 ; start
+lri $AC1.M, #0x0000 ; end
+lri $AC1.L, #0x0011 ; end
+
+; Set the sample format
+lri $AC0.H, #0x08 ; 4-bit PCM, gain scaling = x / 2048
+sr @0xffd1, $AC0.H
+; Set the starting and current address
+srs @ACSAH, $AC0.M
+srs @ACCAH, $AC0.M
+srs @ACSAL, $AC0.L
+srs @ACCAL, $AC0.L
+; Set the ending address
+srs @ACEAH, $AC1.M
+srs @ACEAL, $AC1.L
+
+; Set the gains
+si @GAIN, #0x0800 ; 2048 / 2048 = 1.0
+si @COEF_A1_0, #0x0400 ; 1024 / 2048 = 0.5
+si @COEF_A2_0, #0x0200 ; 512 / 2048 = 0.25
+
+; Reset some registers (these must be reset after setting FORMAT)
+lri $AC0.H, #0x0000
+sr @0xffda, $AC0.H ; pred scale, use 0th coefficients
+sr @0xffdb, $AC0.H ; yn1
+sr @0xffdc, $AC0.H ; yn2
+
+call load_hw_reg_to_regs
+call send_back ; check the accelerator regs before a read
+
+; Expected read sequence
+; r[0] = (data[0] >> 4) + (0/2) + (0/4)
+; r[1] = (data[0] & 0xf) + (r[0]/2) + (0/4)
+; r[2] = (data[1] >> 4) + (r[1]/2) + (r[0]/4)
+; r[3] = (data[1] & 0xf) + (r[2]/2) + (r[1]/4)
+; ...
+
+bloopi #40, end_of_loop
+  lr $IX3, @ACDSAMP
+  call load_hw_reg_to_regs
+  call send_back ; after a read
+  nop ; Loops that end at a return of a call are buggy on hw
+end_of_loop:
+  nop
+
+jmp end_of_test
+
+load_hw_reg_to_regs:
+  lr $AR0, @0xffd1 ; format
+  lr $AR1, @0xffd2 ; unknown
+  lr $AR2, @0xffda ; pred scale
+  lr $AR3, @0xffdb ; yn1
+  lr $IX0, @0xffdc ; yn2
+  lr $IX1, @0xffdf ; unknown accelerator register
+
+  lri $AC0.H, #0
+  lrs $AC0.M, @ACSAH
+  lrs $AC0.L, @ACSAL
+
+  lri $AC1.H, #0
+  lrs $AC1.M, @ACEAH
+  lrs $AC1.L, @ACEAL
+
+  lrs $AX0.H, @ACCAH
+  lrs $AX0.L, @ACCAL
+  lrs $AX1.H, @ACCAH
+  lrs $AX1.L, @ACCAL
+
+  lrs $AX1.H, @ACCAH
+  lrs $AX1.L, @ACCAL
+
+  ret

From 05381c5b6dc20b2fc923cf8675779faeefecfe69 Mon Sep 17 00:00:00 2001
From: Tillmann Karras <tilkax@gmail.com>
Date: Wed, 19 Mar 2025 00:01:44 +0000
Subject: [PATCH 14/20] Address Pokechu22's feedback

---
 Source/Core/Core/DSP/DSPAccelerator.cpp       | 19 +++++++++----------
 .../GameCube_DSP_Users_Manual.tex             | 19 ++++++++++---------
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/Source/Core/Core/DSP/DSPAccelerator.cpp b/Source/Core/Core/DSP/DSPAccelerator.cpp
index bfe772083b5..7e57499d546 100644
--- a/Source/Core/Core/DSP/DSPAccelerator.cpp
+++ b/Source/Core/Core/DSP/DSPAccelerator.cpp
@@ -31,7 +31,7 @@ u16 Accelerator::GetCurrentSample()
     val = (ReadMemory(m_current_address * 2) << 8) | ReadMemory(m_current_address * 2 + 1);
     break;
   default:  // produces garbage, but affects the current address
-    ERROR_LOG_FMT(DSPLLE, "dsp_get_current_sample() - bad format {:#x}", m_sample_format.hex);
+    ERROR_LOG_FMT(DSPLLE, "GetCurrentSample() - bad format {:#x}", m_sample_format.hex);
     break;
   }
   return val;
@@ -75,8 +75,8 @@ u16 Accelerator::ReadRaw()
   // this pre-reset phase
 
   // The cleanest way to emulate the normal non-edge behavior is to only reset things if we just
-  // read the end address If the current address is larger than the end address (and not in the edge
-  // range), it ignores the end address
+  // read the end address. If the current address is larger than the end address (and not in the
+  // edge range), it ignores the end address
   if (m_current_address - 1 == m_end_address)
   {
     // Set address back to start address (confirmed on hardware)
@@ -98,7 +98,7 @@ void Accelerator::WriteRaw(u16 value)
   // Writes only seem to be accepted when the upper most bit of the address is set
   if (m_current_address & 0x80000000)
   {
-    // The format doesn't matter for raw writes, all writes are u16 and the address is treated as if
+    // The format doesn't matter for raw writes; all writes are u16 and the address is treated as if
     // we are in a 16-bit format
     WriteMemory(m_current_address * 2, value >> 8);
     WriteMemory(m_current_address * 2 + 1, value & 0xFF);
@@ -107,8 +107,7 @@ void Accelerator::WriteRaw(u16 value)
   }
   else
   {
-    ERROR_LOG_FMT(DSPLLE,
-                  "dsp_write_aram_raw() - tried to write to address {:#x} without high bit set",
+    ERROR_LOG_FMT(DSPLLE, "WriteRaw() - tried to write to address {:#x} without high bit set",
                   m_current_address);
   }
 }
@@ -120,7 +119,7 @@ u16 Accelerator::ReadSample(const s16* coefs)
 
   if (m_sample_format.unk != 0)
   {
-    WARN_LOG_FMT(DSPLLE, "dsp_read_accelerator_sample() format {:#x} has unknown upper bits set",
+    WARN_LOG_FMT(DSPLLE, "ReadSample() format {:#x} has unknown upper bits set",
                  m_sample_format.hex);
   }
 
@@ -166,7 +165,8 @@ u16 Accelerator::ReadSample(const s16* coefs)
     // These two cases are handled in a special way, separate from normal overflow handling:
     // the ACCOV exception does not fire at all, the predscale register is not updated,
     // and if the end address is 16-byte aligned, the DSP loops to start_address + 1
-    // instead of start_address. This probably needs to be adjusted when using 8 or 16-bit accesses.
+    // instead of start_address.
+    // TODO: This probably needs to be adjusted when using 8 or 16-bit accesses.
     if ((m_end_address & 0xf) == 0x0 && m_current_address == m_end_address)
     {
       m_current_address = m_start_address + 1;
@@ -202,8 +202,7 @@ u16 Accelerator::ReadSample(const s16* coefs)
       gain_shift = 16;  // x / 65536 = x >> 16
       break;
     default:
-      ERROR_LOG_FMT(DSPLLE, "dsp_read_accelerator_sample() invalid gain mode in format {:#x}",
-                    m_sample_format.hex);
+      ERROR_LOG_FMT(DSPLLE, "ReadSample() invalid gain mode in format {:#x}", m_sample_format.hex);
       break;
     }
     s32 val32 = ((static_cast<s32>(m_gain) * raw_sample) >> gain_shift) +
diff --git a/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex b/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex
index c601fa889ef..52a600415ef 100644
--- a/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex
+++ b/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex
@@ -46,7 +46,7 @@
 % Document front page material
 \title{\textbf{\Huge GameCube DSP User's Manual}}
 \author{Reverse-engineered and documented by Duddie \\ \href{mailto:duddie@walla.com}{duddie@walla.com}}
-\date{\today\\v0.1.6}
+\date{\today\\v0.1.7}
 
 % Title formatting commands
 \newcommand{\OpcodeTitle}[1]{\subsection{#1}\label{instruction:#1}}
@@ -263,7 +263,8 @@ The purpose of this documentation is purely academic and it aims at understandin
 0.1.3            & 2022.05.27    & Pokechu22       & Renamed \texttt{CMPAR} instruction to \texttt{CMPAXH}                                    \\ \hline
 0.1.4            & 2022.06.02    & Pokechu22       & Fixed typos; added sections on 16-bit and 40-bit modes and on main and extended opcode writing to the same register. \\ \hline
 0.1.5            & 2022.09.29    & vpelletier      & Fixed \texttt{BLOOP} and \texttt{BLOOPI} suboperation order \\ \hline
-0.1.6            & 2022.06.20    & xperia64        & Acclerator documentation updates, fix register typo in ANDC and ORC descriptions         \\ \hline
+0.1.6            & 2022.06.20    & xperia64        & Accelerator documentation updates, fix register typo in ANDC and ORC descriptions        \\ \hline
+0.1.7            & 2025.04.21    & Tilka           & Fixed typos and complained about GFDL                                                    \\ \hline
 \end{tabular}
 \end{table}
 
@@ -704,8 +705,8 @@ Hardware registers (IFX) occupy the address space at \Address{0xFFxx} in the Dat
 \Address{0xFFD8} & \Register{ACCAH}   & Accelerator current address H \\ \hline
 \Address{0xFFD9} & \Register{ACCAL}   & Accelerator current address L \\ \hline
 \Address{0xFFDA} & \Register{SCALE}   & ADPCM predictor and scale     \\ \hline
-\Address{0xFFDB} & \Register{YN1}     & ADPCM YN1                     \\ \hline
-\Address{0xFFDC} & \Register{YN2}     & ADPCM YN2                     \\ \hline
+\Address{0xFFDB} & \Register{YN1}     & ADPCM output history Y[N - 1] \\ \hline
+\Address{0xFFDC} & \Register{YN2}     & ADPCM output history Y[N - 2] \\ \hline
 \Address{0xFFDD} & \Register{ACDSAMP} & Accelerator processed sample  \\ \hline
 \Address{0xFFDE} & \Register{GAIN}    & Gain                          \\ \hline
 \Address{0xFFDF} & \Register{ACIN}    & Accelerator input             \\ \hline
@@ -764,7 +765,7 @@ The GameCube DSP is connected to the memory bus through a DMA channel. DMA can b
 
 The accelerator is used to transfer data from accelerator memory (ARAM) to DSP memory. The accelerator area can be marked with \Register{ACSA} (start) and \Register{ACEA} (end) addresses.
 Current address for the accelerator can be set or read from the \Register{ACCA} register. Accessing accelerator memory is done by reading or writing the \Register{ACDRAW} register for raw data, or reading the \Register{ACDSAMP} register for processed sample data.
-This register contains raw or processed sample data from ARAM pointed to by the \Register{ACCA} register.
+These registers contain raw or processed sample data from ARAM pointed to by the \Register{ACCA} register.
 After reading the data, \Register{ACCA} is incremented by one.
 After \Register{ACCA} grows bigger than the area pointed to by \Register{ACEA}, it gets reset to a value from \Register{ACSA} and an exception is generated. Raw reads generate exception \Exception{ACRROV}, raw writes generate exception \Exception{ACRWOV}, and sample reads generate exception \Exception{ACSOV}.
 
@@ -778,9 +779,9 @@ After \Register{ACCA} grows bigger than the area pointed to by \Register{ACEA},
 \end{tabular}}
 \RegisterBitDescription{3--2}{d}{R/W}{\begin{tabular}[c]{@{}l@{}}
 \Value{0} - ADPCM decoding from ARAM \\
-\Value{1} - PCM decoding from ACIN, ACCA doesn't increment \\
+\Value{1} - PCM decoding from \Register{ACIN}, \Register{ACCA} doesn't increment \\
 \Value{2} - PCM decoding from ARAM \\
-\Value{3} - PCM decoding from ACIN, ACCA increments \\
+\Value{3} - PCM decoding from \Register{ACIN}, \Register{ACCA} increments \\
 \end{tabular}}
 \RegisterBitDescription{1--0}{s}{R/W}{\begin{tabular}[c]{@{}l@{}}
 \Value{0} - 4-bit \\
@@ -798,7 +799,7 @@ After \Register{ACCA} grows bigger than the area pointed to by \Register{ACEA},
 \RegisterBitOverview{0xFFD3}{ACDRAW}{Raw ARAM Access}{dddd dddd dddd dddd}
 
 \begin{RegisterBitDescriptions}
-\RegisterBitDescription{15--0}{d}{R/W}{Reads from or writes to raw data pointed to by current accelerator address, and then increments the current address.  Reads respect the FORMAT size, writes are always 16-bit and treat the addresses as such.  Writes require that the upper bit of the current address is set.  Reads that overflow the end address throw exception 3.  Writes that overflow throw exception 4.}
+\RegisterBitDescription{15--0}{d}{R/W}{Reads from or writes to raw data pointed to by current accelerator address, and then increments the current address.  Reads respect the FORMAT size. Writes are always 16-bit and treat the addresses as such.  Writes require that the uppermost bit of the current address is set.  Reads that overflow the end address throw exception \Exception{ACRROV}.  Writes that overflow throw exception \Exception{ACRWOV}.}
 \end{RegisterBitDescriptions}
 
 \RegisterBitOverview{0xFFD4}{ACSAH}{Accelerator Start Address H}{dddd dddd dddd dddd}
@@ -854,7 +855,7 @@ After \Register{ACCA} grows bigger than the area pointed to by \Register{ACEA},
 \RegisterBitDescription{15--0}{d}{R/W}{Last value read by the accelerator, updated to the new value of \Register{ACDSAMP} when \Register{ACDSAMP} is read.  Used and updated for all sample formats.  Multiplied by the A1 coefficient selected by SCALE and scaled per FORMAT.}
 \end{RegisterBitDescriptions}
 
-\RegisterBitOverview{0xFFDC}{YN1}{ADPCM YN2}{dddd dddd dddd dddd}
+\RegisterBitOverview{0xFFDC}{YN2}{ADPCM YN2}{dddd dddd dddd dddd}
 
 \begin{RegisterBitDescriptions}
 \RegisterBitDescription{15--0}{d}{R/W}{Second-last value read by the accelerator, updated to the previous value of \Register{YN1} when \Register{ACDSAMP} is read.  Used and updated for all sample formats.  Multiplied by the A2 coefficient selected by SCALE and scaled per FORMAT.  Writing this value starts the accelerator.}

From 6c870860ea5959c281d04412e9275879da26081f Mon Sep 17 00:00:00 2001
From: Tillmann Karras <tilkax@gmail.com>
Date: Thu, 24 Apr 2025 15:35:32 +0100
Subject: [PATCH 15/20] DSP tests: clean up unused code

---
 Source/DSPSpy/tests/mul_test.ds | 80 ++++-----------------------------
 1 file changed, 9 insertions(+), 71 deletions(-)

diff --git a/Source/DSPSpy/tests/mul_test.ds b/Source/DSPSpy/tests/mul_test.ds
index 706488e35f4..908dc8fe891 100644
--- a/Source/DSPSpy/tests/mul_test.ds
+++ b/Source/DSPSpy/tests/mul_test.ds
@@ -18,7 +18,7 @@ call send_back
 
 CLR     $ACC0
 CLRP
-SET15	
+SET15
 LRI     $AX0.L, #0xFFFF
 LRI		$AX1.H, #0x100
 MULXMVZ	$AX0.L, $AX1.H, $ACC0     ; UNSIGNED
@@ -37,7 +37,7 @@ call send_back
 
 CLR     $ACC0
 CLRP
-SET15	
+SET15
 LRI     $AX0.L, #0xFFFF
 LRI		$AX1.H, #0x100
 MULXMV	$AX0.L, $AX1.H, $ACC0     ; UNSIGNED
@@ -56,7 +56,7 @@ call send_back
 
 CLR     $ACC0
 CLRP
-SET15	
+SET15
 LRI     $AX0.L, #0xFFFF
 LRI		$AX1.H, #0x100
 MULXAC	$AX0.L, $AX1.H, $ACC0     ; UNSIGNED
@@ -75,7 +75,7 @@ call send_back
 
 CLR     $ACC0
 CLRP
-SET15	
+SET15
 LRI     $AX0.L, #0xFFFF
 LRI		$AX1.H, #0x100
 MULX	$AX0.L, $AX1.H     ; UNSIGNED
@@ -95,7 +95,7 @@ call send_back
 
 CLR     $ACC0
 CLRP
-SET15	
+SET15
 LRI     $AX0.L, #0xFFFF
 LRI		$AX1.L, #0x100
 MADDX	$AX0.L, $AX1.L     ; SIGNED (!)
@@ -115,7 +115,7 @@ call send_back
 
 CLR     $ACC0
 CLRP
-SET15	
+SET15
 LRI     $AC0.M, #0xFFFF
 LRI		$AX0.H, #0x100
 MULC	$AC0.M, $AX0.H     ; SIGNED (!)
@@ -135,7 +135,7 @@ call send_back
 
 CLR     $ACC0
 CLRP
-SET15	
+SET15
 LRI     $AC0.M, #0xFFFF
 LRI		$AX0.H, #0x100
 MULCAC	$AC0.M, $AX0.H, $ACC0     ; SIGNED (!)
@@ -154,7 +154,7 @@ MOVP    $ACC0
 call send_back
 
 CLR     $ACC0
-SET15	
+SET15
 LRI     $AX0.L, #0xFFFF
 LRI		$AX0.H, #0x100
 MUL		$AX0.L, $AX0.H	; SIGNED (!)
@@ -173,7 +173,7 @@ MOVP    $ACC0
 call send_back
 
 CLR     $ACC0
-SET15	
+SET15
 LRI     $AX0.L, #0xFFFF
 LRI		$AX0.H, #0x100
 MULAC	$AX0.L, $AX0.H, $ACC0     ; SIGNED (!)
@@ -187,65 +187,3 @@ CLR15
 
 ; We're done, DO NOT DELETE THIS LINE
 jmp end_of_test
-
-; test accelerator
-
-; TODO: DSPSpy puts a 16-bit ramp at 0x10000000
-LRIS $AC1.M, #0x0a    ; 16-bit PCM audio
-;SRS	@SampleFormat, $AC1.M
-; Start accelerator position
-LRI $AC1.M, #0x0100
-SRS @ACCAH, $AC1.M
-LRI $AC1.M, #0x1000
-SRS @ACCAH, $AC1.M
-; Current accelerator position
-LRI $AC1.M, #0x0100
-SRS @ACCAH, $AC1.M
-LRI $AC1.M, #0x1000
-SRS @ACCAH, $AC1.M
-; End accelerator position
-LRI $AC1.M, #0x0100
-SRS @ACCAH, $AC1.M
-LRI $AC1.M, #0x2000
-SRS @ACCAH, $AC1.M
-
-; Now to the interesting parameter - gain.
-LRI $AC1.M, #0xFFFF
-SRS @GAIN, $AC1.M
-
-; Let's now load a sample through the accelerator.
-LRS $AC1.M, @ACDSAMP
-call send_back
-
-jmp end_of_test
-
-; test addpaxz
-call send_back
-
-clrp
-lri $AX0.L, #0x1111
-lri $AX0.H, #0x2222
-call send_back
-
-clrp
-addpaxz $ACC0, $AX0.H
-
-call send_back
-
-clrp
-set40
-addpaxz $ACC0, $AX0.H
-set16
-
-call send_back
-
-clrp
-set15
-addpaxz $ACC0, $AX0.H
-clr15
-
-call send_back
-
-
-jmp end_of_test
-

From 7c7c179b44852c1ecdf71e52d95c11859d35d8c9 Mon Sep 17 00:00:00 2001
From: Tillmann Karras <tilkax@gmail.com>
Date: Thu, 24 Apr 2025 13:45:55 +0100
Subject: [PATCH 16/20] DSP tests: update DSP MMIO labels

---
 Source/Core/Core/DSP/DSPTables.cpp           |  8 ++++----
 Source/DSPSpy/tests/accelerator_loop_test.ds | 18 +++++++++---------
 Source/DSPSpy/tests/accelerator_pcm_test.ds  | 18 +++++++++---------
 Source/DSPSpy/tests/accelerator_raw_test.ds  | 14 +++++++-------
 Source/DSPSpy/tests/accelerator_test.ds      |  2 +-
 Source/DSPSpy/tests/dsp_base_noirq.inc       |  6 +++---
 Source/DSPSpy/tests/rti_test.ds              | 14 +++++++-------
 7 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/Source/Core/Core/DSP/DSPTables.cpp b/Source/Core/Core/DSP/DSPTables.cpp
index 12e5d319189..19198355f6c 100644
--- a/Source/Core/Core/DSP/DSPTables.cpp
+++ b/Source/Core/Core/DSP/DSPTables.cpp
@@ -400,7 +400,7 @@ const std::array<pdlabel_t, 96> pdlabels =
   {0xffcf, "DSMAL", "DSP DMA Mem Address L",},
 
   {0xffd0, "0xffd0",nullptr,},
-  {0xffd1, "SampleFormat", "SampleFormat",},
+  {0xffd1, "FORMAT", "Accelerator sample format",},
   {0xffd2, "0xffd2",nullptr,},
   {0xffd3, "ACDRAW", "Accelerator raw read/write from ARAM",},
   {0xffd4, "ACSAH", "Accelerator start address H",},
@@ -409,9 +409,9 @@ const std::array<pdlabel_t, 96> pdlabels =
   {0xffd7, "ACEAL", "Accelerator end address L",},
   {0xffd8, "ACCAH", "Accelerator current address H",},
   {0xffd9, "ACCAL", "Accelerator current address L",},
-  {0xffda, "pred_scale", "pred_scale",},
-  {0xffdb, "yn1", "yn1",},
-  {0xffdc, "yn2", "yn2",},
+  {0xffda, "PRED_SCALE", "ADPCM predictor and scale",},
+  {0xffdb, "YN1", "ADPCM output history Y[N - 1]",},
+  {0xffdc, "YN2", "ADPCM output history Y[N - 2]",},
   {0xffdd, "ACDSAMP", "Accelerator processed sample read from ARAM or ACIN",},
   {0xffde, "GAIN", "Gain",},
   {0xffdf, "ACIN", "Accelerator MMIO PCM input value",},
diff --git a/Source/DSPSpy/tests/accelerator_loop_test.ds b/Source/DSPSpy/tests/accelerator_loop_test.ds
index d3c37e92d16..5bb7d08a86d 100644
--- a/Source/DSPSpy/tests/accelerator_loop_test.ds
+++ b/Source/DSPSpy/tests/accelerator_loop_test.ds
@@ -11,7 +11,7 @@ lri $AC1.L, #0x0011 ; end
 
 ; Set the sample format
 lri $AC0.H, #0x0
-sr @0xffd1, $AC0.H
+sr @FORMAT, $AC0.H
 ; Set the starting and current address
 srs @ACSAH, $AC0.M
 srs @ACCAH, $AC0.M
@@ -23,9 +23,9 @@ srs @ACEAL, $AC1.L
 
 ; Reset some registers (these must be reset after setting FORMAT)
 lri $AC0.H, #0xffff
-sr @0xffda, $AC0.H ; pred scale
-sr @0xffdb, $AC0.H ; yn1
-sr @0xffdc, $AC0.H ; yn2
+sr @PRED_SCALE, $AC0.H
+sr @YN1, $AC0.H
+sr @YN2, $AC0.H
 
 call load_hw_reg_to_regs
 call send_back ; check the accelerator regs before a read
@@ -41,12 +41,12 @@ end_of_loop:
 jmp end_of_test
 
 load_hw_reg_to_regs:
-  lr $AR0, @0xffd1 ; format
+  lr $AR0, @FORMAT
   lr $AR1, @0xffd2 ; unknown
-  lr $AR2, @0xffda ; pred scale
-  lr $AR3, @0xffdb ; yn1
-  lr $IX0, @0xffdc ; yn2
-  lr $IX1, @0xffdf ; unknown accelerator register
+  lr $AR2, @PRED_SCALE
+  lr $AR3, @YN1
+  lr $IX0, @YN2
+  lr $IX1, @ACIN
 
   lri $AC0.H, #0
   lrs $AC0.M, @ACSAH
diff --git a/Source/DSPSpy/tests/accelerator_pcm_test.ds b/Source/DSPSpy/tests/accelerator_pcm_test.ds
index 24b4eae8739..e04a4c688ad 100644
--- a/Source/DSPSpy/tests/accelerator_pcm_test.ds
+++ b/Source/DSPSpy/tests/accelerator_pcm_test.ds
@@ -11,7 +11,7 @@ lri $AC1.L, #0x0011 ; end
 
 ; Set the sample format
 lri $AC0.H, #0x08 ; 4-bit PCM, gain scaling = x / 2048
-sr @0xffd1, $AC0.H
+sr @FORMAT, $AC0.H
 ; Set the starting and current address
 srs @ACSAH, $AC0.M
 srs @ACCAH, $AC0.M
@@ -28,9 +28,9 @@ si @COEF_A2_0, #0x0200 ; 512 / 2048 = 0.25
 
 ; Reset some registers (these must be reset after setting FORMAT)
 lri $AC0.H, #0x0000
-sr @0xffda, $AC0.H ; pred scale, use 0th coefficients
-sr @0xffdb, $AC0.H ; yn1
-sr @0xffdc, $AC0.H ; yn2
+sr @PRED_SCALE, $AC0.H ; use 0th coefficients
+sr @YN1, $AC0.H
+sr @YN2, $AC0.H
 
 call load_hw_reg_to_regs
 call send_back ; check the accelerator regs before a read
@@ -53,12 +53,12 @@ end_of_loop:
 jmp end_of_test
 
 load_hw_reg_to_regs:
-  lr $AR0, @0xffd1 ; format
+  lr $AR0, @FORMAT
   lr $AR1, @0xffd2 ; unknown
-  lr $AR2, @0xffda ; pred scale
-  lr $AR3, @0xffdb ; yn1
-  lr $IX0, @0xffdc ; yn2
-  lr $IX1, @0xffdf ; unknown accelerator register
+  lr $AR2, @PRED_SCALE
+  lr $AR3, @YN1
+  lr $IX0, @YN2
+  lr $IX1, @ACIN
 
   lri $AC0.H, #0
   lrs $AC0.M, @ACSAH
diff --git a/Source/DSPSpy/tests/accelerator_raw_test.ds b/Source/DSPSpy/tests/accelerator_raw_test.ds
index 3eeedaae67a..07acbcb38b8 100644
--- a/Source/DSPSpy/tests/accelerator_raw_test.ds
+++ b/Source/DSPSpy/tests/accelerator_raw_test.ds
@@ -6,7 +6,7 @@ include "dsp_base.inc"
 ; and verify things look correct
 loop_read_test:
   ; Set the sample format
-  sr @0xffd1, $AC0.H
+  sr @FORMAT, $AC0.H
 
   ; Test parameters
   lri $AC0.M, #0x0000 ; start
@@ -39,7 +39,7 @@ end_of_read_loop:
 
 loop_write_test:
   ; Set the sample format
-  sr @0xffd1, $AC0.H
+  sr @FORMAT, $AC0.H
 
   ; Test parameters
   lri $AC0.M, #0x0000 ; start
@@ -86,12 +86,12 @@ call loop_write_test
 jmp end_of_test
 
 load_hw_reg_to_regs:
-  lr $AR0, @0xffd1 ; format
+  lr $AR0, @FORMAT
   lr $AR1, @0xffd2 ; unknown
-  lr $AR2, @0xffda ; pred scale
-  lr $AR3, @0xffdb ; yn1
-  lr $IX0, @0xffdc ; yn2
-  lr $IX1, @0xffdf ; unknown accelerator register
+  lr $AR2, @PRED_SCALE
+  lr $AR3, @YN1
+  lr $IX0, @YN2
+  lr $IX1, @ACIN
 
   lri $AC0.H, #0
   lrs $AC0.M, @ACSAH
diff --git a/Source/DSPSpy/tests/accelerator_test.ds b/Source/DSPSpy/tests/accelerator_test.ds
index 86049a600ed..f431a3108d1 100644
--- a/Source/DSPSpy/tests/accelerator_test.ds
+++ b/Source/DSPSpy/tests/accelerator_test.ds
@@ -12,7 +12,7 @@ include "dsp_base.inc"
 ;   AC1.M/L: end address
 test_accelerator_addrs_ex:
   ; Set the sample format
-  sr @0xffd1, $AC0.H
+  sr @FORMAT, $AC0.H
 
   ; Set the accelerator start and current address.
   srs @ACSAH, $AC0.M
diff --git a/Source/DSPSpy/tests/dsp_base_noirq.inc b/Source/DSPSpy/tests/dsp_base_noirq.inc
index 1b59b090e07..048926a0213 100644
--- a/Source/DSPSpy/tests/dsp_base_noirq.inc
+++ b/Source/DSPSpy/tests/dsp_base_noirq.inc
@@ -142,10 +142,10 @@ irq5:
 	si		@DMBL, #0x0000
 	si		@DIRQ, #0x0001
 	lri		$ac0.m, #0xbbbb
-	sr		@0xffda, $ac0.m ; pred scale
-	sr		@0xffdb, $ac0.m ; yn1
+	sr		@PRED_SCALE, $ac0.m
+	sr		@YN1, $ac0.m
 	lr		$ix2, @ACDSAMP
-	sr		@0xffdc, $ac0.m ; yn2
+	sr		@YN2, $ac0.m
 	rti
 irq6:
 	lri		$ac0.m, #0x0006
diff --git a/Source/DSPSpy/tests/rti_test.ds b/Source/DSPSpy/tests/rti_test.ds
index 3dff950a79d..388a8fbbc85 100644
--- a/Source/DSPSpy/tests/rti_test.ds
+++ b/Source/DSPSpy/tests/rti_test.ds
@@ -14,10 +14,10 @@ include "dsp_base_noirq.inc"
 test_main:
 	; Use the accelerator to generate an IRQ by setting the start and end address to 0
 	; This will result in an interrupt on every read
-	SI @0xffda, #0 ; pred_scale
-	SI @0xffdb, #0 ; yn1
-	SI @0xffdc, #0 ; yn2
-	SI @0xffd1, #0 ; SampleFormat
+	SI @PRED_SCALE, #0
+	SI @YN1, #0
+	SI @YN2, #0
+	SI @FORMAT, #0
 	SI @ACSAH, #0
 	SI @ACCAH, #0
 	SI @ACSAL, #0
@@ -42,9 +42,9 @@ test_main:
 
 accov_irq:
 	; Restore registers, otherwise no new interrupt will be generated
-	SI @0xffda, #0 ; pred_scale
-	SI @0xffdb, #0 ; yn1
-	SI @0xffdc, #0 ; yn2
+	SI @PRED_SCALE, #0
+	SI @YN1, #0
+	SI @YN2, #0
 
 	TSTAXH $AX1.H
 	LRI $AX1.L, #0x1111

From f0bacb826aa25b084e33f8005b425769316b0635 Mon Sep 17 00:00:00 2001
From: Tillmann Karras <tilkax@gmail.com>
Date: Thu, 24 Apr 2025 13:50:31 +0100
Subject: [PATCH 17/20] DSPAccelerator: rename MMIOPCMHalt to MMIOPCMNoInc

---
 Source/Core/Core/DSP/DSPAccelerator.cpp | 6 +++---
 Source/Core/Core/DSP/DSPAccelerator.h   | 8 ++++----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/Source/Core/Core/DSP/DSPAccelerator.cpp b/Source/Core/Core/DSP/DSPAccelerator.cpp
index 7e57499d546..2ecf3121ce2 100644
--- a/Source/Core/Core/DSP/DSPAccelerator.cpp
+++ b/Source/Core/Core/DSP/DSPAccelerator.cpp
@@ -126,7 +126,7 @@ u16 Accelerator::ReadSample(const s16* coefs)
   u16 val = 0;
   u8 step_size = 0;
   s16 raw_sample;
-  if (m_sample_format.decode == FormatDecode::MMIOPCMHalt ||
+  if (m_sample_format.decode == FormatDecode::MMIOPCMNoInc ||
       m_sample_format.decode == FormatDecode::MMIOPCMInc)
   {
     // The addresses can be complete nonsense in either of these modes
@@ -184,7 +184,7 @@ u16 Accelerator::ReadSample(const s16* coefs)
     }
     break;
   }
-  case FormatDecode::MMIOPCMHalt:
+  case FormatDecode::MMIOPCMNoInc:
   case FormatDecode::PCM:  // 16-bit PCM audio
   case FormatDecode::MMIOPCMInc:
   {
@@ -211,7 +211,7 @@ u16 Accelerator::ReadSample(const s16* coefs)
     m_yn2 = m_yn1;
     m_yn1 = val;
     step_size = 2;
-    if (m_sample_format.decode != FormatDecode::MMIOPCMHalt)
+    if (m_sample_format.decode != FormatDecode::MMIOPCMNoInc)
     {
       m_current_address += 1;
     }
diff --git a/Source/Core/Core/DSP/DSPAccelerator.h b/Source/Core/Core/DSP/DSPAccelerator.h
index ac3ed641afc..8e7bbc8fb0c 100644
--- a/Source/Core/Core/DSP/DSPAccelerator.h
+++ b/Source/Core/Core/DSP/DSPAccelerator.h
@@ -64,10 +64,10 @@ protected:
 
   enum class FormatDecode : u16
   {
-    ADPCM = 0,        // ADPCM reads from ARAM, ACCA increments
-    MMIOPCMHalt = 1,  // PCM Reads from ACIN, ACCA doesn't increment
-    PCM = 2,          // PCM reads from ARAM, ACCA increments
-    MMIOPCMInc = 3    // PCM reads from ACIN, ACCA increments
+    ADPCM = 0,         // ADPCM reads from ARAM, ACCA increments
+    MMIOPCMNoInc = 1,  // PCM Reads from ACIN, ACCA doesn't increment
+    PCM = 2,           // PCM reads from ARAM, ACCA increments
+    MMIOPCMInc = 3     // PCM reads from ACIN, ACCA increments
   };
 
   // When reading samples (at least in PCM mode), they are multiplied by the gain, then divided by

From 8412a7c3363fdc7ad4eafa45e3e7c4eaa5b1e488 Mon Sep 17 00:00:00 2001
From: Tillmann Karras <tilkax@gmail.com>
Date: Thu, 24 Apr 2025 13:58:40 +0100
Subject: [PATCH 18/20] DSPAccelerator: update comment (NFC)

---
 Source/Core/Core/DSP/DSPAccelerator.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/Source/Core/Core/DSP/DSPAccelerator.cpp b/Source/Core/Core/DSP/DSPAccelerator.cpp
index 2ecf3121ce2..8e0fbf5a9fb 100644
--- a/Source/Core/Core/DSP/DSPAccelerator.cpp
+++ b/Source/Core/Core/DSP/DSPAccelerator.cpp
@@ -220,9 +220,8 @@ u16 Accelerator::ReadSample(const s16* coefs)
   }
 
   // Check for loop.
-  // Somehow, YN1 and YN2 must be initialized with their "loop" values,
-  // so yeah, it seems likely that we should raise an exception to let
-  // the DSP program do that, at least if DSP_FORMAT == 0x0A.
+  // YN1 and YN2 need to be initialized with their "loop" values,
+  // which is usually done upon this exception.
   if (m_current_address == (m_end_address + step_size - 1))
   {
     // Set address back to start address.

From 80adfc606d6a7346233f5dee663cf6418629564b Mon Sep 17 00:00:00 2001
From: Tillmann Karras <tilkax@gmail.com>
Date: Thu, 24 Apr 2025 17:01:14 +0100
Subject: [PATCH 19/20] docs/DSP: Rename SCALE to PRED_SCALE

---
 .../GameCube_DSP_Users_Manual.tex                         | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex b/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex
index 52a600415ef..aec339c55e0 100644
--- a/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex
+++ b/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex
@@ -704,7 +704,7 @@ Hardware registers (IFX) occupy the address space at \Address{0xFFxx} in the Dat
 \Address{0xFFD7} & \Register{ACEAL}   & Accelerator end address L     \\ \hline
 \Address{0xFFD8} & \Register{ACCAH}   & Accelerator current address H \\ \hline
 \Address{0xFFD9} & \Register{ACCAL}   & Accelerator current address L \\ \hline
-\Address{0xFFDA} & \Register{SCALE}   & ADPCM predictor and scale     \\ \hline
+\Address{0xFFDA} & \Register{PRED\_SCALE} & ADPCM predictor and scale \\ \hline
 \Address{0xFFDB} & \Register{YN1}     & ADPCM output history Y[N - 1] \\ \hline
 \Address{0xFFDC} & \Register{YN2}     & ADPCM output history Y[N - 2] \\ \hline
 \Address{0xFFDD} & \Register{ACDSAMP} & Accelerator processed sample  \\ \hline
@@ -840,7 +840,7 @@ After \Register{ACCA} grows bigger than the area pointed to by \Register{ACEA},
 \RegisterBitDescription{15--0}{d}{R/W}{Bits 15--0 of the accelerator current address}
 \end{RegisterBitDescriptions}
 
-\RegisterBitOverview{0xFFDA}{SCALE}{ADPCM predictor and scale}{---- ---- -ppp ssss}
+\RegisterBitOverview{0xFFDA}{PRED\_SCALE}{ADPCM predictor and scale}{---- ---- -ppp ssss}
 
 \begin{RegisterBitDescriptions}
 \RegisterBitDescription{6--4}{d}{R/W}{Used to decide which pair of coefficients to use (\Register{COEF\_A1\_p} and \Register{COEF\_A2\_p}, at $\Address{0xFFA0} + 2p$ and $\Address{0xFFA0} + 2p + 1$)}
@@ -852,13 +852,13 @@ After \Register{ACCA} grows bigger than the area pointed to by \Register{ACEA},
 \RegisterBitOverview{0xFFDB}{YN1}{ADPCM YN1}{dddd dddd dddd dddd}
 
 \begin{RegisterBitDescriptions}
-\RegisterBitDescription{15--0}{d}{R/W}{Last value read by the accelerator, updated to the new value of \Register{ACDSAMP} when \Register{ACDSAMP} is read.  Used and updated for all sample formats.  Multiplied by the A1 coefficient selected by SCALE and scaled per FORMAT.}
+\RegisterBitDescription{15--0}{d}{R/W}{Last value read by the accelerator, updated to the new value of \Register{ACDSAMP} when \Register{ACDSAMP} is read.  Used and updated for all sample formats.  Multiplied by the A1 coefficient selected by PRED\_SCALE and scaled per FORMAT.}
 \end{RegisterBitDescriptions}
 
 \RegisterBitOverview{0xFFDC}{YN2}{ADPCM YN2}{dddd dddd dddd dddd}
 
 \begin{RegisterBitDescriptions}
-\RegisterBitDescription{15--0}{d}{R/W}{Second-last value read by the accelerator, updated to the previous value of \Register{YN1} when \Register{ACDSAMP} is read.  Used and updated for all sample formats.  Multiplied by the A2 coefficient selected by SCALE and scaled per FORMAT.  Writing this value starts the accelerator.}
+\RegisterBitDescription{15--0}{d}{R/W}{Second-last value read by the accelerator, updated to the previous value of \Register{YN1} when \Register{ACDSAMP} is read.  Used and updated for all sample formats.  Multiplied by the A2 coefficient selected by PRED\_SCALE and scaled per FORMAT.  Writing this value starts the accelerator.}
 \end{RegisterBitDescriptions}
 
 \RegisterBitOverview{0xFFDD}{ACDSAMP}{Accelerator data}{dddd dddd dddd dddd}

From a6290caa2eaa7cdfffe43ca413d18665c40a4713 Mon Sep 17 00:00:00 2001
From: Tillmann Karras <tilkax@gmail.com>
Date: Thu, 24 Apr 2025 19:13:41 +0100
Subject: [PATCH 20/20] DSPHLE: set accelerator gain

---
 Source/Core/Core/HW/DSPHLE/UCodes/AESnd.cpp | 5 ++---
 Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h | 1 +
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Source/Core/Core/HW/DSPHLE/UCodes/AESnd.cpp b/Source/Core/Core/HW/DSPHLE/UCodes/AESnd.cpp
index 0f8467f693e..a143fe450c7 100644
--- a/Source/Core/Core/HW/DSPHLE/UCodes/AESnd.cpp
+++ b/Source/Core/Core/HW/DSPHLE/UCodes/AESnd.cpp
@@ -266,12 +266,11 @@ void AESndAccelerator::WriteMemory(u32 address, u8 value)
 
 static constexpr std::array<s16, 16> ACCELERATOR_COEFS = {};  // all zeros
 
-void AESndUCode::SetUpAccelerator(u16 format, [[maybe_unused]] u16 gain)
+void AESndUCode::SetUpAccelerator(u16 format, u16 gain)
 {
   // setup_accl
   m_accelerator.SetSampleFormat(format);
-  // not currently implemented, but it doesn't matter since the gain is configured to be a no-op
-  // m_accelerator.SetGain(gain);
+  m_accelerator.SetGain(gain);
   m_accelerator.SetStartAddress(m_parameter_block.buf_start);
   m_accelerator.SetEndAddress(m_parameter_block.buf_end);
   m_accelerator.SetCurrentAddress(m_parameter_block.buf_curr);
diff --git a/Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h b/Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h
index e5aefd0c770..4822af6c56d 100644
--- a/Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h
+++ b/Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h
@@ -183,6 +183,7 @@ void AcceleratorSetup(HLEAccelerator* accelerator, PB_TYPE* pb)
   accelerator->SetSampleFormat(pb->audio_addr.sample_format);
   accelerator->SetYn1(pb->adpcm.yn1);
   accelerator->SetYn2(pb->adpcm.yn2);
+  accelerator->SetGain(pb->adpcm.gain);
   accelerator->SetPredScale(pb->adpcm.pred_scale);
 }