From 8f93ac1de8aed3937943f8e4753576315a3e57e9 Mon Sep 17 00:00:00 2001
From: Yarchik <spoko.dev@gmail.com>
Date: Tue, 23 Jun 2026 01:42:07 +0100
Subject: [PATCH] fix(utf32): reassemble split codepoint from overflow, not
 source index

When a 4-byte UTF-32 unit is split across two stream chunks, the decoder
fills `this.overflow` to four bytes and then read it back with the source
index `i` (`overflow[i]`...`overflow[i + 3]`) instead of `overflow[0]`...
`overflow[3]`. Since `overflow` only holds indices 0-3, the read landed
out of range whenever `i > 0`, so every codepoint straddling a chunk
boundary decoded to U+0000 (LE) or a byte-shifted character (BE).

This block was copied from the main loop (which correctly uses `src[i]`);
the index just was not adjusted for the overflow buffer. Whole-buffer
decode was unaffected, which is why existing tests passed.
---
 encodings/utf32.js |  4 ++--
 test/utf32-test.js | 16 ++++++++++++++++
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/encodings/utf32.js b/encodings/utf32.js
index 72317893..aae73751 100644
--- a/encodings/utf32.js
+++ b/encodings/utf32.js
@@ -113,9 +113,9 @@ Utf32Decoder.prototype.write = function (src) {
       // NOTE: codepoint is a signed int32 and can be negative.
       // NOTE: We copied this block from below to help V8 optimize it (it works with array, not buffer).
       if (isLE) {
-        codepoint = overflow[i] | (overflow[i + 1] << 8) | (overflow[i + 2] << 16) | (overflow[i + 3] << 24)
+        codepoint = overflow[0] | (overflow[1] << 8) | (overflow[2] << 16) | (overflow[3] << 24)
       } else {
-        codepoint = overflow[i + 3] | (overflow[i + 2] << 8) | (overflow[i + 1] << 16) | (overflow[i] << 24)
+        codepoint = overflow[3] | (overflow[2] << 8) | (overflow[1] << 16) | (overflow[0] << 24)
       }
       overflow.length = 0
 
diff --git a/test/utf32-test.js b/test/utf32-test.js
index f43156c5..0635ecdd 100644
--- a/test/utf32-test.js
+++ b/test/utf32-test.js
@@ -63,6 +63,14 @@ describe("UTF-32LE codec", function () {
     assert.equal(iconv.decode(Buffer.from([0x61, 0, 0, 0, 0]), "UTF32-LE"), "a")
   })
 
+  it("decodes correctly when codepoints are split across stream chunks", function () {
+    for (var at = 1; at < utf32leBuf.length; at++) {
+      var decoder = iconv.getDecoder("utf-32le")
+      var res = decoder.write(utf32leBuf.slice(0, at)) + decoder.write(utf32leBuf.slice(at)) + (decoder.end() || "")
+      assert.equal(res, testStr, "split at byte " + at)
+    }
+  })
+
   it("handles invalid surrogates gracefully", function () {
     var encoded = iconv.encode(testStr2, "UTF32-LE")
     assert.equal(escape(iconv.decode(encoded, "UTF32-LE")), escape(testStr2))
@@ -114,6 +122,14 @@ describe("UTF-32BE codec", function () {
     assert.equal(iconv.decode(Buffer.from([0, 0, 0, 0x61, 0]), "UTF32-BE"), "a")
   })
 
+  it("decodes correctly when codepoints are split across stream chunks", function () {
+    for (var at = 1; at < utf32beBuf.length; at++) {
+      var decoder = iconv.getDecoder("utf-32be")
+      var res = decoder.write(utf32beBuf.slice(0, at)) + decoder.write(utf32beBuf.slice(at)) + (decoder.end() || "")
+      assert.equal(res, testStr, "split at byte " + at)
+    }
+  })
+
   it("handles invalid surrogates gracefully", function () {
     var encoded = iconv.encode(testStr2, "UTF32-BE")
     assert.equal(escape(iconv.decode(encoded, "UTF32-BE")), escape(testStr2))