From 41a3c16b7f341262b3f4637a57feaa31485cca3b Mon Sep 17 00:00:00 2001 From: Meghan Denny Date: Wed, 22 Jan 2025 22:55:01 -0800 Subject: [PATCH 1/6] node: fix buffer includes+indexof --- src/bun.js/bindings/JSBuffer.cpp | 198 +++--- .../test/parallel/test-buffer-includes.js | 302 +++++++++ .../node/test/parallel/test-buffer-indexof.js | 629 ++++++++++++++++++ 3 files changed, 1051 insertions(+), 78 deletions(-) create mode 100644 test/js/node/test/parallel/test-buffer-includes.js create mode 100644 test/js/node/test/parallel/test-buffer-indexof.js diff --git a/src/bun.js/bindings/JSBuffer.cpp b/src/bun.js/bindings/JSBuffer.cpp index b8346a3ec386ff..37c33cd047b5ce 100644 --- a/src/bun.js/bindings/JSBuffer.cpp +++ b/src/bun.js/bindings/JSBuffer.cpp @@ -1326,6 +1326,37 @@ extern "C" void* zig_memmem(const void* haystack, size_t haystack_len, const voi #define MEMMEM_IMPL memmem #endif +static ssize_t indexOfOffset(size_t length, ssize_t offset_i64, ssize_t needle_length, bool is_forward) +{ + ssize_t length_i64 = static_cast(length); + if (offset_i64 < 0) { + if (offset_i64 + length_i64 >= 0) { + // Negative offsets count backwards from the end of the buffer. + return length_i64 + offset_i64; + } else if (is_forward || needle_length == 0) { + // indexOf from before the start of the buffer: search the whole buffer. + return 0; + } else { + // lastIndexOf from before the start of the buffer: no match. + return -1; + } + } else { + if (offset_i64 + needle_length <= length_i64) { + // Valid positive offset. + return offset_i64; + } else if (needle_length == 0) { + // Out of buffer bounds, but empty needle: point to end of buffer. + return length_i64; + } else if (is_forward) { + // indexOf from past the end of the buffer: no match. + return -1; + } else { + // lastIndexOf from past the end of the buffer: search the whole buffer. + return length_i64 - 1; + } + } +} + static int64_t indexOf(const uint8_t* thisPtr, int64_t thisLength, const uint8_t* valuePtr, int64_t valueLength, int64_t byteOffset) { if (thisLength < valueLength + byteOffset) @@ -1339,6 +1370,28 @@ static int64_t indexOf(const uint8_t* thisPtr, int64_t thisLength, const uint8_t return -1; } +static int64_t indexOf16(const uint8_t* thisPtr, int64_t thisLength, const uint8_t* valuePtr, int64_t valueLength, int64_t byteOffset) +{ + size_t finalresult = 0; + if (thisLength == 1) return -1; + thisLength = thisLength / 2 * 2; + if (valueLength == 1) return -1; + valueLength = valueLength / 2 * 2; + byteOffset = byteOffset / 2 * 2; + while (true) { + auto res = indexOf(thisPtr, thisLength, valuePtr, valueLength, byteOffset); + if (res == -1) return -1; + if (res % 2 == 1) { + thisPtr += res + 1; + thisLength -= res + 1; + finalresult += res + 1; + continue; + } + finalresult += res; + return finalresult; + } +} + static int64_t lastIndexOf(const uint8_t* thisPtr, int64_t thisLength, const uint8_t* valuePtr, int64_t valueLength, int64_t byteOffset) { auto start = thisPtr; @@ -1350,108 +1403,97 @@ static int64_t lastIndexOf(const uint8_t* thisPtr, int64_t thisLength, const uin return -1; } -static int64_t indexOf(JSC::JSGlobalObject* lexicalGlobalObject, JSC::CallFrame* callFrame, typename IDLOperation::ClassParameter castedThis, bool last) +static int64_t indexOf(JSC::JSGlobalObject* lexicalGlobalObject, JSC::CallFrame* callFrame, typename IDLOperation::ClassParameter buffer, bool last) { - auto& vm = JSC::getVM(lexicalGlobalObject); + auto& vm = lexicalGlobalObject->vm(); auto scope = DECLARE_THROW_SCOPE(vm); - if (callFrame->argumentCount() < 1) { - throwVMError(lexicalGlobalObject, scope, createNotEnoughArgumentsError(lexicalGlobalObject)); - return -1; - } + bool dir = !last; + const uint8_t* typedVector = buffer->typedVector(); + size_t byteLength = buffer->byteLength(); + std::optional encoding = std::nullopt; - auto value = callFrame->uncheckedArgument(0); - WebCore::BufferEncodingType encoding = WebCore::BufferEncodingType::utf8; + if (byteLength == 0) return -1; - int64_t length = static_cast(castedThis->byteLength()); - const uint8_t* typedVector = castedThis->typedVector(); + auto valueValue = callFrame->argument(0); + auto byteOffsetValue = callFrame->argument(1); + auto encodingValue = callFrame->argument(2); - int64_t byteOffset = last ? length - 1 : 0; + if (byteOffsetValue.isString()) { + encodingValue = byteOffsetValue; + byteOffsetValue = jsUndefined(); + } else { + double byteOffset = byteOffsetValue.toNumber(lexicalGlobalObject); + RETURN_IF_EXCEPTION(scope, -1); + if (byteOffset > 0x7fffffffp0f) byteOffsetValue = jsDoubleNumber(0x7fffffffp0f); + if (byteOffset < -0x80000000p0f) byteOffsetValue = jsDoubleNumber(-0x80000000p0f); + } - if (callFrame->argumentCount() > 1) { - EnsureStillAliveScope arg1 = callFrame->uncheckedArgument(1); - if (arg1.value().isString()) { - encoding = parseEncoding(lexicalGlobalObject, scope, arg1.value()); - RETURN_IF_EXCEPTION(scope, -1); - } else { - auto byteOffset_ = arg1.value().toNumber(lexicalGlobalObject); - RETURN_IF_EXCEPTION(scope, -1); - - if (std::isnan(byteOffset_) || std::isinf(byteOffset_)) { - byteOffset = last ? length - 1 : 0; - } else if (byteOffset_ < 0) { - byteOffset = length + static_cast(byteOffset_); - } else { - byteOffset = static_cast(byteOffset_); - } + byteOffsetValue = jsDoubleNumber(byteOffsetValue.toNumber(lexicalGlobalObject)); + RETURN_IF_EXCEPTION(scope, -1); + if (std::isnan(byteOffsetValue.asNumber())) byteOffsetValue = jsNumber(dir ? 0 : byteLength); - if (last) { - if (byteOffset < 0) { - return -1; - } else if (byteOffset > length - 1) { - byteOffset = length - 1; - } - } else { - if (byteOffset <= 0) { - byteOffset = 0; - } else if (byteOffset > length - 1) { - return -1; - } - } - - if (callFrame->argumentCount() > 2) { - EnsureStillAliveScope encodingValue = callFrame->uncheckedArgument(2); - if (!encodingValue.value().isUndefined()) { - encoding = parseEncoding(lexicalGlobalObject, scope, encodingValue.value()); - RETURN_IF_EXCEPTION(scope, -1); - } + if (valueValue.isNumber()) { + ssize_t byteOffset = indexOfOffset(byteLength, byteOffsetValue.asNumber(), 1, dir); + if (byteOffset == -1) return -1; + uint8_t byteValue = (valueValue.toInt32(lexicalGlobalObject)) % 256; + RETURN_IF_EXCEPTION(scope, -1); + if (last) { + for (int64_t i = byteOffset; i >= 0; --i) { + if (byteValue == typedVector[i]) return i; } + } else { + const void* offset = memchr(reinterpret_cast(typedVector + byteOffset), byteValue, byteLength - byteOffset); + if (offset != NULL) return static_cast(offset) - typedVector; } + return -1; } - if (value.isString()) { - auto* str = value.toStringOrNull(lexicalGlobalObject); - RETURN_IF_EXCEPTION(scope, -1); + WTF::String encodingString; + if (!encodingValue.isUndefined()) { + encodingString = encodingValue.toWTFString(lexicalGlobalObject); + RETURN_IF_EXCEPTION(scope, {}); + encoding = parseEnumeration2(*lexicalGlobalObject, encodingString); + } else { + encoding = BufferEncodingType::utf8; + } - JSC::EncodedJSValue encodedBuffer = constructFromEncoding(lexicalGlobalObject, str, encoding); + if (valueValue.isString()) { + if (!encoding.has_value()) { + return Bun::ERR::UNKNOWN_ENCODING(scope, lexicalGlobalObject, encodingString); + } + auto* str = valueValue.toStringOrNull(lexicalGlobalObject); + RETURN_IF_EXCEPTION(scope, -1); + ssize_t byteOffset = indexOfOffset(byteLength, byteOffsetValue.asNumber(), str->length(), dir); + if (byteOffset == -1) return -1; + if (str->length() == 0) return byteOffset; + JSC::EncodedJSValue encodedBuffer = constructFromEncoding(lexicalGlobalObject, str, encoding.value()); auto* arrayValue = JSC::jsDynamicCast(JSC::JSValue::decode(encodedBuffer)); int64_t lengthValue = static_cast(arrayValue->byteLength()); const uint8_t* typedVectorValue = arrayValue->typedVector(); if (last) { - return lastIndexOf(typedVector, length, typedVectorValue, lengthValue, byteOffset); + return lastIndexOf(typedVector, byteLength, typedVectorValue, lengthValue, byteOffset); } else { - return indexOf(typedVector, length, typedVectorValue, lengthValue, byteOffset); - } - } else if (value.isNumber()) { - uint8_t byteValue = static_cast((value.toInt32(lexicalGlobalObject)) % 256); - RETURN_IF_EXCEPTION(scope, -1); - - if (last) { - for (int64_t i = byteOffset; i >= 0; --i) { - if (byteValue == typedVector[i]) { - return i; - } - } - } else { - const void* offset = memchr(reinterpret_cast(typedVector + byteOffset), byteValue, length - byteOffset); - if (offset != NULL) { - return static_cast(static_cast(offset) - typedVector); - } + if (encoding.value() == BufferEncodingType::ucs2) return indexOf16(typedVector, byteLength, typedVectorValue, lengthValue, byteOffset); + return indexOf(typedVector, byteLength, typedVectorValue, lengthValue, byteOffset); } + } - return -1; - } else if (auto* arrayValue = JSC::jsDynamicCast(value)) { - size_t lengthValue = arrayValue->byteLength(); - const uint8_t* typedVectorValue = arrayValue->typedVector(); + if (auto* array = JSC::jsDynamicCast(valueValue)) { + if (!encoding.has_value()) encoding = BufferEncodingType::utf8; + size_t lengthValue = array->byteLength(); + ssize_t byteOffset = indexOfOffset(byteLength, byteOffsetValue.asNumber(), lengthValue, dir); + if (byteOffset == -1) return -1; + if (lengthValue == 0) return byteOffset; + const uint8_t* typedVectorValue = array->typedVector(); if (last) { - return lastIndexOf(typedVector, length, typedVectorValue, lengthValue, byteOffset); + return lastIndexOf(typedVector, byteLength, typedVectorValue, lengthValue, byteOffset); } else { - return indexOf(typedVector, length, typedVectorValue, lengthValue, byteOffset); + if (encoding.value() == BufferEncodingType::ucs2) return indexOf16(typedVector, byteLength, typedVectorValue, lengthValue, byteOffset); + return indexOf(typedVector, byteLength, typedVectorValue, lengthValue, byteOffset); } - } else { - throwTypeError(lexicalGlobalObject, scope, "Invalid value type"_s); - return -1; } + Bun::ERR::INVALID_ARG_TYPE(scope, lexicalGlobalObject, "value"_s, "number, string, Buffer, or Uint8Array"_s, valueValue); return -1; } diff --git a/test/js/node/test/parallel/test-buffer-includes.js b/test/js/node/test/parallel/test-buffer-includes.js new file mode 100644 index 00000000000000..e5fb6b076a7456 --- /dev/null +++ b/test/js/node/test/parallel/test-buffer-includes.js @@ -0,0 +1,302 @@ +'use strict'; +const common = require('../common'); +const assert = require('assert'); + +const b = Buffer.from('abcdef'); +const buf_a = Buffer.from('a'); +const buf_bc = Buffer.from('bc'); +const buf_f = Buffer.from('f'); +const buf_z = Buffer.from('z'); +const buf_empty = Buffer.from(''); + +assert(b.includes('a')); +assert(!b.includes('a', 1)); +assert(!b.includes('a', -1)); +assert(!b.includes('a', -4)); +assert(b.includes('a', -b.length)); +assert(b.includes('a', NaN)); +assert(b.includes('a', -Infinity)); +assert(!b.includes('a', Infinity)); +assert(b.includes('bc')); +assert(!b.includes('bc', 2)); +assert(!b.includes('bc', -1)); +assert(!b.includes('bc', -3)); +assert(b.includes('bc', -5)); +assert(b.includes('bc', NaN)); +assert(b.includes('bc', -Infinity)); +assert(!b.includes('bc', Infinity)); +assert(b.includes('f'), b.length - 1); +assert(!b.includes('z')); +assert(b.includes('')); +assert(b.includes('', 1)); +assert(b.includes('', b.length + 1)); +assert(b.includes('', Infinity)); +assert(b.includes(buf_a)); +assert(!b.includes(buf_a, 1)); +assert(!b.includes(buf_a, -1)); +assert(!b.includes(buf_a, -4)); +assert(b.includes(buf_a, -b.length)); +assert(b.includes(buf_a, NaN)); +assert(b.includes(buf_a, -Infinity)); +assert(!b.includes(buf_a, Infinity)); +assert(b.includes(buf_bc)); +assert(!b.includes(buf_bc, 2)); +assert(!b.includes(buf_bc, -1)); +assert(!b.includes(buf_bc, -3)); +assert(b.includes(buf_bc, -5)); +assert(b.includes(buf_bc, NaN)); +assert(b.includes(buf_bc, -Infinity)); +assert(!b.includes(buf_bc, Infinity)); +assert(b.includes(buf_f), b.length - 1); +assert(!b.includes(buf_z)); +assert(b.includes(buf_empty)); +assert(b.includes(buf_empty, 1)); +assert(b.includes(buf_empty, b.length + 1)); +assert(b.includes(buf_empty, Infinity)); +assert(b.includes(0x61)); +assert(!b.includes(0x61, 1)); +assert(!b.includes(0x61, -1)); +assert(!b.includes(0x61, -4)); +assert(b.includes(0x61, -b.length)); +assert(b.includes(0x61, NaN)); +assert(b.includes(0x61, -Infinity)); +assert(!b.includes(0x61, Infinity)); +assert(!b.includes(0x0)); + +// test offsets +assert(b.includes('d', 2)); +assert(b.includes('f', 5)); +assert(b.includes('f', -1)); +assert(!b.includes('f', 6)); + +assert(b.includes(Buffer.from('d'), 2)); +assert(b.includes(Buffer.from('f'), 5)); +assert(b.includes(Buffer.from('f'), -1)); +assert(!b.includes(Buffer.from('f'), 6)); + +assert(!Buffer.from('ff').includes(Buffer.from('f'), 1, 'ucs2')); + +// test hex encoding +assert.strictEqual( + Buffer.from(b.toString('hex'), 'hex') + .includes('64', 0, 'hex'), + true +); +assert.strictEqual( + Buffer.from(b.toString('hex'), 'hex') + .includes(Buffer.from('64', 'hex'), 0, 'hex'), + true +); + +// Test base64 encoding +assert.strictEqual( + Buffer.from(b.toString('base64'), 'base64') + .includes('ZA==', 0, 'base64'), + true +); +assert.strictEqual( + Buffer.from(b.toString('base64'), 'base64') + .includes(Buffer.from('ZA==', 'base64'), 0, 'base64'), + true +); + +// test ascii encoding +assert.strictEqual( + Buffer.from(b.toString('ascii'), 'ascii') + .includes('d', 0, 'ascii'), + true +); +assert.strictEqual( + Buffer.from(b.toString('ascii'), 'ascii') + .includes(Buffer.from('d', 'ascii'), 0, 'ascii'), + true +); + +// Test latin1 encoding +assert.strictEqual( + Buffer.from(b.toString('latin1'), 'latin1') + .includes('d', 0, 'latin1'), + true +); +assert.strictEqual( + Buffer.from(b.toString('latin1'), 'latin1') + .includes(Buffer.from('d', 'latin1'), 0, 'latin1'), + true +); + +// Test binary encoding +assert.strictEqual( + Buffer.from(b.toString('binary'), 'binary') + .includes('d', 0, 'binary'), + true +); +assert.strictEqual( + Buffer.from(b.toString('binary'), 'binary') + .includes(Buffer.from('d', 'binary'), 0, 'binary'), + true +); + + +// test ucs2 encoding +let twoByteString = Buffer.from('\u039a\u0391\u03a3\u03a3\u0395', 'ucs2'); + +assert(twoByteString.includes('\u0395', 4, 'ucs2')); +assert(twoByteString.includes('\u03a3', -4, 'ucs2')); +assert(twoByteString.includes('\u03a3', -6, 'ucs2')); +assert(twoByteString.includes( + Buffer.from('\u03a3', 'ucs2'), -6, 'ucs2')); +assert(!twoByteString.includes('\u03a3', -2, 'ucs2')); + +const mixedByteStringUcs2 = + Buffer.from('\u039a\u0391abc\u03a3\u03a3\u0395', 'ucs2'); +assert(mixedByteStringUcs2.includes('bc', 0, 'ucs2')); +assert(mixedByteStringUcs2.includes('\u03a3', 0, 'ucs2')); +assert(!mixedByteStringUcs2.includes('\u0396', 0, 'ucs2')); + +assert.ok( + mixedByteStringUcs2.includes(Buffer.from('bc', 'ucs2'), 0, 'ucs2')); +assert.ok( + mixedByteStringUcs2.includes(Buffer.from('\u03a3', 'ucs2'), 0, 'ucs2')); +assert.ok( + !mixedByteStringUcs2.includes(Buffer.from('\u0396', 'ucs2'), 0, 'ucs2')); + +twoByteString = Buffer.from('\u039a\u0391\u03a3\u03a3\u0395', 'ucs2'); + +// Test single char pattern +assert(twoByteString.includes('\u039a', 0, 'ucs2')); +assert(twoByteString.includes('\u0391', 0, 'ucs2'), 'Alpha'); +assert(twoByteString.includes('\u03a3', 0, 'ucs2'), 'First Sigma'); +assert(twoByteString.includes('\u03a3', 6, 'ucs2'), 'Second Sigma'); +assert(twoByteString.includes('\u0395', 0, 'ucs2'), 'Epsilon'); +assert(!twoByteString.includes('\u0392', 0, 'ucs2'), 'Not beta'); + +// Test multi-char pattern +assert(twoByteString.includes('\u039a\u0391', 0, 'ucs2'), 'Lambda Alpha'); +assert(twoByteString.includes('\u0391\u03a3', 0, 'ucs2'), 'Alpha Sigma'); +assert(twoByteString.includes('\u03a3\u03a3', 0, 'ucs2'), 'Sigma Sigma'); +assert(twoByteString.includes('\u03a3\u0395', 0, 'ucs2'), 'Sigma Epsilon'); + +const mixedByteStringUtf8 = Buffer.from('\u039a\u0391abc\u03a3\u03a3\u0395'); +assert(mixedByteStringUtf8.includes('bc')); +assert(mixedByteStringUtf8.includes('bc', 5)); +assert(mixedByteStringUtf8.includes('bc', -8)); +assert(mixedByteStringUtf8.includes('\u03a3')); +assert(!mixedByteStringUtf8.includes('\u0396')); + + +// Test complex string includes algorithms. Only trigger for long strings. +// Long string that isn't a simple repeat of a shorter string. +let longString = 'A'; +for (let i = 66; i < 76; i++) { // from 'B' to 'K' + longString = longString + String.fromCharCode(i) + longString; +} + +const longBufferString = Buffer.from(longString); + +// Pattern of 15 chars, repeated every 16 chars in long +let pattern = 'ABACABADABACABA'; +for (let i = 0; i < longBufferString.length - pattern.length; i += 7) { + const includes = longBufferString.includes(pattern, i); + assert(includes, `Long ABACABA...-string at index ${i}`); +} +assert(longBufferString.includes('AJABACA'), 'Long AJABACA, First J'); +assert(longBufferString.includes('AJABACA', 511), 'Long AJABACA, Second J'); + +pattern = 'JABACABADABACABA'; +assert(longBufferString.includes(pattern), 'Long JABACABA..., First J'); +assert(longBufferString.includes(pattern, 512), 'Long JABACABA..., Second J'); + +// Search for a non-ASCII string in a pure ASCII string. +const asciiString = Buffer.from( + 'arglebargleglopglyfarglebargleglopglyfarglebargleglopglyf'); +assert(!asciiString.includes('\x2061')); +assert(asciiString.includes('leb', 0)); + +// Search in string containing many non-ASCII chars. +const allCharsString = Array.from({ length: 65536 }, (_, i) => String.fromCharCode(i)).join(''); +const allCharsBufferUtf8 = Buffer.from(allCharsString); +const allCharsBufferUcs2 = Buffer.from(allCharsString, 'ucs2'); + +// Search for string long enough to trigger complex search with ASCII pattern +// and UC16 subject. +assert(!allCharsBufferUtf8.includes('notfound')); +assert(!allCharsBufferUcs2.includes('notfound')); + +// Find substrings in Utf8. +let lengths = [1, 3, 15]; // Single char, simple and complex. +let indices = [0x5, 0x60, 0x400, 0x680, 0x7ee, 0xFF02, 0x16610, 0x2f77b]; +for (let lengthIndex = 0; lengthIndex < lengths.length; lengthIndex++) { + for (let i = 0; i < indices.length; i++) { + const index = indices[i]; + let length = lengths[lengthIndex]; + + if (index + length > 0x7F) { + length = 2 * length; + } + + if (index + length > 0x7FF) { + length = 3 * length; + } + + if (index + length > 0xFFFF) { + length = 4 * length; + } + + const patternBufferUtf8 = allCharsBufferUtf8.slice(index, index + length); + assert(index, allCharsBufferUtf8.includes(patternBufferUtf8)); + + const patternStringUtf8 = patternBufferUtf8.toString(); + assert(index, allCharsBufferUtf8.includes(patternStringUtf8)); + } +} + +// Find substrings in Usc2. +lengths = [2, 4, 16]; // Single char, simple and complex. +indices = [0x5, 0x65, 0x105, 0x205, 0x285, 0x2005, 0x2085, 0xfff0]; +for (let lengthIndex = 0; lengthIndex < lengths.length; lengthIndex++) { + for (let i = 0; i < indices.length; i++) { + const index = indices[i] * 2; + const length = lengths[lengthIndex]; + + const patternBufferUcs2 = + allCharsBufferUcs2.slice(index, index + length); + assert.ok( + allCharsBufferUcs2.includes(patternBufferUcs2, 0, 'ucs2')); + + const patternStringUcs2 = patternBufferUcs2.toString('ucs2'); + assert.ok( + allCharsBufferUcs2.includes(patternStringUcs2, 0, 'ucs2')); + } +} + +[ + () => { }, + {}, + [], +].forEach((val) => { + assert.throws( + () => b.includes(val), + { + code: 'ERR_INVALID_ARG_TYPE', + name: 'TypeError', + message: 'The "value" argument must be of type number, string, Buffer, or Uint8Array.' + + common.invalidArgTypeHelper(val) + } + ); +}); + +// Test truncation of Number arguments to uint8 +{ + const buf = Buffer.from('this is a test'); + assert.ok(buf.includes(0x6973)); + assert.ok(buf.includes(0x697320)); + assert.ok(buf.includes(0x69732069)); + assert.ok(buf.includes(0x697374657374)); + assert.ok(buf.includes(0x69737374)); + assert.ok(buf.includes(0x69737465)); + assert.ok(buf.includes(0x69737465)); + assert.ok(buf.includes(-140)); + assert.ok(buf.includes(-152)); + assert.ok(!buf.includes(0xff)); + assert.ok(!buf.includes(0xffff)); +} diff --git a/test/js/node/test/parallel/test-buffer-indexof.js b/test/js/node/test/parallel/test-buffer-indexof.js new file mode 100644 index 00000000000000..16b4dd4009b61a --- /dev/null +++ b/test/js/node/test/parallel/test-buffer-indexof.js @@ -0,0 +1,629 @@ +'use strict'; +const common = require('../common'); +const assert = require('assert'); + +const b = Buffer.from('abcdef'); +const buf_a = Buffer.from('a'); +const buf_bc = Buffer.from('bc'); +const buf_f = Buffer.from('f'); +const buf_z = Buffer.from('z'); +const buf_empty = Buffer.from(''); + +const s = 'abcdef'; + +assert.strictEqual(b.indexOf('a'), 0); +assert.strictEqual(b.indexOf('a', 1), -1); +assert.strictEqual(b.indexOf('a', -1), -1); +assert.strictEqual(b.indexOf('a', -4), -1); +assert.strictEqual(b.indexOf('a', -b.length), 0); +assert.strictEqual(b.indexOf('a', NaN), 0); +assert.strictEqual(b.indexOf('a', -Infinity), 0); +assert.strictEqual(b.indexOf('a', Infinity), -1); +assert.strictEqual(b.indexOf('bc'), 1); +assert.strictEqual(b.indexOf('bc', 2), -1); +assert.strictEqual(b.indexOf('bc', -1), -1); +assert.strictEqual(b.indexOf('bc', -3), -1); +assert.strictEqual(b.indexOf('bc', -5), 1); +assert.strictEqual(b.indexOf('bc', NaN), 1); +assert.strictEqual(b.indexOf('bc', -Infinity), 1); +assert.strictEqual(b.indexOf('bc', Infinity), -1); +assert.strictEqual(b.indexOf('f'), b.length - 1); +assert.strictEqual(b.indexOf('z'), -1); +assert.strictEqual(b.indexOf(''), 0); +assert.strictEqual(b.indexOf('', 1), 1); +assert.strictEqual(b.indexOf('', b.length + 1), b.length); +assert.strictEqual(b.indexOf('', Infinity), b.length); +assert.strictEqual(b.indexOf(buf_a), 0); +assert.strictEqual(b.indexOf(buf_a, 1), -1); +assert.strictEqual(b.indexOf(buf_a, -1), -1); +assert.strictEqual(b.indexOf(buf_a, -4), -1); +assert.strictEqual(b.indexOf(buf_a, -b.length), 0); +assert.strictEqual(b.indexOf(buf_a, NaN), 0); +assert.strictEqual(b.indexOf(buf_a, -Infinity), 0); +assert.strictEqual(b.indexOf(buf_a, Infinity), -1); +assert.strictEqual(b.indexOf(buf_bc), 1); +assert.strictEqual(b.indexOf(buf_bc, 2), -1); +assert.strictEqual(b.indexOf(buf_bc, -1), -1); +assert.strictEqual(b.indexOf(buf_bc, -3), -1); +assert.strictEqual(b.indexOf(buf_bc, -5), 1); +assert.strictEqual(b.indexOf(buf_bc, NaN), 1); +assert.strictEqual(b.indexOf(buf_bc, -Infinity), 1); +assert.strictEqual(b.indexOf(buf_bc, Infinity), -1); +assert.strictEqual(b.indexOf(buf_f), b.length - 1); +assert.strictEqual(b.indexOf(buf_z), -1); +assert.strictEqual(b.indexOf(buf_empty), 0); +assert.strictEqual(b.indexOf(buf_empty, 1), 1); +assert.strictEqual(b.indexOf(buf_empty, b.length + 1), b.length); +assert.strictEqual(b.indexOf(buf_empty, Infinity), b.length); +assert.strictEqual(b.indexOf(0x61), 0); +assert.strictEqual(b.indexOf(0x61, 1), -1); +assert.strictEqual(b.indexOf(0x61, -1), -1); +assert.strictEqual(b.indexOf(0x61, -4), -1); +assert.strictEqual(b.indexOf(0x61, -b.length), 0); +assert.strictEqual(b.indexOf(0x61, NaN), 0); +assert.strictEqual(b.indexOf(0x61, -Infinity), 0); +assert.strictEqual(b.indexOf(0x61, Infinity), -1); +assert.strictEqual(b.indexOf(0x0), -1); + +// test offsets +assert.strictEqual(b.indexOf('d', 2), 3); +assert.strictEqual(b.indexOf('f', 5), 5); +assert.strictEqual(b.indexOf('f', -1), 5); +assert.strictEqual(b.indexOf('f', 6), -1); + +assert.strictEqual(b.indexOf(Buffer.from('d'), 2), 3); +assert.strictEqual(b.indexOf(Buffer.from('f'), 5), 5); +assert.strictEqual(b.indexOf(Buffer.from('f'), -1), 5); +assert.strictEqual(b.indexOf(Buffer.from('f'), 6), -1); + +assert.strictEqual(Buffer.from('ff').indexOf(Buffer.from('f'), 1, 'ucs2'), -1); + +// Test invalid and uppercase encoding +assert.strictEqual(b.indexOf('b', 'utf8'), 1); +assert.strictEqual(b.indexOf('b', 'UTF8'), 1); +assert.strictEqual(b.indexOf('62', 'HEX'), 1); +assert.throws(() => b.indexOf('bad', 'enc'), /Unknown encoding: enc/); + +// test hex encoding +assert.strictEqual( + Buffer.from(b.toString('hex'), 'hex') + .indexOf('64', 0, 'hex'), + 3 +); +assert.strictEqual( + Buffer.from(b.toString('hex'), 'hex') + .indexOf(Buffer.from('64', 'hex'), 0, 'hex'), + 3 +); + +// Test base64 encoding +assert.strictEqual( + Buffer.from(b.toString('base64'), 'base64') + .indexOf('ZA==', 0, 'base64'), + 3 +); +assert.strictEqual( + Buffer.from(b.toString('base64'), 'base64') + .indexOf(Buffer.from('ZA==', 'base64'), 0, 'base64'), + 3 +); + +// Test base64url encoding +assert.strictEqual( + Buffer.from(b.toString('base64url'), 'base64url') + .indexOf('ZA==', 0, 'base64url'), + 3 +); + +// test ascii encoding +assert.strictEqual( + Buffer.from(b.toString('ascii'), 'ascii') + .indexOf('d', 0, 'ascii'), + 3 +); +assert.strictEqual( + Buffer.from(b.toString('ascii'), 'ascii') + .indexOf(Buffer.from('d', 'ascii'), 0, 'ascii'), + 3 +); + +// Test latin1 encoding +assert.strictEqual( + Buffer.from(b.toString('latin1'), 'latin1') + .indexOf('d', 0, 'latin1'), + 3 +); +assert.strictEqual( + Buffer.from(b.toString('latin1'), 'latin1') + .indexOf(Buffer.from('d', 'latin1'), 0, 'latin1'), + 3 +); +assert.strictEqual( + Buffer.from('aa\u00e8aa', 'latin1') + .indexOf('\u00e8', 'latin1'), + 2 +); +assert.strictEqual( + Buffer.from('\u00e8', 'latin1') + .indexOf('\u00e8', 'latin1'), + 0 +); +assert.strictEqual( + Buffer.from('\u00e8', 'latin1') + .indexOf(Buffer.from('\u00e8', 'latin1'), 'latin1'), + 0 +); + +// Test binary encoding +assert.strictEqual( + Buffer.from(b.toString('binary'), 'binary') + .indexOf('d', 0, 'binary'), + 3 +); +assert.strictEqual( + Buffer.from(b.toString('binary'), 'binary') + .indexOf(Buffer.from('d', 'binary'), 0, 'binary'), + 3 +); +assert.strictEqual( + Buffer.from('aa\u00e8aa', 'binary') + .indexOf('\u00e8', 'binary'), + 2 +); +assert.strictEqual( + Buffer.from('\u00e8', 'binary') + .indexOf('\u00e8', 'binary'), + 0 +); +assert.strictEqual( + Buffer.from('\u00e8', 'binary') + .indexOf(Buffer.from('\u00e8', 'binary'), 'binary'), + 0 +); + + +// Test optional offset with passed encoding +assert.strictEqual(Buffer.from('aaaa0').indexOf('30', 'hex'), 4); +assert.strictEqual(Buffer.from('aaaa00a').indexOf('3030', 'hex'), 4); + +{ + // Test usc2 and utf16le encoding + ['ucs2', 'utf16le'].forEach((encoding) => { + const twoByteString = Buffer.from( + '\u039a\u0391\u03a3\u03a3\u0395', encoding); + + assert.strictEqual(twoByteString.indexOf('\u0395', 4, encoding), 8); + assert.strictEqual(twoByteString.indexOf('\u03a3', -4, encoding), 6); + assert.strictEqual(twoByteString.indexOf('\u03a3', -6, encoding), 4); + assert.strictEqual(twoByteString.indexOf( + Buffer.from('\u03a3', encoding), -6, encoding), 4); + assert.strictEqual(-1, twoByteString.indexOf('\u03a3', -2, encoding)); + }); +} + +const mixedByteStringUcs2 = + Buffer.from('\u039a\u0391abc\u03a3\u03a3\u0395', 'ucs2'); +assert.strictEqual(mixedByteStringUcs2.indexOf('bc', 0, 'ucs2'), 6); +assert.strictEqual(mixedByteStringUcs2.indexOf('\u03a3', 0, 'ucs2'), 10); +assert.strictEqual(-1, mixedByteStringUcs2.indexOf('\u0396', 0, 'ucs2')); + +assert.strictEqual( + mixedByteStringUcs2.indexOf(Buffer.from('bc', 'ucs2'), 0, 'ucs2'), 6); +assert.strictEqual( + mixedByteStringUcs2.indexOf(Buffer.from('\u03a3', 'ucs2'), 0, 'ucs2'), 10); +assert.strictEqual( + -1, mixedByteStringUcs2.indexOf(Buffer.from('\u0396', 'ucs2'), 0, 'ucs2')); + +{ + const twoByteString = Buffer.from('\u039a\u0391\u03a3\u03a3\u0395', 'ucs2'); + + // Test single char pattern + assert.strictEqual(twoByteString.indexOf('\u039a', 0, 'ucs2'), 0); + let index = twoByteString.indexOf('\u0391', 0, 'ucs2'); + assert.strictEqual(index, 2, `Alpha - at index ${index}`); + index = twoByteString.indexOf('\u03a3', 0, 'ucs2'); + assert.strictEqual(index, 4, `First Sigma - at index ${index}`); + index = twoByteString.indexOf('\u03a3', 6, 'ucs2'); + assert.strictEqual(index, 6, `Second Sigma - at index ${index}`); + index = twoByteString.indexOf('\u0395', 0, 'ucs2'); + assert.strictEqual(index, 8, `Epsilon - at index ${index}`); + index = twoByteString.indexOf('\u0392', 0, 'ucs2'); + assert.strictEqual(-1, index, `Not beta - at index ${index}`); + + // Test multi-char pattern + index = twoByteString.indexOf('\u039a\u0391', 0, 'ucs2'); + assert.strictEqual(index, 0, `Lambda Alpha - at index ${index}`); + index = twoByteString.indexOf('\u0391\u03a3', 0, 'ucs2'); + assert.strictEqual(index, 2, `Alpha Sigma - at index ${index}`); + index = twoByteString.indexOf('\u03a3\u03a3', 0, 'ucs2'); + assert.strictEqual(index, 4, `Sigma Sigma - at index ${index}`); + index = twoByteString.indexOf('\u03a3\u0395', 0, 'ucs2'); + assert.strictEqual(index, 6, `Sigma Epsilon - at index ${index}`); +} + +const mixedByteStringUtf8 = Buffer.from('\u039a\u0391abc\u03a3\u03a3\u0395'); +assert.strictEqual(mixedByteStringUtf8.indexOf('bc'), 5); +assert.strictEqual(mixedByteStringUtf8.indexOf('bc', 5), 5); +assert.strictEqual(mixedByteStringUtf8.indexOf('bc', -8), 5); +assert.strictEqual(mixedByteStringUtf8.indexOf('\u03a3'), 7); +assert.strictEqual(mixedByteStringUtf8.indexOf('\u0396'), -1); + + +// Test complex string indexOf algorithms. Only trigger for long strings. +// Long string that isn't a simple repeat of a shorter string. +let longString = 'A'; +for (let i = 66; i < 76; i++) { // from 'B' to 'K' + longString = longString + String.fromCharCode(i) + longString; +} + +const longBufferString = Buffer.from(longString); + +// Pattern of 15 chars, repeated every 16 chars in long +let pattern = 'ABACABADABACABA'; +for (let i = 0; i < longBufferString.length - pattern.length; i += 7) { + const index = longBufferString.indexOf(pattern, i); + assert.strictEqual((i + 15) & ~0xf, index, + `Long ABACABA...-string at index ${i}`); +} + +let index = longBufferString.indexOf('AJABACA'); +assert.strictEqual(index, 510, `Long AJABACA, First J - at index ${index}`); +index = longBufferString.indexOf('AJABACA', 511); +assert.strictEqual(index, 1534, `Long AJABACA, Second J - at index ${index}`); + +pattern = 'JABACABADABACABA'; +index = longBufferString.indexOf(pattern); +assert.strictEqual(index, 511, `Long JABACABA..., First J - at index ${index}`); +index = longBufferString.indexOf(pattern, 512); +assert.strictEqual( + index, 1535, `Long JABACABA..., Second J - at index ${index}`); + +// Search for a non-ASCII string in a pure ASCII string. +const asciiString = Buffer.from( + 'arglebargleglopglyfarglebargleglopglyfarglebargleglopglyf'); +assert.strictEqual(-1, asciiString.indexOf('\x2061')); +assert.strictEqual(asciiString.indexOf('leb', 0), 3); + +// Search in string containing many non-ASCII chars. +const allCharsString = Array.from({ length: 65536 }, (_, i) => String.fromCharCode(i)).join(''); +const allCharsBufferUtf8 = Buffer.from(allCharsString); +const allCharsBufferUcs2 = Buffer.from(allCharsString, 'ucs2'); + +// Search for string long enough to trigger complex search with ASCII pattern +// and UC16 subject. +assert.strictEqual(-1, allCharsBufferUtf8.indexOf('notfound')); +assert.strictEqual(-1, allCharsBufferUcs2.indexOf('notfound')); + +// Needle is longer than haystack, but only because it's encoded as UTF-16 +assert.strictEqual(Buffer.from('aaaa').indexOf('a'.repeat(4), 'ucs2'), -1); + +assert.strictEqual(Buffer.from('aaaa').indexOf('a'.repeat(4), 'utf8'), 0); +assert.strictEqual(Buffer.from('aaaa').indexOf('你好', 'ucs2'), -1); + +// Haystack has odd length, but the needle is UCS2. +assert.strictEqual(Buffer.from('aaaaa').indexOf('b', 'ucs2'), -1); + +{ + // Find substrings in Utf8. + const lengths = [1, 3, 15]; // Single char, simple and complex. + const indices = [0x5, 0x60, 0x400, 0x680, 0x7ee, 0xFF02, 0x16610, 0x2f77b]; + for (let lengthIndex = 0; lengthIndex < lengths.length; lengthIndex++) { + for (let i = 0; i < indices.length; i++) { + const index = indices[i]; + let length = lengths[lengthIndex]; + + if (index + length > 0x7F) { + length = 2 * length; + } + + if (index + length > 0x7FF) { + length = 3 * length; + } + + if (index + length > 0xFFFF) { + length = 4 * length; + } + + const patternBufferUtf8 = allCharsBufferUtf8.slice(index, index + length); + assert.strictEqual(index, allCharsBufferUtf8.indexOf(patternBufferUtf8)); + + const patternStringUtf8 = patternBufferUtf8.toString(); + assert.strictEqual(index, allCharsBufferUtf8.indexOf(patternStringUtf8)); + } + } +} + +{ + // Find substrings in Usc2. + const lengths = [2, 4, 16]; // Single char, simple and complex. + const indices = [0x5, 0x65, 0x105, 0x205, 0x285, 0x2005, 0x2085, 0xfff0]; + for (let lengthIndex = 0; lengthIndex < lengths.length; lengthIndex++) { + for (let i = 0; i < indices.length; i++) { + const index = indices[i] * 2; + const length = lengths[lengthIndex]; + + const patternBufferUcs2 = + allCharsBufferUcs2.slice(index, index + length); + assert.strictEqual( + index, allCharsBufferUcs2.indexOf(patternBufferUcs2, 0, 'ucs2')); + + const patternStringUcs2 = patternBufferUcs2.toString('ucs2'); + assert.strictEqual( + index, allCharsBufferUcs2.indexOf(patternStringUcs2, 0, 'ucs2')); + } + } +} + +[ + () => {}, + {}, + [], +].forEach((val) => { + assert.throws( + () => b.indexOf(val), + { + code: 'ERR_INVALID_ARG_TYPE', + name: 'TypeError', + message: 'The "value" argument must be of type number, string, Buffer, or Uint8Array.' + + common.invalidArgTypeHelper(val) + } + ); +}); + +// Test weird offset arguments. +// The following offsets coerce to NaN or 0, searching the whole Buffer +assert.strictEqual(b.indexOf('b', undefined), 1); +assert.strictEqual(b.indexOf('b', {}), 1); +assert.strictEqual(b.indexOf('b', 0), 1); +assert.strictEqual(b.indexOf('b', null), 1); +assert.strictEqual(b.indexOf('b', []), 1); + +// The following offset coerces to 2, in other words +[2] === 2 +assert.strictEqual(b.indexOf('b', [2]), -1); + +// Behavior should match String.indexOf() +assert.strictEqual( + b.indexOf('b', undefined), + s.indexOf('b', undefined)); +assert.strictEqual( + b.indexOf('b', {}), + s.indexOf('b', {})); +assert.strictEqual( + b.indexOf('b', 0), + s.indexOf('b', 0)); +assert.strictEqual( + b.indexOf('b', null), + s.indexOf('b', null)); +assert.strictEqual( + b.indexOf('b', []), + s.indexOf('b', [])); +assert.strictEqual( + b.indexOf('b', [2]), + s.indexOf('b', [2])); + +// All code for handling encodings is shared between Buffer.indexOf and +// Buffer.lastIndexOf, so only testing the separate lastIndexOf semantics. + +// Test lastIndexOf basic functionality; Buffer b contains 'abcdef'. +// lastIndexOf string: +assert.strictEqual(b.lastIndexOf('a'), 0); +assert.strictEqual(b.lastIndexOf('a', 1), 0); +assert.strictEqual(b.lastIndexOf('b', 1), 1); +assert.strictEqual(b.lastIndexOf('c', 1), -1); +assert.strictEqual(b.lastIndexOf('a', -1), 0); +assert.strictEqual(b.lastIndexOf('a', -4), 0); +assert.strictEqual(b.lastIndexOf('a', -b.length), 0); +assert.strictEqual(b.lastIndexOf('a', -b.length - 1), -1); +assert.strictEqual(b.lastIndexOf('a', NaN), 0); +assert.strictEqual(b.lastIndexOf('a', -Infinity), -1); +assert.strictEqual(b.lastIndexOf('a', Infinity), 0); +// lastIndexOf Buffer: +assert.strictEqual(b.lastIndexOf(buf_a), 0); +assert.strictEqual(b.lastIndexOf(buf_a, 1), 0); +assert.strictEqual(b.lastIndexOf(buf_a, -1), 0); +assert.strictEqual(b.lastIndexOf(buf_a, -4), 0); +assert.strictEqual(b.lastIndexOf(buf_a, -b.length), 0); +assert.strictEqual(b.lastIndexOf(buf_a, -b.length - 1), -1); +assert.strictEqual(b.lastIndexOf(buf_a, NaN), 0); +assert.strictEqual(b.lastIndexOf(buf_a, -Infinity), -1); +assert.strictEqual(b.lastIndexOf(buf_a, Infinity), 0); +assert.strictEqual(b.lastIndexOf(buf_bc), 1); +assert.strictEqual(b.lastIndexOf(buf_bc, 2), 1); +assert.strictEqual(b.lastIndexOf(buf_bc, -1), 1); +assert.strictEqual(b.lastIndexOf(buf_bc, -3), 1); +assert.strictEqual(b.lastIndexOf(buf_bc, -5), 1); +assert.strictEqual(b.lastIndexOf(buf_bc, -6), -1); +assert.strictEqual(b.lastIndexOf(buf_bc, NaN), 1); +assert.strictEqual(b.lastIndexOf(buf_bc, -Infinity), -1); +assert.strictEqual(b.lastIndexOf(buf_bc, Infinity), 1); +assert.strictEqual(b.lastIndexOf(buf_f), b.length - 1); +assert.strictEqual(b.lastIndexOf(buf_z), -1); +assert.strictEqual(b.lastIndexOf(buf_empty), b.length); +assert.strictEqual(b.lastIndexOf(buf_empty, 1), 1); +assert.strictEqual(b.lastIndexOf(buf_empty, b.length + 1), b.length); +assert.strictEqual(b.lastIndexOf(buf_empty, Infinity), b.length); +// lastIndexOf number: +assert.strictEqual(b.lastIndexOf(0x61), 0); +assert.strictEqual(b.lastIndexOf(0x61, 1), 0); +assert.strictEqual(b.lastIndexOf(0x61, -1), 0); +assert.strictEqual(b.lastIndexOf(0x61, -4), 0); +assert.strictEqual(b.lastIndexOf(0x61, -b.length), 0); +assert.strictEqual(b.lastIndexOf(0x61, -b.length - 1), -1); +assert.strictEqual(b.lastIndexOf(0x61, NaN), 0); +assert.strictEqual(b.lastIndexOf(0x61, -Infinity), -1); +assert.strictEqual(b.lastIndexOf(0x61, Infinity), 0); +assert.strictEqual(b.lastIndexOf(0x0), -1); + +// Test weird offset arguments. +// The following offsets coerce to NaN, searching the whole Buffer +assert.strictEqual(b.lastIndexOf('b', undefined), 1); +assert.strictEqual(b.lastIndexOf('b', {}), 1); + +// The following offsets coerce to 0 +assert.strictEqual(b.lastIndexOf('b', 0), -1); +assert.strictEqual(b.lastIndexOf('b', null), -1); +assert.strictEqual(b.lastIndexOf('b', []), -1); + +// The following offset coerces to 2, in other words +[2] === 2 +assert.strictEqual(b.lastIndexOf('b', [2]), 1); + +// Behavior should match String.lastIndexOf() +assert.strictEqual( + b.lastIndexOf('b', undefined), + s.lastIndexOf('b', undefined)); +assert.strictEqual( + b.lastIndexOf('b', {}), + s.lastIndexOf('b', {})); +assert.strictEqual( + b.lastIndexOf('b', 0), + s.lastIndexOf('b', 0)); +assert.strictEqual( + b.lastIndexOf('b', null), + s.lastIndexOf('b', null)); +assert.strictEqual( + b.lastIndexOf('b', []), + s.lastIndexOf('b', [])); +assert.strictEqual( + b.lastIndexOf('b', [2]), + s.lastIndexOf('b', [2])); + +// Test needles longer than the haystack. +assert.strictEqual(b.lastIndexOf('aaaaaaaaaaaaaaa', 'ucs2'), -1); +assert.strictEqual(b.lastIndexOf('aaaaaaaaaaaaaaa', 'utf8'), -1); +assert.strictEqual(b.lastIndexOf('aaaaaaaaaaaaaaa', 'latin1'), -1); +assert.strictEqual(b.lastIndexOf('aaaaaaaaaaaaaaa', 'binary'), -1); +assert.strictEqual(b.lastIndexOf(Buffer.from('aaaaaaaaaaaaaaa')), -1); +assert.strictEqual(b.lastIndexOf('aaaaaaaaaaaaaaa', 2, 'ucs2'), -1); +assert.strictEqual(b.lastIndexOf('aaaaaaaaaaaaaaa', 3, 'utf8'), -1); +assert.strictEqual(b.lastIndexOf('aaaaaaaaaaaaaaa', 5, 'latin1'), -1); +assert.strictEqual(b.lastIndexOf('aaaaaaaaaaaaaaa', 5, 'binary'), -1); +assert.strictEqual(b.lastIndexOf(Buffer.from('aaaaaaaaaaaaaaa'), 7), -1); + +// 你好 expands to a total of 6 bytes using UTF-8 and 4 bytes using UTF-16 +assert.strictEqual(buf_bc.lastIndexOf('你好', 'ucs2'), -1); +assert.strictEqual(buf_bc.lastIndexOf('你好', 'utf8'), -1); +assert.strictEqual(buf_bc.lastIndexOf('你好', 'latin1'), -1); +assert.strictEqual(buf_bc.lastIndexOf('你好', 'binary'), -1); +assert.strictEqual(buf_bc.lastIndexOf(Buffer.from('你好')), -1); +assert.strictEqual(buf_bc.lastIndexOf('你好', 2, 'ucs2'), -1); +assert.strictEqual(buf_bc.lastIndexOf('你好', 3, 'utf8'), -1); +assert.strictEqual(buf_bc.lastIndexOf('你好', 5, 'latin1'), -1); +assert.strictEqual(buf_bc.lastIndexOf('你好', 5, 'binary'), -1); +assert.strictEqual(buf_bc.lastIndexOf(Buffer.from('你好'), 7), -1); + +// Test lastIndexOf on a longer buffer: +const bufferString = Buffer.from('a man a plan a canal panama'); +assert.strictEqual(bufferString.lastIndexOf('canal'), 15); +assert.strictEqual(bufferString.lastIndexOf('panama'), 21); +assert.strictEqual(bufferString.lastIndexOf('a man a plan a canal panama'), 0); +assert.strictEqual(-1, bufferString.lastIndexOf('a man a plan a canal mexico')); +assert.strictEqual(-1, bufferString + .lastIndexOf('a man a plan a canal mexico city')); +assert.strictEqual(-1, bufferString.lastIndexOf(Buffer.from('a'.repeat(1000)))); +assert.strictEqual(bufferString.lastIndexOf('a man a plan', 4), 0); +assert.strictEqual(bufferString.lastIndexOf('a '), 13); +assert.strictEqual(bufferString.lastIndexOf('a ', 13), 13); +assert.strictEqual(bufferString.lastIndexOf('a ', 12), 6); +assert.strictEqual(bufferString.lastIndexOf('a ', 5), 0); +assert.strictEqual(bufferString.lastIndexOf('a ', -1), 13); +assert.strictEqual(bufferString.lastIndexOf('a ', -27), 0); +assert.strictEqual(-1, bufferString.lastIndexOf('a ', -28)); + +// Test lastIndexOf for the case that the first character can be found, +// but in a part of the buffer that does not make search to search +// due do length constraints. +const abInUCS2 = Buffer.from('ab', 'ucs2'); +assert.strictEqual(-1, Buffer.from('µaaaa¶bbbb', 'latin1').lastIndexOf('µ')); +assert.strictEqual(-1, Buffer.from('µaaaa¶bbbb', 'binary').lastIndexOf('µ')); +assert.strictEqual(-1, Buffer.from('bc').lastIndexOf('ab')); +assert.strictEqual(-1, Buffer.from('abc').lastIndexOf('qa')); +assert.strictEqual(-1, Buffer.from('abcdef').lastIndexOf('qabc')); +assert.strictEqual(-1, Buffer.from('bc').lastIndexOf(Buffer.from('ab'))); +assert.strictEqual(-1, Buffer.from('bc', 'ucs2').lastIndexOf('ab', 'ucs2')); +assert.strictEqual(-1, Buffer.from('bc', 'ucs2').lastIndexOf(abInUCS2)); + +assert.strictEqual(Buffer.from('abc').lastIndexOf('ab'), 0); +assert.strictEqual(Buffer.from('abc').lastIndexOf('ab', 1), 0); +assert.strictEqual(Buffer.from('abc').lastIndexOf('ab', 2), 0); +assert.strictEqual(Buffer.from('abc').lastIndexOf('ab', 3), 0); + +// The above tests test the LINEAR and SINGLE-CHAR strategies. +// Now, we test the BOYER-MOORE-HORSPOOL strategy. +// Test lastIndexOf on a long buffer w multiple matches: +pattern = 'JABACABADABACABA'; +assert.strictEqual(longBufferString.lastIndexOf(pattern), 1535); +assert.strictEqual(longBufferString.lastIndexOf(pattern, 1535), 1535); +assert.strictEqual(longBufferString.lastIndexOf(pattern, 1534), 511); + +// Finally, give it a really long input to trigger fallback from BMH to +// regular BOYER-MOORE (which has better worst-case complexity). + +// Generate a really long Thue-Morse sequence of 'yolo' and 'swag', +// "yolo swag swag yolo swag yolo yolo swag" ..., goes on for about 5MB. +// This is hard to search because it all looks similar, but never repeats. + +// countBits returns the number of bits in the binary representation of n. +function countBits(n) { + let count; + for (count = 0; n > 0; count++) { + n = n & (n - 1); // remove top bit + } + return count; +} +const parts = []; +for (let i = 0; i < 1000000; i++) { + parts.push((countBits(i) % 2 === 0) ? 'yolo' : 'swag'); +} +const reallyLong = Buffer.from(parts.join(' ')); +assert.strictEqual(reallyLong.slice(0, 19).toString(), 'yolo swag swag yolo'); + +// Expensive reverse searches. Stress test lastIndexOf: +pattern = reallyLong.slice(0, 100000); // First 1/50th of the pattern. +assert.strictEqual(reallyLong.lastIndexOf(pattern), 4751360); +assert.strictEqual(reallyLong.lastIndexOf(pattern, 4000000), 3932160); +assert.strictEqual(reallyLong.lastIndexOf(pattern, 3000000), 2949120); +pattern = reallyLong.slice(100000, 200000); // Second 1/50th. +assert.strictEqual(reallyLong.lastIndexOf(pattern), 4728480); +pattern = reallyLong.slice(0, 1000000); // First 1/5th. +assert.strictEqual(reallyLong.lastIndexOf(pattern), 3932160); +pattern = reallyLong.slice(0, 2000000); // first 2/5ths. +assert.strictEqual(reallyLong.lastIndexOf(pattern), 0); + +// Test truncation of Number arguments to uint8 +{ + const buf = Buffer.from('this is a test'); + assert.strictEqual(buf.indexOf(0x6973), 3); + assert.strictEqual(buf.indexOf(0x697320), 4); + assert.strictEqual(buf.indexOf(0x69732069), 2); + assert.strictEqual(buf.indexOf(0x697374657374), 0); + assert.strictEqual(buf.indexOf(0x69737374), 0); + assert.strictEqual(buf.indexOf(0x69737465), 11); + assert.strictEqual(buf.indexOf(0x69737465), 11); + assert.strictEqual(buf.indexOf(-140), 0); + assert.strictEqual(buf.indexOf(-152), 1); + assert.strictEqual(buf.indexOf(0xff), -1); + assert.strictEqual(buf.indexOf(0xffff), -1); +} + +// Test that Uint8Array arguments are okay. +{ + const needle = new Uint8Array([ 0x66, 0x6f, 0x6f ]); + const haystack = Buffer.from('a foo b foo'); + assert.strictEqual(haystack.indexOf(needle), 2); + assert.strictEqual(haystack.lastIndexOf(needle), haystack.length - 3); +} + +// Avoid abort because of invalid usage +// see https://github.com/nodejs/node/issues/32753 +{ + assert.throws(() => { + const buffer = require('buffer'); + new buffer.Buffer.prototype.lastIndexOf(1, 'str'); + }, { + name: 'TypeError', + // code: 'ERR_INVALID_ARG_TYPE', + // message: 'The "buffer" argument must be an instance of Buffer, TypedArray, or DataView.' + + // ' Received an instance of lastIndexOf' + message: `function is not a constructor (evaluating 'new (require("buffer")).Buffer.prototype.lastIndexOf(1, "str")')` + }); +} From ac268ea1ed1703eccc57e20f08045a4298e550dc Mon Sep 17 00:00:00 2001 From: Meghan Denny Date: Thu, 23 Jan 2025 21:02:40 -0800 Subject: [PATCH 2/6] avoid the JSValue roundtrip --- src/bun.js/bindings/JSBuffer.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/bun.js/bindings/JSBuffer.cpp b/src/bun.js/bindings/JSBuffer.cpp index 37c33cd047b5ce..b978923552bc63 100644 --- a/src/bun.js/bindings/JSBuffer.cpp +++ b/src/bun.js/bindings/JSBuffer.cpp @@ -1417,23 +1417,23 @@ static int64_t indexOf(JSC::JSGlobalObject* lexicalGlobalObject, JSC::CallFrame* auto valueValue = callFrame->argument(0); auto byteOffsetValue = callFrame->argument(1); auto encodingValue = callFrame->argument(2); + auto byteOffsetD = 0.0; if (byteOffsetValue.isString()) { encodingValue = byteOffsetValue; byteOffsetValue = jsUndefined(); + byteOffsetD = 0; } else { - double byteOffset = byteOffsetValue.toNumber(lexicalGlobalObject); + byteOffsetD = byteOffsetValue.toNumber(lexicalGlobalObject); RETURN_IF_EXCEPTION(scope, -1); - if (byteOffset > 0x7fffffffp0f) byteOffsetValue = jsDoubleNumber(0x7fffffffp0f); - if (byteOffset < -0x80000000p0f) byteOffsetValue = jsDoubleNumber(-0x80000000p0f); + if (byteOffsetD > 0x7fffffffp0f) byteOffsetD = 0x7fffffffp0f; + if (byteOffsetD < -0x80000000p0f) byteOffsetD = -0x80000000p0f; } - byteOffsetValue = jsDoubleNumber(byteOffsetValue.toNumber(lexicalGlobalObject)); - RETURN_IF_EXCEPTION(scope, -1); - if (std::isnan(byteOffsetValue.asNumber())) byteOffsetValue = jsNumber(dir ? 0 : byteLength); + if (std::isnan(byteOffsetD)) byteOffsetD = dir ? 0 : byteLength; if (valueValue.isNumber()) { - ssize_t byteOffset = indexOfOffset(byteLength, byteOffsetValue.asNumber(), 1, dir); + ssize_t byteOffset = indexOfOffset(byteLength, byteOffsetD, 1, dir); if (byteOffset == -1) return -1; uint8_t byteValue = (valueValue.toInt32(lexicalGlobalObject)) % 256; RETURN_IF_EXCEPTION(scope, -1); @@ -1463,7 +1463,7 @@ static int64_t indexOf(JSC::JSGlobalObject* lexicalGlobalObject, JSC::CallFrame* } auto* str = valueValue.toStringOrNull(lexicalGlobalObject); RETURN_IF_EXCEPTION(scope, -1); - ssize_t byteOffset = indexOfOffset(byteLength, byteOffsetValue.asNumber(), str->length(), dir); + ssize_t byteOffset = indexOfOffset(byteLength, byteOffsetD, str->length(), dir); if (byteOffset == -1) return -1; if (str->length() == 0) return byteOffset; JSC::EncodedJSValue encodedBuffer = constructFromEncoding(lexicalGlobalObject, str, encoding.value()); @@ -1481,7 +1481,7 @@ static int64_t indexOf(JSC::JSGlobalObject* lexicalGlobalObject, JSC::CallFrame* if (auto* array = JSC::jsDynamicCast(valueValue)) { if (!encoding.has_value()) encoding = BufferEncodingType::utf8; size_t lengthValue = array->byteLength(); - ssize_t byteOffset = indexOfOffset(byteLength, byteOffsetValue.asNumber(), lengthValue, dir); + ssize_t byteOffset = indexOfOffset(byteLength, byteOffsetD, lengthValue, dir); if (byteOffset == -1) return -1; if (lengthValue == 0) return byteOffset; const uint8_t* typedVectorValue = array->typedVector(); From 90ad9833fb26958636cca88b56ed6aee3ee9a41e Mon Sep 17 00:00:00 2001 From: Meghan Denny Date: Fri, 24 Jan 2025 19:58:34 -0800 Subject: [PATCH 3/6] tidy --- .../test/parallel/test-buffer-includes.js | 24 ++--- .../node/test/parallel/test-buffer-indexof.js | 87 ++++++------------- 2 files changed, 33 insertions(+), 78 deletions(-) diff --git a/test/js/node/test/parallel/test-buffer-includes.js b/test/js/node/test/parallel/test-buffer-includes.js index e5fb6b076a7456..fb426d09defee9 100644 --- a/test/js/node/test/parallel/test-buffer-includes.js +++ b/test/js/node/test/parallel/test-buffer-includes.js @@ -143,22 +143,17 @@ let twoByteString = Buffer.from('\u039a\u0391\u03a3\u03a3\u0395', 'ucs2'); assert(twoByteString.includes('\u0395', 4, 'ucs2')); assert(twoByteString.includes('\u03a3', -4, 'ucs2')); assert(twoByteString.includes('\u03a3', -6, 'ucs2')); -assert(twoByteString.includes( - Buffer.from('\u03a3', 'ucs2'), -6, 'ucs2')); +assert(twoByteString.includes(Buffer.from('\u03a3', 'ucs2'), -6, 'ucs2')); assert(!twoByteString.includes('\u03a3', -2, 'ucs2')); -const mixedByteStringUcs2 = - Buffer.from('\u039a\u0391abc\u03a3\u03a3\u0395', 'ucs2'); +const mixedByteStringUcs2 =Buffer.from('\u039a\u0391abc\u03a3\u03a3\u0395', 'ucs2'); assert(mixedByteStringUcs2.includes('bc', 0, 'ucs2')); assert(mixedByteStringUcs2.includes('\u03a3', 0, 'ucs2')); assert(!mixedByteStringUcs2.includes('\u0396', 0, 'ucs2')); -assert.ok( - mixedByteStringUcs2.includes(Buffer.from('bc', 'ucs2'), 0, 'ucs2')); -assert.ok( - mixedByteStringUcs2.includes(Buffer.from('\u03a3', 'ucs2'), 0, 'ucs2')); -assert.ok( - !mixedByteStringUcs2.includes(Buffer.from('\u0396', 'ucs2'), 0, 'ucs2')); +assert.ok(mixedByteStringUcs2.includes(Buffer.from('bc', 'ucs2'), 0, 'ucs2')); +assert.ok(mixedByteStringUcs2.includes(Buffer.from('\u03a3', 'ucs2'), 0, 'ucs2')); +assert.ok(!mixedByteStringUcs2.includes(Buffer.from('\u0396', 'ucs2'), 0, 'ucs2')); twoByteString = Buffer.from('\u039a\u0391\u03a3\u03a3\u0395', 'ucs2'); @@ -207,8 +202,7 @@ assert(longBufferString.includes(pattern), 'Long JABACABA..., First J'); assert(longBufferString.includes(pattern, 512), 'Long JABACABA..., Second J'); // Search for a non-ASCII string in a pure ASCII string. -const asciiString = Buffer.from( - 'arglebargleglopglyfarglebargleglopglyfarglebargleglopglyf'); +const asciiString = Buffer.from('arglebargleglopglyfarglebargleglopglyfarglebargleglopglyf'); assert(!asciiString.includes('\x2061')); assert(asciiString.includes('leb', 0)); @@ -260,12 +254,10 @@ for (let lengthIndex = 0; lengthIndex < lengths.length; lengthIndex++) { const patternBufferUcs2 = allCharsBufferUcs2.slice(index, index + length); - assert.ok( - allCharsBufferUcs2.includes(patternBufferUcs2, 0, 'ucs2')); + assert.ok(allCharsBufferUcs2.includes(patternBufferUcs2, 0, 'ucs2')); const patternStringUcs2 = patternBufferUcs2.toString('ucs2'); - assert.ok( - allCharsBufferUcs2.includes(patternStringUcs2, 0, 'ucs2')); + assert.ok(allCharsBufferUcs2.includes(patternStringUcs2, 0, 'ucs2')); } } diff --git a/test/js/node/test/parallel/test-buffer-indexof.js b/test/js/node/test/parallel/test-buffer-indexof.js index 16b4dd4009b61a..27aadc3d79bc5e 100644 --- a/test/js/node/test/parallel/test-buffer-indexof.js +++ b/test/js/node/test/parallel/test-buffer-indexof.js @@ -189,30 +189,24 @@ assert.strictEqual(Buffer.from('aaaa00a').indexOf('3030', 'hex'), 4); { // Test usc2 and utf16le encoding ['ucs2', 'utf16le'].forEach((encoding) => { - const twoByteString = Buffer.from( - '\u039a\u0391\u03a3\u03a3\u0395', encoding); + const twoByteString = Buffer.from('\u039a\u0391\u03a3\u03a3\u0395', encoding); assert.strictEqual(twoByteString.indexOf('\u0395', 4, encoding), 8); assert.strictEqual(twoByteString.indexOf('\u03a3', -4, encoding), 6); assert.strictEqual(twoByteString.indexOf('\u03a3', -6, encoding), 4); - assert.strictEqual(twoByteString.indexOf( - Buffer.from('\u03a3', encoding), -6, encoding), 4); + assert.strictEqual(twoByteString.indexOf(Buffer.from('\u03a3', encoding), -6, encoding), 4); assert.strictEqual(-1, twoByteString.indexOf('\u03a3', -2, encoding)); }); } -const mixedByteStringUcs2 = - Buffer.from('\u039a\u0391abc\u03a3\u03a3\u0395', 'ucs2'); +const mixedByteStringUcs2 = Buffer.from('\u039a\u0391abc\u03a3\u03a3\u0395', 'ucs2'); assert.strictEqual(mixedByteStringUcs2.indexOf('bc', 0, 'ucs2'), 6); assert.strictEqual(mixedByteStringUcs2.indexOf('\u03a3', 0, 'ucs2'), 10); assert.strictEqual(-1, mixedByteStringUcs2.indexOf('\u0396', 0, 'ucs2')); -assert.strictEqual( - mixedByteStringUcs2.indexOf(Buffer.from('bc', 'ucs2'), 0, 'ucs2'), 6); -assert.strictEqual( - mixedByteStringUcs2.indexOf(Buffer.from('\u03a3', 'ucs2'), 0, 'ucs2'), 10); -assert.strictEqual( - -1, mixedByteStringUcs2.indexOf(Buffer.from('\u0396', 'ucs2'), 0, 'ucs2')); +assert.strictEqual(mixedByteStringUcs2.indexOf(Buffer.from('bc', 'ucs2'), 0, 'ucs2'), 6); +assert.strictEqual(mixedByteStringUcs2.indexOf(Buffer.from('\u03a3', 'ucs2'), 0, 'ucs2'), 10); +assert.strictEqual(-1, mixedByteStringUcs2.indexOf(Buffer.from('\u0396', 'ucs2'), 0, 'ucs2')); { const twoByteString = Buffer.from('\u039a\u0391\u03a3\u03a3\u0395', 'ucs2'); @@ -262,8 +256,7 @@ const longBufferString = Buffer.from(longString); let pattern = 'ABACABADABACABA'; for (let i = 0; i < longBufferString.length - pattern.length; i += 7) { const index = longBufferString.indexOf(pattern, i); - assert.strictEqual((i + 15) & ~0xf, index, - `Long ABACABA...-string at index ${i}`); + assert.strictEqual((i + 15) & ~0xf, index, `Long ABACABA...-string at index ${i}`); } let index = longBufferString.indexOf('AJABACA'); @@ -275,12 +268,10 @@ pattern = 'JABACABADABACABA'; index = longBufferString.indexOf(pattern); assert.strictEqual(index, 511, `Long JABACABA..., First J - at index ${index}`); index = longBufferString.indexOf(pattern, 512); -assert.strictEqual( - index, 1535, `Long JABACABA..., Second J - at index ${index}`); +assert.strictEqual(index, 1535, `Long JABACABA..., Second J - at index ${index}`); // Search for a non-ASCII string in a pure ASCII string. -const asciiString = Buffer.from( - 'arglebargleglopglyfarglebargleglopglyfarglebargleglopglyf'); +const asciiString = Buffer.from('arglebargleglopglyfarglebargleglopglyfarglebargleglopglyf'); assert.strictEqual(-1, asciiString.indexOf('\x2061')); assert.strictEqual(asciiString.indexOf('leb', 0), 3); @@ -342,14 +333,11 @@ assert.strictEqual(Buffer.from('aaaaa').indexOf('b', 'ucs2'), -1); const index = indices[i] * 2; const length = lengths[lengthIndex]; - const patternBufferUcs2 = - allCharsBufferUcs2.slice(index, index + length); - assert.strictEqual( - index, allCharsBufferUcs2.indexOf(patternBufferUcs2, 0, 'ucs2')); + const patternBufferUcs2 = allCharsBufferUcs2.slice(index, index + length); + assert.strictEqual(index, allCharsBufferUcs2.indexOf(patternBufferUcs2, 0, 'ucs2')); const patternStringUcs2 = patternBufferUcs2.toString('ucs2'); - assert.strictEqual( - index, allCharsBufferUcs2.indexOf(patternStringUcs2, 0, 'ucs2')); + assert.strictEqual(index, allCharsBufferUcs2.indexOf(patternStringUcs2, 0, 'ucs2')); } } } @@ -382,24 +370,12 @@ assert.strictEqual(b.indexOf('b', []), 1); assert.strictEqual(b.indexOf('b', [2]), -1); // Behavior should match String.indexOf() -assert.strictEqual( - b.indexOf('b', undefined), - s.indexOf('b', undefined)); -assert.strictEqual( - b.indexOf('b', {}), - s.indexOf('b', {})); -assert.strictEqual( - b.indexOf('b', 0), - s.indexOf('b', 0)); -assert.strictEqual( - b.indexOf('b', null), - s.indexOf('b', null)); -assert.strictEqual( - b.indexOf('b', []), - s.indexOf('b', [])); -assert.strictEqual( - b.indexOf('b', [2]), - s.indexOf('b', [2])); +assert.strictEqual(b.indexOf('b', undefined), s.indexOf('b', undefined)); +assert.strictEqual(b.indexOf('b', {}), s.indexOf('b', {})); +assert.strictEqual(b.indexOf('b', 0), s.indexOf('b', 0)); +assert.strictEqual(b.indexOf('b', null), s.indexOf('b', null)); +assert.strictEqual(b.indexOf('b', []), s.indexOf('b', [])); +assert.strictEqual(b.indexOf('b', [2]), s.indexOf('b', [2])); // All code for handling encodings is shared between Buffer.indexOf and // Buffer.lastIndexOf, so only testing the separate lastIndexOf semantics. @@ -468,24 +444,12 @@ assert.strictEqual(b.lastIndexOf('b', []), -1); assert.strictEqual(b.lastIndexOf('b', [2]), 1); // Behavior should match String.lastIndexOf() -assert.strictEqual( - b.lastIndexOf('b', undefined), - s.lastIndexOf('b', undefined)); -assert.strictEqual( - b.lastIndexOf('b', {}), - s.lastIndexOf('b', {})); -assert.strictEqual( - b.lastIndexOf('b', 0), - s.lastIndexOf('b', 0)); -assert.strictEqual( - b.lastIndexOf('b', null), - s.lastIndexOf('b', null)); -assert.strictEqual( - b.lastIndexOf('b', []), - s.lastIndexOf('b', [])); -assert.strictEqual( - b.lastIndexOf('b', [2]), - s.lastIndexOf('b', [2])); +assert.strictEqual(b.lastIndexOf('b', undefined), s.lastIndexOf('b', undefined)); +assert.strictEqual(b.lastIndexOf('b', {}), s.lastIndexOf('b', {})); +assert.strictEqual(b.lastIndexOf('b', 0), s.lastIndexOf('b', 0)); +assert.strictEqual(b.lastIndexOf('b', null), s.lastIndexOf('b', null)); +assert.strictEqual(b.lastIndexOf('b', []), s.lastIndexOf('b', [])); +assert.strictEqual(b.lastIndexOf('b', [2]), s.lastIndexOf('b', [2])); // Test needles longer than the haystack. assert.strictEqual(b.lastIndexOf('aaaaaaaaaaaaaaa', 'ucs2'), -1); @@ -517,8 +481,7 @@ assert.strictEqual(bufferString.lastIndexOf('canal'), 15); assert.strictEqual(bufferString.lastIndexOf('panama'), 21); assert.strictEqual(bufferString.lastIndexOf('a man a plan a canal panama'), 0); assert.strictEqual(-1, bufferString.lastIndexOf('a man a plan a canal mexico')); -assert.strictEqual(-1, bufferString - .lastIndexOf('a man a plan a canal mexico city')); +assert.strictEqual(-1, bufferString.lastIndexOf('a man a plan a canal mexico city')); assert.strictEqual(-1, bufferString.lastIndexOf(Buffer.from('a'.repeat(1000)))); assert.strictEqual(bufferString.lastIndexOf('a man a plan', 4), 0); assert.strictEqual(bufferString.lastIndexOf('a '), 13); From ad75d42c1cfcec43389c76b6355d915259a4d1a4 Mon Sep 17 00:00:00 2001 From: Meghan Denny Date: Fri, 24 Jan 2025 19:58:53 -0800 Subject: [PATCH 4/6] eliminate the byteOffset JSValue roundtrip --- src/bun.js/bindings/JSBuffer.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/bun.js/bindings/JSBuffer.cpp b/src/bun.js/bindings/JSBuffer.cpp index 37c33cd047b5ce..48c3994f7eae79 100644 --- a/src/bun.js/bindings/JSBuffer.cpp +++ b/src/bun.js/bindings/JSBuffer.cpp @@ -1411,6 +1411,7 @@ static int64_t indexOf(JSC::JSGlobalObject* lexicalGlobalObject, JSC::CallFrame* const uint8_t* typedVector = buffer->typedVector(); size_t byteLength = buffer->byteLength(); std::optional encoding = std::nullopt; + double byteOffsetD = 0; if (byteLength == 0) return -1; @@ -1421,19 +1422,18 @@ static int64_t indexOf(JSC::JSGlobalObject* lexicalGlobalObject, JSC::CallFrame* if (byteOffsetValue.isString()) { encodingValue = byteOffsetValue; byteOffsetValue = jsUndefined(); + byteOffsetD = 0; } else { - double byteOffset = byteOffsetValue.toNumber(lexicalGlobalObject); + byteOffsetD = byteOffsetValue.toNumber(lexicalGlobalObject); RETURN_IF_EXCEPTION(scope, -1); - if (byteOffset > 0x7fffffffp0f) byteOffsetValue = jsDoubleNumber(0x7fffffffp0f); - if (byteOffset < -0x80000000p0f) byteOffsetValue = jsDoubleNumber(-0x80000000p0f); + if (byteOffsetD > 0x7fffffffp0f) byteOffsetD = 0x7fffffffp0f; + if (byteOffsetD < -0x80000000p0f) byteOffsetD = -0x80000000p0f; } - byteOffsetValue = jsDoubleNumber(byteOffsetValue.toNumber(lexicalGlobalObject)); - RETURN_IF_EXCEPTION(scope, -1); - if (std::isnan(byteOffsetValue.asNumber())) byteOffsetValue = jsNumber(dir ? 0 : byteLength); + if (std::isnan(byteOffsetD)) byteOffsetD = dir ? 0 : byteLength; if (valueValue.isNumber()) { - ssize_t byteOffset = indexOfOffset(byteLength, byteOffsetValue.asNumber(), 1, dir); + ssize_t byteOffset = indexOfOffset(byteLength, byteOffsetD, 1, dir); if (byteOffset == -1) return -1; uint8_t byteValue = (valueValue.toInt32(lexicalGlobalObject)) % 256; RETURN_IF_EXCEPTION(scope, -1); @@ -1463,11 +1463,11 @@ static int64_t indexOf(JSC::JSGlobalObject* lexicalGlobalObject, JSC::CallFrame* } auto* str = valueValue.toStringOrNull(lexicalGlobalObject); RETURN_IF_EXCEPTION(scope, -1); - ssize_t byteOffset = indexOfOffset(byteLength, byteOffsetValue.asNumber(), str->length(), dir); + ssize_t byteOffset = indexOfOffset(byteLength, byteOffsetD, str->length(), dir); if (byteOffset == -1) return -1; if (str->length() == 0) return byteOffset; JSC::EncodedJSValue encodedBuffer = constructFromEncoding(lexicalGlobalObject, str, encoding.value()); - auto* arrayValue = JSC::jsDynamicCast(JSC::JSValue::decode(encodedBuffer)); + auto* arrayValue = JSC::jsCast(JSC::JSValue::decode(encodedBuffer)); int64_t lengthValue = static_cast(arrayValue->byteLength()); const uint8_t* typedVectorValue = arrayValue->typedVector(); if (last) { @@ -1481,7 +1481,7 @@ static int64_t indexOf(JSC::JSGlobalObject* lexicalGlobalObject, JSC::CallFrame* if (auto* array = JSC::jsDynamicCast(valueValue)) { if (!encoding.has_value()) encoding = BufferEncodingType::utf8; size_t lengthValue = array->byteLength(); - ssize_t byteOffset = indexOfOffset(byteLength, byteOffsetValue.asNumber(), lengthValue, dir); + ssize_t byteOffset = indexOfOffset(byteLength, byteOffsetD, lengthValue, dir); if (byteOffset == -1) return -1; if (lengthValue == 0) return byteOffset; const uint8_t* typedVectorValue = array->typedVector(); From 3232057f884a421e796a538506c6926d14571d03 Mon Sep 17 00:00:00 2001 From: Meghan Denny Date: Fri, 24 Jan 2025 20:30:59 -0800 Subject: [PATCH 5/6] silo the 3 impls into specific functions that only do exactly their thing --- src/bun.js/bindings/JSBuffer.cpp | 85 +++++++++++++++++++------------- 1 file changed, 50 insertions(+), 35 deletions(-) diff --git a/src/bun.js/bindings/JSBuffer.cpp b/src/bun.js/bindings/JSBuffer.cpp index 48c3994f7eae79..d67ae29ea03d90 100644 --- a/src/bun.js/bindings/JSBuffer.cpp +++ b/src/bun.js/bindings/JSBuffer.cpp @@ -1403,6 +1403,53 @@ static int64_t lastIndexOf(const uint8_t* thisPtr, int64_t thisLength, const uin return -1; } +static int64_t indexOfNumber(JSC::JSGlobalObject* lexicalGlobalObject, bool last, const uint8_t* typedVector, size_t byteLength, double byteOffsetD, uint8_t byteValue) +{ + ssize_t byteOffset = indexOfOffset(byteLength, byteOffsetD, 1, !last); + if (byteOffset == -1) return -1; + if (last) { + for (int64_t i = byteOffset; i >= 0; --i) { + if (byteValue == typedVector[i]) return i; + } + } else { + const void* offset = memchr(reinterpret_cast(typedVector + byteOffset), byteValue, byteLength - byteOffset); + if (offset != NULL) return static_cast(offset) - typedVector; + } + return -1; +} + +static int64_t indexOfString(JSC::JSGlobalObject* lexicalGlobalObject, bool last, const uint8_t* typedVector, size_t byteLength, double byteOffsetD, JSString* str, BufferEncodingType encoding) +{ + ssize_t byteOffset = indexOfOffset(byteLength, byteOffsetD, str->length(), !last); + if (byteOffset == -1) return -1; + if (str->length() == 0) return byteOffset; + JSC::EncodedJSValue encodedBuffer = constructFromEncoding(lexicalGlobalObject, str, encoding); + auto* arrayValue = JSC::jsCast(JSC::JSValue::decode(encodedBuffer)); + int64_t lengthValue = static_cast(arrayValue->byteLength()); + const uint8_t* typedVectorValue = arrayValue->typedVector(); + if (last) { + return lastIndexOf(typedVector, byteLength, typedVectorValue, lengthValue, byteOffset); + } else { + if (encoding == BufferEncodingType::ucs2) return indexOf16(typedVector, byteLength, typedVectorValue, lengthValue, byteOffset); + return indexOf(typedVector, byteLength, typedVectorValue, lengthValue, byteOffset); + } +} + +static int64_t indexOfBuffer(JSC::JSGlobalObject* lexicalGlobalObject, bool last, const uint8_t* typedVector, size_t byteLength, double byteOffsetD, JSC::JSGenericTypedArrayView* array, BufferEncodingType encoding) +{ + size_t lengthValue = array->byteLength(); + ssize_t byteOffset = indexOfOffset(byteLength, byteOffsetD, lengthValue, !last); + if (byteOffset == -1) return -1; + if (lengthValue == 0) return byteOffset; + const uint8_t* typedVectorValue = array->typedVector(); + if (last) { + return lastIndexOf(typedVector, byteLength, typedVectorValue, lengthValue, byteOffset); + } else { + if (encoding == BufferEncodingType::ucs2) return indexOf16(typedVector, byteLength, typedVectorValue, lengthValue, byteOffset); + return indexOf(typedVector, byteLength, typedVectorValue, lengthValue, byteOffset); + } +} + static int64_t indexOf(JSC::JSGlobalObject* lexicalGlobalObject, JSC::CallFrame* callFrame, typename IDLOperation::ClassParameter buffer, bool last) { auto& vm = lexicalGlobalObject->vm(); @@ -1433,19 +1480,9 @@ static int64_t indexOf(JSC::JSGlobalObject* lexicalGlobalObject, JSC::CallFrame* if (std::isnan(byteOffsetD)) byteOffsetD = dir ? 0 : byteLength; if (valueValue.isNumber()) { - ssize_t byteOffset = indexOfOffset(byteLength, byteOffsetD, 1, dir); - if (byteOffset == -1) return -1; uint8_t byteValue = (valueValue.toInt32(lexicalGlobalObject)) % 256; RETURN_IF_EXCEPTION(scope, -1); - if (last) { - for (int64_t i = byteOffset; i >= 0; --i) { - if (byteValue == typedVector[i]) return i; - } - } else { - const void* offset = memchr(reinterpret_cast(typedVector + byteOffset), byteValue, byteLength - byteOffset); - if (offset != NULL) return static_cast(offset) - typedVector; - } - return -1; + return indexOfNumber(lexicalGlobalObject, last, typedVector, byteLength, byteOffsetD, byteValue); } WTF::String encodingString; @@ -1463,34 +1500,12 @@ static int64_t indexOf(JSC::JSGlobalObject* lexicalGlobalObject, JSC::CallFrame* } auto* str = valueValue.toStringOrNull(lexicalGlobalObject); RETURN_IF_EXCEPTION(scope, -1); - ssize_t byteOffset = indexOfOffset(byteLength, byteOffsetD, str->length(), dir); - if (byteOffset == -1) return -1; - if (str->length() == 0) return byteOffset; - JSC::EncodedJSValue encodedBuffer = constructFromEncoding(lexicalGlobalObject, str, encoding.value()); - auto* arrayValue = JSC::jsCast(JSC::JSValue::decode(encodedBuffer)); - int64_t lengthValue = static_cast(arrayValue->byteLength()); - const uint8_t* typedVectorValue = arrayValue->typedVector(); - if (last) { - return lastIndexOf(typedVector, byteLength, typedVectorValue, lengthValue, byteOffset); - } else { - if (encoding.value() == BufferEncodingType::ucs2) return indexOf16(typedVector, byteLength, typedVectorValue, lengthValue, byteOffset); - return indexOf(typedVector, byteLength, typedVectorValue, lengthValue, byteOffset); - } + return indexOfString(lexicalGlobalObject, last, typedVector, byteLength, byteOffsetD, str, encoding.value()); } if (auto* array = JSC::jsDynamicCast(valueValue)) { if (!encoding.has_value()) encoding = BufferEncodingType::utf8; - size_t lengthValue = array->byteLength(); - ssize_t byteOffset = indexOfOffset(byteLength, byteOffsetD, lengthValue, dir); - if (byteOffset == -1) return -1; - if (lengthValue == 0) return byteOffset; - const uint8_t* typedVectorValue = array->typedVector(); - if (last) { - return lastIndexOf(typedVector, byteLength, typedVectorValue, lengthValue, byteOffset); - } else { - if (encoding.value() == BufferEncodingType::ucs2) return indexOf16(typedVector, byteLength, typedVectorValue, lengthValue, byteOffset); - return indexOf(typedVector, byteLength, typedVectorValue, lengthValue, byteOffset); - } + return indexOfBuffer(lexicalGlobalObject, last, typedVector, byteLength, byteOffsetD, array, encoding.value()); } Bun::ERR::INVALID_ARG_TYPE(scope, lexicalGlobalObject, "value"_s, "number, string, Buffer, or Uint8Array"_s, valueValue); From e2e6d33eead8820102d0b096bcc936cf6a2adcef Mon Sep 17 00:00:00 2001 From: Meghan Denny Date: Wed, 5 Feb 2025 17:43:31 -0800 Subject: [PATCH 6/6] address review --- src/bun.js/bindings/JSBuffer.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/bun.js/bindings/JSBuffer.cpp b/src/bun.js/bindings/JSBuffer.cpp index 16b8d1384569f8..44b570be30c374 100644 --- a/src/bun.js/bindings/JSBuffer.cpp +++ b/src/bun.js/bindings/JSBuffer.cpp @@ -1364,7 +1364,6 @@ static int64_t indexOf(const uint8_t* thisPtr, int64_t thisLength, const uint8_t if (thisLength < valueLength + byteOffset) return -1; auto start = thisPtr + byteOffset; - auto it = static_cast(MEMMEM_IMPL(start, static_cast(thisLength - byteOffset), valuePtr, static_cast(valueLength))); if (it != NULL) { return it - thisPtr; @@ -1374,7 +1373,7 @@ static int64_t indexOf(const uint8_t* thisPtr, int64_t thisLength, const uint8_t static int64_t indexOf16(const uint8_t* thisPtr, int64_t thisLength, const uint8_t* valuePtr, int64_t valueLength, int64_t byteOffset) { - size_t finalresult = 0; + int64_t finalresult = 0; if (thisLength == 1) return -1; thisLength = thisLength / 2 * 2; if (valueLength == 1) return -1; @@ -1410,6 +1409,13 @@ static int64_t indexOfNumber(JSC::JSGlobalObject* lexicalGlobalObject, bool last ssize_t byteOffset = indexOfOffset(byteLength, byteOffsetD, 1, !last); if (byteOffset == -1) return -1; if (last) { +#if OS(LINUX) +#ifdef __GNU_LIBRARY__ + const void* offset = memrchr(reinterpret_cast(typedVector + byteOffset), byteValue, byteLength - byteOffset); + if (offset != NULL) return static_cast(offset) - typedVector; + return -1; +#endif +#endif for (int64_t i = byteOffset; i >= 0; --i) { if (byteValue == typedVector[i]) return i; } @@ -1482,7 +1488,7 @@ static int64_t indexOf(JSC::JSGlobalObject* lexicalGlobalObject, JSC::CallFrame* if (std::isnan(byteOffsetD)) byteOffsetD = dir ? 0 : byteLength; if (valueValue.isNumber()) { - uint8_t byteValue = (valueValue.toInt32(lexicalGlobalObject)) % 256; + uint8_t byteValue = valueValue.asAnyInt() % 256; RETURN_IF_EXCEPTION(scope, -1); return indexOfNumber(lexicalGlobalObject, last, typedVector, byteLength, byteOffsetD, byteValue); }