-
-
Notifications
You must be signed in to change notification settings - Fork 34.9k
buffer: improve performance of multiple Buffer operations #61871
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
d6c9fec
8b3b9ae
cf9242a
5aae124
2abe277
48abfc5
a049a90
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,31 @@ | ||
| 'use strict'; | ||
|
|
||
| const common = require('../common.js'); | ||
|
|
||
| const bench = common.createBenchmark(main, { | ||
| type: ['Uint8Array', 'Uint16Array', 'Uint32Array', 'Float64Array'], | ||
| len: [64, 256, 2048], | ||
| partial: ['none', 'offset', 'offset-length'], | ||
| n: [6e5], | ||
| }); | ||
|
|
||
| function main({ n, len, type, partial }) { | ||
| const TypedArrayCtor = globalThis[type]; | ||
| const src = new TypedArrayCtor(len); | ||
| for (let i = 0; i < len; i++) src[i] = i; | ||
|
|
||
| let offset; | ||
| let length; | ||
| if (partial === 'offset') { | ||
| offset = len >>> 2; | ||
| } else if (partial === 'offset-length') { | ||
| offset = len >>> 2; | ||
| length = len >>> 1; | ||
| } | ||
|
|
||
| bench.start(); | ||
| for (let i = 0; i < n; i++) { | ||
| Buffer.copyBytesFrom(src, offset, length); | ||
| } | ||
| bench.end(n); | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -50,7 +50,6 @@ const { | |
| TypedArrayPrototypeGetByteOffset, | ||
| TypedArrayPrototypeGetLength, | ||
| TypedArrayPrototypeSet, | ||
| TypedArrayPrototypeSlice, | ||
| TypedArrayPrototypeSubarray, | ||
| Uint8Array, | ||
| } = primordials; | ||
|
|
@@ -383,28 +382,42 @@ Buffer.copyBytesFrom = function copyBytesFrom(view, offset, length) { | |
| return new FastBuffer(); | ||
| } | ||
|
|
||
| const byteLength = TypedArrayPrototypeGetByteLength(view); | ||
|
|
||
| if (offset !== undefined || length !== undefined) { | ||
| if (offset !== undefined) { | ||
| validateInteger(offset, 'offset', 0); | ||
| if (offset >= viewLength) return new FastBuffer(); | ||
| } else { | ||
| offset = 0; | ||
| } | ||
|
|
||
| let end; | ||
| if (length !== undefined) { | ||
| validateInteger(length, 'length', 0); | ||
| end = offset + length; | ||
| // The old code used TypedArrayPrototypeSlice which clamps internally. | ||
| end = MathMin(offset + length, viewLength); | ||
| } else { | ||
| end = viewLength; | ||
| } | ||
|
|
||
| view = TypedArrayPrototypeSlice(view, offset, end); | ||
| if (end <= offset) return new FastBuffer(); | ||
|
|
||
| const elementSize = byteLength / viewLength; | ||
| const srcByteOffset = TypedArrayPrototypeGetByteOffset(view) + | ||
| offset * elementSize; | ||
| const srcByteLength = (end - offset) * elementSize; | ||
|
|
||
| return fromArrayLike(new Uint8Array( | ||
| TypedArrayPrototypeGetBuffer(view), | ||
| srcByteOffset, | ||
| srcByteLength)); | ||
| } | ||
|
|
||
| return fromArrayLike(new Uint8Array( | ||
| TypedArrayPrototypeGetBuffer(view), | ||
| TypedArrayPrototypeGetByteOffset(view), | ||
| TypedArrayPrototypeGetByteLength(view))); | ||
| byteLength)); | ||
| }; | ||
|
|
||
| // Identical to the built-in %TypedArray%.of(), but avoids using the deprecated | ||
|
|
@@ -551,14 +564,15 @@ function fromArrayBuffer(obj, byteOffset, length) { | |
| } | ||
|
|
||
| function fromArrayLike(obj) { | ||
| if (obj.length <= 0) | ||
| const len = obj.length; | ||
| if (len <= 0) | ||
| return new FastBuffer(); | ||
| if (obj.length < (Buffer.poolSize >>> 1)) { | ||
| if (obj.length > (poolSize - poolOffset)) | ||
| if (len < (Buffer.poolSize >>> 1)) { | ||
| if (len > (poolSize - poolOffset)) | ||
| createPool(); | ||
| const b = new FastBuffer(allocPool, poolOffset, obj.length); | ||
| const b = new FastBuffer(allocPool, poolOffset, len); | ||
| TypedArrayPrototypeSet(b, obj, 0); | ||
| poolOffset += obj.length; | ||
| poolOffset += len; | ||
| alignPool(); | ||
| return b; | ||
| } | ||
|
|
@@ -732,11 +746,7 @@ const encodingOps = { | |
| write: asciiWrite, | ||
| slice: asciiSlice, | ||
| indexOf: (buf, val, byteOffset, dir) => | ||
| indexOfBuffer(buf, | ||
| fromStringFast(val, encodingOps.ascii), | ||
| byteOffset, | ||
| encodingsMap.ascii, | ||
| dir), | ||
| indexOfString(buf, val, byteOffset, encodingsMap.ascii, dir), | ||
| }, | ||
| base64: { | ||
| encoding: 'base64', | ||
|
|
@@ -1118,7 +1128,9 @@ function _fill(buf, value, offset, end, encoding) { | |
| value = 0; | ||
| } else if (value.length === 1) { | ||
| // Fast path: If `value` fits into a single byte, use that numeric value. | ||
| if (normalizedEncoding === 'utf8') { | ||
| // ASCII shares this branch with utf8 since code < 128 covers the full | ||
| // ASCII range; anything outside falls through to C++ bindingFill. | ||
| if (normalizedEncoding === 'utf8' || normalizedEncoding === 'ascii') { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Currently,
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, this is safe by design I am just extending the existing single byte numeric optimization to cover ASCII, since the guard already constrains it to the valid ASCII range.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please update comment to why we can add ASCII here. |
||
| const code = StringPrototypeCharCodeAt(value, 0); | ||
| if (code < 128) { | ||
| value = code; | ||
|
|
@@ -1168,29 +1180,30 @@ function _fill(buf, value, offset, end, encoding) { | |
| } | ||
|
|
||
| Buffer.prototype.write = function write(string, offset, length, encoding) { | ||
| const len = this.length; | ||
| // Buffer#write(string); | ||
| if (offset === undefined) { | ||
| return utf8Write(this, string, 0, this.length); | ||
| return utf8Write(this, string, 0, len); | ||
| } | ||
| // Buffer#write(string, encoding) | ||
| if (length === undefined && typeof offset === 'string') { | ||
| encoding = offset; | ||
| length = this.length; | ||
| length = len; | ||
| offset = 0; | ||
|
|
||
| // Buffer#write(string, offset[, length][, encoding]) | ||
| } else { | ||
| validateOffset(offset, 'offset', 0, this.length); | ||
| validateOffset(offset, 'offset', 0, len); | ||
|
|
||
| const remaining = this.length - offset; | ||
| const remaining = len - offset; | ||
|
|
||
| if (length === undefined) { | ||
| length = remaining; | ||
| } else if (typeof length === 'string') { | ||
| encoding = length; | ||
| length = remaining; | ||
| } else { | ||
| validateOffset(length, 'length', 0, this.length); | ||
| validateOffset(length, 'length', 0, len); | ||
| if (length > remaining) | ||
| length = remaining; | ||
| } | ||
|
|
@@ -1208,9 +1221,10 @@ Buffer.prototype.write = function write(string, offset, length, encoding) { | |
| }; | ||
|
|
||
| Buffer.prototype.toJSON = function toJSON() { | ||
| if (this.length > 0) { | ||
| const data = new Array(this.length); | ||
| for (let i = 0; i < this.length; ++i) | ||
| const len = this.length; | ||
| if (len > 0) { | ||
| const data = new Array(len); | ||
| for (let i = 0; i < len; ++i) | ||
| data[i] = this[i]; | ||
| return { type: 'Buffer', data }; | ||
| } | ||
|
|
@@ -1253,45 +1267,53 @@ function swap(b, n, m) { | |
| } | ||
|
|
||
| Buffer.prototype.swap16 = function swap16() { | ||
| // For Buffer.length < 128, it's generally faster to | ||
| // For Buffer.length <= 32, it's generally faster to | ||
| // do the swap in javascript. For larger buffers, | ||
| // dropping down to the native code is faster. | ||
| if (!isUint8Array(this)) | ||
| throw new ERR_INVALID_ARG_TYPE('this', ['Buffer', 'Uint8Array'], this); | ||
| const len = this.length; | ||
| if (len % 2 !== 0) | ||
| throw new ERR_INVALID_BUFFER_SIZE('16-bits'); | ||
| if (len < 128) { | ||
| if (len <= 32) { | ||
| for (let i = 0; i < len; i += 2) | ||
| swap(this, i, i + 1); | ||
| return this; | ||
| } | ||
| return _swap16(this); | ||
| _swap16(this); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what would happen if it's called on not a TypeArrayView now?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i.e.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good catch, I will add a JS side guard
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. would that be good approach @ChALkeR ?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It changes the method to not being callable on a typedarray that is not an uint8array, but the previous behavior was inconsistent in that case anyway as the js part swapped elements and the src part swapped bytes So I think explicitly blocking non-uint8arr should be fine and not a semver-major? |
||
| return this; | ||
anonrig marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| }; | ||
|
|
||
| Buffer.prototype.swap32 = function swap32() { | ||
| // For Buffer.length < 192, it's generally faster to | ||
| // For Buffer.length <= 32, it's generally faster to | ||
| // do the swap in javascript. For larger buffers, | ||
| // dropping down to the native code is faster. | ||
| if (!isUint8Array(this)) | ||
| throw new ERR_INVALID_ARG_TYPE('this', ['Buffer', 'Uint8Array'], this); | ||
| const len = this.length; | ||
| if (len % 4 !== 0) | ||
| throw new ERR_INVALID_BUFFER_SIZE('32-bits'); | ||
| if (len < 192) { | ||
| if (len <= 32) { | ||
| for (let i = 0; i < len; i += 4) { | ||
| swap(this, i, i + 3); | ||
| swap(this, i + 1, i + 2); | ||
| } | ||
| return this; | ||
| } | ||
| return _swap32(this); | ||
| _swap32(this); | ||
| return this; | ||
anonrig marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| }; | ||
|
|
||
| Buffer.prototype.swap64 = function swap64() { | ||
| // For Buffer.length < 192, it's generally faster to | ||
| // For Buffer.length < 48, it's generally faster to | ||
| // do the swap in javascript. For larger buffers, | ||
| // dropping down to the native code is faster. | ||
| if (!isUint8Array(this)) | ||
| throw new ERR_INVALID_ARG_TYPE('this', ['Buffer', 'Uint8Array'], this); | ||
| const len = this.length; | ||
| if (len % 8 !== 0) | ||
| throw new ERR_INVALID_BUFFER_SIZE('64-bits'); | ||
| if (len < 192) { | ||
| if (len < 48) { | ||
| for (let i = 0; i < len; i += 8) { | ||
| swap(this, i, i + 7); | ||
| swap(this, i + 1, i + 6); | ||
|
|
@@ -1300,7 +1322,8 @@ Buffer.prototype.swap64 = function swap64() { | |
| } | ||
| return this; | ||
| } | ||
| return _swap64(this); | ||
| _swap64(this); | ||
| return this; | ||
anonrig marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| }; | ||
|
|
||
| Buffer.prototype.toLocaleString = Buffer.prototype.toString; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1050,7 +1050,7 @@ void IndexOfString(const FunctionCallbackInfo<Value>& args) { | |
| needle_length, | ||
| offset, | ||
| is_forward); | ||
| } else if (enc == LATIN1) { | ||
| } else if (enc == ASCII || enc == LATIN1) { | ||
| uint8_t* needle_data = node::UncheckedMalloc<uint8_t>(needle_length); | ||
| if (needle_data == nullptr) { | ||
| return args.GetReturnValue().Set(-1); | ||
|
|
@@ -1200,31 +1200,59 @@ int32_t FastIndexOfNumber(Local<Value>, | |
| static CFunction fast_index_of_number(CFunction::Make(FastIndexOfNumber)); | ||
|
|
||
| void Swap16(const FunctionCallbackInfo<Value>& args) { | ||
| Environment* env = Environment::GetCurrent(args); | ||
| THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]); | ||
thisalihassan marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| DCHECK(args[0]->IsArrayBufferView()); | ||
| SPREAD_BUFFER_ARG(args[0], ts_obj); | ||
| CHECK(nbytes::SwapBytes16(ts_obj_data, ts_obj_length)); | ||
| args.GetReturnValue().Set(args[0]); | ||
| } | ||
|
|
||
| void FastSwap16(Local<Value> receiver, | ||
| Local<Value> buffer_obj, | ||
| // NOLINTNEXTLINE(runtime/references) | ||
| FastApiCallbackOptions& options) { | ||
| TRACK_V8_FAST_API_CALL("buffer.swap16"); | ||
| HandleScope scope(options.isolate); | ||
| SPREAD_BUFFER_ARG(buffer_obj, ts_obj); | ||
| CHECK(nbytes::SwapBytes16(ts_obj_data, ts_obj_length)); | ||
| } | ||
|
Comment on lines
+1208
to
+1216
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These fast callbacks are non-identical to the conventional callbacks they shadow.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Was removing the validation instead of moving it to js intentional? Also fast paths can keep the validation and fall back to slow i think, so we can still validate on the src side?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
This is outdated, the fast API doesn't use fallback any more (since Node.js v23.x). However, any validation in a JS wrapper should be shadowed by a CHECK or DCHECK in the C++ binding.
Comment on lines
+1208
to
+1216
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fast callbacks should include debug tracking and call tests.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for sharing these I will update the code |
||
|
|
||
| static CFunction fast_swap16(CFunction::Make(FastSwap16)); | ||
|
|
||
| void Swap32(const FunctionCallbackInfo<Value>& args) { | ||
| Environment* env = Environment::GetCurrent(args); | ||
| THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]); | ||
ChALkeR marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| DCHECK(args[0]->IsArrayBufferView()); | ||
| SPREAD_BUFFER_ARG(args[0], ts_obj); | ||
| CHECK(nbytes::SwapBytes32(ts_obj_data, ts_obj_length)); | ||
| args.GetReturnValue().Set(args[0]); | ||
| } | ||
|
|
||
| void FastSwap32(Local<Value> receiver, | ||
| Local<Value> buffer_obj, | ||
| // NOLINTNEXTLINE(runtime/references) | ||
| FastApiCallbackOptions& options) { | ||
| TRACK_V8_FAST_API_CALL("buffer.swap32"); | ||
| HandleScope scope(options.isolate); | ||
| SPREAD_BUFFER_ARG(buffer_obj, ts_obj); | ||
| CHECK(nbytes::SwapBytes32(ts_obj_data, ts_obj_length)); | ||
| } | ||
|
|
||
| static CFunction fast_swap32(CFunction::Make(FastSwap32)); | ||
|
|
||
| void Swap64(const FunctionCallbackInfo<Value>& args) { | ||
| Environment* env = Environment::GetCurrent(args); | ||
| THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]); | ||
| DCHECK(args[0]->IsArrayBufferView()); | ||
| SPREAD_BUFFER_ARG(args[0], ts_obj); | ||
| CHECK(nbytes::SwapBytes64(ts_obj_data, ts_obj_length)); | ||
| args.GetReturnValue().Set(args[0]); | ||
| } | ||
|
|
||
| void FastSwap64(Local<Value> receiver, | ||
| Local<Value> buffer_obj, | ||
| // NOLINTNEXTLINE(runtime/references) | ||
| FastApiCallbackOptions& options) { | ||
| TRACK_V8_FAST_API_CALL("buffer.swap64"); | ||
| HandleScope scope(options.isolate); | ||
| SPREAD_BUFFER_ARG(buffer_obj, ts_obj); | ||
| CHECK(nbytes::SwapBytes64(ts_obj_data, ts_obj_length)); | ||
| } | ||
|
|
||
| static CFunction fast_swap64(CFunction::Make(FastSwap64)); | ||
|
|
||
| static void IsUtf8(const FunctionCallbackInfo<Value>& args) { | ||
| Environment* env = Environment::GetCurrent(args); | ||
| CHECK_EQ(args.Length(), 1); | ||
|
|
@@ -1622,9 +1650,9 @@ void Initialize(Local<Object> target, | |
| SetMethodNoSideEffect( | ||
| context, target, "createUnsafeArrayBuffer", CreateUnsafeArrayBuffer); | ||
|
|
||
| SetMethod(context, target, "swap16", Swap16); | ||
| SetMethod(context, target, "swap32", Swap32); | ||
| SetMethod(context, target, "swap64", Swap64); | ||
| SetFastMethod(context, target, "swap16", Swap16, &fast_swap16); | ||
| SetFastMethod(context, target, "swap32", Swap32, &fast_swap32); | ||
| SetFastMethod(context, target, "swap64", Swap64, &fast_swap64); | ||
|
|
||
| SetMethodNoSideEffect(context, target, "isUtf8", IsUtf8); | ||
| SetMethodNoSideEffect(context, target, "isAscii", IsAscii); | ||
|
|
@@ -1693,8 +1721,11 @@ void RegisterExternalReferences(ExternalReferenceRegistry* registry) { | |
| registry->Register(IndexOfString); | ||
|
|
||
| registry->Register(Swap16); | ||
| registry->Register(fast_swap16); | ||
| registry->Register(Swap32); | ||
| registry->Register(fast_swap32); | ||
| registry->Register(Swap64); | ||
| registry->Register(fast_swap64); | ||
|
|
||
| registry->Register(IsUtf8); | ||
| registry->Register(IsAscii); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you add a comment to why we have this change now?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure, the clamping is required because we switched from TypedArrayPrototypeSlice (which clamped silently)