buffer: improve buff.toString(encoding, start, end)#10575
buffer: improve buff.toString(encoding, start, end)#10575JacksonTian wants to merge 0 commit intonodejs:masterfrom
Conversation
|
Following is benchmark results: $ ./node benchmark/compare.js --new ./node --old ~/.tnvm/versions/node/v7.3.0/bin/node --filter tostring --runs 1 -- buffers
"binary", "filename", "configuration", "rate", "time"
"old", "buffers/buffer-tostring.js", "n=10000000 len=0 arg=""true""", 53075107.789838344, 0.188412241
"old", "buffers/buffer-tostring.js", "n=10000000 len=1 arg=""true""", 6970222.753242602, 1.434674379
"old", "buffers/buffer-tostring.js", "n=10000000 len=64 arg=""true""", 5725362.379855517, 1.746614334
"old", "buffers/buffer-tostring.js", "n=10000000 len=1024 arg=""true""", 3047914.7889390728, 3.280931618
"old", "buffers/buffer-tostring.js", "n=10000000 len=0 arg=""false""", 18727663.41050718, 0.533969443
"old", "buffers/buffer-tostring.js", "n=10000000 len=1 arg=""false""", 8294553.510086126, 1.205610403
"old", "buffers/buffer-tostring.js", "n=10000000 len=64 arg=""false""", 6364356.123134775, 1.571250855
"old", "buffers/buffer-tostring.js", "n=10000000 len=1024 arg=""false""", 3221132.404577592, 3.104498277
"new", "buffers/buffer-tostring.js", "n=10000000 len=0 arg=""true""", 74184534.29612593, 0.134798986
"new", "buffers/buffer-tostring.js", "n=10000000 len=1 arg=""true""", 7833625.1081898045, 1.276548196
"new", "buffers/buffer-tostring.js", "n=10000000 len=64 arg=""true""", 6383079.577419681, 1.566641913
"new", "buffers/buffer-tostring.js", "n=10000000 len=1024 arg=""true""", 3235248.73062808, 3.090952453
"new", "buffers/buffer-tostring.js", "n=10000000 len=0 arg=""false""", 76114013.64713609, 0.131381851
"new", "buffers/buffer-tostring.js", "n=10000000 len=1 arg=""false""", 7597410.922092946, 1.316237874
"new", "buffers/buffer-tostring.js", "n=10000000 len=64 arg=""false""", 6336344.836457308, 1.578196935
"new", "buffers/buffer-tostring.js", "n=10000000 len=1024 arg=""false""", 3238942.7802110813, 3.087427188The new binary is better than old binary(v7.3.0, close to master). When buffer length is zero, the improve is more clearly. |
Do you mean to say, the checks are not skipped, even though the length is zero? |
|
If no parameters passed into toString, the C++ binding method was called, the start/end checks are skipped. When buffer is zero length, call C++ binding method is unnecessary. |
We don't have to do the start and end checks, right? Because we know that the But, if your only concern is about the zero length Buffers, you might want to try special casing that, like this if (this.length === 0) {
return '';
} else if (arguments.length === 0) {
result = this.utf8Slice(0, this.length);
} else {
result = slowToString.apply(this, arguments);
} |
|
The slowToString imply the zero check logic. And, the another thing is |
|
@nodejs/buffer @mscdex PTAL |
|
Ok so I ran the benchmarks with the changes this PR currently makes and I see this: However, if we both move and re-arrange (to avoid unnecessary conditionals) the I'm not sure what's up with the I also changed |
|
I thought V8 stopped looking at the length of a function to determine inlinability. Maybe it's in a later version... |
|
@targos AFAIK it definitely still takes it into account with at least V8 5.4 from my experience. It's certainly not the only factor, but it is one of them. |
|
@mscdex Can you share the second patch you tried? |
Here's my patch on top of this PR:
diff --git a/lib/buffer.js b/lib/buffer.js
index 4a69db6..cfd976b 100644
--- a/lib/buffer.js
+++ b/lib/buffer.js
@@ -432,63 +432,27 @@ Object.defineProperty(Buffer.prototype, 'offset', {
});
-function slowToString(encoding, start, end) {
+function slowToString(buf, encoding, start, end) {
var loweredCase = false;
-
- // No need to verify that "this.length <= MAX_UINT32" since it's a read-only
- // property of a typed array.
-
- // This behaves neither like String nor Uint8Array in that we set start/end
- // to their upper/lower bounds if the value passed is out of range.
- // undefined is handled specially as per ECMA-262 6th Edition,
- // Section 13.3.3.7 Runtime Semantics: KeyedBindingInitialization.
- if (start === undefined || start < 0)
- start = 0;
- // Return early if start > this.length. Done here to prevent potential uint32
- // coercion fail below.
- if (start > this.length)
- return '';
-
- if (end === undefined || end > this.length)
- end = this.length;
-
- if (end <= 0)
- return '';
-
- // Force coersion to uint32. This will also coerce falsey/NaN values to 0.
- end >>>= 0;
- start >>>= 0;
-
- if (end <= start)
- return '';
-
- if (!encoding) encoding = 'utf8';
-
while (true) {
switch (encoding) {
- case 'hex':
- return this.hexSlice(start, end);
-
case 'utf8':
case 'utf-8':
- return this.utf8Slice(start, end);
-
+ return buf.utf8Slice(start, end);
+ case 'hex':
+ return buf.hexSlice(start, end);
case 'ascii':
- return this.asciiSlice(start, end);
-
+ return buf.asciiSlice(start, end);
case 'latin1':
case 'binary':
- return this.latin1Slice(start, end);
-
+ return buf.latin1Slice(start, end);
case 'base64':
- return this.base64Slice(start, end);
-
+ return buf.base64Slice(start, end);
case 'ucs2':
case 'ucs-2':
case 'utf16le':
case 'utf-16le':
- return this.ucs2Slice(start, end);
-
+ return buf.ucs2Slice(start, end);
default:
if (loweredCase)
throw new TypeError('Unknown encoding: ' + encoding);
@@ -498,19 +462,47 @@ function slowToString(encoding, start, end) {
}
}
-Buffer.prototype.copy = function(target, targetStart, sourceStart, sourceEnd) {
- return binding.copy(this, target, targetStart, sourceStart, sourceEnd);
-};
-
-
+// When comparing `start` and `end` with `buffer.length`, there is no need to
+// verify that "this.length <= MAX_UINT32" since it's a read-only property of a
+// typed array.
+// The bounds checking performed by `toString()` behaves neither like String nor
+// Uint8Array in that we set start/end to their upper/lower bounds if the value
+// passed is out of range.
+// `undefined` is handled specially as per ECMA-262 6th Edition,
+// Section 13.3.3.7 Runtime Semantics: KeyedBindingInitialization.
+// Additionally, we return early if `start > this.length`. This is done to
+// prevent potential uint32 coercion failures.
Buffer.prototype.toString = function(encoding, start, end) {
- const result = slowToString.call(this, encoding, start, end);
+ if (start === undefined || start < 0)
+ start = 0;
+ else if (start > this.length)
+ return '';
+ else
+ start >>>= 0;
+ if (end === undefined || end > this.length)
+ end = this.length;
+ else if (end <= 0)
+ return '';
+ else
+ end >>>= 0;
+ if (end <= start)
+ return '';
+ var result;
+ if (!encoding)
+ result = this.utf8Slice(start, end);
+ else
+ result = slowToString(this, encoding, start, end);
if (result === undefined)
throw new Error('"toString()" failed');
return result;
};
+Buffer.prototype.copy = function(target, targetStart, sourceStart, sourceEnd) {
+ return binding.copy(this, target, targetStart, sourceStart, sourceEnd);
+};
+
+
Buffer.prototype.equals = function equals(b) {
if (!isUint8Array(b))
throw new TypeError('Argument must be a Buffer or Uint8Array'); |
|
@mscdex Found the commit: v8/v8@1b33028 |
trevnorris
left a comment
There was a problem hiding this comment.
So do we still want to merge this?
|
@trevnorris not as-is IMHO. I think adding the changes I've proposed will help to mitigate the performance regressions. |
|
ping updates on this one? |
When buff.toString() without parameter will call
buff.utf8Slice(0, this.length) directly. It skips the start/end
check, even thougth the buffer length is zero.
The utf8Slice is a C++ binding method, when the buffer length is
zero, the case is unexpected.
And, the slowToString.apply(this, arguments) is slowly also,
change to slowToString.call(this, encoding, start, end).
Checklist
make -j4 test(UNIX), orvcbuild test(Windows) passesAffected core subsystem(s)