Skip to content

Commit 2906fa8

Browse files
mertcanaltinaduh95
authored andcommitted
src: dispatch ToV8Value(string_view) via StringBytes::Encode
Signed-off-by: Mert Can Altin <[email protected]> PR-URL: #63370 Reviewed-By: Anna Henningsen <[email protected]> Reviewed-By: Daniel Lemire <[email protected]> Reviewed-By: Gürgün Dayıoğlu <[email protected]>
1 parent 01bfcdf commit 2906fa8

5 files changed

Lines changed: 179 additions & 19 deletions

File tree

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
'use strict';
2+
3+
const common = require('../common.js');
4+
const fs = require('fs');
5+
const path = require('path');
6+
const tmpdir = require('../../test/common/tmpdir');
7+
8+
const bench = common.createBenchmark(main, {
9+
size: [64, 1024, 16384, 262144, 4194304],
10+
content: ['ascii', 'latin1', 'utf8_mixed'],
11+
source: ['path', 'fd'],
12+
n: [3e3],
13+
});
14+
15+
function buildContent(kind, size) {
16+
if (kind === 'ascii') {
17+
return Buffer.alloc(size, 0x61); // 'a'
18+
}
19+
if (kind === 'latin1') {
20+
// 'é' in UTF-8 is 0xC3 0xA9 (2 bytes per char)
21+
const pair = Buffer.from([0xC3, 0xA9]);
22+
const buf = Buffer.alloc(size);
23+
for (let i = 0; i + 2 <= size; i += 2) pair.copy(buf, i);
24+
return buf;
25+
}
26+
if (kind === 'utf8_mixed') {
27+
// mixed ASCII + 3-byte CJK (U+4E2D 中 = E4 B8 AD)
28+
const cjk = Buffer.from([0xE4, 0xB8, 0xAD]);
29+
const buf = Buffer.alloc(size);
30+
let i = 0;
31+
while (i + 4 <= size) {
32+
buf[i++] = 0x61;
33+
cjk.copy(buf, i);
34+
i += 3;
35+
}
36+
return buf;
37+
}
38+
throw new Error('unknown content: ' + kind);
39+
}
40+
41+
function main({ n, size, content, source }) {
42+
tmpdir.refresh();
43+
const file = path.join(tmpdir.path, `bench-${content}-${size}.bin`);
44+
fs.writeFileSync(file, buildContent(content, size));
45+
46+
let arg;
47+
let shouldClose = false;
48+
if (source === 'fd') {
49+
arg = fs.openSync(file, 'r');
50+
shouldClose = true;
51+
} else {
52+
arg = file;
53+
}
54+
55+
bench.start();
56+
for (let i = 0; i < n; i++) {
57+
fs.readFileSync(arg, 'utf8');
58+
}
59+
bench.end(n);
60+
61+
if (shouldClose) fs.closeSync(arg);
62+
}

src/util-inl.h

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -341,22 +341,6 @@ v8::Maybe<void> FromV8Array(v8::Local<v8::Context> context,
341341
return js_array->Iterate(context, PushItemToVector, &data);
342342
}
343343

344-
v8::MaybeLocal<v8::Value> ToV8Value(v8::Local<v8::Context> context,
345-
std::string_view str,
346-
v8::Isolate* isolate) {
347-
if (isolate == nullptr) isolate = v8::Isolate::GetCurrent();
348-
if (str.size() >= static_cast<size_t>(v8::String::kMaxLength)) [[unlikely]] {
349-
// V8 only has a TODO comment about adding an exception when the maximum
350-
// string size is exceeded.
351-
ThrowErrStringTooLong(isolate);
352-
return v8::MaybeLocal<v8::Value>();
353-
}
354-
355-
return v8::String::NewFromUtf8(
356-
isolate, str.data(), v8::NewStringType::kNormal, str.size())
357-
.FromMaybe(v8::Local<v8::String>());
358-
}
359-
360344
v8::MaybeLocal<v8::Value> ToV8Value(v8::Local<v8::Context> context,
361345
std::u16string_view str,
362346
v8::Isolate* isolate) {

src/util.cc

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -812,4 +812,15 @@ v8::Maybe<int> GetValidFileMode(Environment* env,
812812
return v8::Just(mode);
813813
}
814814

815+
v8::MaybeLocal<v8::Value> ToV8Value(v8::Local<v8::Context> context,
816+
std::string_view str,
817+
v8::Isolate* isolate) {
818+
if (isolate == nullptr) isolate = v8::Isolate::GetCurrent();
819+
if (str.size() >= static_cast<size_t>(v8::String::kMaxLength)) [[unlikely]] {
820+
ThrowErrStringTooLong(isolate);
821+
return v8::MaybeLocal<v8::Value>();
822+
}
823+
return StringBytes::Encode(isolate, str.data(), str.size(), UTF8);
824+
}
825+
815826
} // namespace node

src/util.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -701,9 +701,9 @@ inline v8::Maybe<void> FromV8Array(v8::Local<v8::Context> context,
701701
v8::Local<v8::Array> js_array,
702702
std::vector<v8::Global<v8::Value>>* out);
703703

704-
inline v8::MaybeLocal<v8::Value> ToV8Value(v8::Local<v8::Context> context,
705-
std::string_view str,
706-
v8::Isolate* isolate = nullptr);
704+
v8::MaybeLocal<v8::Value> ToV8Value(v8::Local<v8::Context> context,
705+
std::string_view str,
706+
v8::Isolate* isolate = nullptr);
707707
inline v8::MaybeLocal<v8::Value> ToV8Value(v8::Local<v8::Context> context,
708708
std::u16string_view str,
709709
v8::Isolate* isolate = nullptr);
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
'use strict';
2+
3+
require('../common');
4+
const fs = require('node:fs');
5+
const path = require('node:path');
6+
const assert = require('node:assert');
7+
const { describe, it } = require('node:test');
8+
const tmpdir = require('../common/tmpdir');
9+
10+
tmpdir.refresh();
11+
12+
function writeFile(name, buf) {
13+
const p = path.join(tmpdir.path, name);
14+
fs.writeFileSync(p, buf);
15+
return p;
16+
}
17+
18+
function expectMatches(filePath, rawBuf) {
19+
assert.strictEqual(
20+
fs.readFileSync(filePath, 'utf8'),
21+
rawBuf.toString('utf8'),
22+
);
23+
}
24+
25+
describe('fs.readFileSync utf8 simdutf dispatch', () => {
26+
it('empty file', () => {
27+
const p = writeFile('empty.txt', Buffer.alloc(0));
28+
assert.strictEqual(fs.readFileSync(p, 'utf8'), '');
29+
});
30+
31+
it('ascii small', () => {
32+
const buf = Buffer.from('hello');
33+
expectMatches(writeFile('tiny-ascii.txt', buf), buf);
34+
});
35+
36+
it('ascii 20KB', () => {
37+
const buf = Buffer.alloc(20 * 1024, 0x41);
38+
expectMatches(writeFile('medium-ascii.txt', buf), buf);
39+
});
40+
41+
it('ascii 1MB', () => {
42+
const buf = Buffer.alloc(1024 * 1024, 0x61);
43+
expectMatches(writeFile('large-ascii.txt', buf), buf);
44+
});
45+
46+
it('fd input', () => {
47+
const buf = Buffer.alloc(50 * 1024, 0x62);
48+
const p = writeFile('fd-ascii.txt', buf);
49+
const fd = fs.openSync(p, 'r');
50+
try {
51+
assert.strictEqual(fs.readFileSync(fd, 'utf8'), buf.toString('utf8'));
52+
} finally {
53+
fs.closeSync(fd);
54+
}
55+
});
56+
57+
it('multibyte UTF-8', () => {
58+
const buf = Buffer.from('中文测试 — café — 🚀'.repeat(500), 'utf8');
59+
expectMatches(writeFile('multibyte.txt', buf), buf);
60+
});
61+
62+
it('latin1-fits utf8', () => {
63+
const buf = Buffer.from('naïve café résumé — niño Köln '.repeat(500), 'utf8');
64+
expectMatches(writeFile('latin1-fits.txt', buf), buf);
65+
});
66+
67+
it('invalid: lone continuation byte', () => {
68+
const buf = Buffer.from([0x68, 0x69, 0x80, 0x21]);
69+
expectMatches(writeFile('invalid-cont.txt', buf), buf);
70+
});
71+
72+
it('invalid: overlong', () => {
73+
const buf = Buffer.from([0x41, 0xC0, 0xAF, 0x42]);
74+
expectMatches(writeFile('invalid-overlong.txt', buf), buf);
75+
});
76+
77+
it('invalid: surrogate', () => {
78+
const buf = Buffer.from([0x41, 0xED, 0xA0, 0x80, 0x42]);
79+
expectMatches(writeFile('invalid-surrogate.txt', buf), buf);
80+
});
81+
82+
it('latin1 boundary U+00FF', () => {
83+
const buf = Buffer.from('ÿ'.repeat(2048), 'utf8');
84+
expectMatches(writeFile('latin1-boundary.txt', buf), buf);
85+
});
86+
87+
it('above latin1 U+0100', () => {
88+
const buf = Buffer.from('ĀāĂ'.repeat(1024), 'utf8');
89+
expectMatches(writeFile('above-latin1.txt', buf), buf);
90+
});
91+
92+
it('single codepoint each UTF-8 length', () => {
93+
for (const cp of [0x41, 0x00E9, 0x4E2D, 0x1F600]) {
94+
const buf = Buffer.from(String.fromCodePoint(cp), 'utf8');
95+
expectMatches(writeFile(`single-cp-${cp.toString(16)}.txt`, buf), buf);
96+
}
97+
});
98+
99+
it('truncated multibyte at EOF', () => {
100+
const buf = Buffer.from([0x41, 0xE4, 0xB8]);
101+
expectMatches(writeFile('truncated-multibyte.txt', buf), buf);
102+
});
103+
});

0 commit comments

Comments
 (0)