Skip to content

Commit 3380cd5

Browse files
bnoordhuisitaloacasas
authored andcommitted
src: support UTF-8 in compiled-in JS source files
Detect it when source files in lib/ are not ASCII. Decode them as UTF-8 and store them as UTF-16 in the binary so they can be used as external string resources without non-ASCII characters getting mangled. Fixes: #10673 PR-URL: #11129 Reviewed-By: Anna Henningsen <[email protected]> Reviewed-By: James M Snell <[email protected]>
1 parent 5350f04 commit 3380cd5

File tree

3 files changed

+81
-95
lines changed

3 files changed

+81
-95
lines changed

node.gyp

+4-4
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@
145145
'src',
146146
'tools/msvs/genfiles',
147147
'deps/uv/src/ares',
148-
'<(SHARED_INTERMEDIATE_DIR)', # for node_natives.h
148+
'<(SHARED_INTERMEDIATE_DIR)',
149149
],
150150

151151
'sources': [
@@ -166,7 +166,6 @@
166166
'src/node_debug_options.cc',
167167
'src/node_file.cc',
168168
'src/node_http_parser.cc',
169-
'src/node_javascript.cc',
170169
'src/node_main.cc',
171170
'src/node_os.cc',
172171
'src/node_revert.cc',
@@ -234,11 +233,11 @@
234233
'deps/http_parser/http_parser.h',
235234
'deps/v8/include/v8.h',
236235
'deps/v8/include/v8-debug.h',
237-
'<(SHARED_INTERMEDIATE_DIR)/node_natives.h',
238236
# javascript files to make for an even more pleasant IDE experience
239237
'<@(library_files)',
240238
# node.gyp is added to the project by default.
241239
'common.gypi',
240+
'<(SHARED_INTERMEDIATE_DIR)/node_javascript.cc',
242241
],
243242

244243
'defines': [
@@ -711,12 +710,13 @@
711710
'actions': [
712711
{
713712
'action_name': 'node_js2c',
713+
'process_outputs_as_sources': 1,
714714
'inputs': [
715715
'<@(library_files)',
716716
'./config.gypi',
717717
],
718718
'outputs': [
719-
'<(SHARED_INTERMEDIATE_DIR)/node_natives.h',
719+
'<(SHARED_INTERMEDIATE_DIR)/node_javascript.cc',
720720
],
721721
'conditions': [
722722
[ 'node_use_dtrace=="false" and node_use_etw=="false"', {

src/node_javascript.cc

-51
This file was deleted.

tools/js2c.py

+77-40
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,16 @@
3737
import string
3838

3939

40-
def ToCString(contents):
41-
step = 20
42-
slices = (contents[i:i+step] for i in xrange(0, len(contents), step))
43-
slices = map(lambda s: ','.join(str(ord(c)) for c in s), slices)
40+
def ToCArray(elements, step=10):
41+
slices = (elements[i:i+step] for i in xrange(0, len(elements), step))
42+
slices = map(lambda s: ','.join(str(x) for x in s), slices)
4443
return ',\n'.join(slices)
4544

4645

46+
def ToCString(contents):
47+
return ToCArray(map(ord, contents), step=20)
48+
49+
4750
def ReadFile(filename):
4851
file = open(filename, "rt")
4952
try:
@@ -161,34 +164,72 @@ def ReadMacros(lines):
161164
return (constants, macros)
162165

163166

164-
HEADER_TEMPLATE = """\
165-
#ifndef NODE_NATIVES_H_
166-
#define NODE_NATIVES_H_
167+
TEMPLATE = """
168+
#include "node.h"
169+
#include "node_javascript.h"
170+
#include "v8.h"
171+
#include "env.h"
172+
#include "env-inl.h"
167173
168-
#include <stdint.h>
174+
namespace node {{
169175
170-
#define NODE_NATIVES_MAP(V) \\
171-
{node_natives_map}
176+
{definitions}
177+
178+
v8::Local<v8::String> MainSource(Environment* env) {{
179+
return internal_bootstrap_node_value.ToStringChecked(env->isolate());
180+
}}
181+
182+
void DefineJavaScript(Environment* env, v8::Local<v8::Object> target) {{
183+
{initializers}
184+
}}
172185
173-
namespace node {{
174-
{sources}
175186
}} // namespace node
187+
"""
176188

177-
#endif // NODE_NATIVES_H_
189+
ONE_BYTE_STRING = """
190+
static const uint8_t raw_{var}[] = {{ {data} }};
191+
static struct : public v8::String::ExternalOneByteStringResource {{
192+
const char* data() const override {{
193+
return reinterpret_cast<const char*>(raw_{var});
194+
}}
195+
size_t length() const override {{ return arraysize(raw_{var}); }}
196+
void Dispose() override {{ /* Default calls `delete this`. */ }}
197+
v8::Local<v8::String> ToStringChecked(v8::Isolate* isolate) {{
198+
return v8::String::NewExternalOneByte(isolate, this).ToLocalChecked();
199+
}}
200+
}} {var};
178201
"""
179202

203+
TWO_BYTE_STRING = """
204+
static const uint16_t raw_{var}[] = {{ {data} }};
205+
static struct : public v8::String::ExternalStringResource {{
206+
const uint16_t* data() const override {{ return raw_{var}; }}
207+
size_t length() const override {{ return arraysize(raw_{var}); }}
208+
void Dispose() override {{ /* Default calls `delete this`. */ }}
209+
v8::Local<v8::String> ToStringChecked(v8::Isolate* isolate) {{
210+
return v8::String::NewExternalTwoByte(isolate, this).ToLocalChecked();
211+
}}
212+
}} {var};
213+
"""
180214

181-
NODE_NATIVES_MAP = """\
182-
V({escaped_id}) \\
215+
INITIALIZER = """\
216+
CHECK(target->Set(env->context(),
217+
{key}.ToStringChecked(env->isolate()),
218+
{value}.ToStringChecked(env->isolate())).FromJust());
183219
"""
184220

185221

186-
SOURCES = """\
187-
static const uint8_t {escaped_id}_name[] = {{
188-
{name}}};
189-
static const uint8_t {escaped_id}_data[] = {{
190-
{data}}};
191-
"""
222+
def Render(var, data):
223+
# Treat non-ASCII as UTF-8 and convert it to UTF-16.
224+
if any(ord(c) > 127 for c in data):
225+
template = TWO_BYTE_STRING
226+
data = map(ord, data.decode('utf-8').encode('utf-16be'))
227+
data = [data[i] * 256 + data[i+1] for i in xrange(0, len(data), 2)]
228+
data = ToCArray(data)
229+
else:
230+
template = ONE_BYTE_STRING
231+
data = ToCString(data)
232+
return template.format(var=var, data=data)
192233

193234

194235
def JS2C(source, target):
@@ -207,36 +248,32 @@ def JS2C(source, target):
207248
(consts, macros) = ReadMacros(macro_lines)
208249

209250
# Build source code lines
210-
node_natives_map = []
211-
sources = []
251+
definitions = []
252+
initializers = []
212253

213-
for s in modules:
214-
lines = ReadFile(str(s))
254+
for name in modules:
255+
lines = ReadFile(str(name))
215256
lines = ExpandConstants(lines, consts)
216257
lines = ExpandMacros(lines, macros)
217-
data = ToCString(lines)
218258

219259
# On Windows, "./foo.bar" in the .gyp file is passed as "foo.bar"
220260
# so don't assume there is always a slash in the file path.
221-
if '/' in s or '\\' in s:
222-
id = '/'.join(re.split('/|\\\\', s)[1:])
223-
else:
224-
id = s
225-
226-
if '.' in id:
227-
id = id.split('.', 1)[0]
261+
if '/' in name or '\\' in name:
262+
name = '/'.join(re.split('/|\\\\', name)[1:])
228263

229-
name = ToCString(id)
230-
escaped_id = id.replace('-', '_').replace('/', '_')
231-
node_natives_map.append(NODE_NATIVES_MAP.format(**locals()))
232-
sources.append(SOURCES.format(**locals()))
264+
name = name.split('.', 1)[0]
265+
var = name.replace('-', '_').replace('/', '_')
266+
key = '%s_key' % var
267+
value = '%s_value' % var
233268

234-
node_natives_map = ''.join(node_natives_map)
235-
sources = ''.join(sources)
269+
definitions.append(Render(key, name))
270+
definitions.append(Render(value, lines))
271+
initializers.append(INITIALIZER.format(key=key, value=value))
236272

237273
# Emit result
238274
output = open(str(target[0]), "w")
239-
output.write(HEADER_TEMPLATE.format(**locals()))
275+
output.write(TEMPLATE.format(definitions=''.join(definitions),
276+
initializers=''.join(initializers)))
240277
output.close()
241278

242279
def main():

0 commit comments

Comments
 (0)