Skip to content

Commit 96a314b

Browse files
bnoordhuisisaacs
authored andcommitted
buffer: strip high bits when converting to ascii
Consider the following example: console.log(Buffer('ú').toString('ascii')); Before this commit, the contents of the buffer was used as-is and hence it prints 'ú'. Now, it prints 'C:'. Perhaps not much of an improvement but it conforms to what the documentation says it does: strip off the high bits. Fixes #4371.
1 parent 632b7d8 commit 96a314b

File tree

3 files changed

+57
-5
lines changed

3 files changed

+57
-5
lines changed

doc/api/buffer.markdown

+4-3
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,10 @@ encoding method. Here are the different string encodings.
1919

2020
* `'ascii'` - for 7 bit ASCII data only. This encoding method is very fast, and
2121
will strip the high bit if set.
22-
Note that this encoding converts a null character (`'\0'` or `'\u0000'`) into
23-
`0x20` (character code of a space). If you want to convert a null character
24-
into `0x00`, you should use `'utf8'`.
22+
23+
Note that when converting from string to buffer, this encoding converts a null
24+
character (`'\0'` or `'\u0000'`) into `0x20` (character code of a space). If
25+
you want to convert a null character into `0x00`, you should use `'utf8'`.
2526

2627
* `'utf8'` - Multibyte encoded Unicode characters. Many web pages and other
2728
document formats use UTF-8.

src/node_buffer.cc

+26-2
Original file line numberDiff line numberDiff line change
@@ -247,15 +247,38 @@ Handle<Value> Buffer::BinarySlice(const Arguments &args) {
247247
}
248248

249249

250+
static bool contains_non_ascii(const char* buf, size_t len) {
251+
for (size_t i = 0; i < len; ++i) {
252+
if (buf[i] & 0x80) return true;
253+
}
254+
return false;
255+
}
256+
257+
258+
static void force_ascii(const char* src, char* dst, size_t len) {
259+
for (size_t i = 0; i < len; ++i) {
260+
dst[i] = src[i] & 0x7f;
261+
}
262+
}
263+
264+
250265
Handle<Value> Buffer::AsciiSlice(const Arguments &args) {
251266
HandleScope scope;
252267
Buffer *parent = ObjectWrap::Unwrap<Buffer>(args.This());
253268
SLICE_ARGS(args[0], args[1])
254269

255270
char* data = parent->data_ + start;
256-
Local<String> string = String::New(data, end - start);
271+
size_t len = end - start;
272+
273+
if (contains_non_ascii(data, len)) {
274+
char* out = new char[len];
275+
force_ascii(data, out, len);
276+
Local<String> rc = String::New(out, len);
277+
delete[] out;
278+
return scope.Close(rc);
279+
}
257280

258-
return scope.Close(string);
281+
return scope.Close(String::New(data, len));
259282
}
260283

261284

@@ -268,6 +291,7 @@ Handle<Value> Buffer::Utf8Slice(const Arguments &args) {
268291
return scope.Close(string);
269292
}
270293

294+
271295
Handle<Value> Buffer::Ucs2Slice(const Arguments &args) {
272296
HandleScope scope;
273297
Buffer *parent = ObjectWrap::Unwrap<Buffer>(args.This());

test/simple/test-buffer-ascii.js

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
// Copyright Joyent, Inc. and other Node contributors.
2+
//
3+
// Permission is hereby granted, free of charge, to any person obtaining a
4+
// copy of this software and associated documentation files (the
5+
// "Software"), to deal in the Software without restriction, including
6+
// without limitation the rights to use, copy, modify, merge, publish,
7+
// distribute, sublicense, and/or sell copies of the Software, and to permit
8+
// persons to whom the Software is furnished to do so, subject to the
9+
// following conditions:
10+
//
11+
// The above copyright notice and this permission notice shall be included
12+
// in all copies or substantial portions of the Software.
13+
//
14+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15+
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16+
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
17+
// NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
18+
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19+
// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20+
// USE OR OTHER DEALINGS IN THE SOFTWARE.
21+
22+
var common = require('../common');
23+
var assert = require('assert');
24+
25+
// ASCII conversion in node.js simply masks off the high bits,
26+
// it doesn't do transliteration.
27+
assert.equal(Buffer('hérité').toString('ascii'), 'hC)ritC)');

0 commit comments

Comments
 (0)