Skip to content

Commit 0939edd

Browse files
jasnellevanlucas
authored andcommitted
buffer: add buffer.transcode
Add buffer.transcode(source, from, to) method. Primarily uses ICU to transcode a buffer's content from one of Node.js' supported encodings to another. Originally part of a proposal to add a new unicode module. Decided to refactor the approach towrds individual PRs without a new module. Refs: #8075 PR-URL: #9038 Reviewed-By: Anna Henningsen <[email protected]>
1 parent a5c62cb commit 0939edd

File tree

9 files changed

+437
-39
lines changed

9 files changed

+437
-39
lines changed

doc/api/buffer.md

+27
Original file line numberDiff line numberDiff line change
@@ -2302,6 +2302,33 @@ added: v3.0.0
23022302
On 32-bit architectures, this value is `(2^30)-1` (~1GB).
23032303
On 64-bit architectures, this value is `(2^31)-1` (~2GB).
23042304

2305+
## buffer.transcode(source, fromEnc, toEnc)
2306+
<!-- YAML
2307+
added: REPLACEME
2308+
-->
2309+
2310+
* `source` {Buffer} A `Buffer` instance
2311+
* `fromEnc` {String} The current encoding
2312+
* `toEnc` {String} To target encoding
2313+
2314+
Re-encodes the given `Buffer` instance from one character encoding to another.
2315+
Returns a new `Buffer` instance.
2316+
2317+
Throws if the `fromEnc` or `toEnc` specify invalid character encodings or if
2318+
conversion from `fromEnc` to `toEnc` is not permitted.
2319+
2320+
The transcoding process will use substitution characters if a given byte
2321+
sequence cannot be adequately represented in the target encoding. For instance:
2322+
2323+
```js
2324+
const newBuf = buffer.transcode(Buffer.from(''), 'utf8', 'ascii');
2325+
console.log(newBuf.toString('ascii'));
2326+
// prints '?'
2327+
```
2328+
2329+
Because the Euro (``) sign is not representable in US-ASCII, it is replaced
2330+
with `?` in the transcoded `Buffer`.
2331+
23052332
## Class: SlowBuffer
23062333
<!-- YAML
23072334
deprecated: v6.0.0

lib/buffer.js

+4
Original file line numberDiff line numberDiff line change
@@ -1360,3 +1360,7 @@ Buffer.prototype.swap64 = function swap64() {
13601360
};
13611361

13621362
Buffer.prototype.toLocaleString = Buffer.prototype.toString;
1363+
1364+
// Put this at the end because internal/buffer has a circular
1365+
// dependency on Buffer.
1366+
exports.transcode = require('internal/buffer').transcode;

lib/internal/buffer.js

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
'use strict';
2+
3+
if (!process.binding('config').hasIntl) {
4+
return;
5+
}
6+
7+
const normalizeEncoding = require('internal/util').normalizeEncoding;
8+
const Buffer = require('buffer').Buffer;
9+
10+
const icu = process.binding('icu');
11+
12+
// Transcodes the Buffer from one encoding to another, returning a new
13+
// Buffer instance.
14+
exports.transcode = function transcode(source, fromEncoding, toEncoding) {
15+
if (!Buffer.isBuffer(source))
16+
throw new TypeError('"source" argument must be a Buffer');
17+
if (source.length === 0) return Buffer.alloc(0);
18+
19+
fromEncoding = normalizeEncoding(fromEncoding) || fromEncoding;
20+
toEncoding = normalizeEncoding(toEncoding) || toEncoding;
21+
const result = icu.transcode(source, fromEncoding, toEncoding);
22+
if (Buffer.isBuffer(result))
23+
return result;
24+
25+
const code = icu.icuErrName(result);
26+
const err = new Error(`Unable to transcode Buffer [${code}]`);
27+
err.code = code;
28+
err.errno = result;
29+
throw err;
30+
};

node.gyp

+1
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@
7474
'lib/v8.js',
7575
'lib/vm.js',
7676
'lib/zlib.js',
77+
'lib/internal/buffer.js',
7778
'lib/internal/child_process.js',
7879
'lib/internal/cluster.js',
7980
'lib/internal/freelist.js',

src/node_buffer.cc

+19-36
Original file line numberDiff line numberDiff line change
@@ -22,23 +22,6 @@
2222
if (!(r)) return env->ThrowRangeError("out of range index"); \
2323
} while (0)
2424

25-
#define THROW_AND_RETURN_UNLESS_BUFFER(env, obj) \
26-
do { \
27-
if (!HasInstance(obj)) \
28-
return env->ThrowTypeError("argument should be a Buffer"); \
29-
} while (0)
30-
31-
#define SPREAD_ARG(val, name) \
32-
CHECK((val)->IsUint8Array()); \
33-
Local<Uint8Array> name = (val).As<Uint8Array>(); \
34-
ArrayBuffer::Contents name##_c = name->Buffer()->GetContents(); \
35-
const size_t name##_offset = name->ByteOffset(); \
36-
const size_t name##_length = name->ByteLength(); \
37-
char* const name##_data = \
38-
static_cast<char*>(name##_c.Data()) + name##_offset; \
39-
if (name##_length > 0) \
40-
CHECK_NE(name##_data, nullptr);
41-
4225
#define SLICE_START_END(start_arg, end_arg, end_max) \
4326
size_t start; \
4427
size_t end; \
@@ -448,7 +431,7 @@ void StringSlice(const FunctionCallbackInfo<Value>& args) {
448431
Isolate* isolate = env->isolate();
449432

450433
THROW_AND_RETURN_UNLESS_BUFFER(env, args.This());
451-
SPREAD_ARG(args.This(), ts_obj);
434+
SPREAD_BUFFER_ARG(args.This(), ts_obj);
452435

453436
if (ts_obj_length == 0)
454437
return args.GetReturnValue().SetEmptyString();
@@ -465,7 +448,7 @@ void StringSlice<UCS2>(const FunctionCallbackInfo<Value>& args) {
465448
Environment* env = Environment::GetCurrent(args);
466449

467450
THROW_AND_RETURN_UNLESS_BUFFER(env, args.This());
468-
SPREAD_ARG(args.This(), ts_obj);
451+
SPREAD_BUFFER_ARG(args.This(), ts_obj);
469452

470453
if (ts_obj_length == 0)
471454
return args.GetReturnValue().SetEmptyString();
@@ -543,8 +526,8 @@ void Copy(const FunctionCallbackInfo<Value> &args) {
543526
THROW_AND_RETURN_UNLESS_BUFFER(env, args.This());
544527
THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]);
545528
Local<Object> target_obj = args[0].As<Object>();
546-
SPREAD_ARG(args.This(), ts_obj);
547-
SPREAD_ARG(target_obj, target);
529+
SPREAD_BUFFER_ARG(args.This(), ts_obj);
530+
SPREAD_BUFFER_ARG(target_obj, target);
548531

549532
size_t target_start;
550533
size_t source_start;
@@ -577,7 +560,7 @@ void Fill(const FunctionCallbackInfo<Value>& args) {
577560
Environment* env = Environment::GetCurrent(args);
578561

579562
THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]);
580-
SPREAD_ARG(args[0], ts_obj);
563+
SPREAD_BUFFER_ARG(args[0], ts_obj);
581564

582565
size_t start = args[2]->Uint32Value();
583566
size_t end = args[3]->Uint32Value();
@@ -590,7 +573,7 @@ void Fill(const FunctionCallbackInfo<Value>& args) {
590573

591574
// First check if Buffer has been passed.
592575
if (Buffer::HasInstance(args[1])) {
593-
SPREAD_ARG(args[1], fill_obj);
576+
SPREAD_BUFFER_ARG(args[1], fill_obj);
594577
str_length = fill_obj_length;
595578
memcpy(ts_obj_data + start, fill_obj_data, MIN(str_length, fill_length));
596579
goto start_fill;
@@ -669,7 +652,7 @@ void StringWrite(const FunctionCallbackInfo<Value>& args) {
669652
Environment* env = Environment::GetCurrent(args);
670653

671654
THROW_AND_RETURN_UNLESS_BUFFER(env, args.This());
672-
SPREAD_ARG(args.This(), ts_obj);
655+
SPREAD_BUFFER_ARG(args.This(), ts_obj);
673656

674657
if (!args[0]->IsString())
675658
return env->ThrowTypeError("Argument must be a string");
@@ -747,7 +730,7 @@ static inline void Swizzle(char* start, unsigned int len) {
747730
template <typename T, enum Endianness endianness>
748731
void ReadFloatGeneric(const FunctionCallbackInfo<Value>& args) {
749732
THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[0]);
750-
SPREAD_ARG(args[0], ts_obj);
733+
SPREAD_BUFFER_ARG(args[0], ts_obj);
751734

752735
uint32_t offset = args[1]->Uint32Value();
753736
CHECK_LE(offset + sizeof(T), ts_obj_length);
@@ -881,8 +864,8 @@ void CompareOffset(const FunctionCallbackInfo<Value> &args) {
881864

882865
THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]);
883866
THROW_AND_RETURN_UNLESS_BUFFER(env, args[1]);
884-
SPREAD_ARG(args[0], ts_obj);
885-
SPREAD_ARG(args[1], target);
867+
SPREAD_BUFFER_ARG(args[0], ts_obj);
868+
SPREAD_BUFFER_ARG(args[1], target);
886869

887870
size_t target_start;
888871
size_t source_start;
@@ -921,8 +904,8 @@ void Compare(const FunctionCallbackInfo<Value> &args) {
921904

922905
THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]);
923906
THROW_AND_RETURN_UNLESS_BUFFER(env, args[1]);
924-
SPREAD_ARG(args[0], obj_a);
925-
SPREAD_ARG(args[1], obj_b);
907+
SPREAD_BUFFER_ARG(args[0], obj_a);
908+
SPREAD_BUFFER_ARG(args[1], obj_b);
926909

927910
size_t cmp_length = MIN(obj_a_length, obj_b_length);
928911

@@ -977,7 +960,7 @@ void IndexOfString(const FunctionCallbackInfo<Value>& args) {
977960
UTF8);
978961

979962
THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[0]);
980-
SPREAD_ARG(args[0], ts_obj);
963+
SPREAD_BUFFER_ARG(args[0], ts_obj);
981964

982965
Local<String> needle = args[1].As<String>();
983966
int64_t offset_i64 = args[2]->IntegerValue();
@@ -1084,8 +1067,8 @@ void IndexOfBuffer(const FunctionCallbackInfo<Value>& args) {
10841067

10851068
THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[0]);
10861069
THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[1]);
1087-
SPREAD_ARG(args[0], ts_obj);
1088-
SPREAD_ARG(args[1], buf);
1070+
SPREAD_BUFFER_ARG(args[0], ts_obj);
1071+
SPREAD_BUFFER_ARG(args[1], buf);
10891072
int64_t offset_i64 = args[2]->IntegerValue();
10901073
bool is_forward = args[4]->IsTrue();
10911074

@@ -1143,7 +1126,7 @@ void IndexOfNumber(const FunctionCallbackInfo<Value>& args) {
11431126
ASSERT(args[3]->IsBoolean());
11441127

11451128
THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[0]);
1146-
SPREAD_ARG(args[0], ts_obj);
1129+
SPREAD_BUFFER_ARG(args[0], ts_obj);
11471130

11481131
uint32_t needle = args[1]->Uint32Value();
11491132
int64_t offset_i64 = args[2]->IntegerValue();
@@ -1171,7 +1154,7 @@ void IndexOfNumber(const FunctionCallbackInfo<Value>& args) {
11711154
void Swap16(const FunctionCallbackInfo<Value>& args) {
11721155
Environment* env = Environment::GetCurrent(args);
11731156
THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]);
1174-
SPREAD_ARG(args[0], ts_obj);
1157+
SPREAD_BUFFER_ARG(args[0], ts_obj);
11751158
SwapBytes16(ts_obj_data, ts_obj_length);
11761159
args.GetReturnValue().Set(args[0]);
11771160
}
@@ -1180,7 +1163,7 @@ void Swap16(const FunctionCallbackInfo<Value>& args) {
11801163
void Swap32(const FunctionCallbackInfo<Value>& args) {
11811164
Environment* env = Environment::GetCurrent(args);
11821165
THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]);
1183-
SPREAD_ARG(args[0], ts_obj);
1166+
SPREAD_BUFFER_ARG(args[0], ts_obj);
11841167
SwapBytes32(ts_obj_data, ts_obj_length);
11851168
args.GetReturnValue().Set(args[0]);
11861169
}
@@ -1189,7 +1172,7 @@ void Swap32(const FunctionCallbackInfo<Value>& args) {
11891172
void Swap64(const FunctionCallbackInfo<Value>& args) {
11901173
Environment* env = Environment::GetCurrent(args);
11911174
THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]);
1192-
SPREAD_ARG(args[0], ts_obj);
1175+
SPREAD_BUFFER_ARG(args[0], ts_obj);
11931176
SwapBytes64(ts_obj_data, ts_obj_length);
11941177
args.GetReturnValue().Set(args[0]);
11951178
}

0 commit comments

Comments
 (0)