amis-rpc-design/node_modules/hermes-parser/dist/HermesParserDecodeUTF8String.js
2023-10-07 19:42:30 +08:00

68 lines
1.9 KiB
JavaScript

/**
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*
*
* @format
*/
'use strict';
/**
* Decode a UTF-8 encoded string from Hermes with a known length.
* Based on Emscripten's UTF8ToString with the following differences:
* - Always reads all bytes up to the given length, including null bytes. This
* means that we can decode strings that contain null bytes in the middle.
* - Allow UTF-8 encoded code points that are part of a surrogate pair, even though
* this is technically invalid UTF-8 that UTF8ToString would convert to 0xfffd.
*/
Object.defineProperty(exports, "__esModule", {
value: true
});
exports.default = HermesParserDecodeUTF8String;
function HermesParserDecodeUTF8String(ptrIn, length, heap) {
let ptr = ptrIn;
const endPtr = ptr + length;
let str = '';
while (ptr < endPtr) {
// ASCII characters fit in single byte code point
let u0 = heap[ptr++];
if (!(u0 & 0x80)) {
str += String.fromCharCode(u0);
continue;
} // Two byte code point
const u1 = heap[ptr++] & 0x3f;
if ((u0 & 0xe0) === 0xc0) {
str += String.fromCharCode((u0 & 0x1f) << 6 | u1);
continue;
}
const u2 = heap[ptr++] & 0x3f;
if ((u0 & 0xf0) === 0xe0) {
// Three byte code point
u0 = (u0 & 0x0f) << 12 | u1 << 6 | u2;
} else {
// Four byte code point
u0 = (u0 & 0x07) << 18 | u1 << 12 | u2 << 6 | heap[ptr++] & 0x3f;
}
if (u0 < 0x10000) {
// Code point fits into a single UTF-16 code unit
str += String.fromCharCode(u0);
} else {
// Code point does not fit into single UTF-16 code unit so convert to surrogate pair
u0 -= 0x10000;
str += String.fromCharCode(0xd800 | u0 >> 10, 0xdc00 | u0 & 0x3ff);
}
}
return str;
}