amis-rpc-design/node_modules/monaco-editor/esm/vs/base/common/strings.js

817 lines
79 KiB
JavaScript
Raw Normal View History

2023-10-07 19:42:30 +08:00
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
var _a;
import { LRUCachedFunction } from './cache.js';
import { Lazy } from './lazy.js';
export function isFalsyOrWhitespace(str) {
if (!str || typeof str !== 'string') {
return true;
}
return str.trim().length === 0;
}
const _formatRegexp = /{(\d+)}/g;
/**
* Helper to produce a string with a variable number of arguments. Insert variable segments
* into the string using the {n} notation where N is the index of the argument following the string.
* @param value string to which formatting is applied
* @param args replacements for {n}-entries
*/
export function format(value, ...args) {
if (args.length === 0) {
return value;
}
return value.replace(_formatRegexp, function (match, group) {
const idx = parseInt(group, 10);
return isNaN(idx) || idx < 0 || idx >= args.length ?
match :
args[idx];
});
}
/**
* Converts HTML characters inside the string to use entities instead. Makes the string safe from
* being used e.g. in HTMLElement.innerHTML.
*/
export function escape(html) {
return html.replace(/[<>&]/g, function (match) {
switch (match) {
case '<': return '&lt;';
case '>': return '&gt;';
case '&': return '&amp;';
default: return match;
}
});
}
/**
* Escapes regular expression characters in a given string
*/
export function escapeRegExpCharacters(value) {
return value.replace(/[\\\{\}\*\+\?\|\^\$\.\[\]\(\)]/g, '\\$&');
}
/**
* Removes all occurrences of needle from the beginning and end of haystack.
* @param haystack string to trim
* @param needle the thing to trim (default is a blank)
*/
export function trim(haystack, needle = ' ') {
const trimmed = ltrim(haystack, needle);
return rtrim(trimmed, needle);
}
/**
* Removes all occurrences of needle from the beginning of haystack.
* @param haystack string to trim
* @param needle the thing to trim
*/
export function ltrim(haystack, needle) {
if (!haystack || !needle) {
return haystack;
}
const needleLen = needle.length;
if (needleLen === 0 || haystack.length === 0) {
return haystack;
}
let offset = 0;
while (haystack.indexOf(needle, offset) === offset) {
offset = offset + needleLen;
}
return haystack.substring(offset);
}
/**
* Removes all occurrences of needle from the end of haystack.
* @param haystack string to trim
* @param needle the thing to trim
*/
export function rtrim(haystack, needle) {
if (!haystack || !needle) {
return haystack;
}
const needleLen = needle.length, haystackLen = haystack.length;
if (needleLen === 0 || haystackLen === 0) {
return haystack;
}
let offset = haystackLen, idx = -1;
while (true) {
idx = haystack.lastIndexOf(needle, offset - 1);
if (idx === -1 || idx + needleLen !== offset) {
break;
}
if (idx === 0) {
return '';
}
offset = idx;
}
return haystack.substring(0, offset);
}
export function convertSimple2RegExpPattern(pattern) {
return pattern.replace(/[\-\\\{\}\+\?\|\^\$\.\,\[\]\(\)\#\s]/g, '\\$&').replace(/[\*]/g, '.*');
}
export function stripWildcards(pattern) {
return pattern.replace(/\*/g, '');
}
export function createRegExp(searchString, isRegex, options = {}) {
if (!searchString) {
throw new Error('Cannot create regex from empty string');
}
if (!isRegex) {
searchString = escapeRegExpCharacters(searchString);
}
if (options.wholeWord) {
if (!/\B/.test(searchString.charAt(0))) {
searchString = '\\b' + searchString;
}
if (!/\B/.test(searchString.charAt(searchString.length - 1))) {
searchString = searchString + '\\b';
}
}
let modifiers = '';
if (options.global) {
modifiers += 'g';
}
if (!options.matchCase) {
modifiers += 'i';
}
if (options.multiline) {
modifiers += 'm';
}
if (options.unicode) {
modifiers += 'u';
}
return new RegExp(searchString, modifiers);
}
export function regExpLeadsToEndlessLoop(regexp) {
// Exit early if it's one of these special cases which are meant to match
// against an empty string
if (regexp.source === '^' || regexp.source === '^$' || regexp.source === '$' || regexp.source === '^\\s*$') {
return false;
}
// We check against an empty string. If the regular expression doesn't advance
// (e.g. ends in an endless loop) it will match an empty string.
const match = regexp.exec('');
return !!(match && regexp.lastIndex === 0);
}
export function splitLines(str) {
return str.split(/\r\n|\r|\n/);
}
/**
* Returns first index of the string that is not whitespace.
* If string is empty or contains only whitespaces, returns -1
*/
export function firstNonWhitespaceIndex(str) {
for (let i = 0, len = str.length; i < len; i++) {
const chCode = str.charCodeAt(i);
if (chCode !== 32 /* CharCode.Space */ && chCode !== 9 /* CharCode.Tab */) {
return i;
}
}
return -1;
}
/**
* Returns the leading whitespace of the string.
* If the string contains only whitespaces, returns entire string
*/
export function getLeadingWhitespace(str, start = 0, end = str.length) {
for (let i = start; i < end; i++) {
const chCode = str.charCodeAt(i);
if (chCode !== 32 /* CharCode.Space */ && chCode !== 9 /* CharCode.Tab */) {
return str.substring(start, i);
}
}
return str.substring(start, end);
}
/**
* Returns last index of the string that is not whitespace.
* If string is empty or contains only whitespaces, returns -1
*/
export function lastNonWhitespaceIndex(str, startIndex = str.length - 1) {
for (let i = startIndex; i >= 0; i--) {
const chCode = str.charCodeAt(i);
if (chCode !== 32 /* CharCode.Space */ && chCode !== 9 /* CharCode.Tab */) {
return i;
}
}
return -1;
}
export function compare(a, b) {
if (a < b) {
return -1;
}
else if (a > b) {
return 1;
}
else {
return 0;
}
}
export function compareSubstring(a, b, aStart = 0, aEnd = a.length, bStart = 0, bEnd = b.length) {
for (; aStart < aEnd && bStart < bEnd; aStart++, bStart++) {
const codeA = a.charCodeAt(aStart);
const codeB = b.charCodeAt(bStart);
if (codeA < codeB) {
return -1;
}
else if (codeA > codeB) {
return 1;
}
}
const aLen = aEnd - aStart;
const bLen = bEnd - bStart;
if (aLen < bLen) {
return -1;
}
else if (aLen > bLen) {
return 1;
}
return 0;
}
export function compareIgnoreCase(a, b) {
return compareSubstringIgnoreCase(a, b, 0, a.length, 0, b.length);
}
export function compareSubstringIgnoreCase(a, b, aStart = 0, aEnd = a.length, bStart = 0, bEnd = b.length) {
for (; aStart < aEnd && bStart < bEnd; aStart++, bStart++) {
let codeA = a.charCodeAt(aStart);
let codeB = b.charCodeAt(bStart);
if (codeA === codeB) {
// equal
continue;
}
if (codeA >= 128 || codeB >= 128) {
// not ASCII letters -> fallback to lower-casing strings
return compareSubstring(a.toLowerCase(), b.toLowerCase(), aStart, aEnd, bStart, bEnd);
}
// mapper lower-case ascii letter onto upper-case varinats
// [97-122] (lower ascii) --> [65-90] (upper ascii)
if (isLowerAsciiLetter(codeA)) {
codeA -= 32;
}
if (isLowerAsciiLetter(codeB)) {
codeB -= 32;
}
// compare both code points
const diff = codeA - codeB;
if (diff === 0) {
continue;
}
return diff;
}
const aLen = aEnd - aStart;
const bLen = bEnd - bStart;
if (aLen < bLen) {
return -1;
}
else if (aLen > bLen) {
return 1;
}
return 0;
}
export function isAsciiDigit(code) {
return code >= 48 /* CharCode.Digit0 */ && code <= 57 /* CharCode.Digit9 */;
}
export function isLowerAsciiLetter(code) {
return code >= 97 /* CharCode.a */ && code <= 122 /* CharCode.z */;
}
export function isUpperAsciiLetter(code) {
return code >= 65 /* CharCode.A */ && code <= 90 /* CharCode.Z */;
}
export function equalsIgnoreCase(a, b) {
return a.length === b.length && compareSubstringIgnoreCase(a, b) === 0;
}
export function startsWithIgnoreCase(str, candidate) {
const candidateLength = candidate.length;
if (candidate.length > str.length) {
return false;
}
return compareSubstringIgnoreCase(str, candidate, 0, candidateLength) === 0;
}
/**
* @returns the length of the common prefix of the two strings.
*/
export function commonPrefixLength(a, b) {
const len = Math.min(a.length, b.length);
let i;
for (i = 0; i < len; i++) {
if (a.charCodeAt(i) !== b.charCodeAt(i)) {
return i;
}
}
return len;
}
/**
* @returns the length of the common suffix of the two strings.
*/
export function commonSuffixLength(a, b) {
const len = Math.min(a.length, b.length);
let i;
const aLastIndex = a.length - 1;
const bLastIndex = b.length - 1;
for (i = 0; i < len; i++) {
if (a.charCodeAt(aLastIndex - i) !== b.charCodeAt(bLastIndex - i)) {
return i;
}
}
return len;
}
/**
* See http://en.wikipedia.org/wiki/Surrogate_pair
*/
export function isHighSurrogate(charCode) {
return (0xD800 <= charCode && charCode <= 0xDBFF);
}
/**
* See http://en.wikipedia.org/wiki/Surrogate_pair
*/
export function isLowSurrogate(charCode) {
return (0xDC00 <= charCode && charCode <= 0xDFFF);
}
/**
* See http://en.wikipedia.org/wiki/Surrogate_pair
*/
export function computeCodePoint(highSurrogate, lowSurrogate) {
return ((highSurrogate - 0xD800) << 10) + (lowSurrogate - 0xDC00) + 0x10000;
}
/**
* get the code point that begins at offset `offset`
*/
export function getNextCodePoint(str, len, offset) {
const charCode = str.charCodeAt(offset);
if (isHighSurrogate(charCode) && offset + 1 < len) {
const nextCharCode = str.charCodeAt(offset + 1);
if (isLowSurrogate(nextCharCode)) {
return computeCodePoint(charCode, nextCharCode);
}
}
return charCode;
}
/**
* get the code point that ends right before offset `offset`
*/
function getPrevCodePoint(str, offset) {
const charCode = str.charCodeAt(offset - 1);
if (isLowSurrogate(charCode) && offset > 1) {
const prevCharCode = str.charCodeAt(offset - 2);
if (isHighSurrogate(prevCharCode)) {
return computeCodePoint(prevCharCode, charCode);
}
}
return charCode;
}
export class CodePointIterator {
get offset() {
return this._offset;
}
constructor(str, offset = 0) {
this._str = str;
this._len = str.length;
this._offset = offset;
}
setOffset(offset) {
this._offset = offset;
}
prevCodePoint() {
const codePoint = getPrevCodePoint(this._str, this._offset);
this._offset -= (codePoint >= 65536 /* Constants.UNICODE_SUPPLEMENTARY_PLANE_BEGIN */ ? 2 : 1);
return codePoint;
}
nextCodePoint() {
const codePoint = getNextCodePoint(this._str, this._len, this._offset);
this._offset += (codePoint >= 65536 /* Constants.UNICODE_SUPPLEMENTARY_PLANE_BEGIN */ ? 2 : 1);
return codePoint;
}
eol() {
return (this._offset >= this._len);
}
}
export class GraphemeIterator {
get offset() {
return this._iterator.offset;
}
constructor(str, offset = 0) {
this._iterator = new CodePointIterator(str, offset);
}
nextGraphemeLength() {
const graphemeBreakTree = GraphemeBreakTree.getInstance();
const iterator = this._iterator;
const initialOffset = iterator.offset;
let graphemeBreakType = graphemeBreakTree.getGraphemeBreakType(iterator.nextCodePoint());
while (!iterator.eol()) {
const offset = iterator.offset;
const nextGraphemeBreakType = graphemeBreakTree.getGraphemeBreakType(iterator.nextCodePoint());
if (breakBetweenGraphemeBreakType(graphemeBreakType, nextGraphemeBreakType)) {
// move iterator back
iterator.setOffset(offset);
break;
}
graphemeBreakType = nextGraphemeBreakType;
}
return (iterator.offset - initialOffset);
}
prevGraphemeLength() {
const graphemeBreakTree = GraphemeBreakTree.getInstance();
const iterator = this._iterator;
const initialOffset = iterator.offset;
let graphemeBreakType = graphemeBreakTree.getGraphemeBreakType(iterator.prevCodePoint());
while (iterator.offset > 0) {
const offset = iterator.offset;
const prevGraphemeBreakType = graphemeBreakTree.getGraphemeBreakType(iterator.prevCodePoint());
if (breakBetweenGraphemeBreakType(prevGraphemeBreakType, graphemeBreakType)) {
// move iterator back
iterator.setOffset(offset);
break;
}
graphemeBreakType = prevGraphemeBreakType;
}
return (initialOffset - iterator.offset);
}
eol() {
return this._iterator.eol();
}
}
export function nextCharLength(str, initialOffset) {
const iterator = new GraphemeIterator(str, initialOffset);
return iterator.nextGraphemeLength();
}
export function prevCharLength(str, initialOffset) {
const iterator = new GraphemeIterator(str, initialOffset);
return iterator.prevGraphemeLength();
}
export function getCharContainingOffset(str, offset) {
if (offset > 0 && isLowSurrogate(str.charCodeAt(offset))) {
offset--;
}
const endOffset = offset + nextCharLength(str, offset);
const startOffset = endOffset - prevCharLength(str, endOffset);
return [startOffset, endOffset];
}
let CONTAINS_RTL = undefined;
function makeContainsRtl() {
// Generated using https://github.com/alexdima/unicode-utils/blob/main/rtl-test.js
return /(?:[\u05BE\u05C0\u05C3\u05C6\u05D0-\u05F4\u0608\u060B\u060D\u061B-\u064A\u066D-\u066F\u0671-\u06D5\u06E5\u06E6\u06EE\u06EF\u06FA-\u0710\u0712-\u072F\u074D-\u07A5\u07B1-\u07EA\u07F4\u07F5\u07FA\u07FE-\u0815\u081A\u0824\u0828\u0830-\u0858\u085E-\u088E\u08A0-\u08C9\u200F\uFB1D\uFB1F-\uFB28\uFB2A-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFC\uFE70-\uFEFC]|\uD802[\uDC00-\uDD1B\uDD20-\uDE00\uDE10-\uDE35\uDE40-\uDEE4\uDEEB-\uDF35\uDF40-\uDFFF]|\uD803[\uDC00-\uDD23\uDE80-\uDEA9\uDEAD-\uDF45\uDF51-\uDF81\uDF86-\uDFF6]|\uD83A[\uDC00-\uDCCF\uDD00-\uDD43\uDD4B-\uDFFF]|\uD83B[\uDC00-\uDEBB])/;
}
/**
* Returns true if `str` contains any Unicode character that is classified as "R" or "AL".
*/
export function containsRTL(str) {
if (!CONTAINS_RTL) {
CONTAINS_RTL = makeContainsRtl();
}
return CONTAINS_RTL.test(str);
}
const IS_BASIC_ASCII = /^[\t\n\r\x20-\x7E]*$/;
/**
* Returns true if `str` contains only basic ASCII characters in the range 32 - 126 (including 32 and 126) or \n, \r, \t
*/
export function isBasicASCII(str) {
return IS_BASIC_ASCII.test(str);
}
export const UNUSUAL_LINE_TERMINATORS = /[\u2028\u2029]/; // LINE SEPARATOR (LS) or PARAGRAPH SEPARATOR (PS)
/**
* Returns true if `str` contains unusual line terminators, like LS or PS
*/
export function containsUnusualLineTerminators(str) {
return UNUSUAL_LINE_TERMINATORS.test(str);
}
export function isFullWidthCharacter(charCode) {
// Do a cheap trick to better support wrapping of wide characters, treat them as 2 columns
// http://jrgraphix.net/research/unicode_blocks.php
// 2E80 - 2EFF CJK Radicals Supplement
// 2F00 - 2FDF Kangxi Radicals
// 2FF0 - 2FFF Ideographic Description Characters
// 3000 - 303F CJK Symbols and Punctuation
// 3040 - 309F Hiragana
// 30A0 - 30FF Katakana
// 3100 - 312F Bopomofo
// 3130 - 318F Hangul Compatibility Jamo
// 3190 - 319F Kanbun
// 31A0 - 31BF Bopomofo Extended
// 31F0 - 31FF Katakana Phonetic Extensions
// 3200 - 32FF Enclosed CJK Letters and Months
// 3300 - 33FF CJK Compatibility
// 3400 - 4DBF CJK Unified Ideographs Extension A
// 4DC0 - 4DFF Yijing Hexagram Symbols
// 4E00 - 9FFF CJK Unified Ideographs
// A000 - A48F Yi Syllables
// A490 - A4CF Yi Radicals
// AC00 - D7AF Hangul Syllables
// [IGNORE] D800 - DB7F High Surrogates
// [IGNORE] DB80 - DBFF High Private Use Surrogates
// [IGNORE] DC00 - DFFF Low Surrogates
// [IGNORE] E000 - F8FF Private Use Area
// F900 - FAFF CJK Compatibility Ideographs
// [IGNORE] FB00 - FB4F Alphabetic Presentation Forms
// [IGNORE] FB50 - FDFF Arabic Presentation Forms-A
// [IGNORE] FE00 - FE0F Variation Selectors
// [IGNORE] FE20 - FE2F Combining Half Marks
// [IGNORE] FE30 - FE4F CJK Compatibility Forms
// [IGNORE] FE50 - FE6F Small Form Variants
// [IGNORE] FE70 - FEFF Arabic Presentation Forms-B
// FF00 - FFEF Halfwidth and Fullwidth Forms
// [https://en.wikipedia.org/wiki/Halfwidth_and_fullwidth_forms]
// of which FF01 - FF5E fullwidth ASCII of 21 to 7E
// [IGNORE] and FF65 - FFDC halfwidth of Katakana and Hangul
// [IGNORE] FFF0 - FFFF Specials
return ((charCode >= 0x2E80 && charCode <= 0xD7AF)
|| (charCode >= 0xF900 && charCode <= 0xFAFF)
|| (charCode >= 0xFF01 && charCode <= 0xFF5E));
}
/**
* A fast function (therefore imprecise) to check if code points are emojis.
* Generated using https://github.com/alexdima/unicode-utils/blob/main/emoji-test.js
*/
export function isEmojiImprecise(x) {
return ((x >= 0x1F1E6 && x <= 0x1F1FF) || (x === 8986) || (x === 8987) || (x === 9200)
|| (x === 9203) || (x >= 9728 && x <= 10175) || (x === 11088) || (x === 11093)
|| (x >= 127744 && x <= 128591) || (x >= 128640 && x <= 128764)
|| (x >= 128992 && x <= 129008) || (x >= 129280 && x <= 129535)
|| (x >= 129648 && x <= 129782));
}
// -- UTF-8 BOM
export const UTF8_BOM_CHARACTER = String.fromCharCode(65279 /* CharCode.UTF8_BOM */);
export function startsWithUTF8BOM(str) {
return !!(str && str.length > 0 && str.charCodeAt(0) === 65279 /* CharCode.UTF8_BOM */);
}
export function containsUppercaseCharacter(target, ignoreEscapedChars = false) {
if (!target) {
return false;
}
if (ignoreEscapedChars) {
target = target.replace(/\\./g, '');
}
return target.toLowerCase() !== target;
}
/**
* Produces 'a'-'z', followed by 'A'-'Z'... followed by 'a'-'z', etc.
*/
export function singleLetterHash(n) {
const LETTERS_CNT = (90 /* CharCode.Z */ - 65 /* CharCode.A */ + 1);
n = n % (2 * LETTERS_CNT);
if (n < LETTERS_CNT) {
return String.fromCharCode(97 /* CharCode.a */ + n);
}
return String.fromCharCode(65 /* CharCode.A */ + n - LETTERS_CNT);
}
function breakBetweenGraphemeBreakType(breakTypeA, breakTypeB) {
// http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundary_Rules
// !!! Let's make the common case a bit faster
if (breakTypeA === 0 /* GraphemeBreakType.Other */) {
// see https://www.unicode.org/Public/13.0.0/ucd/auxiliary/GraphemeBreakTest-13.0.0d10.html#table
return (breakTypeB !== 5 /* GraphemeBreakType.Extend */ && breakTypeB !== 7 /* GraphemeBreakType.SpacingMark */);
}
// Do not break between a CR and LF. Otherwise, break before and after controls.
// GB3 CR × LF
// GB4 (Control | CR | LF) ÷
// GB5 ÷ (Control | CR | LF)
if (breakTypeA === 2 /* GraphemeBreakType.CR */) {
if (breakTypeB === 3 /* GraphemeBreakType.LF */) {
return false; // GB3
}
}
if (breakTypeA === 4 /* GraphemeBreakType.Control */ || breakTypeA === 2 /* GraphemeBreakType.CR */ || breakTypeA === 3 /* GraphemeBreakType.LF */) {
return true; // GB4
}
if (breakTypeB === 4 /* GraphemeBreakType.Control */ || breakTypeB === 2 /* GraphemeBreakType.CR */ || breakTypeB === 3 /* GraphemeBreakType.LF */) {
return true; // GB5
}
// Do not break Hangul syllable sequences.
// GB6 L × (L | V | LV | LVT)
// GB7 (LV | V) × (V | T)
// GB8 (LVT | T) × T
if (breakTypeA === 8 /* GraphemeBreakType.L */) {
if (breakTypeB === 8 /* GraphemeBreakType.L */ || breakTypeB === 9 /* GraphemeBreakType.V */ || breakTypeB === 11 /* GraphemeBreakType.LV */ || breakTypeB === 12 /* GraphemeBreakType.LVT */) {
return false; // GB6
}
}
if (breakTypeA === 11 /* GraphemeBreakType.LV */ || breakTypeA === 9 /* GraphemeBreakType.V */) {
if (breakTypeB === 9 /* GraphemeBreakType.V */ || breakTypeB === 10 /* GraphemeBreakType.T */) {
return false; // GB7
}
}
if (breakTypeA === 12 /* GraphemeBreakType.LVT */ || breakTypeA === 10 /* GraphemeBreakType.T */) {
if (breakTypeB === 10 /* GraphemeBreakType.T */) {
return false; // GB8
}
}
// Do not break before extending characters or ZWJ.
// GB9 × (Extend | ZWJ)
if (breakTypeB === 5 /* GraphemeBreakType.Extend */ || breakTypeB === 13 /* GraphemeBreakType.ZWJ */) {
return false; // GB9
}
// The GB9a and GB9b rules only apply to extended grapheme clusters:
// Do not break before SpacingMarks, or after Prepend characters.
// GB9a × SpacingMark
// GB9b Prepend ×
if (breakTypeB === 7 /* GraphemeBreakType.SpacingMark */) {
return false; // GB9a
}
if (breakTypeA === 1 /* GraphemeBreakType.Prepend */) {
return false; // GB9b
}
// Do not break within emoji modifier sequences or emoji zwj sequences.
// GB11 \p{Extended_Pictographic} Extend* ZWJ × \p{Extended_Pictographic}
if (breakTypeA === 13 /* GraphemeBreakType.ZWJ */ && breakTypeB === 14 /* GraphemeBreakType.Extended_Pictographic */) {
// Note: we are not implementing the rule entirely here to avoid introducing states
return false; // GB11
}
// GB12 sot (RI RI)* RI × RI
// GB13 [^RI] (RI RI)* RI × RI
if (breakTypeA === 6 /* GraphemeBreakType.Regional_Indicator */ && breakTypeB === 6 /* GraphemeBreakType.Regional_Indicator */) {
// Note: we are not implementing the rule entirely here to avoid introducing states
return false; // GB12 & GB13
}
// GB999 Any ÷ Any
return true;
}
class GraphemeBreakTree {
static getInstance() {
if (!GraphemeBreakTree._INSTANCE) {
GraphemeBreakTree._INSTANCE = new GraphemeBreakTree();
}
return GraphemeBreakTree._INSTANCE;
}
constructor() {
this._data = getGraphemeBreakRawData();
}
getGraphemeBreakType(codePoint) {
// !!! Let's make 7bit ASCII a bit faster: 0..31
if (codePoint < 32) {
if (codePoint === 10 /* CharCode.LineFeed */) {
return 3 /* GraphemeBreakType.LF */;
}
if (codePoint === 13 /* CharCode.CarriageReturn */) {
return 2 /* GraphemeBreakType.CR */;
}
return 4 /* GraphemeBreakType.Control */;
}
// !!! Let's make 7bit ASCII a bit faster: 32..126
if (codePoint < 127) {
return 0 /* GraphemeBreakType.Other */;
}
const data = this._data;
const nodeCount = data.length / 3;
let nodeIndex = 1;
while (nodeIndex <= nodeCount) {
if (codePoint < data[3 * nodeIndex]) {
// go left
nodeIndex = 2 * nodeIndex;
}
else if (codePoint > data[3 * nodeIndex + 1]) {
// go right
nodeIndex = 2 * nodeIndex + 1;
}
else {
// hit
return data[3 * nodeIndex + 2];
}
}
return 0 /* GraphemeBreakType.Other */;
}
}
GraphemeBreakTree._INSTANCE = null;
function getGraphemeBreakRawData() {
// generated using https://github.com/alexdima/unicode-utils/blob/main/grapheme-break.js
return JSON.parse('[0,0,0,51229,51255,12,44061,44087,12,127462,127487,6,7083,7085,5,47645,47671,12,54813,54839,12,128678,128678,14,3270,3270,5,9919,9923,14,45853,45879,12,49437,49463,12,53021,53047,12,71216,71218,7,128398,128399,14,129360,129374,14,2519,2519,5,4448,4519,9,9742,9742,14,12336,12336,14,44957,44983,12,46749,46775,12,48541,48567,12,50333,50359,12,52125,52151,12,53917,53943,12,69888,69890,5,73018,73018,5,127990,127990,14,128558,128559,14,128759,128760,14,129653,129655,14,2027,2035,5,2891,2892,7,3761,3761,5,6683,6683,5,8293,8293,4,9825,9826,14,9999,9999,14,43452,43453,5,44509,44535,12,45405,45431,12,46301,46327,12,47197,47223,12,48093,48119,12,48989,49015,12,49885,49911,12,50781,50807,12,51677,51703,12,52573,52599,12,53469,53495,12,54365,54391,12,65279,65279,4,70471,70472,7,72145,72147,7,119173,119179,5,127799,127818,14,128240,128244,14,128512,128512,14,128652,128652,14,128721,128722,14,129292,129292,14,129445,129450,14,129734,129743,14,1476,1477,5,2366,2368,7,2750,2752,7,3076,3076,5,3415,3415,5,4141,4144,5,6109,6109,5,6964,6964,5,7394,7400,5,9197,9198,14,9770,9770,14,9877,9877,14,9968,9969,14,10084,10084,14,43052,43052,5,43713,43713,5,44285,44311,12,44733,44759,12,45181,45207,12,45629,45655,12,46077,46103,12,46525,46551,12,46973,46999,12,47421,47447,12,47869,47895,12,48317,48343,12,48765,48791,12,49213,49239,12,49661,49687,12,50109,50135,12,50557,50583,12,51005,51031,12,51453,51479,12,51901,51927,12,52349,52375,12,52797,52823,12,53245,53271,12,53693,53719,12,54141,54167,12,54589,54615,12,55037,55063,12,69506,69509,5,70191,70193,5,70841,70841,7,71463,71467,5,72330,72342,5,94031,94031,5,123628,123631,5,127763,127765,14,127941,127941,14,128043,128062,14,128302,128317,14,128465,128467,14,128539,128539,14,128640,128640,14,128662,128662,14,128703,128703,14,128745,128745,14,129004,129007,14,129329,129330,14,129402,129402,14,129483,129483,14,129686,129704,14,130048,131069,14,173,173,4,1757,1757,1,2200,2207,5,2434,2435,7,2631,2632,5,2817,2817,5,3008,3008,5,3201,3201,5,3387,3388,5,3542,3542,5,3902,3903,7,4190,4192,5,6002,6003,5,6439,6440,5,6765,6770,7,7019,7027,5,7154,7155,7,8205,8205,13,8505,8505,14,9654,9654,14,9757,9757,14,9792,9792,14,9852,9853,14,9890,9894,14,9937,9937,14,9981,9981,14,10035,10036,14,11035,11036,14,42654,42655,5,43346,43347,7,43587,43587,5,44006,44007,7,44173,44199,12,44397,44423,12,44621,44647,12,44845,44871,12,45069,45095,12,45293,45319,12,45517,45543,12,45741,45767,12,45965,45991,12,46189,46215,12,46413,46439,12,46637,46663,12,46861,46887,12,47085,47111,12,47309,47335,12,47533,47559,12,47757,47783,12,47981,48007,12,48205,48231,12,48429,48455,12,48653,48679,12,48877,48903,12,49101,49127,12,49325,49351,12,49549,49575,12,49773,49799,12,49997,50023,12,50221,50247,12,50445,50471,12,50669,50695,12,50893,50919,12,51117,51143,12,51341,51367,12,51565,51591,12,51789,51815,12,52013,52039,12,52237,52263,12,52461,52487,12,52685,52711,12,52909,52935,12,53133,53159,12,53357,53383,12,53581,53607,12,53805,53831,12,54029,54055,12,54253,54279,12,54477,54503,12,54701,54727,12,54925,54951,12,55149,55175,12,68101,68102,5,69762,69762,7,70067,70069,7,70371,70378,5,70720,70721,7,71087,71087,5,71341,71341,5,71995,71996,5,72249,72249,7,72850,72871,5,73109,73109,5,118576,118598,5,121505,121519,5,127245,127247,14,127568,127569,14,127777,127777,14,127872,127891,14,127956,127967,14,128015,128016,14,128110,128172,14,128259,128259,14,128367,128368,14,128424,128424,14,128488,128488,14,128530,128532,14,128550,128551,14,128566,128566,14,128647,128647,14,128656,128656,14,128667,128673,14,128691,128693,14,128715,128715,14,128728,128732,14,128752,128752,14,128765,128767,14,129096,129103,14,129311,129311,14,129344,129349,14,129394,129394,14,129413,129425,14,129466,129471,14,129511,129535,14,129664,129666,14,129719,129722,14,129760,129767,14,917536,917631,5,13,13,2,1160,1161,5,1564,1564,4,1807,1807,1,2085,2087,5,2307,2307,7,2382,2383,7,2497,2500,5,2563,2563,7,2677,2677,5,2763,2764,7,2879,2879,5,2914,2915,5,3021,3021,5,3142,3144,5,3263,3263,5,3285,3286,5,3398,3400,7,3530,3530,5,3633,3633,5,3864,3865,5,3974,3975,5,4155,4156,7,4229,42
}
//#endregion
/**
* Computes the offset after performing a left delete on the given string,
* while considering unicode grapheme/emoji rules.
*/
export function getLeftDeleteOffset(offset, str) {
if (offset === 0) {
return 0;
}
// Try to delete emoji part.
const emojiOffset = getOffsetBeforeLastEmojiComponent(offset, str);
if (emojiOffset !== undefined) {
return emojiOffset;
}
// Otherwise, just skip a single code point.
const iterator = new CodePointIterator(str, offset);
iterator.prevCodePoint();
return iterator.offset;
}
function getOffsetBeforeLastEmojiComponent(initialOffset, str) {
// See https://www.unicode.org/reports/tr51/tr51-14.html#EBNF_and_Regex for the
// structure of emojis.
const iterator = new CodePointIterator(str, initialOffset);
let codePoint = iterator.prevCodePoint();
// Skip modifiers
while ((isEmojiModifier(codePoint) || codePoint === 65039 /* CodePoint.emojiVariantSelector */ || codePoint === 8419 /* CodePoint.enclosingKeyCap */)) {
if (iterator.offset === 0) {
// Cannot skip modifier, no preceding emoji base.
return undefined;
}
codePoint = iterator.prevCodePoint();
}
// Expect base emoji
if (!isEmojiImprecise(codePoint)) {
// Unexpected code point, not a valid emoji.
return undefined;
}
let resultOffset = iterator.offset;
if (resultOffset > 0) {
// Skip optional ZWJ code points that combine multiple emojis.
// In theory, we should check if that ZWJ actually combines multiple emojis
// to prevent deleting ZWJs in situations we didn't account for.
const optionalZwjCodePoint = iterator.prevCodePoint();
if (optionalZwjCodePoint === 8205 /* CodePoint.zwj */) {
resultOffset = iterator.offset;
}
}
return resultOffset;
}
function isEmojiModifier(codePoint) {
return 0x1F3FB <= codePoint && codePoint <= 0x1F3FF;
}
export const noBreakWhitespace = '\xa0';
export class AmbiguousCharacters {
static getInstance(locales) {
return _a.cache.get(Array.from(locales));
}
static getLocales() {
return _a._locales.value;
}
constructor(confusableDictionary) {
this.confusableDictionary = confusableDictionary;
}
isAmbiguous(codePoint) {
return this.confusableDictionary.has(codePoint);
}
/**
* Returns the non basic ASCII code point that the given code point can be confused,
* or undefined if such code point does note exist.
*/
getPrimaryConfusable(codePoint) {
return this.confusableDictionary.get(codePoint);
}
getConfusableCodePoints() {
return new Set(this.confusableDictionary.keys());
}
}
_a = AmbiguousCharacters;
AmbiguousCharacters.ambiguousCharacterData = new Lazy(() => {
// Generated using https://github.com/hediet/vscode-unicode-data
// Stored as key1, value1, key2, value2, ...
return JSON.parse('{\"_common\":[8232,32,8233,32,5760,32,8192,32,8193,32,8194,32,8195,32,8196,32,8197,32,8198,32,8200,32,8201,32,8202,32,8287,32,8199,32,8239,32,2042,95,65101,95,65102,95,65103,95,8208,45,8209,45,8210,45,65112,45,1748,45,8259,45,727,45,8722,45,10134,45,11450,45,1549,44,1643,44,8218,44,184,44,42233,44,894,59,2307,58,2691,58,1417,58,1795,58,1796,58,5868,58,65072,58,6147,58,6153,58,8282,58,1475,58,760,58,42889,58,8758,58,720,58,42237,58,451,33,11601,33,660,63,577,63,2429,63,5038,63,42731,63,119149,46,8228,46,1793,46,1794,46,42510,46,68176,46,1632,46,1776,46,42232,46,1373,96,65287,96,8219,96,8242,96,1370,96,1523,96,8175,96,65344,96,900,96,8189,96,8125,96,8127,96,8190,96,697,96,884,96,712,96,714,96,715,96,756,96,699,96,701,96,700,96,702,96,42892,96,1497,96,2036,96,2037,96,5194,96,5836,96,94033,96,94034,96,65339,91,10088,40,10098,40,12308,40,64830,40,65341,93,10089,41,10099,41,12309,41,64831,41,10100,123,119060,123,10101,125,65342,94,8270,42,1645,42,8727,42,66335,42,5941,47,8257,47,8725,47,8260,47,9585,47,10187,47,10744,47,119354,47,12755,47,12339,47,11462,47,20031,47,12035,47,65340,92,65128,92,8726,92,10189,92,10741,92,10745,92,119311,92,119355,92,12756,92,20022,92,12034,92,42872,38,708,94,710,94,5869,43,10133,43,66203,43,8249,60,10094,60,706,60,119350,60,5176,60,5810,60,5120,61,11840,61,12448,61,42239,61,8250,62,10095,62,707,62,119351,62,5171,62,94015,62,8275,126,732,126,8128,126,8764,126,65372,124,65293,45,120784,50,120794,50,120804,50,120814,50,120824,50,130034,50,42842,50,423,50,1000,50,42564,50,5311,50,42735,50,119302,51,120785,51,120795,51,120805,51,120815,51,120825,51,130035,51,42923,51,540,51,439,51,42858,51,11468,51,1248,51,94011,51,71882,51,120786,52,120796,52,120806,52,120816,52,120826,52,130036,52,5070,52,71855,52,120787,53,120797,53,120807,53,120817,53,120827,53,130037,53,444,53,71867,53,120788,54,120798,54,120808,54,120818,54,120828,54,130038,54,11474,54,5102,54,71893,54,119314,55,120789,55,120799,55,120809,55,120819,55,120829,55,130039,55,66770,55,71878,55,2819,56,2538,56,2666,56,125131,56,120790,56,120800,56,120810,56,120820,56,120830,56,130040,56,547,56,546,56,66330,56,2663,57,2920,57,2541,57,3437,57,120791,57,120801,57,120811,57,120821,57,120831,57,130041,57,42862,57,11466,57,71884,57,71852,57,71894,57,9082,97,65345,97,119834,97,119886,97,119938,97,119990,97,120042,97,120094,97,120146,97,120198,97,120250,97,120302,97,120354,97,120406,97,120458,97,593,97,945,97,120514,97,120572,97,120630,97,120688,97,120746,97,65313,65,119808,65,119860,65,119912,65,119964,65,120016,65,120068,65,120120,65,120172,65,120224,65,120276,65,120328,65,120380,65,120432,65,913,65,120488,65,120546,65,120604,65,120662,65,120720,65,5034,65,5573,65,42222,65,94016,65,66208,65,119835,98,119887,98,119939,98,119991,98,120043,98,120095,98,120147,98,120199,98,120251,98,120303,98,120355,98,120407,98,120459,98,388,98,5071,98,5234,98,5551,98,65314,66,8492,66,119809,66,119861,66,119913,66,120017,66,120069,66,120121,66,120173,66,120225,66,120277,66,120329,66,120381,66,120433,66,42932,66,914,66,120489,66,120547,66,120605,66,120663,66,120721,66,5108,66,5623,66,42192,66,66178,66,66209,66,66305,66,65347,99,8573,99,119836,99,119888,99,119940,99,119992,99,120044,99,120096,99,120148,99,120200,99,120252,99,120304,99,120356,99,120408,99,120460,99,7428,99,1010,99,11429,99,43951,99,66621,99,128844,67,71922,67,71913,67,65315,67,8557,67,8450,67,8493,67,119810,67,119862,67,119914,67,119966,67,120018,67,120174,67,120226,67,120278,67,120330,67,120382,67,120434,67,1017,67,11428,67,5087,67,42202,67,66210,67,66306,67,66581,67,66844,67,8574,100,8518,100,119837,100,119889,100,119941,100,119993,100,120045,100,120097,100,120149,100,120201,100,120253,100,120305,100,120357,100,120409,100,120461,100,1281,100,5095,100,5231,100,42194,100,8558,68,8517,68,119811,68,119863,68,119915,68,119967,68,120019,68,120071,68,120123,68,120175,68,120227,68,120279,68,120331,68,120383,68,120435,68,5024,68,5598,68,5610,68,42195,68,8494,101,65349,101,8495,101,8519,101,119838,101,119890,101,119942,101,120046,101,120098,101,120150,101,120202,101,120254,101,120306,101,120358,101
});
AmbiguousCharacters.cache = new LRUCachedFunction((locales) => {
function arrayToMap(arr) {
const result = new Map();
for (let i = 0; i < arr.length; i += 2) {
result.set(arr[i], arr[i + 1]);
}
return result;
}
function mergeMaps(map1, map2) {
const result = new Map(map1);
for (const [key, value] of map2) {
result.set(key, value);
}
return result;
}
function intersectMaps(map1, map2) {
if (!map1) {
return map2;
}
const result = new Map();
for (const [key, value] of map1) {
if (map2.has(key)) {
result.set(key, value);
}
}
return result;
}
const data = _a.ambiguousCharacterData.value;
let filteredLocales = locales.filter((l) => !l.startsWith('_') && l in data);
if (filteredLocales.length === 0) {
filteredLocales = ['_default'];
}
let languageSpecificMap = undefined;
for (const locale of filteredLocales) {
const map = arrayToMap(data[locale]);
languageSpecificMap = intersectMaps(languageSpecificMap, map);
}
const commonMap = arrayToMap(data['_common']);
const map = mergeMaps(commonMap, languageSpecificMap);
return new _a(map);
});
AmbiguousCharacters._locales = new Lazy(() => Object.keys(_a.ambiguousCharacterData.value).filter((k) => !k.startsWith('_')));
export class InvisibleCharacters {
static getRawData() {
// Generated using https://github.com/hediet/vscode-unicode-data
return JSON.parse('[9,10,11,12,13,32,127,160,173,847,1564,4447,4448,6068,6069,6155,6156,6157,6158,7355,7356,8192,8193,8194,8195,8196,8197,8198,8199,8200,8201,8202,8203,8204,8205,8206,8207,8234,8235,8236,8237,8238,8239,8287,8288,8289,8290,8291,8292,8293,8294,8295,8296,8297,8298,8299,8300,8301,8302,8303,10240,12288,12644,65024,65025,65026,65027,65028,65029,65030,65031,65032,65033,65034,65035,65036,65037,65038,65039,65279,65440,65520,65521,65522,65523,65524,65525,65526,65527,65528,65532,78844,119155,119156,119157,119158,119159,119160,119161,119162,917504,917505,917506,917507,917508,917509,917510,917511,917512,917513,917514,917515,917516,917517,917518,917519,917520,917521,917522,917523,917524,917525,917526,917527,917528,917529,917530,917531,917532,917533,917534,917535,917536,917537,917538,917539,917540,917541,917542,917543,917544,917545,917546,917547,917548,917549,917550,917551,917552,917553,917554,917555,917556,917557,917558,917559,917560,917561,917562,917563,917564,917565,917566,917567,917568,917569,917570,917571,917572,917573,917574,917575,917576,917577,917578,917579,917580,917581,917582,917583,917584,917585,917586,917587,917588,917589,917590,917591,917592,917593,917594,917595,917596,917597,917598,917599,917600,917601,917602,917603,917604,917605,917606,917607,917608,917609,917610,917611,917612,917613,917614,917615,917616,917617,917618,917619,917620,917621,917622,917623,917624,917625,917626,917627,917628,917629,917630,917631,917760,917761,917762,917763,917764,917765,917766,917767,917768,917769,917770,917771,917772,917773,917774,917775,917776,917777,917778,917779,917780,917781,917782,917783,917784,917785,917786,917787,917788,917789,917790,917791,917792,917793,917794,917795,917796,917797,917798,917799,917800,917801,917802,917803,917804,917805,917806,917807,917808,917809,917810,917811,917812,917813,917814,917815,917816,917817,917818,917819,917820,917821,917822,917823,917824,917825,917826,917827,917828,917829,917830,917831,917832,917833,917834,917835,917836,917837,917838,917839,917840,917841,917842,917843,917844,917845,917846,917847,917848,917849,917850,917851,917852,917853,917854,917855,917856,917857,917858,917859,917860,917861,917862,917863,917864,917865,917866,917867,917868,917869,917870,917871,917872,917873,917874,917875,917876,917877,917878,917879,917880,917881,917882,917883,917884,917885,917886,917887,917888,917889,917890,917891,917892,917893,917894,917895,917896,917897,917898,917899,917900,917901,917902,917903,917904,917905,917906,917907,917908,917909,917910,917911,917912,917913,917914,917915,917916,917917,917918,917919,917920,917921,917922,917923,917924,917925,917926,917927,917928,917929,917930,917931,917932,917933,917934,917935,917936,917937,917938,917939,917940,917941,917942,917943,917944,917945,917946,917947,917948,917949,917950,917951,917952,917953,917954,917955,917956,917957,917958,917959,917960,917961,917962,917963,917964,917965,917966,917967,917968,917969,917970,917971,917972,917973,917974,917975,917976,917977,917978,917979,917980,917981,917982,917983,917984,917985,917986,917987,917988,917989,917990,917991,917992,917993,917994,917995,917996,917997,917998,917999]');
}
static getData() {
if (!this._data) {
this._data = new Set(InvisibleCharacters.getRawData());
}
return this._data;
}
static isInvisibleCharacter(codePoint) {
return InvisibleCharacters.getData().has(codePoint);
}
static get codePoints() {
return InvisibleCharacters.getData();
}
}
InvisibleCharacters._data = undefined;