mirror of
https://github.com/mermaid-js/mermaid.git
synced 2025-01-21 06:53:17 +08:00
feat: split unicode properly
This commit is contained in:
parent
c41df420d7
commit
0a437f5800
@ -1,4 +1,4 @@
|
|||||||
import { splitTextToChars, splitLineToFitWidthLoop, type CheckFitFunction } from './splitText.js';
|
import { splitTextToChars, splitLineToFitWidth, type CheckFitFunction } from './splitText.js';
|
||||||
import { describe, it, expect } from 'vitest';
|
import { describe, it, expect } from 'vitest';
|
||||||
|
|
||||||
describe('splitText', () => {
|
describe('splitText', () => {
|
||||||
@ -6,6 +6,7 @@ describe('splitText', () => {
|
|||||||
{ str: '', split: [] },
|
{ str: '', split: [] },
|
||||||
{ str: '🏳️⚧️🏳️🌈👩🏾❤️👨🏻', split: ['🏳️⚧️', '🏳️🌈', '👩🏾❤️👨🏻'] },
|
{ str: '🏳️⚧️🏳️🌈👩🏾❤️👨🏻', split: ['🏳️⚧️', '🏳️🌈', '👩🏾❤️👨🏻'] },
|
||||||
{ str: 'ok', split: ['o', 'k'] },
|
{ str: 'ok', split: ['o', 'k'] },
|
||||||
|
{ str: 'abc', split: ['a', 'b', 'c'] },
|
||||||
])('should split $str into graphemes', ({ str, split }: { str: string; split: string[] }) => {
|
])('should split $str into graphemes', ({ str, split }: { str: string; split: string[] }) => {
|
||||||
expect(splitTextToChars(str)).toEqual(split);
|
expect(splitTextToChars(str)).toEqual(split);
|
||||||
});
|
});
|
||||||
@ -31,7 +32,7 @@ describe('split lines', () => {
|
|||||||
const checkFn: CheckFitFunction = (text: string) => {
|
const checkFn: CheckFitFunction = (text: string) => {
|
||||||
return splitTextToChars(text).length <= width;
|
return splitTextToChars(text).length <= width;
|
||||||
};
|
};
|
||||||
expect(splitLineToFitWidthLoop(str.split(' '), checkFn)).toEqual(split);
|
expect(splitLineToFitWidth(str, checkFn)).toEqual(split);
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
@ -10,126 +10,83 @@ export function splitTextToChars(text: string): string[] {
|
|||||||
return [...text];
|
return [...text];
|
||||||
}
|
}
|
||||||
|
|
||||||
export function splitWordToFitWidth(checkFit: CheckFitFunction, word: string): string[] {
|
/**
|
||||||
console.error('splitWordToFitWidth', word);
|
* Splits a string into words.
|
||||||
const characters = splitTextToChars(word);
|
*/
|
||||||
if (characters.length === 0) {
|
function splitLineToWords(text: string): string[] {
|
||||||
return [];
|
if (Intl.Segmenter) {
|
||||||
|
return [...new Intl.Segmenter(undefined, { granularity: 'word' }).segment(text)]
|
||||||
|
.map((s) => s.segment)
|
||||||
|
.filter((word) => word !== ' ');
|
||||||
}
|
}
|
||||||
const newWord = [];
|
return text.split(' ');
|
||||||
let lastCheckedCharacter = '';
|
|
||||||
while (characters.length > 0) {
|
|
||||||
lastCheckedCharacter = characters.shift() ?? ' ';
|
|
||||||
if (checkFit([...newWord, lastCheckedCharacter].join(''))) {
|
|
||||||
newWord.push(lastCheckedCharacter);
|
|
||||||
} else if (newWord.length === 0) {
|
|
||||||
// Even the first character was too long, we cannot split it, so return it as is.
|
|
||||||
// This is an edge case that can happen when the first character is a long grapheme.
|
|
||||||
return [lastCheckedCharacter, characters.join('')];
|
|
||||||
} else {
|
|
||||||
// The last character was too long, so we need to put it back and return the rest.
|
|
||||||
characters.unshift(lastCheckedCharacter);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (characters.length === 0) {
|
|
||||||
return [newWord.join('')];
|
|
||||||
}
|
|
||||||
console.error({ newWord, characters });
|
|
||||||
return [newWord.join(''), ...splitWordToFitWidth(checkFit, characters.join(''))];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export function splitWordToFitWidth2(checkFit: CheckFitFunction, word: string): [string, string] {
|
/**
|
||||||
console.error('splitWordToFitWidth2', word);
|
* Splits a word into two parts, the first part fits the width and the remaining part.
|
||||||
|
* @param checkFit - Function to check if word fits
|
||||||
|
* @param word - Word to split
|
||||||
|
* @returns [first part of word that fits, rest of word]
|
||||||
|
*/
|
||||||
|
export function splitWordToFitWidth(checkFit: CheckFitFunction, word: string): [string, string] {
|
||||||
const characters = splitTextToChars(word);
|
const characters = splitTextToChars(word);
|
||||||
if (characters.length === 0) {
|
if (characters.length === 0) {
|
||||||
return ['', ''];
|
return ['', ''];
|
||||||
}
|
}
|
||||||
const newWord = [];
|
return splitWordToFitWidthRecursion(checkFit, [], characters);
|
||||||
let lastCheckedCharacter = '';
|
|
||||||
while (characters.length > 0) {
|
|
||||||
lastCheckedCharacter = characters.shift() ?? ' ';
|
|
||||||
if (checkFit([...newWord, lastCheckedCharacter].join(''))) {
|
|
||||||
newWord.push(lastCheckedCharacter);
|
|
||||||
} else if (newWord.length === 0) {
|
|
||||||
// Even the first character was too long, we cannot split it, so return it as is.
|
|
||||||
// This is an edge case that can happen when the first character is a long grapheme.
|
|
||||||
return [lastCheckedCharacter, characters.join('')];
|
|
||||||
} else {
|
|
||||||
// The last character was too long, so we need to put it back and return the rest.
|
|
||||||
characters.unshift(lastCheckedCharacter);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
console.error({ newWord, characters });
|
|
||||||
return [newWord.join(''), characters.join('')];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export function splitLineToFitWidth(
|
function splitWordToFitWidthRecursion(
|
||||||
|
checkFit: CheckFitFunction,
|
||||||
|
usedChars: string[],
|
||||||
|
remainingChars: string[]
|
||||||
|
): [string, string] {
|
||||||
|
if (remainingChars.length === 0) {
|
||||||
|
return [usedChars.join(''), ''];
|
||||||
|
}
|
||||||
|
const [nextChar, ...rest] = remainingChars;
|
||||||
|
const newWord = [...usedChars, nextChar];
|
||||||
|
if (checkFit(newWord.join(''))) {
|
||||||
|
return splitWordToFitWidthRecursion(checkFit, newWord, rest);
|
||||||
|
}
|
||||||
|
return [usedChars.join(''), remainingChars.join('')];
|
||||||
|
}
|
||||||
|
|
||||||
|
export function splitLineToFitWidth(line: string, checkFit: CheckFitFunction): string[] {
|
||||||
|
return splitLineToFitWidthRecursion(splitLineToWords(line), checkFit);
|
||||||
|
}
|
||||||
|
|
||||||
|
function splitLineToFitWidthRecursion(
|
||||||
words: string[],
|
words: string[],
|
||||||
checkFit: CheckFitFunction,
|
checkFit: CheckFitFunction,
|
||||||
lines: string[] = [],
|
lines: string[] = [],
|
||||||
popped: string[] = []
|
newLine = ''
|
||||||
): string[] {
|
): string[] {
|
||||||
console.error('splitLineToFitWidth', { words, lines, popped });
|
|
||||||
// Return if there is nothing left to split
|
// Return if there is nothing left to split
|
||||||
if (words.length === 0 && popped.length === 0) {
|
|
||||||
return lines;
|
|
||||||
}
|
|
||||||
const remainingText = words.join(' ');
|
|
||||||
if (checkFit(remainingText)) {
|
|
||||||
lines.push(remainingText);
|
|
||||||
words = [...popped];
|
|
||||||
}
|
|
||||||
if (words.length > 1) {
|
|
||||||
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
|
||||||
popped.unshift(words.pop()!);
|
|
||||||
return splitLineToFitWidth(words, checkFit, lines, popped);
|
|
||||||
} else if (words.length === 1) {
|
|
||||||
const [word, rest] = splitWordToFitWidth(checkFit, words[0]);
|
|
||||||
lines.push(word);
|
|
||||||
console.error({ word, rest });
|
|
||||||
if (rest) {
|
|
||||||
return splitLineToFitWidth([rest], checkFit, lines, []);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return lines;
|
|
||||||
}
|
|
||||||
|
|
||||||
export function splitLineToFitWidthLoop(words: string[], checkFit: CheckFitFunction): string[] {
|
|
||||||
console.error('splitLineToFitWidthLoop', { words });
|
|
||||||
if (words.length === 0) {
|
if (words.length === 0) {
|
||||||
return [];
|
// If there is a new line, add it to the lines
|
||||||
|
if (newLine.length > 0) {
|
||||||
|
lines.push(newLine);
|
||||||
|
}
|
||||||
|
return lines.length > 0 ? lines : [''];
|
||||||
|
}
|
||||||
|
const nextWord = words.shift() ?? ' ';
|
||||||
|
const lineWithNextWord = newLine ? `${newLine} ${nextWord}` : nextWord;
|
||||||
|
if (checkFit(lineWithNextWord)) {
|
||||||
|
// nextWord fits, so we can add it to the new line and continue
|
||||||
|
return splitLineToFitWidthRecursion(words, checkFit, lines, lineWithNextWord);
|
||||||
}
|
}
|
||||||
|
|
||||||
const lines: string[] = [];
|
// nextWord doesn't fit, so we need to split it
|
||||||
let newLine: string[] = [];
|
if (newLine.length > 0) {
|
||||||
let lastCheckedWord = '';
|
// There was text in newLine, so add it to lines and push nextWord back into words.
|
||||||
while (words.length > 0) {
|
lines.push(newLine);
|
||||||
lastCheckedWord = words.shift() ?? ' ';
|
words.unshift(nextWord);
|
||||||
console.error({ lastCheckedWord, words });
|
|
||||||
if (checkFit([...newLine, lastCheckedWord].join(' '))) {
|
|
||||||
newLine.push(lastCheckedWord);
|
|
||||||
} else {
|
} else {
|
||||||
console.error({ newLine });
|
// There was no text in newLine, so we need to split nextWord
|
||||||
if (newLine.length === 0) {
|
const [line, rest] = splitWordToFitWidth(checkFit, nextWord);
|
||||||
const [word, rest] = splitWordToFitWidth2(checkFit, lastCheckedWord);
|
lines.push(line);
|
||||||
console.error({ word, rest });
|
|
||||||
lines.push(word);
|
|
||||||
if (rest) {
|
|
||||||
words.unshift(rest);
|
words.unshift(rest);
|
||||||
}
|
}
|
||||||
} else {
|
return splitLineToFitWidthRecursion(words, checkFit, lines);
|
||||||
words.unshift(lastCheckedWord);
|
|
||||||
lines.push(newLine.join(' '));
|
|
||||||
newLine = [];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
console.error({ newLine, lastCheckedWord, words, lines });
|
|
||||||
}
|
|
||||||
if (newLine.length > 0) {
|
|
||||||
lines.push(newLine.join(' '));
|
|
||||||
}
|
|
||||||
console.error({ newLine, lastCheckedWord, words, lines });
|
|
||||||
return lines;
|
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user