Use joiner to split unicode

2025-01-21 06:53:17 +08:00 · 2023-06-13 11:42:39 +05:30 · 2023-06-13 11:42:39 +05:30 · b36a0177db
commit b36a0177db
parent dd4e14690d
2 changed files with 46 additions and 12 deletions
--- a/packages/mermaid/src/rendering-util/splitText.spec.ts
+++ b/packages/mermaid/src/rendering-util/splitText.spec.ts
@ -13,26 +13,43 @@ describe('splitText', () => {
 });

 describe('split lines', () => {
+  const createCheckFn = (width: number): CheckFitFunction => {
+    return (text: string) => {
+      return splitTextToChars(text).length <= width;
+    };
+  };
+
  it.each([
    // empty string
-    { str: '', width: 1, split: [''] },
-    // Width >= Individual words
-    { str: 'hello world', width: 5, split: ['hello', 'world'] },
    { str: 'hello world', width: 7, split: ['hello', 'world'] },
    // width > full line
    { str: 'hello world', width: 20, split: ['hello world'] },
    // width < individual word
    { str: 'hello world', width: 3, split: ['hel', 'lo', 'wor', 'ld'] },
    { str: 'hello 12 world', width: 4, split: ['hell', 'o 12', 'worl', 'd'] },
+    { str: 'hello  1 2 world', width: 4, split: ['hell', 'o  1', '2', 'worl', 'd'] },
+    { str: 'hello  1 2 world', width: 6, split: ['hello', '  1 2', 'world'] },
    { str: '🏳️‍⚧️🏳️‍🌈👩🏾‍❤️‍👨🏻', width: 1, split: ['🏳️‍⚧️', '🏳️‍🌈', '👩🏾‍❤️‍👨🏻'] },
+    { str: '🏳️‍⚧️🏳️‍🌈👩🏾‍❤️‍👨🏻', width: 2, split: ['🏳️‍⚧️🏳️‍🌈', '👩🏾‍❤️‍👨🏻'] },
+    { str: '🏳️‍⚧️🏳️‍🌈👩🏾‍❤️‍👨🏻', width: 3, split: ['🏳️‍⚧️🏳️‍🌈👩🏾‍❤️‍👨🏻'] },
+    { str: '中文中', width: 1, split: ['中', '文', '中'] },
+    { str: '中文中', width: 2, split: ['中文', '中'] },
+    { str: '中文中', width: 3, split: ['中文中'] },
    { str: 'Flag 🏳️‍⚧️ this 🏳️‍🌈', width: 6, split: ['Flag 🏳️‍⚧️', 'this 🏳️‍🌈'] },
  ])(
    'should split $str into lines of $width characters',
    ({ str, split, width }: { str: string; width: number; split: string[] }) => {
-      const checkFn: CheckFitFunction = (text: string) => {
-        return splitTextToChars(text).length <= width;
-      };
+      const checkFn = createCheckFn(width);
      expect(splitLineToFitWidth(str, checkFn)).toEqual(split);
    }
  );
+
+  it('should handle strings with newlines', () => {
+    const checkFn: CheckFitFunction = createCheckFn(6);
+    const str = `Flag
+  🏳️‍⚧️ this 🏳️‍🌈`;
+    expect(() => splitLineToFitWidth(str, checkFn)).toThrowErrorMatchingInlineSnapshot(
+      '"splitLineToFitWidth does not support newlines in the line"'
+    );
+  });
 });
--- a/packages/mermaid/src/rendering-util/splitText.ts
+++ b/packages/mermaid/src/rendering-util/splitText.ts
@ -15,11 +15,17 @@ export function splitTextToChars(text: string): string[] {
 */
 function splitLineToWords(text: string): string[] {
  if (Intl.Segmenter) {
-    return [...new Intl.Segmenter(undefined, { granularity: 'word' }).segment(text)]
-      .map((s) => s.segment)
-      .filter((word) => word !== ' ');
+    return [...new Intl.Segmenter(undefined, { granularity: 'word' }).segment(text)].map(
+      (s) => s.segment
+    );
  }
-  return text.split(' ');
+  // Split by ' ' removes the ' 's from the result.
+  const words = text.split(' ');
+  // Add the ' 's back to the result.
+  const wordsWithSpaces = words.flatMap((s) => [s, ' ']);
+  // Remove last space.
+  wordsWithSpaces.pop();
+  return wordsWithSpaces;
 }

 /**
@ -55,7 +61,11 @@ function splitWordToFitWidthRecursion(
 }

 export function splitLineToFitWidth(line: string, checkFit: CheckFitFunction): string[] {
-  return splitLineToFitWidthRecursion(splitLineToWords(line), checkFit);
+  if (line.includes('\n')) {
+    throw new Error('splitLineToFitWidth does not support newlines in the line');
+  }
+  const words = splitLineToWords(line);
+  return splitLineToFitWidthRecursion(words, checkFit);
 }

 function splitLineToFitWidthRecursion(
@ -74,8 +84,15 @@ function splitLineToFitWidthRecursion(
    }
    return lines.length > 0 ? lines : [''];
  }
+  let joiner = '';
+  if (words[0] === ' ') {
+    joiner = ' ';
+    words.shift();
+  }
  const nextWord = words.shift() ?? ' ';
-  const lineWithNextWord = newLine ? `${newLine} ${nextWord}` : nextWord;
+
+  const nextWordWithJoiner = joiner + nextWord;
+  const lineWithNextWord = newLine ? `${newLine}${nextWordWithJoiner}` : nextWordWithJoiner;
  if (checkFit(lineWithNextWord)) {
    // nextWord fits, so we can add it to the new line and continue
    return splitLineToFitWidthRecursion(words, checkFit, lines, lineWithNextWord);