Use joiner to split unicode

This commit is contained in:
Sidharth Vinod 2023-06-13 11:42:39 +05:30
parent dd4e14690d
commit b36a0177db
No known key found for this signature in database
GPG Key ID: FB5CCD378D3907CD
2 changed files with 46 additions and 12 deletions

View File

@ -13,26 +13,43 @@ describe('splitText', () => {
}); });
describe('split lines', () => { describe('split lines', () => {
const createCheckFn = (width: number): CheckFitFunction => {
return (text: string) => {
return splitTextToChars(text).length <= width;
};
};
it.each([ it.each([
// empty string // empty string
{ str: '', width: 1, split: [''] },
// Width >= Individual words
{ str: 'hello world', width: 5, split: ['hello', 'world'] },
{ str: 'hello world', width: 7, split: ['hello', 'world'] }, { str: 'hello world', width: 7, split: ['hello', 'world'] },
// width > full line // width > full line
{ str: 'hello world', width: 20, split: ['hello world'] }, { str: 'hello world', width: 20, split: ['hello world'] },
// width < individual word // width < individual word
{ str: 'hello world', width: 3, split: ['hel', 'lo', 'wor', 'ld'] }, { str: 'hello world', width: 3, split: ['hel', 'lo', 'wor', 'ld'] },
{ str: 'hello 12 world', width: 4, split: ['hell', 'o 12', 'worl', 'd'] }, { str: 'hello 12 world', width: 4, split: ['hell', 'o 12', 'worl', 'd'] },
{ str: 'hello 1 2 world', width: 4, split: ['hell', 'o 1', '2', 'worl', 'd'] },
{ str: 'hello 1 2 world', width: 6, split: ['hello', ' 1 2', 'world'] },
{ str: '🏳️‍⚧️🏳️‍🌈👩🏾‍❤️‍👨🏻', width: 1, split: ['🏳️‍⚧️', '🏳️‍🌈', '👩🏾‍❤️‍👨🏻'] }, { str: '🏳️‍⚧️🏳️‍🌈👩🏾‍❤️‍👨🏻', width: 1, split: ['🏳️‍⚧️', '🏳️‍🌈', '👩🏾‍❤️‍👨🏻'] },
{ str: '🏳️‍⚧️🏳️‍🌈👩🏾‍❤️‍👨🏻', width: 2, split: ['🏳️‍⚧️🏳️‍🌈', '👩🏾‍❤️‍👨🏻'] },
{ str: '🏳️‍⚧️🏳️‍🌈👩🏾‍❤️‍👨🏻', width: 3, split: ['🏳️‍⚧️🏳️‍🌈👩🏾‍❤️‍👨🏻'] },
{ str: '中文中', width: 1, split: ['中', '文', '中'] },
{ str: '中文中', width: 2, split: ['中文', '中'] },
{ str: '中文中', width: 3, split: ['中文中'] },
{ str: 'Flag 🏳️‍⚧️ this 🏳️‍🌈', width: 6, split: ['Flag 🏳️‍⚧️', 'this 🏳️‍🌈'] }, { str: 'Flag 🏳️‍⚧️ this 🏳️‍🌈', width: 6, split: ['Flag 🏳️‍⚧️', 'this 🏳️‍🌈'] },
])( ])(
'should split $str into lines of $width characters', 'should split $str into lines of $width characters',
({ str, split, width }: { str: string; width: number; split: string[] }) => { ({ str, split, width }: { str: string; width: number; split: string[] }) => {
const checkFn: CheckFitFunction = (text: string) => { const checkFn = createCheckFn(width);
return splitTextToChars(text).length <= width;
};
expect(splitLineToFitWidth(str, checkFn)).toEqual(split); expect(splitLineToFitWidth(str, checkFn)).toEqual(split);
} }
); );
it('should handle strings with newlines', () => {
const checkFn: CheckFitFunction = createCheckFn(6);
const str = `Flag
🏳 this 🏳🌈`;
expect(() => splitLineToFitWidth(str, checkFn)).toThrowErrorMatchingInlineSnapshot(
'"splitLineToFitWidth does not support newlines in the line"'
);
});
}); });

View File

@ -15,11 +15,17 @@ export function splitTextToChars(text: string): string[] {
*/ */
function splitLineToWords(text: string): string[] { function splitLineToWords(text: string): string[] {
if (Intl.Segmenter) { if (Intl.Segmenter) {
return [...new Intl.Segmenter(undefined, { granularity: 'word' }).segment(text)] return [...new Intl.Segmenter(undefined, { granularity: 'word' }).segment(text)].map(
.map((s) => s.segment) (s) => s.segment
.filter((word) => word !== ' '); );
} }
return text.split(' '); // Split by ' ' removes the ' 's from the result.
const words = text.split(' ');
// Add the ' 's back to the result.
const wordsWithSpaces = words.flatMap((s) => [s, ' ']);
// Remove last space.
wordsWithSpaces.pop();
return wordsWithSpaces;
} }
/** /**
@ -55,7 +61,11 @@ function splitWordToFitWidthRecursion(
} }
export function splitLineToFitWidth(line: string, checkFit: CheckFitFunction): string[] { export function splitLineToFitWidth(line: string, checkFit: CheckFitFunction): string[] {
return splitLineToFitWidthRecursion(splitLineToWords(line), checkFit); if (line.includes('\n')) {
throw new Error('splitLineToFitWidth does not support newlines in the line');
}
const words = splitLineToWords(line);
return splitLineToFitWidthRecursion(words, checkFit);
} }
function splitLineToFitWidthRecursion( function splitLineToFitWidthRecursion(
@ -74,8 +84,15 @@ function splitLineToFitWidthRecursion(
} }
return lines.length > 0 ? lines : ['']; return lines.length > 0 ? lines : [''];
} }
let joiner = '';
if (words[0] === ' ') {
joiner = ' ';
words.shift();
}
const nextWord = words.shift() ?? ' '; const nextWord = words.shift() ?? ' ';
const lineWithNextWord = newLine ? `${newLine} ${nextWord}` : nextWord;
const nextWordWithJoiner = joiner + nextWord;
const lineWithNextWord = newLine ? `${newLine}${nextWordWithJoiner}` : nextWordWithJoiner;
if (checkFit(lineWithNextWord)) { if (checkFit(lineWithNextWord)) {
// nextWord fits, so we can add it to the new line and continue // nextWord fits, so we can add it to the new line and continue
return splitLineToFitWidthRecursion(words, checkFit, lines, lineWithNextWord); return splitLineToFitWidthRecursion(words, checkFit, lines, lineWithNextWord);