From 84600a78db83bd02b781e8cc2d5c108720f98cd6 Mon Sep 17 00:00:00 2001 From: alsaleem00 Date: Thu, 11 Feb 2021 20:53:00 +0300 Subject: [PATCH] fix(text) improve Arabic contextual analysis (#2062) Adds hyphen processing and proper handling of lam-alef sequence --- CHANGELOG.md | 1 + src/lv_misc/lv_txt_ap.c | 53 +++++++++++++++++++++++++++++++++++++---- 2 files changed, 49 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0510485bb..5c1148fda 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ ### Bugfixes - fix(indev) clear the indev's `act_obj` in `lv_indev_reset` - fix(text) fix out of bounds read in `_lv_txt_get_width` +- fix(text) improve Arabic contextual analysis by adding hyphen processing and proper handling of lam-alef sequence - fix(delete) delete animation after the children are deleted ## v7.10.0 diff --git a/src/lv_misc/lv_txt_ap.c b/src/lv_misc/lv_txt_ap.c index 6bd7124ac..168edebd4 100644 --- a/src/lv_misc/lv_txt_ap.c +++ b/src/lv_misc/lv_txt_ap.c @@ -25,6 +25,7 @@ **********************/ #if LV_USE_ARABIC_PERSIAN_CHARS == 1 static uint32_t lv_ap_get_char_index(uint16_t c); +static uint32_t lv_txt_lam_alef(uint32_t ch_curr, uint32_t ch_next); /********************** * STATIC VARIABLES @@ -58,6 +59,7 @@ const ap_chars_map_t ap_chars_map[] = { {22, 0xFEC6, 1, 2, -1, {1, 1}}, // ظ {23, 0xFECA, 1, 2, -1, {1, 1}}, // ع {24, 0xFECE, 1, 2, -1, {1, 1}}, // غ + {30, 0x0640, 0, 0, 0, {1, 1}}, // - (mad, hyphen) {31, 0xFED2, 1, 2, -1, {1, 1}}, // ف {32, 0xFED6, 1, 2, -1, {1, 1}}, // ق {135, 0xFB8F, 1, 2, -1, {1, 1}}, // ک @@ -130,11 +132,13 @@ void _lv_txt_ap_proc(const char * txt, char * txt_out) uint32_t txt_length = 0; uint32_t index_current, idx_next, idx_previous, i, j; uint32_t * ch_enc; + uint32_t * ch_fin; char * txt_out_temp; txt_length = _lv_txt_get_encoded_length(txt); ch_enc = (uint32_t *)lv_mem_alloc(sizeof(uint32_t) * (txt_length + 1)); + ch_fin = (uint32_t *)lv_mem_alloc(sizeof(uint32_t) * (txt_length + 1)); i = 0; j = 0; @@ -144,12 +148,15 @@ void _lv_txt_ap_proc(const char * txt, char * txt_out) ch_enc[j] = 0; i = 0; + j = 0; idx_previous = LV_UNDEF_ARABIC_PERSIAN_CHARS; while(i < txt_length) { index_current = lv_ap_get_char_index(ch_enc[i]); idx_next = lv_ap_get_char_index(ch_enc[i + 1]); if(index_current == LV_UNDEF_ARABIC_PERSIAN_CHARS) { + ch_fin[j] = ch_enc[i]; + j++; i++; idx_previous = LV_UNDEF_ARABIC_PERSIAN_CHARS; continue; @@ -160,18 +167,37 @@ void _lv_txt_ap_proc(const char * txt, char * txt_out) uint8_t conjunction_to_next = ((i == txt_length - 1) || idx_next == LV_UNDEF_ARABIC_PERSIAN_CHARS) ? 0 : ap_chars_map[idx_next].ap_chars_conjunction.conj_to_previous; + uint32_t lam_alef = lv_txt_lam_alef(index_current, idx_next); + if ( lam_alef ) { + if (conjunction_to_previuse) { + lam_alef ++; + } + ch_fin[j] = lam_alef; + idx_previous = LV_UNDEF_ARABIC_PERSIAN_CHARS; + i += 2; + j++; + continue; + } + if(conjunction_to_previuse && conjunction_to_next) - ch_enc[i] = ap_chars_map[index_current].char_end_form + ap_chars_map[index_current].char_middle_form_offset; + ch_fin[j] = ap_chars_map[index_current].char_end_form + ap_chars_map[index_current].char_middle_form_offset; else if(!conjunction_to_previuse && conjunction_to_next) - ch_enc[i] = ap_chars_map[index_current].char_end_form + ap_chars_map[index_current].char_begining_form_offset; + ch_fin[j] = ap_chars_map[index_current].char_end_form + ap_chars_map[index_current].char_begining_form_offset; else if(conjunction_to_previuse && !conjunction_to_next) - ch_enc[i] = ap_chars_map[index_current].char_end_form; + ch_fin[j] = ap_chars_map[index_current].char_end_form; else - ch_enc[i] = ap_chars_map[index_current].char_end_form + ap_chars_map[index_current].char_isolated_form_offset; + ch_fin[j] = ap_chars_map[index_current].char_end_form + ap_chars_map[index_current].char_isolated_form_offset; idx_previous = index_current; i++; + j++; } - + ch_fin[j] = 0; + for (i=0; i