1
0
mirror of https://github.com/lvgl/lvgl.git synced 2025-01-21 06:53:01 +08:00
lvgl/src/lv_misc/lv_txt.c

838 lines
28 KiB
C
Raw Normal View History

2017-11-23 20:42:14 +01:00
/**
2017-11-24 17:48:47 +01:00
* @file lv_text.c
2018-06-19 09:49:58 +02:00
*
2017-11-23 20:42:14 +01:00
*/
/*********************
* INCLUDES
*********************/
2017-11-26 11:38:28 +01:00
#include "lv_txt.h"
2017-11-23 20:42:14 +01:00
#include "lv_math.h"
2017-11-23 20:42:14 +01:00
/*********************
* DEFINES
*********************/
2019-04-04 07:15:40 +02:00
#define NO_BREAK_FOUND UINT32_MAX
2017-11-23 20:42:14 +01:00
/**********************
* TYPEDEFS
**********************/
/**********************
* STATIC PROTOTYPES
**********************/
2019-06-14 05:47:40 +02:00
static inline bool is_break_char(uint32_t letter);
2017-11-23 20:42:14 +01:00
#if LV_TXT_ENC == LV_TXT_ENC_UTF8
static uint8_t lv_txt_utf8_size(const char * str);
static uint32_t lv_txt_unicode_to_utf8(uint32_t letter_uni);
static uint32_t lv_txt_utf8_conv_wc(uint32_t c);
static uint32_t lv_txt_utf8_next(const char * txt, uint32_t * i);
static uint32_t lv_txt_utf8_prev(const char * txt, uint32_t * i_start);
static uint32_t lv_txt_utf8_get_byte_id(const char * txt, uint32_t utf8_id);
static uint32_t lv_txt_utf8_get_char_id(const char * txt, uint32_t byte_id);
static uint32_t lv_txt_utf8_get_length(const char * txt);
#elif LV_TXT_ENC == LV_TXT_ENC_ASCII
static uint8_t lv_txt_iso8859_1_size(const char * str);
static uint32_t lv_txt_unicode_to_iso8859_1(uint32_t letter_uni);
static uint32_t lv_txt_iso8859_1_conv_wc(uint32_t c);
static uint32_t lv_txt_iso8859_1_next(const char * txt, uint32_t * i);
static uint32_t lv_txt_iso8859_1_prev(const char * txt, uint32_t * i_start);
static uint32_t lv_txt_iso8859_1_get_byte_id(const char * txt, uint32_t utf8_id);
static uint32_t lv_txt_iso8859_1_get_char_id(const char * txt, uint32_t byte_id);
static uint32_t lv_txt_iso8859_1_get_length(const char * txt);
#endif
2017-11-23 20:42:14 +01:00
/**********************
* STATIC VARIABLES
**********************/
/**********************
* GLOBAL VARIABLES
**********************/
#if LV_TXT_ENC == LV_TXT_ENC_UTF8
2019-04-04 07:15:40 +02:00
uint8_t (*lv_txt_encoded_size)(const char *) = lv_txt_utf8_size;
uint32_t (*lv_txt_unicode_to_encoded)(uint32_t) = lv_txt_unicode_to_utf8;
uint32_t (*lv_txt_encoded_conv_wc)(uint32_t) = lv_txt_utf8_conv_wc;
uint32_t (*lv_txt_encoded_next)(const char *, uint32_t *) = lv_txt_utf8_next;
uint32_t (*lv_txt_encoded_prev)(const char *, uint32_t *) = lv_txt_utf8_prev;
2018-10-05 17:22:49 +02:00
uint32_t (*lv_txt_encoded_get_byte_id)(const char *, uint32_t) = lv_txt_utf8_get_byte_id;
uint32_t (*lv_txt_encoded_get_char_id)(const char *, uint32_t) = lv_txt_utf8_get_char_id;
2019-04-04 07:15:40 +02:00
uint32_t (*lv_txt_get_encoded_length)(const char *) = lv_txt_utf8_get_length;
#elif LV_TXT_ENC == LV_TXT_ENC_ASCII
uint8_t (*lv_txt_encoded_size)(const char *) = lv_txt_iso8859_1_size;
uint32_t (*lv_txt_unicode_to_encoded)(uint32_t) = lv_txt_unicode_to_iso8859_1;
uint32_t (*lv_txt_encoded_conv_wc)(uint32_t) = lv_txt_iso8859_1_conv_wc;
uint32_t (*lv_txt_encoded_next)(const char *, uint32_t *) = lv_txt_iso8859_1_next;
uint32_t (*lv_txt_encoded_prev)(const char *, uint32_t *) = lv_txt_iso8859_1_prev;
uint32_t (*lv_txt_encoded_get_byte_id)(const char *, uint32_t) = lv_txt_iso8859_1_get_byte_id;
uint32_t (*lv_txt_encoded_get_char_id)(const char *, uint32_t) = lv_txt_iso8859_1_get_char_id;
uint32_t (*lv_txt_get_encoded_length)(const char *) = lv_txt_iso8859_1_get_length;
#endif
2017-11-23 20:42:14 +01:00
/**********************
* MACROS
**********************/
/**********************
* GLOBAL FUNCTIONS
**********************/
/**
* Get size of a text
* @param size_res pointer to a 'point_t' variable to store the result
* @param text pointer to a text
* @param font pinter to font of the text
* @param letter_space letter space of the text
* @param txt.line_space line space of the text
* @param flags settings for the text from 'txt_flag_t' enum
2019-04-04 07:15:40 +02:00
* @param max_width max with of the text (break the lines to fit this size) Set CORD_MAX to avoid
* line breaks
2017-11-23 20:42:14 +01:00
*/
2019-06-06 06:05:40 +02:00
void lv_txt_get_size(lv_point_t * size_res, const char * text, const lv_font_t * font, lv_coord_t letter_space,
lv_coord_t line_space, lv_coord_t max_width, lv_txt_flag_t flag)
2017-11-23 20:42:14 +01:00
{
size_res->x = 0;
size_res->y = 0;
if(text == NULL) return;
if(font == NULL) return;
2017-11-24 17:48:47 +01:00
if(flag & LV_TXT_FLAG_EXPAND) max_width = LV_COORD_MAX;
2017-11-23 20:42:14 +01:00
2019-04-04 07:15:40 +02:00
uint32_t line_start = 0;
2017-11-23 20:42:14 +01:00
uint32_t new_line_start = 0;
2017-11-23 21:28:36 +01:00
lv_coord_t act_line_length;
2019-04-23 15:56:59 +02:00
uint8_t letter_height = lv_font_get_line_height(font);
2017-11-23 20:42:14 +01:00
/*Calc. the height and longest line*/
2018-06-19 09:49:58 +02:00
while(text[line_start] != '\0') {
2019-06-06 06:05:40 +02:00
new_line_start += lv_txt_get_next_line(&text[line_start], font, letter_space, max_width, flag);
2019-04-04 07:15:40 +02:00
size_res->y += letter_height;
2017-11-23 20:42:14 +01:00
size_res->y += line_space;
/*Calculate the the longest line*/
2019-06-06 06:05:40 +02:00
act_line_length = lv_txt_get_width(&text[line_start], new_line_start - line_start, font, letter_space, flag);
2017-11-23 20:42:14 +01:00
2017-11-24 17:48:47 +01:00
size_res->x = LV_MATH_MAX(act_line_length, size_res->x);
2019-04-04 07:15:40 +02:00
line_start = new_line_start;
2017-11-23 20:42:14 +01:00
}
/*Ma ke the text one line taller if the last character is '\n' or '\r'*/
if((line_start != 0) && (text[line_start - 1] == '\n' || text[line_start - 1] == '\r')) {
2017-11-23 20:42:14 +01:00
size_res->y += letter_height + line_space;
}
/*Correction with the last line space or set the height manually if the text is empty*/
2019-04-04 07:15:40 +02:00
if(size_res->y == 0)
size_res->y = letter_height;
else
size_res->y -= line_space;
2017-11-23 20:42:14 +01:00
}
/**
* Get the next word of text. A word is delimited by break characters.
*
* If the word cannot fit in the max_width space, obey LV_TXT_LINE_BREAK_LONG_* rules.
*
* If the next word cannot fit anything, return 0.
*
* If the first character is a break character, returns the next index.
*
* Example calls from lv_txt_get_next_line() assuming sufficent max_width and
* txt = "Test text\n"
* 0123456789
*
* Calls would be as follows:
* 1. Return i=4, pointing at breakchar ' ', for the string "Test"
* 2. Return i=5, since i=4 was a breakchar.
* 3. Return i=9, pointing at breakchar '\n'
* 4. Parenting lv_txt_get_next_line() would detect subsequent '\0'
*
* TODO: Returned word_w_ptr may overestimate the returned word's width when
* max_width is reached. In current usage, this has no impact.
*
2017-11-23 20:42:14 +01:00
* @param txt a '\0' terminated string
* @param font pointer to a font
* @param letter_space letter space
2019-06-14 05:47:40 +02:00
* @param max_width max with of the text (break the lines to fit this size) Set CORD_MAX to avoid line breaks
2017-11-23 20:42:14 +01:00
* @param flags settings for the text from 'txt_flag_type' enum
* @param[out] word_w_ptr width (in pixels) of the parsed word. May be NULL.
* @param force Force return the fraction of the word that can fit in the provided space.
* @return the index of the first char of the next word (in byte index not letter index. With UTF-8 they are different)
2017-11-23 20:42:14 +01:00
*/
static uint16_t lv_txt_get_next_word(const char * txt, const lv_font_t * font,
lv_coord_t letter_space, lv_coord_t max_width,
lv_txt_flag_t flag, uint32_t *word_w_ptr, lv_txt_cmd_state_t * cmd_state, bool force)
2017-11-23 20:42:14 +01:00
{
if(txt == NULL || txt[0] == '\0') return 0;
2017-11-23 20:42:14 +01:00
if(font == NULL) return 0;
2017-11-24 17:48:47 +01:00
if(flag & LV_TXT_FLAG_EXPAND) max_width = LV_COORD_MAX;
2017-11-23 20:42:14 +01:00
uint32_t i = 0, i_next = 0, i_next_next = 0; /* Iterating index into txt */
uint32_t letter = 0; /* Letter at i */
uint32_t letter_next = 0; /* Letter at i_next */
lv_coord_t letter_w;
lv_coord_t cur_w = 0; /* Pixel Width of transversed string */
uint32_t word_len = 0; /* Number of characters in the transversed word */
uint32_t break_index = NO_BREAK_FOUND; /* only used for "long" words */
uint32_t break_letter_count = 0; /* Number of characters up to the long word break point */
2019-06-14 05:47:40 +02:00
letter = lv_txt_encoded_next(txt, &i_next);
i_next_next = i_next;
/* Obtain the full word, regardless if it fits or not in max_width */
2017-11-23 20:42:14 +01:00
while(txt[i] != '\0') {
letter_next = lv_txt_encoded_next(txt, &i_next_next);
word_len++;
2017-11-23 20:42:14 +01:00
/*Handle the recolor command*/
2017-11-24 17:48:47 +01:00
if((flag & LV_TXT_FLAG_RECOLOR) != 0) {
if(lv_txt_is_cmd(cmd_state, letter) != false) {
2019-10-30 06:49:21 +01:00
i = i_next;
i_next = i_next_next;
letter = letter_next;
continue; /*Skip the letter is it is part of a command*/
2017-11-23 20:42:14 +01:00
}
}
letter_w = lv_font_get_glyph_width(font, letter, letter_next);
cur_w += letter_w;
/* Test if this character fits within max_width */
if(break_index == NO_BREAK_FOUND && cur_w > max_width) {
break_index = i;
break_letter_count = word_len - 1;
/* break_index is now pointing at the character that doesn't fit */
}
/*Check for new line chars and breakchars*/
if(letter == '\n' || letter == '\r' || is_break_char(letter)) {
/* Update the output width on the first character if it fits.
* Must do this here incase first letter is a break character. */
if(i == 0 && break_index == NO_BREAK_FOUND && word_w_ptr != NULL) *word_w_ptr = cur_w;
word_len--;
break;
2017-11-23 20:42:14 +01:00
}
2018-06-19 09:49:58 +02:00
/* Update the output width */
if( word_w_ptr != NULL && break_index == NO_BREAK_FOUND ) *word_w_ptr = cur_w;
2019-06-14 05:47:40 +02:00
if(letter_w > 0) {
cur_w += letter_space;
}
i = i_next;
i_next = i_next_next;
letter = letter_next;
}
/* Entire Word fits in the provided space */
if( break_index == NO_BREAK_FOUND ) {
if( word_len == 0 || (letter == '\r' && letter_next == '\n') ) i = i_next;
return i;
}
#if LV_TXT_LINE_BREAK_LONG_LEN > 0
/* Word doesn't fit in provided space, but isn't "long" */
if(word_len < LV_TXT_LINE_BREAK_LONG_LEN) {
if( force ) return break_index;
if(word_w_ptr != NULL) *word_w_ptr = 0; /* Return no word */
return 0;
}
/* Word is "long," but insufficient amounts can fit in provided space */
if(break_letter_count < LV_TXT_LINE_BREAK_LONG_PRE_MIN_LEN) {
if( force ) return break_index;
if(word_w_ptr != NULL) *word_w_ptr = 0;
return 0;
}
/* Word is a "long", but letters may need to be better distributed */
{
i = break_index;
int32_t n_move = LV_TXT_LINE_BREAK_LONG_POST_MIN_LEN - (word_len - break_letter_count);
/* Move pointer "i" backwards */
for(;n_move>0; n_move--){
lv_txt_encoded_prev(txt, &i);
// TODO: it would be appropriate to update the returned word width here
// However, in current usage, this doesn't impact anything.
}
}
return i;
#else
if( force ) return break_index;
if(word_w_ptr != NULL) *word_w_ptr = 0; /* Return no word */
(void) break_letter_count;
return 0;
#endif
}
/**
* Get the next line of text. Check line length and break chars too.
*
* A line of txt includes the \n character.
*
* @param txt a '\0' terminated string
* @param font pointer to a font
* @param letter_space letter space
* @param max_width max with of the text (break the lines to fit this size) Set CORD_MAX to avoid line breaks
* @param flags settings for the text from 'txt_flag_type' enum
* @return the index of the first char of the new line (in byte index not letter index. With UTF-8 they are different)
*/
uint16_t lv_txt_get_next_line(const char * txt, const lv_font_t * font,
lv_coord_t letter_space, lv_coord_t max_width, lv_txt_flag_t flag)
{
if(txt == NULL) return 0;
if(font == NULL) return 0;
if(flag & LV_TXT_FLAG_EXPAND) max_width = LV_COORD_MAX;
lv_txt_cmd_state_t cmd_state = LV_TXT_CMD_STATE_WAIT;
uint32_t i = 0; /* Iterating index into txt */
while(txt[i] != '\0' && max_width > 0) {
uint32_t word_w = 0;
uint32_t advance = lv_txt_get_next_word(&txt[i], font, letter_space, max_width, flag, &word_w, &cmd_state, i==0);
max_width -= word_w;
if( advance == 0 ){
if(i == 0) lv_txt_encoded_next(txt, &i); // prevent inf loops
break;
}
i += advance;
if(txt[0] == '\n') break;
if(txt[i] == '\n'){
i++; /* Include the following newline in the current line */
break;
}
2019-06-20 08:20:45 -07:00
}
2019-06-23 16:04:17 -07:00
/* Always step at least one to avoid infinite loops */
if(i == 0) {
lv_txt_encoded_next(txt, &i);
}
2017-11-23 20:42:14 +01:00
return i;
}
/**
* Give the length of a text with a given font
* @param txt a '\0' terminate string
2019-04-04 07:15:40 +02:00
* @param length length of 'txt' in byte count and not characters (Á is 1 character but 2 bytes in
* UTF-8)
2017-11-23 20:42:14 +01:00
* @param font pointer to a font
* @param letter_space letter space
* @param flags settings for the text from 'txt_flag_t' enum
* @return length of a char_num long text
*/
2019-06-06 06:05:40 +02:00
lv_coord_t lv_txt_get_width(const char * txt, uint16_t length, const lv_font_t * font, lv_coord_t letter_space,
lv_txt_flag_t flag)
2017-11-23 20:42:14 +01:00
{
if(txt == NULL) return 0;
if(font == NULL) return 0;
2019-04-04 07:15:40 +02:00
uint32_t i = 0;
lv_coord_t width = 0;
2017-11-24 17:48:47 +01:00
lv_txt_cmd_state_t cmd_state = LV_TXT_CMD_STATE_WAIT;
2017-11-23 20:42:14 +01:00
uint32_t letter;
2019-05-01 16:43:32 +02:00
uint32_t letter_next;
2018-06-19 09:49:58 +02:00
2017-11-23 20:42:14 +01:00
if(length != 0) {
2019-04-04 07:15:40 +02:00
while(i < length) {
2019-06-06 06:05:40 +02:00
letter = lv_txt_encoded_next(txt, &i);
2019-05-01 16:43:32 +02:00
letter_next = lv_txt_encoded_next(&txt[i], NULL);
2017-11-24 17:48:47 +01:00
if((flag & LV_TXT_FLAG_RECOLOR) != 0) {
if(lv_txt_is_cmd(&cmd_state, letter) != false) {
2017-11-23 20:42:14 +01:00
continue;
}
}
2019-05-01 16:43:32 +02:00
lv_coord_t char_width = lv_font_get_glyph_width(font, letter, letter_next);
2019-04-04 07:15:40 +02:00
if(char_width > 0) {
width += char_width;
width += letter_space;
}
2017-11-23 20:42:14 +01:00
}
if(width > 0) {
2019-04-04 07:15:40 +02:00
width -= letter_space; /*Trim the last letter space. Important if the text is center
aligned */
}
2017-11-23 20:42:14 +01:00
}
2018-06-19 09:49:58 +02:00
2017-11-23 20:42:14 +01:00
return width;
}
/**
* Check next character in a string and decide if the character is part of the command or not
2019-04-04 07:15:40 +02:00
* @param state pointer to a txt_cmd_state_t variable which stores the current state of command
* processing (Initied. to TXT_CMD_STATE_WAIT )
2017-11-23 20:42:14 +01:00
* @param c the current character
* @return true: the character is part of a command and should not be written,
* false: the character should be written
*/
2017-11-24 17:48:47 +01:00
bool lv_txt_is_cmd(lv_txt_cmd_state_t * state, uint32_t c)
2017-11-23 20:42:14 +01:00
{
bool ret = false;
2017-12-02 20:43:50 +01:00
if(c == (uint32_t)LV_TXT_COLOR_CMD[0]) {
2018-06-19 09:49:58 +02:00
if(*state == LV_TXT_CMD_STATE_WAIT) { /*Start char*/
*state = LV_TXT_CMD_STATE_PAR;
2019-04-04 07:15:40 +02:00
ret = true;
2019-04-04 16:44:16 +02:00
}
/*Other start char in parameter is escaped cmd. char */
else if(*state == LV_TXT_CMD_STATE_PAR) {
2018-06-19 09:49:58 +02:00
*state = LV_TXT_CMD_STATE_WAIT;
2019-04-04 16:44:16 +02:00
}
/*Command end */
else if(*state == LV_TXT_CMD_STATE_IN) {
2018-06-19 09:49:58 +02:00
*state = LV_TXT_CMD_STATE_WAIT;
2019-04-04 07:15:40 +02:00
ret = true;
2018-06-19 09:49:58 +02:00
}
}
/*Skip the color parameter and wait the space after it*/
if(*state == LV_TXT_CMD_STATE_PAR) {
if(c == ' ') {
*state = LV_TXT_CMD_STATE_IN; /*After the parameter the text is in the command*/
}
ret = true;
}
return ret;
2017-11-23 20:42:14 +01:00
}
/**
* Insert a string into an other
* @param txt_buf the original text (must be big enough for the result text)
2019-04-04 07:15:40 +02:00
* @param pos position to insert. Expressed in character index and not byte index (Different in
* UTF-8) 0: before the original text, 1: after the first char etc.
2017-11-23 20:42:14 +01:00
* @param ins_txt text to insert
*/
2017-11-24 17:48:47 +01:00
void lv_txt_ins(char * txt_buf, uint32_t pos, const char * ins_txt)
2017-11-23 20:42:14 +01:00
{
uint32_t old_len = strlen(txt_buf);
uint32_t ins_len = strlen(ins_txt);
uint32_t new_len = ins_len + old_len;
2019-06-06 06:05:40 +02:00
pos = lv_txt_encoded_get_byte_id(txt_buf, pos); /*Convert to byte index instead of letter index*/
2017-11-23 20:42:14 +01:00
/*Copy the second part into the end to make place to text to insert*/
2017-12-02 20:43:50 +01:00
uint32_t i;
2018-06-19 09:49:58 +02:00
for(i = new_len; i >= pos + ins_len; i--) {
2017-11-23 20:42:14 +01:00
txt_buf[i] = txt_buf[i - ins_len];
}
/* Copy the text into the new space*/
memcpy(txt_buf + pos, ins_txt, ins_len);
}
/**
* Delete a part of a string
* @param txt string to modify
2019-04-04 07:15:40 +02:00
* @param pos position where to start the deleting (0: before the first char, 1: after the first
* char etc.)
2017-11-23 20:42:14 +01:00
* @param len number of characters to delete
*/
2017-11-24 17:48:47 +01:00
void lv_txt_cut(char * txt, uint32_t pos, uint32_t len)
2017-11-23 20:42:14 +01:00
{
uint32_t old_len = strlen(txt);
2019-04-04 07:15:40 +02:00
pos = lv_txt_encoded_get_byte_id(txt, pos); /*Convert to byte index instead of letter index*/
2018-09-25 16:21:31 +02:00
len = lv_txt_encoded_get_byte_id(&txt[pos], len);
2017-11-23 20:42:14 +01:00
/*Copy the second part into the end to make place to text to insert*/
uint32_t i;
2018-06-19 09:49:58 +02:00
for(i = pos; i <= old_len - len; i++) {
txt[i] = txt[i + len];
2017-11-23 20:42:14 +01:00
}
}
#if LV_TXT_ENC == LV_TXT_ENC_UTF8
/*******************************
* UTF-8 ENCODER/DECOER
******************************/
2017-11-23 20:42:14 +01:00
/**
* Give the size of an UTF-8 coded character
* @param str pointer to a character in a string
2017-11-23 20:42:14 +01:00
* @return length of the UTF-8 character (1,2,3 or 4). O on invalid code
*/
static uint8_t lv_txt_utf8_size(const char * str)
2017-11-23 20:42:14 +01:00
{
2019-04-04 07:15:40 +02:00
if((str[0] & 0x80) == 0)
return 1;
else if((str[0] & 0xE0) == 0xC0)
return 2;
else if((str[0] & 0xF0) == 0xE0)
return 3;
else if((str[0] & 0xF8) == 0xF0)
return 4;
2019-09-27 06:03:54 +02:00
return 0; /*If the char was invalid tell it's 1 byte long*/
2017-11-23 20:42:14 +01:00
}
/**
* Convert an Unicode letter to UTF-8.
* @param letter_uni an Unicode letter
* @return UTF-8 coded character in Little Endian to be compatible with C chars (e.g. 'Á', 'Ű')
*/
static uint32_t lv_txt_unicode_to_utf8(uint32_t letter_uni)
2017-11-23 20:42:14 +01:00
{
if(letter_uni < 128) return letter_uni;
uint8_t bytes[4];
2018-06-19 09:49:58 +02:00
if(letter_uni < 0x0800) {
2019-04-04 07:15:40 +02:00
bytes[0] = ((letter_uni >> 6) & 0x1F) | 0xC0;
bytes[1] = ((letter_uni >> 0) & 0x3F) | 0x80;
2017-11-23 20:42:14 +01:00
bytes[2] = 0;
bytes[3] = 0;
2018-06-19 09:49:58 +02:00
} else if(letter_uni < 0x010000) {
bytes[0] = ((letter_uni >> 12) & 0x0F) | 0xE0;
2019-04-04 07:15:40 +02:00
bytes[1] = ((letter_uni >> 6) & 0x3F) | 0x80;
bytes[2] = ((letter_uni >> 0) & 0x3F) | 0x80;
2017-11-23 20:42:14 +01:00
bytes[3] = 0;
2018-06-19 09:49:58 +02:00
} else if(letter_uni < 0x110000) {
bytes[0] = ((letter_uni >> 18) & 0x07) | 0xF0;
bytes[1] = ((letter_uni >> 12) & 0x3F) | 0x80;
2019-04-04 07:15:40 +02:00
bytes[2] = ((letter_uni >> 6) & 0x3F) | 0x80;
bytes[3] = ((letter_uni >> 0) & 0x3F) | 0x80;
2017-11-23 20:42:14 +01:00
}
2018-06-19 09:49:58 +02:00
uint32_t * res_p = (uint32_t *)bytes;
2017-11-26 23:57:39 +01:00
return *res_p;
2017-11-23 20:42:14 +01:00
}
/**
* Convert a wide character, e.g. 'Á' little endian to be UTF-8 compatible
* @param c a wide character or a Little endian number
* @return `c` in big endian
*/
static uint32_t lv_txt_utf8_conv_wc(uint32_t c)
{
2018-10-05 17:22:49 +02:00
/*Swap the bytes (UTF-8 is big endian, but the MCUs are little endian)*/
if((c & 0x80) != 0) {
uint32_t swapped;
uint8_t c8[4];
memcpy(c8, &c, 4);
swapped = (c8[0] << 24) + (c8[1] << 16) + (c8[2] << 8) + (c8[3]);
uint8_t i;
for(i = 0; i < 4; i++) {
2019-04-04 07:15:40 +02:00
if((swapped & 0xFF) == 0)
swapped = (swapped >> 8); /*Ignore leading zeros (they were in the end originally)*/
2018-10-05 17:22:49 +02:00
}
c = swapped;
}
return c;
}
2017-11-23 20:42:14 +01:00
/**
* Decode an UTF-8 character from a string.
* @param txt pointer to '\0' terminated string
2017-11-24 17:48:47 +01:00
* @param i start byte index in 'txt' where to start.
* After call it will point to the next UTF-8 char in 'txt'.
* NULL to use txt[0] as index
2017-11-23 20:42:14 +01:00
* @return the decoded Unicode character or 0 on invalid UTF-8 code
*/
static uint32_t lv_txt_utf8_next(const char * txt, uint32_t * i)
2017-11-23 20:42:14 +01:00
{
/* Unicode to UTF-8
* 00000000 00000000 00000000 0xxxxxxx -> 0xxxxxxx
* 00000000 00000000 00000yyy yyxxxxxx -> 110yyyyy 10xxxxxx
* 00000000 00000000 zzzzyyyy yyxxxxxx -> 1110zzzz 10yyyyyy 10xxxxxx
* 00000000 000wwwzz zzzzyyyy yyxxxxxx -> 11110www 10zzzzzz 10yyyyyy 10xxxxxx
* */
uint32_t result = 0;
/*Dummy 'i' pointer is required*/
uint32_t i_tmp = 0;
if(i == NULL) i = &i_tmp;
/*Normal ASCII*/
if((txt[*i] & 0x80) == 0) {
result = txt[*i];
(*i)++;
}
/*Real UTF-8 decode*/
else {
/*2 bytes UTF-8 code*/
if((txt[*i] & 0xE0) == 0xC0) {
result = (uint32_t)(txt[*i] & 0x1F) << 6;
2017-11-23 20:42:14 +01:00
(*i)++;
2019-04-04 07:15:40 +02:00
if((txt[*i] & 0xC0) != 0x80) return 0; /*Invalid UTF-8 code*/
result += (txt[*i] & 0x3F);
2017-11-23 20:42:14 +01:00
(*i)++;
}
/*3 bytes UTF-8 code*/
else if((txt[*i] & 0xF0) == 0xE0) {
result = (uint32_t)(txt[*i] & 0x0F) << 12;
2017-11-23 20:42:14 +01:00
(*i)++;
2019-04-04 07:15:40 +02:00
if((txt[*i] & 0xC0) != 0x80) return 0; /*Invalid UTF-8 code*/
result += (uint32_t)(txt[*i] & 0x3F) << 6;
2017-11-23 20:42:14 +01:00
(*i)++;
2019-04-04 07:15:40 +02:00
if((txt[*i] & 0xC0) != 0x80) return 0; /*Invalid UTF-8 code*/
result += (txt[*i] & 0x3F);
2017-11-23 20:42:14 +01:00
(*i)++;
}
/*4 bytes UTF-8 code*/
else if((txt[*i] & 0xF8) == 0xF0) {
result = (uint32_t)(txt[*i] & 0x07) << 18;
2017-11-23 20:42:14 +01:00
(*i)++;
2019-04-04 07:15:40 +02:00
if((txt[*i] & 0xC0) != 0x80) return 0; /*Invalid UTF-8 code*/
result += (uint32_t)(txt[*i] & 0x3F) << 12;
2017-11-23 20:42:14 +01:00
(*i)++;
2019-04-04 07:15:40 +02:00
if((txt[*i] & 0xC0) != 0x80) return 0; /*Invalid UTF-8 code*/
result += (uint32_t)(txt[*i] & 0x3F) << 6;
2017-11-23 20:42:14 +01:00
(*i)++;
2019-04-04 07:15:40 +02:00
if((txt[*i] & 0xC0) != 0x80) return 0; /*Invalid UTF-8 code*/
result += txt[*i] & 0x3F;
2017-11-23 20:42:14 +01:00
(*i)++;
} else {
(*i)++; /*Not UTF-8 char. Go the next.*/
}
}
return result;
}
/**
* Get previous UTF-8 character form a string.
* @param txt pointer to '\0' terminated string
2019-04-04 07:15:40 +02:00
* @param i start byte index in 'txt' where to start. After the call it will point to the previous
* UTF-8 char in 'txt'.
2017-11-23 20:42:14 +01:00
* @return the decoded Unicode character or 0 on invalid UTF-8 code
*/
static uint32_t lv_txt_utf8_prev(const char * txt, uint32_t * i)
2017-11-23 20:42:14 +01:00
{
uint8_t c_size;
uint8_t cnt = 0;
/*Try to find a !0 long UTF-8 char by stepping one character back*/
2017-11-24 17:48:47 +01:00
(*i)--;
2017-11-23 20:42:14 +01:00
do {
2019-04-04 07:15:40 +02:00
if(cnt >= 4) return 0; /*No UTF-8 char found before the initial*/
2017-11-23 20:42:14 +01:00
c_size = lv_txt_encoded_size(&txt[*i]);
2017-11-23 20:42:14 +01:00
if(c_size == 0) {
2019-04-04 07:15:40 +02:00
if(*i != 0)
(*i)--;
else
return 0;
2017-11-23 20:42:14 +01:00
}
cnt++;
} while(c_size == 0);
2019-04-04 07:15:40 +02:00
uint32_t i_tmp = *i;
uint32_t letter = lv_txt_encoded_next(txt, &i_tmp); /*Character found, get it*/
2017-11-23 20:42:14 +01:00
return letter;
}
/**
* Convert a character index (in an UTF-8 text) to byte index.
2017-11-24 17:48:47 +01:00
* E.g. in "AÁRT" index of 'R' is 2th char but start at byte 3 because 'Á' is 2 bytes long
2017-11-23 20:42:14 +01:00
* @param txt a '\0' terminated UTF-8 string
2017-11-24 17:48:47 +01:00
* @param utf8_id character index
2017-11-23 20:42:14 +01:00
* @return byte index of the 'utf8_id'th letter
*/
static uint32_t lv_txt_utf8_get_byte_id(const char * txt, uint32_t utf8_id)
2017-11-23 20:42:14 +01:00
{
uint32_t i;
uint32_t byte_cnt = 0;
for(i = 0; i < utf8_id; i++) {
2019-09-27 06:03:54 +02:00
uint8_t c_size = lv_txt_encoded_size(&txt[byte_cnt]);
byte_cnt += c_size > 0 ? c_size : 1;
2017-11-23 20:42:14 +01:00
}
return byte_cnt;
}
/**
* Convert a byte index (in an UTF-8 text) to character index.
2017-11-24 17:48:47 +01:00
* E.g. in "AÁRT" index of 'R' is 2th char but start at byte 3 because 'Á' is 2 bytes long
2017-11-23 20:42:14 +01:00
* @param txt a '\0' terminated UTF-8 string
* @param byte_id byte index
* @return character index of the letter at 'byte_id'th position
*/
static uint32_t lv_txt_utf8_get_char_id(const char * txt, uint32_t byte_id)
2017-11-23 20:42:14 +01:00
{
2019-04-04 07:15:40 +02:00
uint32_t i = 0;
2017-11-23 20:42:14 +01:00
uint32_t char_cnt = 0;
while(i < byte_id) {
2018-10-05 17:22:49 +02:00
lv_txt_encoded_next(txt, &i); /*'i' points to the next letter so use the prev. value*/
2017-11-23 20:42:14 +01:00
char_cnt++;
}
return char_cnt;
}
/**
* Get the number of characters (and NOT bytes) in a string. Decode it with UTF-8 if enabled.
* E.g.: "ÁBC" is 3 characters (but 4 bytes)
* @param txt a '\0' terminated char string
* @return number of characters
*/
static uint32_t lv_txt_utf8_get_length(const char * txt)
2017-11-23 20:42:14 +01:00
{
uint32_t len = 0;
2019-04-04 07:15:40 +02:00
uint32_t i = 0;
2017-11-23 20:42:14 +01:00
while(txt[i] != '\0') {
2018-10-05 17:22:49 +02:00
lv_txt_encoded_next(txt, &i);
2017-11-23 20:42:14 +01:00
len++;
}
return len;
}
#elif LV_TXT_ENC == LV_TXT_ENC_ASCII
/*******************************
* ASCII ENCODER/DECOER
******************************/
/**
* Give the size of an ISO8859-1 coded character
* @param str pointer to a character in a string
* @return length of the UTF-8 character (1,2,3 or 4). O on invalid code
*/
static uint8_t lv_txt_iso8859_1_size(const char * str)
{
2019-06-06 06:05:40 +02:00
(void)str; /*Unused*/
return 1;
}
/**
* Convert an Unicode letter to ISO8859-1.
* @param letter_uni an Unicode letter
* @return ISO8859-1 coded character in Little Endian to be compatible with C chars (e.g. 'Á', 'Ű')
*/
static uint32_t lv_txt_unicode_to_iso8859_1(uint32_t letter_uni)
{
2019-06-06 06:05:40 +02:00
if(letter_uni < 128)
return letter_uni;
else
return ' ';
}
/**
* Convert wide characters to ASCII, however wide characters in ASCII range (e.g. 'A') are ASCII compatible by default.
* So this function does nothing just returns with `c`.
* @param c a character, e.g. 'A'
* @return same as `c`
*/
static uint32_t lv_txt_iso8859_1_conv_wc(uint32_t c)
{
return c;
}
/**
* Decode an ISO8859-1 character from a string.
* @param txt pointer to '\0' terminated string
* @param i start byte index in 'txt' where to start.
* After call it will point to the next UTF-8 char in 'txt'.
* NULL to use txt[0] as index
* @return the decoded Unicode character or 0 on invalid UTF-8 code
*/
static uint32_t lv_txt_iso8859_1_next(const char * txt, uint32_t * i)
{
2019-06-06 06:05:40 +02:00
if(i == NULL) return txt[1]; /*Get the next char */
2019-06-06 06:05:40 +02:00
uint8_t letter = txt[*i];
(*i)++;
return letter;
}
/**
* Get previous ISO8859-1 character form a string.
* @param txt pointer to '\0' terminated string
* @param i start byte index in 'txt' where to start. After the call it will point to the previous UTF-8 char in 'txt'.
* @return the decoded Unicode character or 0 on invalid UTF-8 code
*/
static uint32_t lv_txt_iso8859_1_prev(const char * txt, uint32_t * i)
{
2019-06-06 06:05:40 +02:00
if(i == NULL) return *(txt - 1); /*Get the prev. char */
(*i)--;
2019-06-06 06:05:40 +02:00
uint8_t letter = txt[*i];
return letter;
}
/**
* Convert a character index (in an ISO8859-1 text) to byte index.
* E.g. in "AÁRT" index of 'R' is 2th char but start at byte 3 because 'Á' is 2 bytes long
* @param txt a '\0' terminated UTF-8 string
* @param utf8_id character index
* @return byte index of the 'utf8_id'th letter
*/
static uint32_t lv_txt_iso8859_1_get_byte_id(const char * txt, uint32_t utf8_id)
{
2019-06-06 06:05:40 +02:00
(void)txt; /*Unused*/
return utf8_id; /*In Non encoded no difference*/
}
/**
* Convert a byte index (in an ISO8859-1 text) to character index.
* E.g. in "AÁRT" index of 'R' is 2th char but start at byte 3 because 'Á' is 2 bytes long
* @param txt a '\0' terminated UTF-8 string
* @param byte_id byte index
* @return character index of the letter at 'byte_id'th position
*/
static uint32_t lv_txt_iso8859_1_get_char_id(const char * txt, uint32_t byte_id)
{
2019-06-06 06:05:40 +02:00
(void)txt; /*Unused*/
return byte_id; /*In Non encoded no difference*/
}
/**
* Get the number of characters (and NOT bytes) in a string. Decode it with UTF-8 if enabled.
* E.g.: "ÁBC" is 3 characters (but 4 bytes)
* @param txt a '\0' terminated char string
* @return number of characters
*/
static uint32_t lv_txt_iso8859_1_get_length(const char * txt)
{
return strlen(txt);
}
#else
#error "Invalid character encoding. See `LV_TXT_ENC` in `lv_conf.h`"
#endif
2017-11-23 20:42:14 +01:00
/**********************
* STATIC FUNCTIONS
**********************/
/**
* Test if char is break char or not (a text can broken here or not)
* @param letter a letter
* @return false: 'letter' is not break char
*/
2019-06-27 07:16:15 +02:00
static inline bool is_break_char(uint32_t letter)
2017-11-23 20:42:14 +01:00
{
uint8_t i;
bool ret = false;
2018-06-19 09:49:58 +02:00
2017-11-23 20:42:14 +01:00
/*Compare the letter to TXT_BREAK_CHARS*/
2017-11-26 11:38:28 +01:00
for(i = 0; LV_TXT_BREAK_CHARS[i] != '\0'; i++) {
2017-12-02 20:43:50 +01:00
if(letter == (uint32_t)LV_TXT_BREAK_CHARS[i]) {
2017-11-23 20:42:14 +01:00
ret = true; /*If match then it is break char*/
break;
}
}
2018-06-19 09:49:58 +02:00
2017-11-23 20:42:14 +01:00
return ret;
}