de4b564754
The function used an assert if n was 0. Instead of using an assert, the function will now just not do anything. The documentation has also been updated to reflect this. The reasoning for this is that the strlcpy function this (sort of) implements for utf8 works the same way. Signed-off-by: Emil Gydesen <emil.gydesen@nordicsemi.no>
75 lines
1.7 KiB
C
75 lines
1.7 KiB
C
/*
|
|
* Copyright (c) 2021 Nordic Semiconductor ASA
|
|
*
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
*/
|
|
|
|
#include <stdint.h>
|
|
#include <string.h>
|
|
#include <zephyr/sys/__assert.h>
|
|
|
|
#define ASCII_CHAR 0x7F
|
|
#define SEQUENCE_FIRST_MASK 0xC0
|
|
#define SEQUENCE_LEN_2_BYTE 0xC0
|
|
#define SEQUENCE_LEN_3_BYTE 0xE0
|
|
#define SEQUENCE_LEN_4_BYTE 0xF0
|
|
|
|
char *utf8_trunc(char *utf8_str)
|
|
{
|
|
char *last_byte_p = utf8_str + strlen(utf8_str) - 1;
|
|
uint8_t bytes_truncated;
|
|
char seq_start_byte;
|
|
|
|
if ((*last_byte_p & ASCII_CHAR) == *last_byte_p) {
|
|
/* Not part of an UTF8 sequence, return */
|
|
return utf8_str;
|
|
}
|
|
|
|
/* Find the starting byte and NULL-terminate other bytes */
|
|
bytes_truncated = 0;
|
|
while ((*last_byte_p & SEQUENCE_FIRST_MASK) != SEQUENCE_FIRST_MASK &&
|
|
last_byte_p > utf8_str) {
|
|
last_byte_p--;
|
|
bytes_truncated++;
|
|
}
|
|
bytes_truncated++; /* include the starting byte */
|
|
|
|
/* Verify if the the last character actually need to be truncated
|
|
* Handles the case where the number of bytes in the last UTF8-char
|
|
* matches the number of bytes we searched for the starting byte
|
|
*/
|
|
seq_start_byte = *last_byte_p;
|
|
if ((seq_start_byte & SEQUENCE_LEN_4_BYTE) == SEQUENCE_LEN_4_BYTE) {
|
|
if (bytes_truncated == 4) {
|
|
return utf8_str;
|
|
}
|
|
} else if ((seq_start_byte & SEQUENCE_LEN_3_BYTE) == SEQUENCE_LEN_3_BYTE) {
|
|
if (bytes_truncated == 3) {
|
|
return utf8_str;
|
|
}
|
|
} else if ((seq_start_byte & SEQUENCE_LEN_2_BYTE) == SEQUENCE_LEN_2_BYTE) {
|
|
if (bytes_truncated == 2) {
|
|
return utf8_str;
|
|
}
|
|
}
|
|
|
|
/* NULL-terminate the unterminated starting byte */
|
|
*last_byte_p = '\0';
|
|
|
|
return utf8_str;
|
|
}
|
|
|
|
char *utf8_lcpy(char *dst, const char *src, size_t n)
|
|
{
|
|
if (n > 0) {
|
|
strncpy(dst, src, n - 1);
|
|
dst[n - 1] = '\0';
|
|
|
|
if (n != 1) {
|
|
utf8_trunc(dst);
|
|
}
|
|
}
|
|
|
|
return dst;
|
|
}
|