zephyr/lib/os/utf8.c
Emil Gydesen de4b564754 zephyr: Fix n=0 for utf8_lcpy
The function used an assert if n was 0. Instead
of using an assert, the function will now just
not do anything. The documentation has also
been updated to reflect this.

The reasoning for this is that the strlcpy function
this (sort of) implements for utf8 works the same way.

Signed-off-by: Emil Gydesen <emil.gydesen@nordicsemi.no>
2022-07-04 15:49:22 +02:00

75 lines
1.7 KiB
C

/*
* Copyright (c) 2021 Nordic Semiconductor ASA
*
* SPDX-License-Identifier: Apache-2.0
*/
#include <stdint.h>
#include <string.h>
#include <zephyr/sys/__assert.h>
#define ASCII_CHAR 0x7F
#define SEQUENCE_FIRST_MASK 0xC0
#define SEQUENCE_LEN_2_BYTE 0xC0
#define SEQUENCE_LEN_3_BYTE 0xE0
#define SEQUENCE_LEN_4_BYTE 0xF0
char *utf8_trunc(char *utf8_str)
{
char *last_byte_p = utf8_str + strlen(utf8_str) - 1;
uint8_t bytes_truncated;
char seq_start_byte;
if ((*last_byte_p & ASCII_CHAR) == *last_byte_p) {
/* Not part of an UTF8 sequence, return */
return utf8_str;
}
/* Find the starting byte and NULL-terminate other bytes */
bytes_truncated = 0;
while ((*last_byte_p & SEQUENCE_FIRST_MASK) != SEQUENCE_FIRST_MASK &&
last_byte_p > utf8_str) {
last_byte_p--;
bytes_truncated++;
}
bytes_truncated++; /* include the starting byte */
/* Verify if the the last character actually need to be truncated
* Handles the case where the number of bytes in the last UTF8-char
* matches the number of bytes we searched for the starting byte
*/
seq_start_byte = *last_byte_p;
if ((seq_start_byte & SEQUENCE_LEN_4_BYTE) == SEQUENCE_LEN_4_BYTE) {
if (bytes_truncated == 4) {
return utf8_str;
}
} else if ((seq_start_byte & SEQUENCE_LEN_3_BYTE) == SEQUENCE_LEN_3_BYTE) {
if (bytes_truncated == 3) {
return utf8_str;
}
} else if ((seq_start_byte & SEQUENCE_LEN_2_BYTE) == SEQUENCE_LEN_2_BYTE) {
if (bytes_truncated == 2) {
return utf8_str;
}
}
/* NULL-terminate the unterminated starting byte */
*last_byte_p = '\0';
return utf8_str;
}
char *utf8_lcpy(char *dst, const char *src, size_t n)
{
if (n > 0) {
strncpy(dst, src, n - 1);
dst[n - 1] = '\0';
if (n != 1) {
utf8_trunc(dst);
}
}
return dst;
}