From 04f7f3485f8823773a7154dd3f5f71cd9fcbf154 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Ekl=C3=B6f?= Date: Thu, 17 Jan 2019 20:44:51 +0100 Subject: [PATCH] particle/string: don't cut in the middle of an utf-8 multibyte When limiting a string (due to it exceeding it's max length), make sure not to cut it in the middle of an utf-8 multibyte, as this results in an invalid utf-8 string. --- particles/string.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/particles/string.c b/particles/string.c index 8901d72..51ab520 100644 --- a/particles/string.c +++ b/particles/string.c @@ -108,13 +108,25 @@ instantiate(const struct particle *particle, const struct tag_set *tags) memset(&e->extents, 0, sizeof(e->extents)); if (p->max_len > 0) { - size_t len = strlen(e->text); + const size_t len = strlen(e->text); if (len > p->max_len) { - if (p->max_len >= 3) { - for (size_t i = 0; i < 3; i++) - e->text[p->max_len - 3 + i] = '.'; + + size_t end = p->max_len; + if (end >= 3) { + /* "allocate" room for three dots at the end */ + end -= 3; } - e->text[p->max_len] = '\0'; + + /* Mucho importante - don't cut in the middle of a utf8 multibyte */ + while (end > 0 && e->text[end - 1] >> 7) + end--; + + if (p->max_len > 3) { + for (size_t i = 0; i < 3; i++) + e->text[end + i] = '.'; + e->text[end + 3] = '\0'; + } else + e->text[end] = '\0'; } }