summaryrefslogtreecommitdiffstats
path: root/fs/udf/unicode.c
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2018-04-12 17:06:18 +0200
committerJan Kara <jack@suse.cz>2018-04-19 16:00:48 +0200
commitb8a41c44a4ed8bad89b91584a7c7e4610c4b8c88 (patch)
treeb8c5544efa9d2df87c5555b3716a5df197e45fee /fs/udf/unicode.c
parentb8333ea1adf08c5ccc3790403f0d1053e1b49f62 (diff)
downloadlinux-b8a41c44a4ed8bad89b91584a7c7e4610c4b8c88.tar.bz2
udf: Use UTF-32 <-> UTF-8 conversion functions from NLS
Instead of implementing our own functions converting to and from UTF-8, use the ones provided by NLS. Signed-off-by: Jan Kara <jack@suse.cz>
Diffstat (limited to 'fs/udf/unicode.c')
-rw-r--r--fs/udf/unicode.c80
1 files changed, 17 insertions, 63 deletions
diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c
index 16a8ad21b77e..18df831afd3d 100644
--- a/fs/udf/unicode.c
+++ b/fs/udf/unicode.c
@@ -28,6 +28,7 @@
#include "udf_sb.h"
+#define UNICODE_MAX 0x10ffff
#define SURROGATE_MASK 0xfffff800
#define SURROGATE_PAIR 0x0000d800
@@ -40,22 +41,12 @@ static int udf_uni2char_utf8(wchar_t uni,
if (boundlen <= 0)
return -ENAMETOOLONG;
- if ((uni & SURROGATE_MASK) == SURROGATE_PAIR)
- return -EINVAL;
-
- if (uni < 0x80) {
- out[u_len++] = (unsigned char)uni;
- } else if (uni < 0x800) {
- if (boundlen < 2)
- return -ENAMETOOLONG;
- out[u_len++] = (unsigned char)(0xc0 | (uni >> 6));
- out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f));
- } else {
- if (boundlen < 3)
- return -ENAMETOOLONG;
- out[u_len++] = (unsigned char)(0xe0 | (uni >> 12));
- out[u_len++] = (unsigned char)(0x80 | ((uni >> 6) & 0x3f));
- out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f));
+ u_len = utf32_to_utf8(uni, out, boundlen);
+ if (u_len < 0) {
+ if (uni > UNICODE_MAX ||
+ (uni & SURROGATE_MASK) == SURROGATE_PAIR)
+ return -EINVAL;
+ return -ENAMETOOLONG;
}
return u_len;
}
@@ -64,56 +55,19 @@ static int udf_char2uni_utf8(const unsigned char *in,
int boundlen,
wchar_t *uni)
{
- unsigned int utf_char;
- unsigned char c;
- int utf_cnt, u_len;
-
- utf_char = 0;
- utf_cnt = 0;
- for (u_len = 0; u_len < boundlen;) {
- c = in[u_len++];
-
- /* Complete a multi-byte UTF-8 character */
- if (utf_cnt) {
- utf_char = (utf_char << 6) | (c & 0x3f);
- if (--utf_cnt)
- continue;
- } else {
- /* Check for a multi-byte UTF-8 character */
- if (c & 0x80) {
- /* Start a multi-byte UTF-8 character */
- if ((c & 0xe0) == 0xc0) {
- utf_char = c & 0x1f;
- utf_cnt = 1;
- } else if ((c & 0xf0) == 0xe0) {
- utf_char = c & 0x0f;
- utf_cnt = 2;
- } else if ((c & 0xf8) == 0xf0) {
- utf_char = c & 0x07;
- utf_cnt = 3;
- } else if ((c & 0xfc) == 0xf8) {
- utf_char = c & 0x03;
- utf_cnt = 4;
- } else if ((c & 0xfe) == 0xfc) {
- utf_char = c & 0x01;
- utf_cnt = 5;
- } else {
- utf_cnt = -1;
- break;
- }
- continue;
- } else {
- /* Single byte UTF-8 character (most common) */
- utf_char = c;
- }
- }
- *uni = utf_char;
- break;
- }
- if (utf_cnt) {
+ int u_len;
+ unicode_t c;
+
+ u_len = utf8_to_utf32(in, boundlen, &c);
+ if (u_len < 0) {
*uni = '?';
return -EINVAL;
}
+
+ if (c > MAX_WCHAR_T)
+ *uni = '?';
+ else
+ *uni = c;
return u_len;
}