summaryrefslogtreecommitdiffstats
path: root/src/util.c
diff options
context:
space:
mode:
authorAki Niemi <aki.niemi@nokia.com>2009-09-07 14:50:03 +0300
committerAki Niemi <aki.niemi@nokia.com>2009-09-08 14:16:23 +0300
commit462b0b8249b519b02cf773e6a1bc2f6c1d38c111 (patch)
treef611f347f140ef89f6fa62542d7955c0b4ac4133 /src/util.c
parentc155a91313bc7499fddd857e894998a8bf38609c (diff)
downloadofono-462b0b8249b519b02cf773e6a1bc2f6c1d38c111.tar.bz2
Add support for national language variants
Add API for supporting character conversion using national language variants. Also, add conversion tables for Turkish, Spanish and Portuguese, and fix the default table. The lookup algorithms were tweaked to support multiple tables.
Diffstat (limited to 'src/util.c')
-rw-r--r--src/util.c608
1 files changed, 501 insertions, 107 deletions
diff --git a/src/util.c b/src/util.c
index 9136b64e..97362393 100644
--- a/src/util.c
+++ b/src/util.c
@@ -26,6 +26,7 @@
#include <stdio.h>
#include <string.h>
#include <ctype.h>
+#include <stdlib.h>
#include <glib.h>
@@ -58,27 +59,237 @@
attached.
*/
+#define GUND 0xFFFF
+#define KNOWN_VARIANTS 4
+
+#define UTF8_LENGTH(c) \
+ ((c) < 0x80 ? 1 : \
+ ((c) < 0x800 ? 2 : 3))
+
+#define TABLE_SIZE(t) \
+ (sizeof((t)) / sizeof(struct codepoint))
+
+struct codepoint {
+ unsigned short from;
+ unsigned short to;
+};
+
+struct single_shift_table {
+ const struct codepoint *table;
+ unsigned int len;
+};
+
/* GSM to Unicode extension table, for GSM sequences starting with 0x1B */
-static unsigned short gsm_extension[] =
-{
- 0x0A, 0x000C, /* See NOTE 3 in 23.038 */
- 0x14, 0x005E,
- 0x1B, 0x0020, /* See NOTE 1 in 23.038 */
- 0x28, 0x007B,
- 0x29, 0x007D,
- 0x2F, 0x005C,
- 0x3C, 0x005B,
- 0x3D, 0x007E,
- 0x3E, 0x005D,
- 0x40, 0x007C,
- 0x65, 0x20AC
+static const struct codepoint default_ext_gsm[] =
+{
+ { 0x0A, 0x000C }, /* See NOTE 3 in 23.038 */
+ { 0x14, 0x005E },
+ { 0x1B, 0x0020 }, /* See NOTE 1 in 23.038 */
+ { 0x28, 0x007B },
+ { 0x29, 0x007D },
+ { 0x2F, 0x005C },
+ { 0x3C, 0x005B },
+ { 0x3D, 0x007E },
+ { 0x3E, 0x005D },
+ { 0x40, 0x007C },
+ { 0x65, 0x20AC }
+};
+
+static const struct codepoint default_ext_unicode[] =
+{
+ { 0x000C, 0x1B0A },
+ { 0x005B, 0x1B3C },
+ { 0x005C, 0x1B2F },
+ { 0x005D, 0x1B3E },
+ { 0x005E, 0x1B14 },
+ { 0x007B, 0x1B28 },
+ { 0x007C, 0x1B40 },
+ { 0x007D, 0x1B29 },
+ { 0x007E, 0x1B3D },
+ { 0x20AC, 0x1B65 }
+};
+
+/* Appendix A.2.1. in 3GPP TS23.038, V.8.2.0 */
+static const struct codepoint turkish_ext_gsm[] =
+{
+ { 0x0A, 0x000C }, /* See NOTE 3 */
+ { 0x14, 0x005E },
+ { 0x1B, 0x0020 }, /* See NOTE 1 */
+ { 0x28, 0x007B },
+ { 0x29, 0x007D },
+ { 0x2F, 0x005C },
+ { 0x3C, 0x005B },
+ { 0x3D, 0x007E },
+ { 0x3E, 0x005D },
+ { 0x40, 0x007C },
+ { 0x47, 0x011E },
+ { 0x49, 0x0130 },
+ { 0x53, 0x015E },
+ { 0x63, 0x00E7 },
+ { 0x65, 0x20AC },
+ { 0x67, 0x011F },
+ { 0x69, 0x0131 },
+ { 0x73, 0x015F }
+};
+
+static const struct codepoint turkish_ext_unicode[] =
+{
+ { 0x000C, 0x1B0A },
+ { 0x005B, 0x1B3C },
+ { 0x005C, 0x1B2F },
+ { 0x005D, 0x1B3E },
+ { 0x005E, 0x1B14 },
+ { 0x007B, 0x1B28 },
+ { 0x007C, 0x1B40 },
+ { 0x007D, 0x1B29 },
+ { 0x007E, 0x1B3D },
+ { 0x00E7, 0x1B63 },
+ { 0x011E, 0x1B47 },
+ { 0x011F, 0x1B67 },
+ { 0x0130, 0x1B49 },
+ { 0x0131, 0x1B69 },
+ { 0x015E, 0x1B53 },
+ { 0x015F, 0x1B73 },
+ { 0x20AC, 0x1B65 }
+};
+
+/* Appendix A.2.2. in 3GPP TS23.038 V.8.2.0*/
+static const struct codepoint spanish_ext_gsm[] =
+{
+ { 0x09, 0x00E7 },
+ { 0x0A, 0x000C }, /* See NOTE 3 */
+ { 0x14, 0x005E },
+ { 0x1B, 0x0020 }, /* See NOTE 1 */
+ { 0x28, 0x007B },
+ { 0x29, 0x007D },
+ { 0x2F, 0x005C },
+ { 0x3C, 0x005B },
+ { 0x3D, 0x007E },
+ { 0x3E, 0x005D },
+ { 0x40, 0x007C },
+ { 0x41, 0x00C1 },
+ { 0x49, 0x00CD },
+ { 0x4F, 0x00D3 },
+ { 0x55, 0x00DA },
+ { 0x61, 0x00E1 },
+ { 0x65, 0x20AC },
+ { 0x69, 0x00ED },
+ { 0x6F, 0x00F3 },
+ { 0x75, 0x00FA }
+};
+
+static const struct codepoint spanish_ext_unicode[] =
+{
+ { 0x000C, 0x1B0A },
+ { 0x005B, 0x1B3C },
+ { 0x005C, 0x1B2F },
+ { 0x005D, 0x1B3E },
+ { 0x005E, 0x1B14 },
+ { 0x007B, 0x1B28 },
+ { 0x007C, 0x1B40 },
+ { 0x007D, 0x1B29 },
+ { 0x007E, 0x1B3D },
+ { 0x00C1, 0x1B41 },
+ { 0x00CD, 0x1B49 },
+ { 0x00D3, 0x1B4F },
+ { 0x00DA, 0x1B55 },
+ { 0x00E1, 0x1B61 },
+ { 0x00E7, 0x1B09 },
+ { 0x00ED, 0x1B69 },
+ { 0x00F3, 0x1B6F },
+ { 0x00FA, 0x1B75 },
+ { 0x20AC, 0x1B65 }
+};
+
+/* Appendix A.2.3. in 3GPP TS23.038 V.8.2.0 */
+static const struct codepoint portuguese_ext_gsm[] =
+{
+ { 0x05, 0x00EA },
+ { 0x09, 0x00E7 },
+ { 0x0A, 0x000C }, /* See NOTE 3 */
+ { 0x0B, 0x00D4 },
+ { 0x0C, 0x00F4 },
+ { 0x0E, 0x00C1 },
+ { 0x0F, 0x00E1 },
+ { 0x12, 0x03A6 },
+ { 0x13, 0x0393 },
+ { 0x14, 0x005E },
+ { 0x15, 0x03A9 },
+ { 0x16, 0x03A0 },
+ { 0x17, 0x03A8 },
+ { 0x18, 0x03A3 },
+ { 0x19, 0x0398 },
+ { 0x1B, 0x0020 }, /* See NOTE 1 */
+ { 0x1F, 0x00CA },
+ { 0x28, 0x007B },
+ { 0x29, 0x007D },
+ { 0x2F, 0x005C },
+ { 0x3C, 0x005B },
+ { 0x3D, 0x007E },
+ { 0x3E, 0x005D },
+ { 0x40, 0x007C },
+ { 0x41, 0x00C0 },
+ { 0x49, 0x00CD },
+ { 0x4F, 0x00D3 },
+ { 0x55, 0x00DA },
+ { 0x5B, 0x00C3 },
+ { 0x5C, 0x00D5 },
+ { 0x61, 0x00C2 },
+ { 0x65, 0x20AC },
+ { 0x69, 0x00ED },
+ { 0x6F, 0x00F3 },
+ { 0x75, 0x00FA },
+ { 0x7B, 0x00E3 },
+ { 0x7C, 0x00F5 },
+ { 0x7F, 0x00E2 }
+};
+
+static const struct codepoint portuguese_ext_unicode[] =
+{
+ { 0x000C, 0x1B0A },
+ { 0x005B, 0x1B3C },
+ { 0x005C, 0x1B2F },
+ { 0x005D, 0x1B3E },
+ { 0x005E, 0x1B14 },
+ { 0x007B, 0x1B28 },
+ { 0x007C, 0x1B40 },
+ { 0x007D, 0x1B29 },
+ { 0x007E, 0x1B3D },
+ { 0x00C0, 0x1B41 },
+ { 0x00C1, 0x1B0E },
+ { 0x00C2, 0x1B61 },
+ { 0x00C3, 0x1B5B },
+ { 0x00CA, 0x1B1F },
+ { 0x00CD, 0x1B49 },
+ { 0x00D3, 0x1B4F },
+ { 0x00D4, 0x1B0B },
+ { 0x00D5, 0x1B5C },
+ { 0x00DA, 0x1B55 },
+ { 0x00E1, 0x1B0F },
+ { 0x00E2, 0x1B7F },
+ { 0x00E3, 0x1B7B },
+ { 0x00E7, 0x1B09 },
+ { 0x00EA, 0x1B05 },
+ { 0x00ED, 0x1B69 },
+ { 0x00F3, 0x1B6F },
+ { 0x00F4, 0x1B0C },
+ { 0x00F5, 0x1B7C },
+ { 0x00FA, 0x1B75 },
+ { 0x0393, 0x1B13 },
+ { 0x0398, 0x1B19 },
+ { 0x03A0, 0x1B16 },
+ { 0x03A3, 0x1B18 },
+ { 0x03A6, 0x1B12 },
+ { 0x03A8, 0x1B17 },
+ { 0x03A9, 0x1B15 },
+ { 0x20AC, 0x1B65 }
};
/* Used for conversion of GSM to Unicode */
-static unsigned short gsm_table[] =
+static const unsigned short default_gsm[] =
{
0x0040, 0x00A3, 0x0024, 0x00A5, 0x00E8, 0x00E9, 0x00F9, 0x00EC, /* 0x07 */
- 0x00F2, 0x00E7, 0x000A, 0x00D8, 0x00F8, 0x000D, 0x00C5, 0x00E5, /* 0x0F */
+ 0x00F2, 0x00C7, 0x000A, 0x00D8, 0x00F8, 0x000D, 0x00C5, 0x00E5, /* 0x0F */
0x0394, 0x005F, 0x03A6, 0x0393, 0x039B, 0x03A9, 0x03A0, 0x03A8, /* 0x17 */
0x03A3, 0x0398, 0x039E, 0x00A0, 0x00C6, 0x00E6, 0x00DF, 0x00C9, /* 0x1F */
0x0020, 0x0021, 0x0022, 0x0023, 0x00A4, 0x0025, 0x0026, 0x0027, /* 0x27 */
@@ -95,80 +306,252 @@ static unsigned short gsm_table[] =
0x0078, 0x0079, 0x007A, 0x00E4, 0x00F6, 0x00F1, 0x00FC, 0x00E0 /* 0x7F */
};
-#define GUND 0xFFFF
-
-/* 3GPP 27.005 Annex A */
-static unsigned short unicode_256_table[] =
-{
- GUND, GUND, GUND, GUND, GUND, GUND, GUND, GUND, /* 0x07 */
- GUND, GUND, 0x0A, GUND, 0x1B0A, 0x0D, GUND, GUND, /* 0x0F */
- GUND, GUND, GUND, GUND, GUND, GUND, GUND, GUND, /* 0x17 */
- GUND, GUND, GUND, GUND, GUND, GUND, GUND, GUND, /* 0x1F */
- 0x20, 0x21, 0x22, 0x23, 0x02, 0x25, 0x26, 0x27, /* 0x27 */
- 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, /* 0x2F */
- 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x37 */
- 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, /* 0x3F */
- 0x00, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x47 */
- 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, /* 0x4F */
- 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x57 */
- 0x58, 0x59, 0x5A, 0x1B3C, 0x1B2F, 0x1B3E, 0x1B14, 0x11, /* 0x5F */
- GUND, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x67 */
- 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, /* 0x6F */
- 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x77 */
- 0x78, 0x79, 0x7A, 0x1B28, 0x1B40, 0x1B29, 0x1B3D, GUND, /* 0x7F */
- GUND, GUND, GUND, GUND, GUND, GUND, GUND, GUND, /* 0x87 */
- GUND, GUND, GUND, GUND, GUND, GUND, GUND, GUND, /* 0x8F */
- GUND, GUND, GUND, GUND, GUND, GUND, GUND, GUND, /* 0x97 */
- GUND, GUND, GUND, GUND, GUND, GUND, GUND, GUND, /* 0x9F */
- GUND, 0x40, GUND, 0x01, 0x24, 0x03, GUND, 0x5f, /* 0xA7 */
- GUND, GUND, GUND, GUND, GUND, GUND, GUND, GUND, /* 0xAF */
- GUND, GUND, GUND, GUND, GUND, GUND, GUND, GUND, /* 0xB7 */
- GUND, GUND, GUND, GUND, GUND, GUND, GUND, 0x60, /* 0xBF */
- 0x41, 0x41, 0x41, 0x41, 0x5B, 0x0E, 0x1C, 0x09, /* 0xC7 */
- 0x45, 0x1F, 0x45, 0x45, 0x49, 0x49, 0x49, 0x49, /* 0xCF */
- GUND, 0x5D, 0x4F, 0x4F, 0x4F, 0x4F, 0x5C, GUND, /* 0xD7 */
- 0x0B, 0x55, 0x55, 0x55, 0x5E, 0x59, GUND, 0x1E, /* 0xDF */
- 0x7F, 0x61, 0x61, 0x61, 0x7B, 0x0F, 0x1D, 0x09, /* 0xE7 */
- 0x04, 0x05, 0x65, 0x65, 0x07, 0x69, 0x69, 0x69, /* 0xEF */
- GUND, 0x7D, 0x08, 0x6F, 0x6F, 0x6F, 0x7C, GUND, /* 0xF7 */
- 0x0C, 0x06, 0x75, 0x75, 0x7E, 0x79, GUND, 0x79 /* 0xFF */
+static const struct codepoint default_unicode[] =
+{
+ { 0x000A, 0x0A }, { 0x000D, 0x0D }, { 0x0020, 0x20 }, { 0x0021, 0x21 },
+ { 0x0022, 0x22 }, { 0x0023, 0x23 }, { 0x0024, 0x02 }, { 0x0025, 0x25 },
+ { 0x0026, 0x26 }, { 0x0027, 0x27 }, { 0x0028, 0x28 }, { 0x0029, 0x29 },
+ { 0x002A, 0x2A }, { 0x002B, 0x2B }, { 0x002C, 0x2C }, { 0x002D, 0x2D },
+ { 0x002E, 0x2E }, { 0x002F, 0x2F }, { 0x0030, 0x30 }, { 0x0031, 0x31 },
+ { 0x0032, 0x32 }, { 0x0033, 0x33 }, { 0x0034, 0x34 }, { 0x0035, 0x35 },
+ { 0x0036, 0x36 }, { 0x0037, 0x37 }, { 0x0038, 0x38 }, { 0x0039, 0x39 },
+ { 0x003A, 0x3A }, { 0x003B, 0x3B }, { 0x003C, 0x3C }, { 0x003D, 0x3D },
+ { 0x003E, 0x3E }, { 0x003F, 0x3F }, { 0x0040, 0x00 }, { 0x0041, 0x41 },
+ { 0x0042, 0x42 }, { 0x0043, 0x43 }, { 0x0044, 0x44 }, { 0x0045, 0x45 },
+ { 0x0046, 0x46 }, { 0x0047, 0x47 }, { 0x0048, 0x48 }, { 0x0049, 0x49 },
+ { 0x004A, 0x4A }, { 0x004B, 0x4B }, { 0x004C, 0x4C }, { 0x004D, 0x4D },
+ { 0x004E, 0x4E }, { 0x004F, 0x4F }, { 0x0050, 0x50 }, { 0x0051, 0x51 },
+ { 0x0052, 0x52 }, { 0x0053, 0x53 }, { 0x0054, 0x54 }, { 0x0055, 0x55 },
+ { 0x0056, 0x56 }, { 0x0057, 0x57 }, { 0x0058, 0x58 }, { 0x0059, 0x59 },
+ { 0x005A, 0x5A }, { 0x005F, 0x11 }, { 0x0061, 0x61 }, { 0x0062, 0x62 },
+ { 0x0063, 0x63 }, { 0x0064, 0x64 }, { 0x0065, 0x65 }, { 0x0066, 0x66 },
+ { 0x0067, 0x67 }, { 0x0068, 0x68 }, { 0x0069, 0x69 }, { 0x006A, 0x6A },
+ { 0x006B, 0x6B }, { 0x006C, 0x6C }, { 0x006D, 0x6D }, { 0x006E, 0x6E },
+ { 0x006F, 0x6F }, { 0x0070, 0x70 }, { 0x0071, 0x71 }, { 0x0072, 0x72 },
+ { 0x0073, 0x73 }, { 0x0074, 0x74 }, { 0x0075, 0x75 }, { 0x0076, 0x76 },
+ { 0x0077, 0x77 }, { 0x0078, 0x78 }, { 0x0079, 0x79 }, { 0x007A, 0x7A },
+ { 0x00A0, 0x20 }, { 0x00A1, 0x40 }, { 0x00A3, 0x01 }, { 0x00A4, 0x24 },
+ { 0x00A5, 0x03 }, { 0x00A7, 0x5F }, { 0x00BF, 0x60 }, { 0x00C4, 0x5B },
+ { 0x00C5, 0x0E }, { 0x00C6, 0x1C }, { 0x00C7, 0x09 }, { 0x00C9, 0x1F },
+ { 0x00D1, 0x5D }, { 0x00D6, 0x5C }, { 0x00D8, 0x0B }, { 0x00DC, 0x5E },
+ { 0x00DF, 0x1E }, { 0x00E0, 0x7F }, { 0x00E4, 0x7B }, { 0x00E5, 0x0F },
+ { 0x00E6, 0x1D }, { 0x00E8, 0x04 }, { 0x00E9, 0x05 }, { 0x00EC, 0x07 },
+ { 0x00F1, 0x7D }, { 0x00F2, 0x08 }, { 0x00F6, 0x7C }, { 0x00F8, 0x0C },
+ { 0x00F9, 0x06 }, { 0x00FC, 0x7E }, { 0x0393, 0x13 }, { 0x0394, 0x10 },
+ { 0x0398, 0x19 }, { 0x039B, 0x14 }, { 0x039E, 0x1A }, { 0x03A0, 0x16 },
+ { 0x03A3, 0x18 }, { 0x03A6, 0x12 }, { 0x03A8, 0x17 }, { 0x03A9, 0x15 }
};
-/* Starts at 0x0390 */
-static unsigned short greek_unicode_offset = 0x0390;
+/* Appendix A.3.1 in 3GPP TS23.038 */
+static const unsigned short turkish_gsm[] =
+{
+ 0x0040, 0x00A3, 0x0024, 0x00A5, 0x20AC, 0x00E9, 0x00F9, 0x0131, /* 0x07 */
+ 0x00F2, 0x00C7, 0x000A, 0x011E, 0x011F, 0x000D, 0x00C5, 0x00E5, /* 0x0F */
+ 0x0394, 0x005F, 0x03A6, 0x0393, 0x039B, 0x03A9, 0x03A0, 0x03A8, /* 0x17 */
+ 0x03A3, 0x0398, 0x039E, 0x00A0, 0x015E, 0x015F, 0x00DF, 0x00C9, /* 0x1F */
+ 0x0020, 0x0021, 0x0022, 0x0023, 0x00A4, 0x0025, 0x0026, 0x0027, /* 0x27 */
+ 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F, /* 0x2F */
+ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, /* 0x37 */
+ 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, /* 0x3F */
+ 0x0130, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, /* 0x47 */
+ 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, /* 0x4F */
+ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, /* 0x57 */
+ 0x0058, 0x0059, 0x005A, 0x00C4, 0x00D6, 0x00D1, 0x00DC, 0x00A7, /* 0x5F */
+ 0x00E7, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, /* 0x67 */
+ 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F, /* 0x6F */
+ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, /* 0x77 */
+ 0x0078, 0x0079, 0x007A, 0x00E4, 0x00F6, 0x00F1, 0x00FC, 0x00E0 /* 0x7F */
+};
-static unsigned short greek_unicode_table[] =
+static const struct codepoint turkish_unicode[] =
{
- GUND, GUND, GUND, 0x13, 0x10, GUND, GUND, GUND, /* 0x07 */
- 0x19, GUND, GUND, 0x14, GUND, GUND, 0x1A, GUND, /* 0x0F */
- 0x16, GUND, GUND, 0x18, GUND, GUND, 0x12, GUND, /* 0x17 */
- 0x17, 0x15, GUND, GUND, GUND, GUND, GUND, GUND, /* 0x1F */
+ { 0x000A, 0x0A }, { 0x000D, 0x0D }, { 0x0020, 0x20 }, { 0x0021, 0x21 },
+ { 0x0022, 0x22 }, { 0x0023, 0x23 }, { 0x0024, 0x02 }, { 0x0025, 0x25 },
+ { 0x0026, 0x26 }, { 0x0027, 0x27 }, { 0x0028, 0x28 }, { 0x0029, 0x29 },
+ { 0x002A, 0x2A }, { 0x002B, 0x2B }, { 0x002C, 0x2C }, { 0x002D, 0x2D },
+ { 0x002E, 0x2E }, { 0x002F, 0x2F }, { 0x0030, 0x30 }, { 0x0031, 0x31 },
+ { 0x0032, 0x32 }, { 0x0033, 0x33 }, { 0x0034, 0x34 }, { 0x0035, 0x35 },
+ { 0x0036, 0x36 }, { 0x0037, 0x37 }, { 0x0038, 0x38 }, { 0x0039, 0x39 },
+ { 0x003A, 0x3A }, { 0x003B, 0x3B }, { 0x003C, 0x3C }, { 0x003D, 0x3D },
+ { 0x003E, 0x3E }, { 0x003F, 0x3F }, { 0x0040, 0x00 }, { 0x0041, 0x41 },
+ { 0x0042, 0x42 }, { 0x0043, 0x43 }, { 0x0044, 0x44 }, { 0x0045, 0x45 },
+ { 0x0046, 0x46 }, { 0x0047, 0x47 }, { 0x0048, 0x48 }, { 0x0049, 0x49 },
+ { 0x004A, 0x4A }, { 0x004B, 0x4B }, { 0x004C, 0x4C }, { 0x004D, 0x4D },
+ { 0x004E, 0x4E }, { 0x004F, 0x4F }, { 0x0050, 0x50 }, { 0x0051, 0x51 },
+ { 0x0052, 0x52 }, { 0x0053, 0x53 }, { 0x0054, 0x54 }, { 0x0055, 0x55 },
+ { 0x0056, 0x56 }, { 0x0057, 0x57 }, { 0x0058, 0x58 }, { 0x0059, 0x59 },
+ { 0x005A, 0x5A }, { 0x005F, 0x11 }, { 0x0061, 0x61 }, { 0x0062, 0x62 },
+ { 0x0063, 0x63 }, { 0x0064, 0x64 }, { 0x0065, 0x65 }, { 0x0066, 0x66 },
+ { 0x0067, 0x67 }, { 0x0068, 0x68 }, { 0x0069, 0x69 }, { 0x006A, 0x6A },
+ { 0x006B, 0x6B }, { 0x006C, 0x6C }, { 0x006D, 0x6D }, { 0x006E, 0x6E },
+ { 0x006F, 0x6F }, { 0x0070, 0x70 }, { 0x0071, 0x71 }, { 0x0072, 0x72 },
+ { 0x0073, 0x73 }, { 0x0074, 0x74 }, { 0x0075, 0x75 }, { 0x0076, 0x76 },
+ { 0x0077, 0x77 }, { 0x0078, 0x78 }, { 0x0079, 0x79 }, { 0x007A, 0x7A },
+ { 0x00A0, 0x20 }, { 0x00A3, 0x01 }, { 0x00A4, 0x24 }, { 0x00A5, 0x03 },
+ { 0x00A7, 0x5F }, { 0x00C4, 0x5B }, { 0x00C5, 0x0E }, { 0x00C7, 0x09 },
+ { 0x00C9, 0x1F }, { 0x00D1, 0x5D }, { 0x00D6, 0x5C }, { 0x00DC, 0x5E },
+ { 0x00DF, 0x1E }, { 0x00E0, 0x7F }, { 0x00E4, 0x7B }, { 0x00E5, 0x0F },
+ { 0x00E7, 0x60 }, { 0x00E9, 0x05 }, { 0x00F1, 0x7D }, { 0x00F2, 0x08 },
+ { 0x00F6, 0x7C }, { 0x00F9, 0x06 }, { 0x00FC, 0x7E }, { 0x011E, 0x0B },
+ { 0x011F, 0x0C }, { 0x0130, 0x40 }, { 0x0131, 0x07 }, { 0x015E, 0x1C },
+ { 0x015F, 0x1D }, { 0x0393, 0x13 }, { 0x0394, 0x10 }, { 0x0398, 0x19 },
+ { 0x039B, 0x14 }, { 0x039E, 0x1A }, { 0x03A0, 0x16 }, { 0x03A3, 0x18 },
+ { 0x03A6, 0x12 }, { 0x03A8, 0x17 }, { 0x03A9, 0x15 }, { 0x20AC, 0x04 }
};
-#define UTF8_LENGTH(c) \
- ((c) < 0x80 ? 1 : \
- ((c) < 0x800 ? 2 : 3))
+/* Appendix A.3.2 in 3GPP TS23.038 */
+static const unsigned short portuguese_gsm[] =
+{
+ 0x0040, 0x00A3, 0x0024, 0x00A5, 0x00EA, 0x00E9, 0x00FA, 0x00ED, /* 0x07 */
+ 0x00F3, 0x00E7, 0x000A, 0x00D4, 0x00F4, 0x000D, 0x00C1, 0x00E1, /* 0x0F */
+ 0x0394, 0x005F, 0x00AA, 0x00C7, 0x00C0, 0x221E, 0x005E, 0x005C, /* 0x17 */
+ 0x20ac, 0x00D3, 0x007C, 0x00A0, 0x00C2, 0x00E2, 0x00CA, 0x00C9, /* 0x1F */
+ 0x0020, 0x0021, 0x0022, 0x0023, 0x00BA, 0x0025, 0x0026, 0x0027, /* 0x27 */
+ 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F, /* 0x2F */
+ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, /* 0x37 */
+ 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, /* 0x3F */
+ 0x00A1, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, /* 0x47 */
+ 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, /* 0x4F */
+ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, /* 0x57 */
+ 0x0058, 0x0059, 0x005A, 0x00C3, 0x00D5, 0x00DA, 0x00DC, 0x00A7, /* 0x5F */
+ 0x007E, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, /* 0x67 */
+ 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F, /* 0x6F */
+ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, /* 0x77 */
+ 0x0078, 0x0079, 0x007A, 0x00E3, 0x00F5, 0x0060, 0x00FC, 0x00E0 /* 0x7F */
+};
-static unsigned short gsm_extension_table_lookup(unsigned char k)
+static const struct codepoint portuguese_unicode[] =
{
- static unsigned int ext_table_len =
- (sizeof(gsm_extension) / sizeof(unsigned short)) >> 1;
- unsigned int i;
- unsigned short *t;
+ { 0x000A, 0x0A }, { 0x000D, 0x0D }, { 0x0020, 0x20 }, { 0x0021, 0x21 },
+ { 0x0022, 0x22 }, { 0x0023, 0x23 }, { 0x0024, 0x02 }, { 0x0025, 0x25 },
+ { 0x0026, 0x26 }, { 0x0027, 0x27 }, { 0x0028, 0x28 }, { 0x0029, 0x29 },
+ { 0x002A, 0x2A }, { 0x002B, 0x2B }, { 0x002C, 0x2C }, { 0x002D, 0x2D },
+ { 0x002E, 0x2E }, { 0x002F, 0x2F }, { 0x0030, 0x30 }, { 0x0031, 0x31 },
+ { 0x0032, 0x32 }, { 0x0033, 0x33 }, { 0x0034, 0x34 }, { 0x0035, 0x35 },
+ { 0x0036, 0x36 }, { 0x0037, 0x37 }, { 0x0038, 0x38 }, { 0x0039, 0x39 },
+ { 0x003A, 0x3A }, { 0x003B, 0x3B }, { 0x003C, 0x3C }, { 0x003D, 0x3D },
+ { 0x003E, 0x3E }, { 0x003F, 0x3F }, { 0x0040, 0x00 }, { 0x0041, 0x41 },
+ { 0x0042, 0x42 }, { 0x0043, 0x43 }, { 0x0044, 0x44 }, { 0x0045, 0x45 },
+ { 0x0046, 0x46 }, { 0x0047, 0x47 }, { 0x0048, 0x48 }, { 0x0049, 0x49 },
+ { 0x004A, 0x4A }, { 0x004B, 0x4B }, { 0x004C, 0x4C }, { 0x004D, 0x4D },
+ { 0x004E, 0x4E }, { 0x004F, 0x4F }, { 0x0050, 0x50 }, { 0x0051, 0x51 },
+ { 0x0052, 0x52 }, { 0x0053, 0x53 }, { 0x0054, 0x54 }, { 0x0055, 0x55 },
+ { 0x0056, 0x56 }, { 0x0057, 0x57 }, { 0x0058, 0x58 }, { 0x0059, 0x59 },
+ { 0x005A, 0x5A }, { 0x005C, 0x17 }, { 0x005E, 0x16 }, { 0x005F, 0x11 },
+ { 0x0060, 0x7D }, { 0x0061, 0x61 }, { 0x0062, 0x62 }, { 0x0063, 0x63 },
+ { 0x0064, 0x64 }, { 0x0065, 0x65 }, { 0x0066, 0x66 }, { 0x0067, 0x67 },
+ { 0x0068, 0x68 }, { 0x0069, 0x69 }, { 0x006A, 0x6A }, { 0x006B, 0x6B },
+ { 0x006C, 0x6C }, { 0x006D, 0x6D }, { 0x006E, 0x6E }, { 0x006F, 0x6F },
+ { 0x0070, 0x70 }, { 0x0071, 0x71 }, { 0x0072, 0x72 }, { 0x0073, 0x73 },
+ { 0x0074, 0x74 }, { 0x0075, 0x75 }, { 0x0076, 0x76 }, { 0x0077, 0x77 },
+ { 0x0078, 0x78 }, { 0x0079, 0x79 }, { 0x007A, 0x7A }, { 0x007C, 0x1A },
+ { 0x007E, 0x60 }, { 0x00A0, 0x20 }, { 0x00A3, 0x01 }, { 0x00A5, 0x03 },
+ { 0x00A7, 0x5F }, { 0x00AA, 0x12 }, { 0x00BA, 0x24 }, { 0x00C0, 0x14 },
+ { 0x00C1, 0x0E }, { 0x00C2, 0x1C }, { 0x00C3, 0x5B }, { 0x00C7, 0x13 },
+ { 0x00C9, 0x1F }, { 0x00CA, 0x1E }, { 0x00CD, 0x40 }, { 0x00D3, 0x19 },
+ { 0x00D4, 0x0B }, { 0x00D5, 0x5C }, { 0x00DA, 0x5D }, { 0x00DC, 0x5E },
+ { 0x00E0, 0x7F }, { 0x00E1, 0x0F }, { 0x00E2, 0x1D }, { 0x00E3, 0x7B },
+ { 0x00E7, 0x09 }, { 0x00E9, 0x05 }, { 0x00EA, 0x04 }, { 0x00ED, 0x07 },
+ { 0x00F3, 0x08 }, { 0x00F4, 0x0C }, { 0x00F5, 0x7C }, { 0x00FA, 0x06 },
+ { 0x00FC, 0x7E }, { 0x0394, 0x10 }, { 0x20AC, 0x18 }, { 0x221E, 0x15 }
+};
- for (i = 0, t = gsm_extension; i < ext_table_len; i++) {
- if (t[0] == k)
- return t[1];
- t += 2;
- }
+static const struct single_shift_table gsm_single_shift[] =
+{
+ { default_ext_gsm, TABLE_SIZE(default_ext_gsm) },
+ { turkish_ext_gsm, TABLE_SIZE(turkish_ext_gsm) },
+ { spanish_ext_gsm, TABLE_SIZE(spanish_ext_gsm) },
+ { portuguese_ext_gsm, TABLE_SIZE(portuguese_ext_gsm) }
+};
+
+static const struct single_shift_table unicode_single_shift[] =
+{
+ { default_ext_unicode, TABLE_SIZE(default_ext_unicode) },
+ { turkish_ext_unicode, TABLE_SIZE(turkish_ext_unicode) },
+ { spanish_ext_unicode, TABLE_SIZE(spanish_ext_unicode) },
+ { portuguese_ext_unicode, TABLE_SIZE(portuguese_ext_unicode) }
+};
+
+static const unsigned short *gsm_locking_shift[] =
+{
+ default_gsm,
+ turkish_gsm,
+ default_gsm,
+ portuguese_gsm
+};
+
+static const struct codepoint *unicode_locking_shift[] =
+{
+ default_unicode,
+ turkish_unicode,
+ default_unicode,
+ portuguese_unicode
+};
+
+static int compare_codepoints(const void *a, const void *b)
+{
+ const struct codepoint *ca = (const struct codepoint *)a;
+ const struct codepoint *cb = (const struct codepoint *)b;
+
+ return (ca->from > cb->from) - (ca->from < cb->from);
+}
+
+static unsigned short codepoint_lookup(struct codepoint *key,
+ const struct codepoint *table,
+ unsigned int len)
+{
+ struct codepoint *result = NULL;
+
+ result = bsearch(key, table, len, sizeof(struct codepoint),
+ compare_codepoints);
+
+ return result ? result->to : GUND;
+}
+
+static unsigned short gsm_locking_shift_lookup(unsigned char k,
+ unsigned char lang)
+{
+ /* If language is not defined in 3GPP TS 23.038,
+ * implementations are instructed to ignore it' */
+ unsigned char variant = lang < KNOWN_VARIANTS ? lang : 0;
+
+ return gsm_locking_shift[variant][k];
+}
+
+static unsigned short gsm_single_shift_lookup(unsigned char k,
+ unsigned char lang)
+{
+ struct codepoint key = { k, 0 };
+ unsigned char variant = lang < KNOWN_VARIANTS ? lang : 0;
+
+ return codepoint_lookup(&key, gsm_single_shift[variant].table,
+ gsm_single_shift[variant].len);
+}
+
+static unsigned short unicode_locking_shift_lookup(unsigned short k,
+ unsigned char lang)
+{
+ struct codepoint key = { k, 0 };
+ unsigned char variant = lang < KNOWN_VARIANTS ? lang : 0;
- return 0;
+ return codepoint_lookup(&key, unicode_locking_shift[variant], 128);
+}
+
+static unsigned short unicode_single_shift_lookup(unsigned short k,
+ unsigned char lang)
+{
+ struct codepoint key = { k, 0 };
+ unsigned char variant = lang < KNOWN_VARIANTS ? lang : 0;
+
+ return codepoint_lookup(&key, unicode_single_shift[variant].table,
+ unicode_single_shift[variant].len);
}
/*!
- * Converts text coded using GSM codec into UTF8 encoded text. If len
- * is less than 0, and terminator character is given, the length is
- * computed automatically.
+ * Converts text coded using GSM codec into UTF8 encoded text, using
+ * the given language identifiers for single shift and locking shift
+ * tables. If len is less than 0, and terminator character is given,
+ * the length is computed automatically.
*
* Returns newly-allocated UTF8 encoded string or NULL if the conversion
* could not be performed. Returns the number of bytes read from the
@@ -177,9 +560,11 @@ static unsigned short gsm_extension_table_lookup(unsigned char k)
* encoded string in items_written (if not NULL) not including the terminal
* '\0' character. The caller is reponsible for freeing the returned value.
*/
-char *convert_gsm_to_utf8(const unsigned char *text, long len,
- long *items_read, long *items_written,
- unsigned char terminator)
+char *convert_gsm_to_utf8_with_lang(const unsigned char *text, long len,
+ long *items_read, long *items_written,
+ unsigned char terminator,
+ unsigned char locking_lang,
+ unsigned char single_lang)
{
char *res = NULL;
char *out;
@@ -209,12 +594,12 @@ char *convert_gsm_to_utf8(const unsigned char *text, long len,
if (i >= len)
goto err_out;
- c = gsm_extension_table_lookup(text[i]);
+ c = gsm_single_shift_lookup(text[i], single_lang);
- if (c == 0)
+ if (c == GUND)
goto err_out;
} else {
- c = gsm_table[text[i]];
+ c = gsm_locking_shift_lookup(text[i], locking_lang);
}
res_length += UTF8_LENGTH(c);
@@ -232,9 +617,9 @@ char *convert_gsm_to_utf8(const unsigned char *text, long len,
unsigned short c;
if (text[i] == 0x1b)
- c = gsm_extension_table_lookup(text[++i]);
+ c = gsm_single_shift_lookup(text[++i], single_lang);
else
- c = gsm_table[text[i]];
+ c = gsm_locking_shift_lookup(text[i], locking_lang);
out += g_unichar_to_utf8(c, out);
@@ -253,22 +638,13 @@ err_out:
return res;
}
-static unsigned short unicode_to_gsm(unsigned short c)
+char *convert_gsm_to_utf8(const unsigned char *text, long len,
+ long *items_read, long *items_written,
+ unsigned char terminator)
{
- static int greek_unicode_size = sizeof(greek_unicode_table) /
- sizeof(unsigned short);
- unsigned short converted = GUND;
-
- if (c == 0x20AC)
- converted = 0x1B65;
- else if (c < 256)
- converted = unicode_256_table[c];
- else if ((c >= greek_unicode_offset) &&
- (c < (greek_unicode_offset + greek_unicode_size))) {
- converted = greek_unicode_table[c-greek_unicode_offset];
- }
-
- return converted;
+ return convert_gsm_to_utf8_with_lang(text, len, items_read,
+ items_written,
+ terminator, 0, 0);
}
/*!
@@ -281,9 +657,11 @@ static unsigned short unicode_to_gsm(unsigned short c)
* the actual number of bytes read. If items_written is not NULL, contains
* the number of bytes written.
*/
-unsigned char *convert_utf8_to_gsm(const char *text, long len,
+unsigned char *convert_utf8_to_gsm_with_lang(const char *text, long len,
long *items_read, long *items_written,
- unsigned char terminator)
+ unsigned char terminator,
+ unsigned char locking_lang,
+ unsigned char single_lang)
{
long nchars = 0;
const char *in;
@@ -306,7 +684,10 @@ unsigned char *convert_utf8_to_gsm(const char *text, long len,
if (c > 0xffff)
goto err_out;
- converted = unicode_to_gsm(c);
+ converted = unicode_locking_shift_lookup(c, locking_lang);
+
+ if (converted == GUND)
+ converted = unicode_single_shift_lookup(c, single_lang);
if (converted == GUND)
goto err_out;
@@ -332,7 +713,11 @@ unsigned char *convert_utf8_to_gsm(const char *text, long len,
gunichar c = g_utf8_get_char(in);
- converted = unicode_to_gsm(c);
+ converted = unicode_locking_shift_lookup(c, locking_lang);
+
+ if (converted == GUND)
+ converted = unicode_single_shift_lookup(c, single_lang);
+
if (converted & 0x1b00) {
*out = 0x1b;
++out;
@@ -357,6 +742,15 @@ err_out:
return res;
}
+unsigned char *convert_utf8_to_gsm(const char *text, long len,
+ long *items_read, long *items_written,
+ unsigned char terminator)
+{
+ return convert_utf8_to_gsm_with_lang(text, len, items_read,
+ items_written,
+ terminator, 0, 0);
+}
+
/*!
* Decodes the hex encoded data and converts to a byte array. If terminator
* is not 0, the terminator character is appended to the end of the result.
@@ -779,14 +1173,14 @@ char *sim_string_to_utf8(const unsigned char *buffer, int length)
if (i >= length)
return NULL;
- c = gsm_extension_table_lookup(buffer[i++]);
+ c = gsm_single_shift_lookup(buffer[i++], 0);
if (c == 0)
return NULL;
j += 2;
} else {
- c = gsm_table[buffer[i++]];
+ c = gsm_locking_shift_lookup(buffer[i++], 0);
j += 1;
}
@@ -816,9 +1210,9 @@ char *sim_string_to_utf8(const unsigned char *buffer, int length)
c = (buffer[i++] & 0x7f) + ucs2_offset;
else if (buffer[i] == 0x1b) {
++i;
- c = gsm_extension_table_lookup(buffer[i++]);
+ c = gsm_single_shift_lookup(buffer[i++], 0);
} else
- c = gsm_table[buffer[i++]];
+ c = gsm_locking_shift_lookup(buffer[i++], 0);
out += g_unichar_to_utf8(c, out);
}