summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/util.c608
-rw-r--r--src/util.h11
2 files changed, 512 insertions, 107 deletions
diff --git a/src/util.c b/src/util.c
index 9136b64e..97362393 100644
--- a/src/util.c
+++ b/src/util.c
@@ -26,6 +26,7 @@
#include <stdio.h>
#include <string.h>
#include <ctype.h>
+#include <stdlib.h>
#include <glib.h>
@@ -58,27 +59,237 @@
attached.
*/
+#define GUND 0xFFFF
+#define KNOWN_VARIANTS 4
+
+#define UTF8_LENGTH(c) \
+ ((c) < 0x80 ? 1 : \
+ ((c) < 0x800 ? 2 : 3))
+
+#define TABLE_SIZE(t) \
+ (sizeof((t)) / sizeof(struct codepoint))
+
+struct codepoint {
+ unsigned short from;
+ unsigned short to;
+};
+
+struct single_shift_table {
+ const struct codepoint *table;
+ unsigned int len;
+};
+
/* GSM to Unicode extension table, for GSM sequences starting with 0x1B */
-static unsigned short gsm_extension[] =
-{
- 0x0A, 0x000C, /* See NOTE 3 in 23.038 */
- 0x14, 0x005E,
- 0x1B, 0x0020, /* See NOTE 1 in 23.038 */
- 0x28, 0x007B,
- 0x29, 0x007D,
- 0x2F, 0x005C,
- 0x3C, 0x005B,
- 0x3D, 0x007E,
- 0x3E, 0x005D,
- 0x40, 0x007C,
- 0x65, 0x20AC
+static const struct codepoint default_ext_gsm[] =
+{
+ { 0x0A, 0x000C }, /* See NOTE 3 in 23.038 */
+ { 0x14, 0x005E },
+ { 0x1B, 0x0020 }, /* See NOTE 1 in 23.038 */
+ { 0x28, 0x007B },
+ { 0x29, 0x007D },
+ { 0x2F, 0x005C },
+ { 0x3C, 0x005B },
+ { 0x3D, 0x007E },
+ { 0x3E, 0x005D },
+ { 0x40, 0x007C },
+ { 0x65, 0x20AC }
+};
+
+static const struct codepoint default_ext_unicode[] =
+{
+ { 0x000C, 0x1B0A },
+ { 0x005B, 0x1B3C },
+ { 0x005C, 0x1B2F },
+ { 0x005D, 0x1B3E },
+ { 0x005E, 0x1B14 },
+ { 0x007B, 0x1B28 },
+ { 0x007C, 0x1B40 },
+ { 0x007D, 0x1B29 },
+ { 0x007E, 0x1B3D },
+ { 0x20AC, 0x1B65 }
+};
+
+/* Appendix A.2.1. in 3GPP TS23.038, V.8.2.0 */
+static const struct codepoint turkish_ext_gsm[] =
+{
+ { 0x0A, 0x000C }, /* See NOTE 3 */
+ { 0x14, 0x005E },
+ { 0x1B, 0x0020 }, /* See NOTE 1 */
+ { 0x28, 0x007B },
+ { 0x29, 0x007D },
+ { 0x2F, 0x005C },
+ { 0x3C, 0x005B },
+ { 0x3D, 0x007E },
+ { 0x3E, 0x005D },
+ { 0x40, 0x007C },
+ { 0x47, 0x011E },
+ { 0x49, 0x0130 },
+ { 0x53, 0x015E },
+ { 0x63, 0x00E7 },
+ { 0x65, 0x20AC },
+ { 0x67, 0x011F },
+ { 0x69, 0x0131 },
+ { 0x73, 0x015F }
+};
+
+static const struct codepoint turkish_ext_unicode[] =
+{
+ { 0x000C, 0x1B0A },
+ { 0x005B, 0x1B3C },
+ { 0x005C, 0x1B2F },
+ { 0x005D, 0x1B3E },
+ { 0x005E, 0x1B14 },
+ { 0x007B, 0x1B28 },
+ { 0x007C, 0x1B40 },
+ { 0x007D, 0x1B29 },
+ { 0x007E, 0x1B3D },
+ { 0x00E7, 0x1B63 },
+ { 0x011E, 0x1B47 },
+ { 0x011F, 0x1B67 },
+ { 0x0130, 0x1B49 },
+ { 0x0131, 0x1B69 },
+ { 0x015E, 0x1B53 },
+ { 0x015F, 0x1B73 },
+ { 0x20AC, 0x1B65 }
+};
+
+/* Appendix A.2.2. in 3GPP TS23.038 V.8.2.0*/
+static const struct codepoint spanish_ext_gsm[] =
+{
+ { 0x09, 0x00E7 },
+ { 0x0A, 0x000C }, /* See NOTE 3 */
+ { 0x14, 0x005E },
+ { 0x1B, 0x0020 }, /* See NOTE 1 */
+ { 0x28, 0x007B },
+ { 0x29, 0x007D },
+ { 0x2F, 0x005C },
+ { 0x3C, 0x005B },
+ { 0x3D, 0x007E },
+ { 0x3E, 0x005D },
+ { 0x40, 0x007C },
+ { 0x41, 0x00C1 },
+ { 0x49, 0x00CD },
+ { 0x4F, 0x00D3 },
+ { 0x55, 0x00DA },
+ { 0x61, 0x00E1 },
+ { 0x65, 0x20AC },
+ { 0x69, 0x00ED },
+ { 0x6F, 0x00F3 },
+ { 0x75, 0x00FA }
+};
+
+static const struct codepoint spanish_ext_unicode[] =
+{
+ { 0x000C, 0x1B0A },
+ { 0x005B, 0x1B3C },
+ { 0x005C, 0x1B2F },
+ { 0x005D, 0x1B3E },
+ { 0x005E, 0x1B14 },
+ { 0x007B, 0x1B28 },
+ { 0x007C, 0x1B40 },
+ { 0x007D, 0x1B29 },
+ { 0x007E, 0x1B3D },
+ { 0x00C1, 0x1B41 },
+ { 0x00CD, 0x1B49 },
+ { 0x00D3, 0x1B4F },
+ { 0x00DA, 0x1B55 },
+ { 0x00E1, 0x1B61 },
+ { 0x00E7, 0x1B09 },
+ { 0x00ED, 0x1B69 },
+ { 0x00F3, 0x1B6F },
+ { 0x00FA, 0x1B75 },
+ { 0x20AC, 0x1B65 }
+};
+
+/* Appendix A.2.3. in 3GPP TS23.038 V.8.2.0 */
+static const struct codepoint portuguese_ext_gsm[] =
+{
+ { 0x05, 0x00EA },
+ { 0x09, 0x00E7 },
+ { 0x0A, 0x000C }, /* See NOTE 3 */
+ { 0x0B, 0x00D4 },
+ { 0x0C, 0x00F4 },
+ { 0x0E, 0x00C1 },
+ { 0x0F, 0x00E1 },
+ { 0x12, 0x03A6 },
+ { 0x13, 0x0393 },
+ { 0x14, 0x005E },
+ { 0x15, 0x03A9 },
+ { 0x16, 0x03A0 },
+ { 0x17, 0x03A8 },
+ { 0x18, 0x03A3 },
+ { 0x19, 0x0398 },
+ { 0x1B, 0x0020 }, /* See NOTE 1 */
+ { 0x1F, 0x00CA },
+ { 0x28, 0x007B },
+ { 0x29, 0x007D },
+ { 0x2F, 0x005C },
+ { 0x3C, 0x005B },
+ { 0x3D, 0x007E },
+ { 0x3E, 0x005D },
+ { 0x40, 0x007C },
+ { 0x41, 0x00C0 },
+ { 0x49, 0x00CD },
+ { 0x4F, 0x00D3 },
+ { 0x55, 0x00DA },
+ { 0x5B, 0x00C3 },
+ { 0x5C, 0x00D5 },
+ { 0x61, 0x00C2 },
+ { 0x65, 0x20AC },
+ { 0x69, 0x00ED },
+ { 0x6F, 0x00F3 },
+ { 0x75, 0x00FA },
+ { 0x7B, 0x00E3 },
+ { 0x7C, 0x00F5 },
+ { 0x7F, 0x00E2 }
+};
+
+static const struct codepoint portuguese_ext_unicode[] =
+{
+ { 0x000C, 0x1B0A },
+ { 0x005B, 0x1B3C },
+ { 0x005C, 0x1B2F },
+ { 0x005D, 0x1B3E },
+ { 0x005E, 0x1B14 },
+ { 0x007B, 0x1B28 },
+ { 0x007C, 0x1B40 },
+ { 0x007D, 0x1B29 },
+ { 0x007E, 0x1B3D },
+ { 0x00C0, 0x1B41 },
+ { 0x00C1, 0x1B0E },
+ { 0x00C2, 0x1B61 },
+ { 0x00C3, 0x1B5B },
+ { 0x00CA, 0x1B1F },
+ { 0x00CD, 0x1B49 },
+ { 0x00D3, 0x1B4F },
+ { 0x00D4, 0x1B0B },
+ { 0x00D5, 0x1B5C },
+ { 0x00DA, 0x1B55 },
+ { 0x00E1, 0x1B0F },
+ { 0x00E2, 0x1B7F },
+ { 0x00E3, 0x1B7B },
+ { 0x00E7, 0x1B09 },
+ { 0x00EA, 0x1B05 },
+ { 0x00ED, 0x1B69 },
+ { 0x00F3, 0x1B6F },
+ { 0x00F4, 0x1B0C },
+ { 0x00F5, 0x1B7C },
+ { 0x00FA, 0x1B75 },
+ { 0x0393, 0x1B13 },
+ { 0x0398, 0x1B19 },
+ { 0x03A0, 0x1B16 },
+ { 0x03A3, 0x1B18 },
+ { 0x03A6, 0x1B12 },
+ { 0x03A8, 0x1B17 },
+ { 0x03A9, 0x1B15 },
+ { 0x20AC, 0x1B65 }
};
/* Used for conversion of GSM to Unicode */
-static unsigned short gsm_table[] =
+static const unsigned short default_gsm[] =
{
0x0040, 0x00A3, 0x0024, 0x00A5, 0x00E8, 0x00E9, 0x00F9, 0x00EC, /* 0x07 */
- 0x00F2, 0x00E7, 0x000A, 0x00D8, 0x00F8, 0x000D, 0x00C5, 0x00E5, /* 0x0F */
+ 0x00F2, 0x00C7, 0x000A, 0x00D8, 0x00F8, 0x000D, 0x00C5, 0x00E5, /* 0x0F */
0x0394, 0x005F, 0x03A6, 0x0393, 0x039B, 0x03A9, 0x03A0, 0x03A8, /* 0x17 */
0x03A3, 0x0398, 0x039E, 0x00A0, 0x00C6, 0x00E6, 0x00DF, 0x00C9, /* 0x1F */
0x0020, 0x0021, 0x0022, 0x0023, 0x00A4, 0x0025, 0x0026, 0x0027, /* 0x27 */
@@ -95,80 +306,252 @@ static unsigned short gsm_table[] =
0x0078, 0x0079, 0x007A, 0x00E4, 0x00F6, 0x00F1, 0x00FC, 0x00E0 /* 0x7F */
};
-#define GUND 0xFFFF
-
-/* 3GPP 27.005 Annex A */
-static unsigned short unicode_256_table[] =
-{
- GUND, GUND, GUND, GUND, GUND, GUND, GUND, GUND, /* 0x07 */
- GUND, GUND, 0x0A, GUND, 0x1B0A, 0x0D, GUND, GUND, /* 0x0F */
- GUND, GUND, GUND, GUND, GUND, GUND, GUND, GUND, /* 0x17 */
- GUND, GUND, GUND, GUND, GUND, GUND, GUND, GUND, /* 0x1F */
- 0x20, 0x21, 0x22, 0x23, 0x02, 0x25, 0x26, 0x27, /* 0x27 */
- 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, /* 0x2F */
- 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x37 */
- 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, /* 0x3F */
- 0x00, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x47 */
- 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, /* 0x4F */
- 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x57 */
- 0x58, 0x59, 0x5A, 0x1B3C, 0x1B2F, 0x1B3E, 0x1B14, 0x11, /* 0x5F */
- GUND, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x67 */
- 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, /* 0x6F */
- 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x77 */
- 0x78, 0x79, 0x7A, 0x1B28, 0x1B40, 0x1B29, 0x1B3D, GUND, /* 0x7F */
- GUND, GUND, GUND, GUND, GUND, GUND, GUND, GUND, /* 0x87 */
- GUND, GUND, GUND, GUND, GUND, GUND, GUND, GUND, /* 0x8F */
- GUND, GUND, GUND, GUND, GUND, GUND, GUND, GUND, /* 0x97 */
- GUND, GUND, GUND, GUND, GUND, GUND, GUND, GUND, /* 0x9F */
- GUND, 0x40, GUND, 0x01, 0x24, 0x03, GUND, 0x5f, /* 0xA7 */
- GUND, GUND, GUND, GUND, GUND, GUND, GUND, GUND, /* 0xAF */
- GUND, GUND, GUND, GUND, GUND, GUND, GUND, GUND, /* 0xB7 */
- GUND, GUND, GUND, GUND, GUND, GUND, GUND, 0x60, /* 0xBF */
- 0x41, 0x41, 0x41, 0x41, 0x5B, 0x0E, 0x1C, 0x09, /* 0xC7 */
- 0x45, 0x1F, 0x45, 0x45, 0x49, 0x49, 0x49, 0x49, /* 0xCF */
- GUND, 0x5D, 0x4F, 0x4F, 0x4F, 0x4F, 0x5C, GUND, /* 0xD7 */
- 0x0B, 0x55, 0x55, 0x55, 0x5E, 0x59, GUND, 0x1E, /* 0xDF */
- 0x7F, 0x61, 0x61, 0x61, 0x7B, 0x0F, 0x1D, 0x09, /* 0xE7 */
- 0x04, 0x05, 0x65, 0x65, 0x07, 0x69, 0x69, 0x69, /* 0xEF */
- GUND, 0x7D, 0x08, 0x6F, 0x6F, 0x6F, 0x7C, GUND, /* 0xF7 */
- 0x0C, 0x06, 0x75, 0x75, 0x7E, 0x79, GUND, 0x79 /* 0xFF */
+static const struct codepoint default_unicode[] =
+{
+ { 0x000A, 0x0A }, { 0x000D, 0x0D }, { 0x0020, 0x20 }, { 0x0021, 0x21 },
+ { 0x0022, 0x22 }, { 0x0023, 0x23 }, { 0x0024, 0x02 }, { 0x0025, 0x25 },
+ { 0x0026, 0x26 }, { 0x0027, 0x27 }, { 0x0028, 0x28 }, { 0x0029, 0x29 },
+ { 0x002A, 0x2A }, { 0x002B, 0x2B }, { 0x002C, 0x2C }, { 0x002D, 0x2D },
+ { 0x002E, 0x2E }, { 0x002F, 0x2F }, { 0x0030, 0x30 }, { 0x0031, 0x31 },
+ { 0x0032, 0x32 }, { 0x0033, 0x33 }, { 0x0034, 0x34 }, { 0x0035, 0x35 },
+ { 0x0036, 0x36 }, { 0x0037, 0x37 }, { 0x0038, 0x38 }, { 0x0039, 0x39 },
+ { 0x003A, 0x3A }, { 0x003B, 0x3B }, { 0x003C, 0x3C }, { 0x003D, 0x3D },
+ { 0x003E, 0x3E }, { 0x003F, 0x3F }, { 0x0040, 0x00 }, { 0x0041, 0x41 },
+ { 0x0042, 0x42 }, { 0x0043, 0x43 }, { 0x0044, 0x44 }, { 0x0045, 0x45 },
+ { 0x0046, 0x46 }, { 0x0047, 0x47 }, { 0x0048, 0x48 }, { 0x0049, 0x49 },
+ { 0x004A, 0x4A }, { 0x004B, 0x4B }, { 0x004C, 0x4C }, { 0x004D, 0x4D },
+ { 0x004E, 0x4E }, { 0x004F, 0x4F }, { 0x0050, 0x50 }, { 0x0051, 0x51 },
+ { 0x0052, 0x52 }, { 0x0053, 0x53 }, { 0x0054, 0x54 }, { 0x0055, 0x55 },
+ { 0x0056, 0x56 }, { 0x0057, 0x57 }, { 0x0058, 0x58 }, { 0x0059, 0x59 },
+ { 0x005A, 0x5A }, { 0x005F, 0x11 }, { 0x0061, 0x61 }, { 0x0062, 0x62 },
+ { 0x0063, 0x63 }, { 0x0064, 0x64 }, { 0x0065, 0x65 }, { 0x0066, 0x66 },
+ { 0x0067, 0x67 }, { 0x0068, 0x68 }, { 0x0069, 0x69 }, { 0x006A, 0x6A },
+ { 0x006B, 0x6B }, { 0x006C, 0x6C }, { 0x006D, 0x6D }, { 0x006E, 0x6E },
+ { 0x006F, 0x6F }, { 0x0070, 0x70 }, { 0x0071, 0x71 }, { 0x0072, 0x72 },
+ { 0x0073, 0x73 }, { 0x0074, 0x74 }, { 0x0075, 0x75 }, { 0x0076, 0x76 },
+ { 0x0077, 0x77 }, { 0x0078, 0x78 }, { 0x0079, 0x79 }, { 0x007A, 0x7A },
+ { 0x00A0, 0x20 }, { 0x00A1, 0x40 }, { 0x00A3, 0x01 }, { 0x00A4, 0x24 },
+ { 0x00A5, 0x03 }, { 0x00A7, 0x5F }, { 0x00BF, 0x60 }, { 0x00C4, 0x5B },
+ { 0x00C5, 0x0E }, { 0x00C6, 0x1C }, { 0x00C7, 0x09 }, { 0x00C9, 0x1F },
+ { 0x00D1, 0x5D }, { 0x00D6, 0x5C }, { 0x00D8, 0x0B }, { 0x00DC, 0x5E },
+ { 0x00DF, 0x1E }, { 0x00E0, 0x7F }, { 0x00E4, 0x7B }, { 0x00E5, 0x0F },
+ { 0x00E6, 0x1D }, { 0x00E8, 0x04 }, { 0x00E9, 0x05 }, { 0x00EC, 0x07 },
+ { 0x00F1, 0x7D }, { 0x00F2, 0x08 }, { 0x00F6, 0x7C }, { 0x00F8, 0x0C },
+ { 0x00F9, 0x06 }, { 0x00FC, 0x7E }, { 0x0393, 0x13 }, { 0x0394, 0x10 },
+ { 0x0398, 0x19 }, { 0x039B, 0x14 }, { 0x039E, 0x1A }, { 0x03A0, 0x16 },
+ { 0x03A3, 0x18 }, { 0x03A6, 0x12 }, { 0x03A8, 0x17 }, { 0x03A9, 0x15 }
};
-/* Starts at 0x0390 */
-static unsigned short greek_unicode_offset = 0x0390;
+/* Appendix A.3.1 in 3GPP TS23.038 */
+static const unsigned short turkish_gsm[] =
+{
+ 0x0040, 0x00A3, 0x0024, 0x00A5, 0x20AC, 0x00E9, 0x00F9, 0x0131, /* 0x07 */
+ 0x00F2, 0x00C7, 0x000A, 0x011E, 0x011F, 0x000D, 0x00C5, 0x00E5, /* 0x0F */
+ 0x0394, 0x005F, 0x03A6, 0x0393, 0x039B, 0x03A9, 0x03A0, 0x03A8, /* 0x17 */
+ 0x03A3, 0x0398, 0x039E, 0x00A0, 0x015E, 0x015F, 0x00DF, 0x00C9, /* 0x1F */
+ 0x0020, 0x0021, 0x0022, 0x0023, 0x00A4, 0x0025, 0x0026, 0x0027, /* 0x27 */
+ 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F, /* 0x2F */
+ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, /* 0x37 */
+ 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, /* 0x3F */
+ 0x0130, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, /* 0x47 */
+ 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, /* 0x4F */
+ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, /* 0x57 */
+ 0x0058, 0x0059, 0x005A, 0x00C4, 0x00D6, 0x00D1, 0x00DC, 0x00A7, /* 0x5F */
+ 0x00E7, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, /* 0x67 */
+ 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F, /* 0x6F */
+ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, /* 0x77 */
+ 0x0078, 0x0079, 0x007A, 0x00E4, 0x00F6, 0x00F1, 0x00FC, 0x00E0 /* 0x7F */
+};
-static unsigned short greek_unicode_table[] =
+static const struct codepoint turkish_unicode[] =
{
- GUND, GUND, GUND, 0x13, 0x10, GUND, GUND, GUND, /* 0x07 */
- 0x19, GUND, GUND, 0x14, GUND, GUND, 0x1A, GUND, /* 0x0F */
- 0x16, GUND, GUND, 0x18, GUND, GUND, 0x12, GUND, /* 0x17 */
- 0x17, 0x15, GUND, GUND, GUND, GUND, GUND, GUND, /* 0x1F */
+ { 0x000A, 0x0A }, { 0x000D, 0x0D }, { 0x0020, 0x20 }, { 0x0021, 0x21 },
+ { 0x0022, 0x22 }, { 0x0023, 0x23 }, { 0x0024, 0x02 }, { 0x0025, 0x25 },
+ { 0x0026, 0x26 }, { 0x0027, 0x27 }, { 0x0028, 0x28 }, { 0x0029, 0x29 },
+ { 0x002A, 0x2A }, { 0x002B, 0x2B }, { 0x002C, 0x2C }, { 0x002D, 0x2D },
+ { 0x002E, 0x2E }, { 0x002F, 0x2F }, { 0x0030, 0x30 }, { 0x0031, 0x31 },
+ { 0x0032, 0x32 }, { 0x0033, 0x33 }, { 0x0034, 0x34 }, { 0x0035, 0x35 },
+ { 0x0036, 0x36 }, { 0x0037, 0x37 }, { 0x0038, 0x38 }, { 0x0039, 0x39 },
+ { 0x003A, 0x3A }, { 0x003B, 0x3B }, { 0x003C, 0x3C }, { 0x003D, 0x3D },
+ { 0x003E, 0x3E }, { 0x003F, 0x3F }, { 0x0040, 0x00 }, { 0x0041, 0x41 },
+ { 0x0042, 0x42 }, { 0x0043, 0x43 }, { 0x0044, 0x44 }, { 0x0045, 0x45 },
+ { 0x0046, 0x46 }, { 0x0047, 0x47 }, { 0x0048, 0x48 }, { 0x0049, 0x49 },
+ { 0x004A, 0x4A }, { 0x004B, 0x4B }, { 0x004C, 0x4C }, { 0x004D, 0x4D },
+ { 0x004E, 0x4E }, { 0x004F, 0x4F }, { 0x0050, 0x50 }, { 0x0051, 0x51 },
+ { 0x0052, 0x52 }, { 0x0053, 0x53 }, { 0x0054, 0x54 }, { 0x0055, 0x55 },
+ { 0x0056, 0x56 }, { 0x0057, 0x57 }, { 0x0058, 0x58 }, { 0x0059, 0x59 },
+ { 0x005A, 0x5A }, { 0x005F, 0x11 }, { 0x0061, 0x61 }, { 0x0062, 0x62 },
+ { 0x0063, 0x63 }, { 0x0064, 0x64 }, { 0x0065, 0x65 }, { 0x0066, 0x66 },
+ { 0x0067, 0x67 }, { 0x0068, 0x68 }, { 0x0069, 0x69 }, { 0x006A, 0x6A },
+ { 0x006B, 0x6B }, { 0x006C, 0x6C }, { 0x006D, 0x6D }, { 0x006E, 0x6E },
+ { 0x006F, 0x6F }, { 0x0070, 0x70 }, { 0x0071, 0x71 }, { 0x0072, 0x72 },
+ { 0x0073, 0x73 }, { 0x0074, 0x74 }, { 0x0075, 0x75 }, { 0x0076, 0x76 },
+ { 0x0077, 0x77 }, { 0x0078, 0x78 }, { 0x0079, 0x79 }, { 0x007A, 0x7A },
+ { 0x00A0, 0x20 }, { 0x00A3, 0x01 }, { 0x00A4, 0x24 }, { 0x00A5, 0x03 },
+ { 0x00A7, 0x5F }, { 0x00C4, 0x5B }, { 0x00C5, 0x0E }, { 0x00C7, 0x09 },
+ { 0x00C9, 0x1F }, { 0x00D1, 0x5D }, { 0x00D6, 0x5C }, { 0x00DC, 0x5E },
+ { 0x00DF, 0x1E }, { 0x00E0, 0x7F }, { 0x00E4, 0x7B }, { 0x00E5, 0x0F },
+ { 0x00E7, 0x60 }, { 0x00E9, 0x05 }, { 0x00F1, 0x7D }, { 0x00F2, 0x08 },
+ { 0x00F6, 0x7C }, { 0x00F9, 0x06 }, { 0x00FC, 0x7E }, { 0x011E, 0x0B },
+ { 0x011F, 0x0C }, { 0x0130, 0x40 }, { 0x0131, 0x07 }, { 0x015E, 0x1C },
+ { 0x015F, 0x1D }, { 0x0393, 0x13 }, { 0x0394, 0x10 }, { 0x0398, 0x19 },
+ { 0x039B, 0x14 }, { 0x039E, 0x1A }, { 0x03A0, 0x16 }, { 0x03A3, 0x18 },
+ { 0x03A6, 0x12 }, { 0x03A8, 0x17 }, { 0x03A9, 0x15 }, { 0x20AC, 0x04 }
};
-#define UTF8_LENGTH(c) \
- ((c) < 0x80 ? 1 : \
- ((c) < 0x800 ? 2 : 3))
+/* Appendix A.3.2 in 3GPP TS23.038 */
+static const unsigned short portuguese_gsm[] =
+{
+ 0x0040, 0x00A3, 0x0024, 0x00A5, 0x00EA, 0x00E9, 0x00FA, 0x00ED, /* 0x07 */
+ 0x00F3, 0x00E7, 0x000A, 0x00D4, 0x00F4, 0x000D, 0x00C1, 0x00E1, /* 0x0F */
+ 0x0394, 0x005F, 0x00AA, 0x00C7, 0x00C0, 0x221E, 0x005E, 0x005C, /* 0x17 */
+ 0x20ac, 0x00D3, 0x007C, 0x00A0, 0x00C2, 0x00E2, 0x00CA, 0x00C9, /* 0x1F */
+ 0x0020, 0x0021, 0x0022, 0x0023, 0x00BA, 0x0025, 0x0026, 0x0027, /* 0x27 */
+ 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F, /* 0x2F */
+ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, /* 0x37 */
+ 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, /* 0x3F */
+ 0x00A1, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, /* 0x47 */
+ 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, /* 0x4F */
+ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, /* 0x57 */
+ 0x0058, 0x0059, 0x005A, 0x00C3, 0x00D5, 0x00DA, 0x00DC, 0x00A7, /* 0x5F */
+ 0x007E, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, /* 0x67 */
+ 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F, /* 0x6F */
+ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, /* 0x77 */
+ 0x0078, 0x0079, 0x007A, 0x00E3, 0x00F5, 0x0060, 0x00FC, 0x00E0 /* 0x7F */
+};
-static unsigned short gsm_extension_table_lookup(unsigned char k)
+static const struct codepoint portuguese_unicode[] =
{
- static unsigned int ext_table_len =
- (sizeof(gsm_extension) / sizeof(unsigned short)) >> 1;
- unsigned int i;
- unsigned short *t;
+ { 0x000A, 0x0A }, { 0x000D, 0x0D }, { 0x0020, 0x20 }, { 0x0021, 0x21 },
+ { 0x0022, 0x22 }, { 0x0023, 0x23 }, { 0x0024, 0x02 }, { 0x0025, 0x25 },
+ { 0x0026, 0x26 }, { 0x0027, 0x27 }, { 0x0028, 0x28 }, { 0x0029, 0x29 },
+ { 0x002A, 0x2A }, { 0x002B, 0x2B }, { 0x002C, 0x2C }, { 0x002D, 0x2D },
+ { 0x002E, 0x2E }, { 0x002F, 0x2F }, { 0x0030, 0x30 }, { 0x0031, 0x31 },
+ { 0x0032, 0x32 }, { 0x0033, 0x33 }, { 0x0034, 0x34 }, { 0x0035, 0x35 },
+ { 0x0036, 0x36 }, { 0x0037, 0x37 }, { 0x0038, 0x38 }, { 0x0039, 0x39 },
+ { 0x003A, 0x3A }, { 0x003B, 0x3B }, { 0x003C, 0x3C }, { 0x003D, 0x3D },
+ { 0x003E, 0x3E }, { 0x003F, 0x3F }, { 0x0040, 0x00 }, { 0x0041, 0x41 },
+ { 0x0042, 0x42 }, { 0x0043, 0x43 }, { 0x0044, 0x44 }, { 0x0045, 0x45 },
+ { 0x0046, 0x46 }, { 0x0047, 0x47 }, { 0x0048, 0x48 }, { 0x0049, 0x49 },
+ { 0x004A, 0x4A }, { 0x004B, 0x4B }, { 0x004C, 0x4C }, { 0x004D, 0x4D },
+ { 0x004E, 0x4E }, { 0x004F, 0x4F }, { 0x0050, 0x50 }, { 0x0051, 0x51 },
+ { 0x0052, 0x52 }, { 0x0053, 0x53 }, { 0x0054, 0x54 }, { 0x0055, 0x55 },
+ { 0x0056, 0x56 }, { 0x0057, 0x57 }, { 0x0058, 0x58 }, { 0x0059, 0x59 },
+ { 0x005A, 0x5A }, { 0x005C, 0x17 }, { 0x005E, 0x16 }, { 0x005F, 0x11 },
+ { 0x0060, 0x7D }, { 0x0061, 0x61 }, { 0x0062, 0x62 }, { 0x0063, 0x63 },
+ { 0x0064, 0x64 }, { 0x0065, 0x65 }, { 0x0066, 0x66 }, { 0x0067, 0x67 },
+ { 0x0068, 0x68 }, { 0x0069, 0x69 }, { 0x006A, 0x6A }, { 0x006B, 0x6B },
+ { 0x006C, 0x6C }, { 0x006D, 0x6D }, { 0x006E, 0x6E }, { 0x006F, 0x6F },
+ { 0x0070, 0x70 }, { 0x0071, 0x71 }, { 0x0072, 0x72 }, { 0x0073, 0x73 },
+ { 0x0074, 0x74 }, { 0x0075, 0x75 }, { 0x0076, 0x76 }, { 0x0077, 0x77 },
+ { 0x0078, 0x78 }, { 0x0079, 0x79 }, { 0x007A, 0x7A }, { 0x007C, 0x1A },
+ { 0x007E, 0x60 }, { 0x00A0, 0x20 }, { 0x00A3, 0x01 }, { 0x00A5, 0x03 },
+ { 0x00A7, 0x5F }, { 0x00AA, 0x12 }, { 0x00BA, 0x24 }, { 0x00C0, 0x14 },
+ { 0x00C1, 0x0E }, { 0x00C2, 0x1C }, { 0x00C3, 0x5B }, { 0x00C7, 0x13 },
+ { 0x00C9, 0x1F }, { 0x00CA, 0x1E }, { 0x00CD, 0x40 }, { 0x00D3, 0x19 },
+ { 0x00D4, 0x0B }, { 0x00D5, 0x5C }, { 0x00DA, 0x5D }, { 0x00DC, 0x5E },
+ { 0x00E0, 0x7F }, { 0x00E1, 0x0F }, { 0x00E2, 0x1D }, { 0x00E3, 0x7B },
+ { 0x00E7, 0x09 }, { 0x00E9, 0x05 }, { 0x00EA, 0x04 }, { 0x00ED, 0x07 },
+ { 0x00F3, 0x08 }, { 0x00F4, 0x0C }, { 0x00F5, 0x7C }, { 0x00FA, 0x06 },
+ { 0x00FC, 0x7E }, { 0x0394, 0x10 }, { 0x20AC, 0x18 }, { 0x221E, 0x15 }
+};
- for (i = 0, t = gsm_extension; i < ext_table_len; i++) {
- if (t[0] == k)
- return t[1];
- t += 2;
- }
+static const struct single_shift_table gsm_single_shift[] =
+{
+ { default_ext_gsm, TABLE_SIZE(default_ext_gsm) },
+ { turkish_ext_gsm, TABLE_SIZE(turkish_ext_gsm) },
+ { spanish_ext_gsm, TABLE_SIZE(spanish_ext_gsm) },
+ { portuguese_ext_gsm, TABLE_SIZE(portuguese_ext_gsm) }
+};
+
+static const struct single_shift_table unicode_single_shift[] =
+{
+ { default_ext_unicode, TABLE_SIZE(default_ext_unicode) },
+ { turkish_ext_unicode, TABLE_SIZE(turkish_ext_unicode) },
+ { spanish_ext_unicode, TABLE_SIZE(spanish_ext_unicode) },
+ { portuguese_ext_unicode, TABLE_SIZE(portuguese_ext_unicode) }
+};
+
+static const unsigned short *gsm_locking_shift[] =
+{
+ default_gsm,
+ turkish_gsm,
+ default_gsm,
+ portuguese_gsm
+};
+
+static const struct codepoint *unicode_locking_shift[] =
+{
+ default_unicode,
+ turkish_unicode,
+ default_unicode,
+ portuguese_unicode
+};
+
+static int compare_codepoints(const void *a, const void *b)
+{
+ const struct codepoint *ca = (const struct codepoint *)a;
+ const struct codepoint *cb = (const struct codepoint *)b;
+
+ return (ca->from > cb->from) - (ca->from < cb->from);
+}
+
+static unsigned short codepoint_lookup(struct codepoint *key,
+ const struct codepoint *table,
+ unsigned int len)
+{
+ struct codepoint *result = NULL;
+
+ result = bsearch(key, table, len, sizeof(struct codepoint),
+ compare_codepoints);
+
+ return result ? result->to : GUND;
+}
+
+static unsigned short gsm_locking_shift_lookup(unsigned char k,
+ unsigned char lang)
+{
+ /* If language is not defined in 3GPP TS 23.038,
+ * implementations are instructed to ignore it' */
+ unsigned char variant = lang < KNOWN_VARIANTS ? lang : 0;
+
+ return gsm_locking_shift[variant][k];
+}
+
+static unsigned short gsm_single_shift_lookup(unsigned char k,
+ unsigned char lang)
+{
+ struct codepoint key = { k, 0 };
+ unsigned char variant = lang < KNOWN_VARIANTS ? lang : 0;
+
+ return codepoint_lookup(&key, gsm_single_shift[variant].table,
+ gsm_single_shift[variant].len);
+}
+
+static unsigned short unicode_locking_shift_lookup(unsigned short k,
+ unsigned char lang)
+{
+ struct codepoint key = { k, 0 };
+ unsigned char variant = lang < KNOWN_VARIANTS ? lang : 0;
- return 0;
+ return codepoint_lookup(&key, unicode_locking_shift[variant], 128);
+}
+
+static unsigned short unicode_single_shift_lookup(unsigned short k,
+ unsigned char lang)
+{
+ struct codepoint key = { k, 0 };
+ unsigned char variant = lang < KNOWN_VARIANTS ? lang : 0;
+
+ return codepoint_lookup(&key, unicode_single_shift[variant].table,
+ unicode_single_shift[variant].len);
}
/*!
- * Converts text coded using GSM codec into UTF8 encoded text. If len
- * is less than 0, and terminator character is given, the length is
- * computed automatically.
+ * Converts text coded using GSM codec into UTF8 encoded text, using
+ * the given language identifiers for single shift and locking shift
+ * tables. If len is less than 0, and terminator character is given,
+ * the length is computed automatically.
*
* Returns newly-allocated UTF8 encoded string or NULL if the conversion
* could not be performed. Returns the number of bytes read from the
@@ -177,9 +560,11 @@ static unsigned short gsm_extension_table_lookup(unsigned char k)
* encoded string in items_written (if not NULL) not including the terminal
* '\0' character. The caller is reponsible for freeing the returned value.
*/
-char *convert_gsm_to_utf8(const unsigned char *text, long len,
- long *items_read, long *items_written,
- unsigned char terminator)
+char *convert_gsm_to_utf8_with_lang(const unsigned char *text, long len,
+ long *items_read, long *items_written,
+ unsigned char terminator,
+ unsigned char locking_lang,
+ unsigned char single_lang)
{
char *res = NULL;
char *out;
@@ -209,12 +594,12 @@ char *convert_gsm_to_utf8(const unsigned char *text, long len,
if (i >= len)
goto err_out;
- c = gsm_extension_table_lookup(text[i]);
+ c = gsm_single_shift_lookup(text[i], single_lang);
- if (c == 0)
+ if (c == GUND)
goto err_out;
} else {
- c = gsm_table[text[i]];
+ c = gsm_locking_shift_lookup(text[i], locking_lang);
}
res_length += UTF8_LENGTH(c);
@@ -232,9 +617,9 @@ char *convert_gsm_to_utf8(const unsigned char *text, long len,
unsigned short c;
if (text[i] == 0x1b)
- c = gsm_extension_table_lookup(text[++i]);
+ c = gsm_single_shift_lookup(text[++i], single_lang);
else
- c = gsm_table[text[i]];
+ c = gsm_locking_shift_lookup(text[i], locking_lang);
out += g_unichar_to_utf8(c, out);
@@ -253,22 +638,13 @@ err_out:
return res;
}
-static unsigned short unicode_to_gsm(unsigned short c)
+char *convert_gsm_to_utf8(const unsigned char *text, long len,
+ long *items_read, long *items_written,
+ unsigned char terminator)
{
- static int greek_unicode_size = sizeof(greek_unicode_table) /
- sizeof(unsigned short);
- unsigned short converted = GUND;
-
- if (c == 0x20AC)
- converted = 0x1B65;
- else if (c < 256)
- converted = unicode_256_table[c];
- else if ((c >= greek_unicode_offset) &&
- (c < (greek_unicode_offset + greek_unicode_size))) {
- converted = greek_unicode_table[c-greek_unicode_offset];
- }
-
- return converted;
+ return convert_gsm_to_utf8_with_lang(text, len, items_read,
+ items_written,
+ terminator, 0, 0);
}
/*!
@@ -281,9 +657,11 @@ static unsigned short unicode_to_gsm(unsigned short c)
* the actual number of bytes read. If items_written is not NULL, contains
* the number of bytes written.
*/
-unsigned char *convert_utf8_to_gsm(const char *text, long len,
+unsigned char *convert_utf8_to_gsm_with_lang(const char *text, long len,
long *items_read, long *items_written,
- unsigned char terminator)
+ unsigned char terminator,
+ unsigned char locking_lang,
+ unsigned char single_lang)
{
long nchars = 0;
const char *in;
@@ -306,7 +684,10 @@ unsigned char *convert_utf8_to_gsm(const char *text, long len,
if (c > 0xffff)
goto err_out;
- converted = unicode_to_gsm(c);
+ converted = unicode_locking_shift_lookup(c, locking_lang);
+
+ if (converted == GUND)
+ converted = unicode_single_shift_lookup(c, single_lang);
if (converted == GUND)
goto err_out;
@@ -332,7 +713,11 @@ unsigned char *convert_utf8_to_gsm(const char *text, long len,
gunichar c = g_utf8_get_char(in);
- converted = unicode_to_gsm(c);
+ converted = unicode_locking_shift_lookup(c, locking_lang);
+
+ if (converted == GUND)
+ converted = unicode_single_shift_lookup(c, single_lang);
+
if (converted & 0x1b00) {
*out = 0x1b;
++out;
@@ -357,6 +742,15 @@ err_out:
return res;
}
+unsigned char *convert_utf8_to_gsm(const char *text, long len,
+ long *items_read, long *items_written,
+ unsigned char terminator)
+{
+ return convert_utf8_to_gsm_with_lang(text, len, items_read,
+ items_written,
+ terminator, 0, 0);
+}
+
/*!
* Decodes the hex encoded data and converts to a byte array. If terminator
* is not 0, the terminator character is appended to the end of the result.
@@ -779,14 +1173,14 @@ char *sim_string_to_utf8(const unsigned char *buffer, int length)
if (i >= length)
return NULL;
- c = gsm_extension_table_lookup(buffer[i++]);
+ c = gsm_single_shift_lookup(buffer[i++], 0);
if (c == 0)
return NULL;
j += 2;
} else {
- c = gsm_table[buffer[i++]];
+ c = gsm_locking_shift_lookup(buffer[i++], 0);
j += 1;
}
@@ -816,9 +1210,9 @@ char *sim_string_to_utf8(const unsigned char *buffer, int length)
c = (buffer[i++] & 0x7f) + ucs2_offset;
else if (buffer[i] == 0x1b) {
++i;
- c = gsm_extension_table_lookup(buffer[i++]);
+ c = gsm_single_shift_lookup(buffer[i++], 0);
} else
- c = gsm_table[buffer[i++]];
+ c = gsm_locking_shift_lookup(buffer[i++], 0);
out += g_unichar_to_utf8(c, out);
}
diff --git a/src/util.h b/src/util.h
index 46bb3bab..47aa0662 100644
--- a/src/util.h
+++ b/src/util.h
@@ -21,9 +21,20 @@
char *convert_gsm_to_utf8(const unsigned char *text, long len, long *items_read,
long *items_written, unsigned char terminator);
+
+char *convert_gsm_to_utf8_with_lang(const unsigned char *text, long len, long *items_read,
+ long *items_written, unsigned char terminator,
+ unsigned char locking_shift_lang,
+ unsigned char single_shift_lang);
+
unsigned char *convert_utf8_to_gsm(const char *text, long len, long *items_read,
long *items_written, unsigned char terminator);
+unsigned char *convert_utf8_to_gsm_with_lang(const char *text, long len, long *items_read,
+ long *items_written, unsigned char terminator,
+ unsigned char locking_shift_lang,
+ unsigned char single_shifth_lang);
+
unsigned char *decode_hex_own_buf(const char *in, long len, long *items_written,
unsigned char terminator,
unsigned char *buf);