diff options
-rw-r--r-- | src/util.c | 608 | ||||
-rw-r--r-- | src/util.h | 11 |
2 files changed, 512 insertions, 107 deletions
@@ -26,6 +26,7 @@ #include <stdio.h> #include <string.h> #include <ctype.h> +#include <stdlib.h> #include <glib.h> @@ -58,27 +59,237 @@ attached. */ +#define GUND 0xFFFF +#define KNOWN_VARIANTS 4 + +#define UTF8_LENGTH(c) \ + ((c) < 0x80 ? 1 : \ + ((c) < 0x800 ? 2 : 3)) + +#define TABLE_SIZE(t) \ + (sizeof((t)) / sizeof(struct codepoint)) + +struct codepoint { + unsigned short from; + unsigned short to; +}; + +struct single_shift_table { + const struct codepoint *table; + unsigned int len; +}; + /* GSM to Unicode extension table, for GSM sequences starting with 0x1B */ -static unsigned short gsm_extension[] = -{ - 0x0A, 0x000C, /* See NOTE 3 in 23.038 */ - 0x14, 0x005E, - 0x1B, 0x0020, /* See NOTE 1 in 23.038 */ - 0x28, 0x007B, - 0x29, 0x007D, - 0x2F, 0x005C, - 0x3C, 0x005B, - 0x3D, 0x007E, - 0x3E, 0x005D, - 0x40, 0x007C, - 0x65, 0x20AC +static const struct codepoint default_ext_gsm[] = +{ + { 0x0A, 0x000C }, /* See NOTE 3 in 23.038 */ + { 0x14, 0x005E }, + { 0x1B, 0x0020 }, /* See NOTE 1 in 23.038 */ + { 0x28, 0x007B }, + { 0x29, 0x007D }, + { 0x2F, 0x005C }, + { 0x3C, 0x005B }, + { 0x3D, 0x007E }, + { 0x3E, 0x005D }, + { 0x40, 0x007C }, + { 0x65, 0x20AC } +}; + +static const struct codepoint default_ext_unicode[] = +{ + { 0x000C, 0x1B0A }, + { 0x005B, 0x1B3C }, + { 0x005C, 0x1B2F }, + { 0x005D, 0x1B3E }, + { 0x005E, 0x1B14 }, + { 0x007B, 0x1B28 }, + { 0x007C, 0x1B40 }, + { 0x007D, 0x1B29 }, + { 0x007E, 0x1B3D }, + { 0x20AC, 0x1B65 } +}; + +/* Appendix A.2.1. in 3GPP TS23.038, V.8.2.0 */ +static const struct codepoint turkish_ext_gsm[] = +{ + { 0x0A, 0x000C }, /* See NOTE 3 */ + { 0x14, 0x005E }, + { 0x1B, 0x0020 }, /* See NOTE 1 */ + { 0x28, 0x007B }, + { 0x29, 0x007D }, + { 0x2F, 0x005C }, + { 0x3C, 0x005B }, + { 0x3D, 0x007E }, + { 0x3E, 0x005D }, + { 0x40, 0x007C }, + { 0x47, 0x011E }, + { 0x49, 0x0130 }, + { 0x53, 0x015E }, + { 0x63, 0x00E7 }, + { 0x65, 0x20AC }, + { 0x67, 0x011F }, + { 0x69, 0x0131 }, + { 0x73, 0x015F } +}; + +static const struct codepoint turkish_ext_unicode[] = +{ + { 0x000C, 0x1B0A }, + { 0x005B, 0x1B3C }, + { 0x005C, 0x1B2F }, + { 0x005D, 0x1B3E }, + { 0x005E, 0x1B14 }, + { 0x007B, 0x1B28 }, + { 0x007C, 0x1B40 }, + { 0x007D, 0x1B29 }, + { 0x007E, 0x1B3D }, + { 0x00E7, 0x1B63 }, + { 0x011E, 0x1B47 }, + { 0x011F, 0x1B67 }, + { 0x0130, 0x1B49 }, + { 0x0131, 0x1B69 }, + { 0x015E, 0x1B53 }, + { 0x015F, 0x1B73 }, + { 0x20AC, 0x1B65 } +}; + +/* Appendix A.2.2. in 3GPP TS23.038 V.8.2.0*/ +static const struct codepoint spanish_ext_gsm[] = +{ + { 0x09, 0x00E7 }, + { 0x0A, 0x000C }, /* See NOTE 3 */ + { 0x14, 0x005E }, + { 0x1B, 0x0020 }, /* See NOTE 1 */ + { 0x28, 0x007B }, + { 0x29, 0x007D }, + { 0x2F, 0x005C }, + { 0x3C, 0x005B }, + { 0x3D, 0x007E }, + { 0x3E, 0x005D }, + { 0x40, 0x007C }, + { 0x41, 0x00C1 }, + { 0x49, 0x00CD }, + { 0x4F, 0x00D3 }, + { 0x55, 0x00DA }, + { 0x61, 0x00E1 }, + { 0x65, 0x20AC }, + { 0x69, 0x00ED }, + { 0x6F, 0x00F3 }, + { 0x75, 0x00FA } +}; + +static const struct codepoint spanish_ext_unicode[] = +{ + { 0x000C, 0x1B0A }, + { 0x005B, 0x1B3C }, + { 0x005C, 0x1B2F }, + { 0x005D, 0x1B3E }, + { 0x005E, 0x1B14 }, + { 0x007B, 0x1B28 }, + { 0x007C, 0x1B40 }, + { 0x007D, 0x1B29 }, + { 0x007E, 0x1B3D }, + { 0x00C1, 0x1B41 }, + { 0x00CD, 0x1B49 }, + { 0x00D3, 0x1B4F }, + { 0x00DA, 0x1B55 }, + { 0x00E1, 0x1B61 }, + { 0x00E7, 0x1B09 }, + { 0x00ED, 0x1B69 }, + { 0x00F3, 0x1B6F }, + { 0x00FA, 0x1B75 }, + { 0x20AC, 0x1B65 } +}; + +/* Appendix A.2.3. in 3GPP TS23.038 V.8.2.0 */ +static const struct codepoint portuguese_ext_gsm[] = +{ + { 0x05, 0x00EA }, + { 0x09, 0x00E7 }, + { 0x0A, 0x000C }, /* See NOTE 3 */ + { 0x0B, 0x00D4 }, + { 0x0C, 0x00F4 }, + { 0x0E, 0x00C1 }, + { 0x0F, 0x00E1 }, + { 0x12, 0x03A6 }, + { 0x13, 0x0393 }, + { 0x14, 0x005E }, + { 0x15, 0x03A9 }, + { 0x16, 0x03A0 }, + { 0x17, 0x03A8 }, + { 0x18, 0x03A3 }, + { 0x19, 0x0398 }, + { 0x1B, 0x0020 }, /* See NOTE 1 */ + { 0x1F, 0x00CA }, + { 0x28, 0x007B }, + { 0x29, 0x007D }, + { 0x2F, 0x005C }, + { 0x3C, 0x005B }, + { 0x3D, 0x007E }, + { 0x3E, 0x005D }, + { 0x40, 0x007C }, + { 0x41, 0x00C0 }, + { 0x49, 0x00CD }, + { 0x4F, 0x00D3 }, + { 0x55, 0x00DA }, + { 0x5B, 0x00C3 }, + { 0x5C, 0x00D5 }, + { 0x61, 0x00C2 }, + { 0x65, 0x20AC }, + { 0x69, 0x00ED }, + { 0x6F, 0x00F3 }, + { 0x75, 0x00FA }, + { 0x7B, 0x00E3 }, + { 0x7C, 0x00F5 }, + { 0x7F, 0x00E2 } +}; + +static const struct codepoint portuguese_ext_unicode[] = +{ + { 0x000C, 0x1B0A }, + { 0x005B, 0x1B3C }, + { 0x005C, 0x1B2F }, + { 0x005D, 0x1B3E }, + { 0x005E, 0x1B14 }, + { 0x007B, 0x1B28 }, + { 0x007C, 0x1B40 }, + { 0x007D, 0x1B29 }, + { 0x007E, 0x1B3D }, + { 0x00C0, 0x1B41 }, + { 0x00C1, 0x1B0E }, + { 0x00C2, 0x1B61 }, + { 0x00C3, 0x1B5B }, + { 0x00CA, 0x1B1F }, + { 0x00CD, 0x1B49 }, + { 0x00D3, 0x1B4F }, + { 0x00D4, 0x1B0B }, + { 0x00D5, 0x1B5C }, + { 0x00DA, 0x1B55 }, + { 0x00E1, 0x1B0F }, + { 0x00E2, 0x1B7F }, + { 0x00E3, 0x1B7B }, + { 0x00E7, 0x1B09 }, + { 0x00EA, 0x1B05 }, + { 0x00ED, 0x1B69 }, + { 0x00F3, 0x1B6F }, + { 0x00F4, 0x1B0C }, + { 0x00F5, 0x1B7C }, + { 0x00FA, 0x1B75 }, + { 0x0393, 0x1B13 }, + { 0x0398, 0x1B19 }, + { 0x03A0, 0x1B16 }, + { 0x03A3, 0x1B18 }, + { 0x03A6, 0x1B12 }, + { 0x03A8, 0x1B17 }, + { 0x03A9, 0x1B15 }, + { 0x20AC, 0x1B65 } }; /* Used for conversion of GSM to Unicode */ -static unsigned short gsm_table[] = +static const unsigned short default_gsm[] = { 0x0040, 0x00A3, 0x0024, 0x00A5, 0x00E8, 0x00E9, 0x00F9, 0x00EC, /* 0x07 */ - 0x00F2, 0x00E7, 0x000A, 0x00D8, 0x00F8, 0x000D, 0x00C5, 0x00E5, /* 0x0F */ + 0x00F2, 0x00C7, 0x000A, 0x00D8, 0x00F8, 0x000D, 0x00C5, 0x00E5, /* 0x0F */ 0x0394, 0x005F, 0x03A6, 0x0393, 0x039B, 0x03A9, 0x03A0, 0x03A8, /* 0x17 */ 0x03A3, 0x0398, 0x039E, 0x00A0, 0x00C6, 0x00E6, 0x00DF, 0x00C9, /* 0x1F */ 0x0020, 0x0021, 0x0022, 0x0023, 0x00A4, 0x0025, 0x0026, 0x0027, /* 0x27 */ @@ -95,80 +306,252 @@ static unsigned short gsm_table[] = 0x0078, 0x0079, 0x007A, 0x00E4, 0x00F6, 0x00F1, 0x00FC, 0x00E0 /* 0x7F */ }; -#define GUND 0xFFFF - -/* 3GPP 27.005 Annex A */ -static unsigned short unicode_256_table[] = -{ - GUND, GUND, GUND, GUND, GUND, GUND, GUND, GUND, /* 0x07 */ - GUND, GUND, 0x0A, GUND, 0x1B0A, 0x0D, GUND, GUND, /* 0x0F */ - GUND, GUND, GUND, GUND, GUND, GUND, GUND, GUND, /* 0x17 */ - GUND, GUND, GUND, GUND, GUND, GUND, GUND, GUND, /* 0x1F */ - 0x20, 0x21, 0x22, 0x23, 0x02, 0x25, 0x26, 0x27, /* 0x27 */ - 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, /* 0x2F */ - 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x37 */ - 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, /* 0x3F */ - 0x00, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x47 */ - 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, /* 0x4F */ - 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x57 */ - 0x58, 0x59, 0x5A, 0x1B3C, 0x1B2F, 0x1B3E, 0x1B14, 0x11, /* 0x5F */ - GUND, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x67 */ - 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, /* 0x6F */ - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x77 */ - 0x78, 0x79, 0x7A, 0x1B28, 0x1B40, 0x1B29, 0x1B3D, GUND, /* 0x7F */ - GUND, GUND, GUND, GUND, GUND, GUND, GUND, GUND, /* 0x87 */ - GUND, GUND, GUND, GUND, GUND, GUND, GUND, GUND, /* 0x8F */ - GUND, GUND, GUND, GUND, GUND, GUND, GUND, GUND, /* 0x97 */ - GUND, GUND, GUND, GUND, GUND, GUND, GUND, GUND, /* 0x9F */ - GUND, 0x40, GUND, 0x01, 0x24, 0x03, GUND, 0x5f, /* 0xA7 */ - GUND, GUND, GUND, GUND, GUND, GUND, GUND, GUND, /* 0xAF */ - GUND, GUND, GUND, GUND, GUND, GUND, GUND, GUND, /* 0xB7 */ - GUND, GUND, GUND, GUND, GUND, GUND, GUND, 0x60, /* 0xBF */ - 0x41, 0x41, 0x41, 0x41, 0x5B, 0x0E, 0x1C, 0x09, /* 0xC7 */ - 0x45, 0x1F, 0x45, 0x45, 0x49, 0x49, 0x49, 0x49, /* 0xCF */ - GUND, 0x5D, 0x4F, 0x4F, 0x4F, 0x4F, 0x5C, GUND, /* 0xD7 */ - 0x0B, 0x55, 0x55, 0x55, 0x5E, 0x59, GUND, 0x1E, /* 0xDF */ - 0x7F, 0x61, 0x61, 0x61, 0x7B, 0x0F, 0x1D, 0x09, /* 0xE7 */ - 0x04, 0x05, 0x65, 0x65, 0x07, 0x69, 0x69, 0x69, /* 0xEF */ - GUND, 0x7D, 0x08, 0x6F, 0x6F, 0x6F, 0x7C, GUND, /* 0xF7 */ - 0x0C, 0x06, 0x75, 0x75, 0x7E, 0x79, GUND, 0x79 /* 0xFF */ +static const struct codepoint default_unicode[] = +{ + { 0x000A, 0x0A }, { 0x000D, 0x0D }, { 0x0020, 0x20 }, { 0x0021, 0x21 }, + { 0x0022, 0x22 }, { 0x0023, 0x23 }, { 0x0024, 0x02 }, { 0x0025, 0x25 }, + { 0x0026, 0x26 }, { 0x0027, 0x27 }, { 0x0028, 0x28 }, { 0x0029, 0x29 }, + { 0x002A, 0x2A }, { 0x002B, 0x2B }, { 0x002C, 0x2C }, { 0x002D, 0x2D }, + { 0x002E, 0x2E }, { 0x002F, 0x2F }, { 0x0030, 0x30 }, { 0x0031, 0x31 }, + { 0x0032, 0x32 }, { 0x0033, 0x33 }, { 0x0034, 0x34 }, { 0x0035, 0x35 }, + { 0x0036, 0x36 }, { 0x0037, 0x37 }, { 0x0038, 0x38 }, { 0x0039, 0x39 }, + { 0x003A, 0x3A }, { 0x003B, 0x3B }, { 0x003C, 0x3C }, { 0x003D, 0x3D }, + { 0x003E, 0x3E }, { 0x003F, 0x3F }, { 0x0040, 0x00 }, { 0x0041, 0x41 }, + { 0x0042, 0x42 }, { 0x0043, 0x43 }, { 0x0044, 0x44 }, { 0x0045, 0x45 }, + { 0x0046, 0x46 }, { 0x0047, 0x47 }, { 0x0048, 0x48 }, { 0x0049, 0x49 }, + { 0x004A, 0x4A }, { 0x004B, 0x4B }, { 0x004C, 0x4C }, { 0x004D, 0x4D }, + { 0x004E, 0x4E }, { 0x004F, 0x4F }, { 0x0050, 0x50 }, { 0x0051, 0x51 }, + { 0x0052, 0x52 }, { 0x0053, 0x53 }, { 0x0054, 0x54 }, { 0x0055, 0x55 }, + { 0x0056, 0x56 }, { 0x0057, 0x57 }, { 0x0058, 0x58 }, { 0x0059, 0x59 }, + { 0x005A, 0x5A }, { 0x005F, 0x11 }, { 0x0061, 0x61 }, { 0x0062, 0x62 }, + { 0x0063, 0x63 }, { 0x0064, 0x64 }, { 0x0065, 0x65 }, { 0x0066, 0x66 }, + { 0x0067, 0x67 }, { 0x0068, 0x68 }, { 0x0069, 0x69 }, { 0x006A, 0x6A }, + { 0x006B, 0x6B }, { 0x006C, 0x6C }, { 0x006D, 0x6D }, { 0x006E, 0x6E }, + { 0x006F, 0x6F }, { 0x0070, 0x70 }, { 0x0071, 0x71 }, { 0x0072, 0x72 }, + { 0x0073, 0x73 }, { 0x0074, 0x74 }, { 0x0075, 0x75 }, { 0x0076, 0x76 }, + { 0x0077, 0x77 }, { 0x0078, 0x78 }, { 0x0079, 0x79 }, { 0x007A, 0x7A }, + { 0x00A0, 0x20 }, { 0x00A1, 0x40 }, { 0x00A3, 0x01 }, { 0x00A4, 0x24 }, + { 0x00A5, 0x03 }, { 0x00A7, 0x5F }, { 0x00BF, 0x60 }, { 0x00C4, 0x5B }, + { 0x00C5, 0x0E }, { 0x00C6, 0x1C }, { 0x00C7, 0x09 }, { 0x00C9, 0x1F }, + { 0x00D1, 0x5D }, { 0x00D6, 0x5C }, { 0x00D8, 0x0B }, { 0x00DC, 0x5E }, + { 0x00DF, 0x1E }, { 0x00E0, 0x7F }, { 0x00E4, 0x7B }, { 0x00E5, 0x0F }, + { 0x00E6, 0x1D }, { 0x00E8, 0x04 }, { 0x00E9, 0x05 }, { 0x00EC, 0x07 }, + { 0x00F1, 0x7D }, { 0x00F2, 0x08 }, { 0x00F6, 0x7C }, { 0x00F8, 0x0C }, + { 0x00F9, 0x06 }, { 0x00FC, 0x7E }, { 0x0393, 0x13 }, { 0x0394, 0x10 }, + { 0x0398, 0x19 }, { 0x039B, 0x14 }, { 0x039E, 0x1A }, { 0x03A0, 0x16 }, + { 0x03A3, 0x18 }, { 0x03A6, 0x12 }, { 0x03A8, 0x17 }, { 0x03A9, 0x15 } }; -/* Starts at 0x0390 */ -static unsigned short greek_unicode_offset = 0x0390; +/* Appendix A.3.1 in 3GPP TS23.038 */ +static const unsigned short turkish_gsm[] = +{ + 0x0040, 0x00A3, 0x0024, 0x00A5, 0x20AC, 0x00E9, 0x00F9, 0x0131, /* 0x07 */ + 0x00F2, 0x00C7, 0x000A, 0x011E, 0x011F, 0x000D, 0x00C5, 0x00E5, /* 0x0F */ + 0x0394, 0x005F, 0x03A6, 0x0393, 0x039B, 0x03A9, 0x03A0, 0x03A8, /* 0x17 */ + 0x03A3, 0x0398, 0x039E, 0x00A0, 0x015E, 0x015F, 0x00DF, 0x00C9, /* 0x1F */ + 0x0020, 0x0021, 0x0022, 0x0023, 0x00A4, 0x0025, 0x0026, 0x0027, /* 0x27 */ + 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F, /* 0x2F */ + 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, /* 0x37 */ + 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, /* 0x3F */ + 0x0130, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, /* 0x47 */ + 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, /* 0x4F */ + 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, /* 0x57 */ + 0x0058, 0x0059, 0x005A, 0x00C4, 0x00D6, 0x00D1, 0x00DC, 0x00A7, /* 0x5F */ + 0x00E7, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, /* 0x67 */ + 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F, /* 0x6F */ + 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, /* 0x77 */ + 0x0078, 0x0079, 0x007A, 0x00E4, 0x00F6, 0x00F1, 0x00FC, 0x00E0 /* 0x7F */ +}; -static unsigned short greek_unicode_table[] = +static const struct codepoint turkish_unicode[] = { - GUND, GUND, GUND, 0x13, 0x10, GUND, GUND, GUND, /* 0x07 */ - 0x19, GUND, GUND, 0x14, GUND, GUND, 0x1A, GUND, /* 0x0F */ - 0x16, GUND, GUND, 0x18, GUND, GUND, 0x12, GUND, /* 0x17 */ - 0x17, 0x15, GUND, GUND, GUND, GUND, GUND, GUND, /* 0x1F */ + { 0x000A, 0x0A }, { 0x000D, 0x0D }, { 0x0020, 0x20 }, { 0x0021, 0x21 }, + { 0x0022, 0x22 }, { 0x0023, 0x23 }, { 0x0024, 0x02 }, { 0x0025, 0x25 }, + { 0x0026, 0x26 }, { 0x0027, 0x27 }, { 0x0028, 0x28 }, { 0x0029, 0x29 }, + { 0x002A, 0x2A }, { 0x002B, 0x2B }, { 0x002C, 0x2C }, { 0x002D, 0x2D }, + { 0x002E, 0x2E }, { 0x002F, 0x2F }, { 0x0030, 0x30 }, { 0x0031, 0x31 }, + { 0x0032, 0x32 }, { 0x0033, 0x33 }, { 0x0034, 0x34 }, { 0x0035, 0x35 }, + { 0x0036, 0x36 }, { 0x0037, 0x37 }, { 0x0038, 0x38 }, { 0x0039, 0x39 }, + { 0x003A, 0x3A }, { 0x003B, 0x3B }, { 0x003C, 0x3C }, { 0x003D, 0x3D }, + { 0x003E, 0x3E }, { 0x003F, 0x3F }, { 0x0040, 0x00 }, { 0x0041, 0x41 }, + { 0x0042, 0x42 }, { 0x0043, 0x43 }, { 0x0044, 0x44 }, { 0x0045, 0x45 }, + { 0x0046, 0x46 }, { 0x0047, 0x47 }, { 0x0048, 0x48 }, { 0x0049, 0x49 }, + { 0x004A, 0x4A }, { 0x004B, 0x4B }, { 0x004C, 0x4C }, { 0x004D, 0x4D }, + { 0x004E, 0x4E }, { 0x004F, 0x4F }, { 0x0050, 0x50 }, { 0x0051, 0x51 }, + { 0x0052, 0x52 }, { 0x0053, 0x53 }, { 0x0054, 0x54 }, { 0x0055, 0x55 }, + { 0x0056, 0x56 }, { 0x0057, 0x57 }, { 0x0058, 0x58 }, { 0x0059, 0x59 }, + { 0x005A, 0x5A }, { 0x005F, 0x11 }, { 0x0061, 0x61 }, { 0x0062, 0x62 }, + { 0x0063, 0x63 }, { 0x0064, 0x64 }, { 0x0065, 0x65 }, { 0x0066, 0x66 }, + { 0x0067, 0x67 }, { 0x0068, 0x68 }, { 0x0069, 0x69 }, { 0x006A, 0x6A }, + { 0x006B, 0x6B }, { 0x006C, 0x6C }, { 0x006D, 0x6D }, { 0x006E, 0x6E }, + { 0x006F, 0x6F }, { 0x0070, 0x70 }, { 0x0071, 0x71 }, { 0x0072, 0x72 }, + { 0x0073, 0x73 }, { 0x0074, 0x74 }, { 0x0075, 0x75 }, { 0x0076, 0x76 }, + { 0x0077, 0x77 }, { 0x0078, 0x78 }, { 0x0079, 0x79 }, { 0x007A, 0x7A }, + { 0x00A0, 0x20 }, { 0x00A3, 0x01 }, { 0x00A4, 0x24 }, { 0x00A5, 0x03 }, + { 0x00A7, 0x5F }, { 0x00C4, 0x5B }, { 0x00C5, 0x0E }, { 0x00C7, 0x09 }, + { 0x00C9, 0x1F }, { 0x00D1, 0x5D }, { 0x00D6, 0x5C }, { 0x00DC, 0x5E }, + { 0x00DF, 0x1E }, { 0x00E0, 0x7F }, { 0x00E4, 0x7B }, { 0x00E5, 0x0F }, + { 0x00E7, 0x60 }, { 0x00E9, 0x05 }, { 0x00F1, 0x7D }, { 0x00F2, 0x08 }, + { 0x00F6, 0x7C }, { 0x00F9, 0x06 }, { 0x00FC, 0x7E }, { 0x011E, 0x0B }, + { 0x011F, 0x0C }, { 0x0130, 0x40 }, { 0x0131, 0x07 }, { 0x015E, 0x1C }, + { 0x015F, 0x1D }, { 0x0393, 0x13 }, { 0x0394, 0x10 }, { 0x0398, 0x19 }, + { 0x039B, 0x14 }, { 0x039E, 0x1A }, { 0x03A0, 0x16 }, { 0x03A3, 0x18 }, + { 0x03A6, 0x12 }, { 0x03A8, 0x17 }, { 0x03A9, 0x15 }, { 0x20AC, 0x04 } }; -#define UTF8_LENGTH(c) \ - ((c) < 0x80 ? 1 : \ - ((c) < 0x800 ? 2 : 3)) +/* Appendix A.3.2 in 3GPP TS23.038 */ +static const unsigned short portuguese_gsm[] = +{ + 0x0040, 0x00A3, 0x0024, 0x00A5, 0x00EA, 0x00E9, 0x00FA, 0x00ED, /* 0x07 */ + 0x00F3, 0x00E7, 0x000A, 0x00D4, 0x00F4, 0x000D, 0x00C1, 0x00E1, /* 0x0F */ + 0x0394, 0x005F, 0x00AA, 0x00C7, 0x00C0, 0x221E, 0x005E, 0x005C, /* 0x17 */ + 0x20ac, 0x00D3, 0x007C, 0x00A0, 0x00C2, 0x00E2, 0x00CA, 0x00C9, /* 0x1F */ + 0x0020, 0x0021, 0x0022, 0x0023, 0x00BA, 0x0025, 0x0026, 0x0027, /* 0x27 */ + 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F, /* 0x2F */ + 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, /* 0x37 */ + 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, /* 0x3F */ + 0x00A1, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, /* 0x47 */ + 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, /* 0x4F */ + 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, /* 0x57 */ + 0x0058, 0x0059, 0x005A, 0x00C3, 0x00D5, 0x00DA, 0x00DC, 0x00A7, /* 0x5F */ + 0x007E, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, /* 0x67 */ + 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F, /* 0x6F */ + 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, /* 0x77 */ + 0x0078, 0x0079, 0x007A, 0x00E3, 0x00F5, 0x0060, 0x00FC, 0x00E0 /* 0x7F */ +}; -static unsigned short gsm_extension_table_lookup(unsigned char k) +static const struct codepoint portuguese_unicode[] = { - static unsigned int ext_table_len = - (sizeof(gsm_extension) / sizeof(unsigned short)) >> 1; - unsigned int i; - unsigned short *t; + { 0x000A, 0x0A }, { 0x000D, 0x0D }, { 0x0020, 0x20 }, { 0x0021, 0x21 }, + { 0x0022, 0x22 }, { 0x0023, 0x23 }, { 0x0024, 0x02 }, { 0x0025, 0x25 }, + { 0x0026, 0x26 }, { 0x0027, 0x27 }, { 0x0028, 0x28 }, { 0x0029, 0x29 }, + { 0x002A, 0x2A }, { 0x002B, 0x2B }, { 0x002C, 0x2C }, { 0x002D, 0x2D }, + { 0x002E, 0x2E }, { 0x002F, 0x2F }, { 0x0030, 0x30 }, { 0x0031, 0x31 }, + { 0x0032, 0x32 }, { 0x0033, 0x33 }, { 0x0034, 0x34 }, { 0x0035, 0x35 }, + { 0x0036, 0x36 }, { 0x0037, 0x37 }, { 0x0038, 0x38 }, { 0x0039, 0x39 }, + { 0x003A, 0x3A }, { 0x003B, 0x3B }, { 0x003C, 0x3C }, { 0x003D, 0x3D }, + { 0x003E, 0x3E }, { 0x003F, 0x3F }, { 0x0040, 0x00 }, { 0x0041, 0x41 }, + { 0x0042, 0x42 }, { 0x0043, 0x43 }, { 0x0044, 0x44 }, { 0x0045, 0x45 }, + { 0x0046, 0x46 }, { 0x0047, 0x47 }, { 0x0048, 0x48 }, { 0x0049, 0x49 }, + { 0x004A, 0x4A }, { 0x004B, 0x4B }, { 0x004C, 0x4C }, { 0x004D, 0x4D }, + { 0x004E, 0x4E }, { 0x004F, 0x4F }, { 0x0050, 0x50 }, { 0x0051, 0x51 }, + { 0x0052, 0x52 }, { 0x0053, 0x53 }, { 0x0054, 0x54 }, { 0x0055, 0x55 }, + { 0x0056, 0x56 }, { 0x0057, 0x57 }, { 0x0058, 0x58 }, { 0x0059, 0x59 }, + { 0x005A, 0x5A }, { 0x005C, 0x17 }, { 0x005E, 0x16 }, { 0x005F, 0x11 }, + { 0x0060, 0x7D }, { 0x0061, 0x61 }, { 0x0062, 0x62 }, { 0x0063, 0x63 }, + { 0x0064, 0x64 }, { 0x0065, 0x65 }, { 0x0066, 0x66 }, { 0x0067, 0x67 }, + { 0x0068, 0x68 }, { 0x0069, 0x69 }, { 0x006A, 0x6A }, { 0x006B, 0x6B }, + { 0x006C, 0x6C }, { 0x006D, 0x6D }, { 0x006E, 0x6E }, { 0x006F, 0x6F }, + { 0x0070, 0x70 }, { 0x0071, 0x71 }, { 0x0072, 0x72 }, { 0x0073, 0x73 }, + { 0x0074, 0x74 }, { 0x0075, 0x75 }, { 0x0076, 0x76 }, { 0x0077, 0x77 }, + { 0x0078, 0x78 }, { 0x0079, 0x79 }, { 0x007A, 0x7A }, { 0x007C, 0x1A }, + { 0x007E, 0x60 }, { 0x00A0, 0x20 }, { 0x00A3, 0x01 }, { 0x00A5, 0x03 }, + { 0x00A7, 0x5F }, { 0x00AA, 0x12 }, { 0x00BA, 0x24 }, { 0x00C0, 0x14 }, + { 0x00C1, 0x0E }, { 0x00C2, 0x1C }, { 0x00C3, 0x5B }, { 0x00C7, 0x13 }, + { 0x00C9, 0x1F }, { 0x00CA, 0x1E }, { 0x00CD, 0x40 }, { 0x00D3, 0x19 }, + { 0x00D4, 0x0B }, { 0x00D5, 0x5C }, { 0x00DA, 0x5D }, { 0x00DC, 0x5E }, + { 0x00E0, 0x7F }, { 0x00E1, 0x0F }, { 0x00E2, 0x1D }, { 0x00E3, 0x7B }, + { 0x00E7, 0x09 }, { 0x00E9, 0x05 }, { 0x00EA, 0x04 }, { 0x00ED, 0x07 }, + { 0x00F3, 0x08 }, { 0x00F4, 0x0C }, { 0x00F5, 0x7C }, { 0x00FA, 0x06 }, + { 0x00FC, 0x7E }, { 0x0394, 0x10 }, { 0x20AC, 0x18 }, { 0x221E, 0x15 } +}; - for (i = 0, t = gsm_extension; i < ext_table_len; i++) { - if (t[0] == k) - return t[1]; - t += 2; - } +static const struct single_shift_table gsm_single_shift[] = +{ + { default_ext_gsm, TABLE_SIZE(default_ext_gsm) }, + { turkish_ext_gsm, TABLE_SIZE(turkish_ext_gsm) }, + { spanish_ext_gsm, TABLE_SIZE(spanish_ext_gsm) }, + { portuguese_ext_gsm, TABLE_SIZE(portuguese_ext_gsm) } +}; + +static const struct single_shift_table unicode_single_shift[] = +{ + { default_ext_unicode, TABLE_SIZE(default_ext_unicode) }, + { turkish_ext_unicode, TABLE_SIZE(turkish_ext_unicode) }, + { spanish_ext_unicode, TABLE_SIZE(spanish_ext_unicode) }, + { portuguese_ext_unicode, TABLE_SIZE(portuguese_ext_unicode) } +}; + +static const unsigned short *gsm_locking_shift[] = +{ + default_gsm, + turkish_gsm, + default_gsm, + portuguese_gsm +}; + +static const struct codepoint *unicode_locking_shift[] = +{ + default_unicode, + turkish_unicode, + default_unicode, + portuguese_unicode +}; + +static int compare_codepoints(const void *a, const void *b) +{ + const struct codepoint *ca = (const struct codepoint *)a; + const struct codepoint *cb = (const struct codepoint *)b; + + return (ca->from > cb->from) - (ca->from < cb->from); +} + +static unsigned short codepoint_lookup(struct codepoint *key, + const struct codepoint *table, + unsigned int len) +{ + struct codepoint *result = NULL; + + result = bsearch(key, table, len, sizeof(struct codepoint), + compare_codepoints); + + return result ? result->to : GUND; +} + +static unsigned short gsm_locking_shift_lookup(unsigned char k, + unsigned char lang) +{ + /* If language is not defined in 3GPP TS 23.038, + * implementations are instructed to ignore it' */ + unsigned char variant = lang < KNOWN_VARIANTS ? lang : 0; + + return gsm_locking_shift[variant][k]; +} + +static unsigned short gsm_single_shift_lookup(unsigned char k, + unsigned char lang) +{ + struct codepoint key = { k, 0 }; + unsigned char variant = lang < KNOWN_VARIANTS ? lang : 0; + + return codepoint_lookup(&key, gsm_single_shift[variant].table, + gsm_single_shift[variant].len); +} + +static unsigned short unicode_locking_shift_lookup(unsigned short k, + unsigned char lang) +{ + struct codepoint key = { k, 0 }; + unsigned char variant = lang < KNOWN_VARIANTS ? lang : 0; - return 0; + return codepoint_lookup(&key, unicode_locking_shift[variant], 128); +} + +static unsigned short unicode_single_shift_lookup(unsigned short k, + unsigned char lang) +{ + struct codepoint key = { k, 0 }; + unsigned char variant = lang < KNOWN_VARIANTS ? lang : 0; + + return codepoint_lookup(&key, unicode_single_shift[variant].table, + unicode_single_shift[variant].len); } /*! - * Converts text coded using GSM codec into UTF8 encoded text. If len - * is less than 0, and terminator character is given, the length is - * computed automatically. + * Converts text coded using GSM codec into UTF8 encoded text, using + * the given language identifiers for single shift and locking shift + * tables. If len is less than 0, and terminator character is given, + * the length is computed automatically. * * Returns newly-allocated UTF8 encoded string or NULL if the conversion * could not be performed. Returns the number of bytes read from the @@ -177,9 +560,11 @@ static unsigned short gsm_extension_table_lookup(unsigned char k) * encoded string in items_written (if not NULL) not including the terminal * '\0' character. The caller is reponsible for freeing the returned value. */ -char *convert_gsm_to_utf8(const unsigned char *text, long len, - long *items_read, long *items_written, - unsigned char terminator) +char *convert_gsm_to_utf8_with_lang(const unsigned char *text, long len, + long *items_read, long *items_written, + unsigned char terminator, + unsigned char locking_lang, + unsigned char single_lang) { char *res = NULL; char *out; @@ -209,12 +594,12 @@ char *convert_gsm_to_utf8(const unsigned char *text, long len, if (i >= len) goto err_out; - c = gsm_extension_table_lookup(text[i]); + c = gsm_single_shift_lookup(text[i], single_lang); - if (c == 0) + if (c == GUND) goto err_out; } else { - c = gsm_table[text[i]]; + c = gsm_locking_shift_lookup(text[i], locking_lang); } res_length += UTF8_LENGTH(c); @@ -232,9 +617,9 @@ char *convert_gsm_to_utf8(const unsigned char *text, long len, unsigned short c; if (text[i] == 0x1b) - c = gsm_extension_table_lookup(text[++i]); + c = gsm_single_shift_lookup(text[++i], single_lang); else - c = gsm_table[text[i]]; + c = gsm_locking_shift_lookup(text[i], locking_lang); out += g_unichar_to_utf8(c, out); @@ -253,22 +638,13 @@ err_out: return res; } -static unsigned short unicode_to_gsm(unsigned short c) +char *convert_gsm_to_utf8(const unsigned char *text, long len, + long *items_read, long *items_written, + unsigned char terminator) { - static int greek_unicode_size = sizeof(greek_unicode_table) / - sizeof(unsigned short); - unsigned short converted = GUND; - - if (c == 0x20AC) - converted = 0x1B65; - else if (c < 256) - converted = unicode_256_table[c]; - else if ((c >= greek_unicode_offset) && - (c < (greek_unicode_offset + greek_unicode_size))) { - converted = greek_unicode_table[c-greek_unicode_offset]; - } - - return converted; + return convert_gsm_to_utf8_with_lang(text, len, items_read, + items_written, + terminator, 0, 0); } /*! @@ -281,9 +657,11 @@ static unsigned short unicode_to_gsm(unsigned short c) * the actual number of bytes read. If items_written is not NULL, contains * the number of bytes written. */ -unsigned char *convert_utf8_to_gsm(const char *text, long len, +unsigned char *convert_utf8_to_gsm_with_lang(const char *text, long len, long *items_read, long *items_written, - unsigned char terminator) + unsigned char terminator, + unsigned char locking_lang, + unsigned char single_lang) { long nchars = 0; const char *in; @@ -306,7 +684,10 @@ unsigned char *convert_utf8_to_gsm(const char *text, long len, if (c > 0xffff) goto err_out; - converted = unicode_to_gsm(c); + converted = unicode_locking_shift_lookup(c, locking_lang); + + if (converted == GUND) + converted = unicode_single_shift_lookup(c, single_lang); if (converted == GUND) goto err_out; @@ -332,7 +713,11 @@ unsigned char *convert_utf8_to_gsm(const char *text, long len, gunichar c = g_utf8_get_char(in); - converted = unicode_to_gsm(c); + converted = unicode_locking_shift_lookup(c, locking_lang); + + if (converted == GUND) + converted = unicode_single_shift_lookup(c, single_lang); + if (converted & 0x1b00) { *out = 0x1b; ++out; @@ -357,6 +742,15 @@ err_out: return res; } +unsigned char *convert_utf8_to_gsm(const char *text, long len, + long *items_read, long *items_written, + unsigned char terminator) +{ + return convert_utf8_to_gsm_with_lang(text, len, items_read, + items_written, + terminator, 0, 0); +} + /*! * Decodes the hex encoded data and converts to a byte array. If terminator * is not 0, the terminator character is appended to the end of the result. @@ -779,14 +1173,14 @@ char *sim_string_to_utf8(const unsigned char *buffer, int length) if (i >= length) return NULL; - c = gsm_extension_table_lookup(buffer[i++]); + c = gsm_single_shift_lookup(buffer[i++], 0); if (c == 0) return NULL; j += 2; } else { - c = gsm_table[buffer[i++]]; + c = gsm_locking_shift_lookup(buffer[i++], 0); j += 1; } @@ -816,9 +1210,9 @@ char *sim_string_to_utf8(const unsigned char *buffer, int length) c = (buffer[i++] & 0x7f) + ucs2_offset; else if (buffer[i] == 0x1b) { ++i; - c = gsm_extension_table_lookup(buffer[i++]); + c = gsm_single_shift_lookup(buffer[i++], 0); } else - c = gsm_table[buffer[i++]]; + c = gsm_locking_shift_lookup(buffer[i++], 0); out += g_unichar_to_utf8(c, out); } @@ -21,9 +21,20 @@ char *convert_gsm_to_utf8(const unsigned char *text, long len, long *items_read, long *items_written, unsigned char terminator); + +char *convert_gsm_to_utf8_with_lang(const unsigned char *text, long len, long *items_read, + long *items_written, unsigned char terminator, + unsigned char locking_shift_lang, + unsigned char single_shift_lang); + unsigned char *convert_utf8_to_gsm(const char *text, long len, long *items_read, long *items_written, unsigned char terminator); +unsigned char *convert_utf8_to_gsm_with_lang(const char *text, long len, long *items_read, + long *items_written, unsigned char terminator, + unsigned char locking_shift_lang, + unsigned char single_shifth_lang); + unsigned char *decode_hex_own_buf(const char *in, long len, long *items_written, unsigned char terminator, unsigned char *buf); |