diff options
Diffstat (limited to 'src/bplist.c')
| -rw-r--r-- | src/bplist.c | 123 |
1 files changed, 69 insertions, 54 deletions
diff --git a/src/bplist.c b/src/bplist.c index 8d50f2e..559830f 100644 --- a/src/bplist.c +++ b/src/bplist.c | |||
| @@ -1142,63 +1142,78 @@ static void write_string(bytearray_t * bplist, char *val, uint64_t size) | |||
| 1142 | write_raw_data(bplist, BPLIST_STRING, (uint8_t *) val, size); | 1142 | write_raw_data(bplist, BPLIST_STRING, (uint8_t *) val, size); |
| 1143 | } | 1143 | } |
| 1144 | 1144 | ||
| 1145 | static uint16_t *plist_utf8_to_utf16be(char *unistr, size_t size, size_t *items_read, size_t *items_written) | 1145 | static uint16_t *plist_utf8_to_utf16be(const unsigned char *unistr, size_t size, size_t *items_read, size_t *items_written) |
| 1146 | { | 1146 | { |
| 1147 | uint16_t *outbuf; | 1147 | uint16_t *outbuf; |
| 1148 | size_t p = 0; | 1148 | size_t p = 0; |
| 1149 | size_t i = 0; | 1149 | size_t i = 0; |
| 1150 | 1150 | ||
| 1151 | unsigned char c0; | 1151 | unsigned char c0; |
| 1152 | unsigned char c1; | 1152 | unsigned char c1; |
| 1153 | unsigned char c2; | 1153 | unsigned char c2; |
| 1154 | unsigned char c3; | 1154 | unsigned char c3; |
| 1155 | 1155 | ||
| 1156 | uint32_t w; | 1156 | outbuf = (uint16_t*)malloc(((size*2)+1)*sizeof(uint16_t)); |
| 1157 | 1157 | if (!outbuf) { | |
| 1158 | outbuf = (uint16_t*)malloc(((size*2)+1)*sizeof(uint16_t)); | 1158 | PLIST_BIN_ERR("%s: Could not allocate %" PRIu64 " bytes\n", __func__, (uint64_t)((size*2)+1)*sizeof(uint16_t)); |
| 1159 | if (!outbuf) { | 1159 | return NULL; |
| 1160 | PLIST_BIN_ERR("%s: Could not allocate %" PRIu64 " bytes\n", __func__, (uint64_t)((size*2)+1)*sizeof(uint16_t)); | 1160 | } |
| 1161 | return NULL; | ||
| 1162 | } | ||
| 1163 | 1161 | ||
| 1164 | while (i < size) { | 1162 | while (i < size) { |
| 1165 | c0 = unistr[i]; | 1163 | c0 = unistr[i]; |
| 1166 | c1 = (i < size-1) ? unistr[i+1] : 0; | 1164 | c1 = (i+1 < size) ? unistr[i+1] : 0; |
| 1167 | c2 = (i < size-2) ? unistr[i+2] : 0; | 1165 | c2 = (i+2 < size) ? unistr[i+2] : 0; |
| 1168 | c3 = (i < size-3) ? unistr[i+3] : 0; | 1166 | c3 = (i+3 < size) ? unistr[i+3] : 0; |
| 1169 | if ((c0 >= 0xF0) && (i < size-3) && (c1 >= 0x80) && (c2 >= 0x80) && (c3 >= 0x80)) { | 1167 | if ((c0 >= 0xF0 && c0 <= 0xF4) && (i+3 < size) && ((c1 & 0xC0) == 0x80) && ((c2 & 0xC0) == 0x80) && ((c3 & 0xC0) == 0x80)) { |
| 1170 | // 4 byte sequence. Need to generate UTF-16 surrogate pair | 1168 | // 4 byte sequence. Need to generate UTF-16 surrogate pair |
| 1171 | w = ((((c0 & 7) << 18) + ((c1 & 0x3F) << 12) + ((c2 & 0x3F) << 6) + (c3 & 0x3F)) & 0x1FFFFF) - 0x010000; | 1169 | /* lead-specific second-byte constraints */ |
| 1172 | outbuf[p++] = be16toh(0xD800 + (w >> 10)); | 1170 | if ((c0 == 0xF0 && c1 < 0x90) || /* overlong (< U+10000) */ |
| 1173 | outbuf[p++] = be16toh(0xDC00 + (w & 0x3FF)); | 1171 | (c0 == 0xF4 && c1 > 0x8F)) /* > U+10FFFF */ |
| 1174 | i+=4; | 1172 | { |
| 1175 | } else if ((c0 >= 0xE0) && (i < size-2) && (c1 >= 0x80) && (c2 >= 0x80)) { | 1173 | break; |
| 1176 | // 3 byte sequence | 1174 | } |
| 1177 | outbuf[p++] = be16toh(((c2 & 0x3F) + ((c1 & 3) << 6)) + (((c1 >> 2) & 15) << 8) + ((c0 & 15) << 12)); | 1175 | uint32_t w = ((uint32_t)(c3 & 0x3F)) | ((uint32_t)(c2 & 0x3F) << 6) | ((uint32_t)(c1 & 0x3F) << 12) | ((uint32_t)(c0 & 0x07) << 18); |
| 1178 | i+=3; | 1176 | if (w < 0x10000 || w > 0x10FFFF) break; |
| 1179 | } else if ((c0 >= 0xC0) && (i < size-1) && (c1 >= 0x80)) { | 1177 | w -= 0x10000; |
| 1180 | // 2 byte sequence | 1178 | outbuf[p++] = be16toh((uint16_t)(0xD800 + (w >> 10))); |
| 1181 | outbuf[p++] = be16toh(((c1 & 0x3F) + ((c0 & 3) << 6)) + (((c0 >> 2) & 7) << 8)); | 1179 | outbuf[p++] = be16toh((uint16_t)(0xDC00 + (w & 0x3FF))); |
| 1182 | i+=2; | 1180 | i+=4; |
| 1183 | } else if (c0 < 0x80) { | 1181 | } else if (((c0 & 0xF0) == 0xE0) && (i+2 < size) && ((c1 & 0xC0) == 0x80) && ((c2 & 0xC0) == 0x80)) { |
| 1184 | // 1 byte sequence | 1182 | // 3 byte sequence |
| 1185 | outbuf[p++] = be16toh(c0); | 1183 | if ((c0 == 0xE0 && c1 < 0xA0) || /* overlong (< U+0800) */ |
| 1186 | i+=1; | 1184 | (c0 == 0xED && c1 > 0x9F)) /* UTF-16 surrogate range */ |
| 1187 | } else { | 1185 | { |
| 1188 | // invalid character | 1186 | break; |
| 1189 | PLIST_BIN_ERR("%s: invalid utf8 sequence in string at index %zu\n", __func__, i); | 1187 | } |
| 1190 | break; | 1188 | uint32_t w = ((uint32_t)(c2 & 0x3F)) | ((uint32_t)(c1 & 0x3F) << 6) | ((uint32_t)(c0 & 0x0F) << 12); |
| 1191 | } | 1189 | if (w < 0x800) break; |
| 1192 | } | 1190 | if (w >= 0xD800 && w <= 0xDFFF) break; // invalid Unicode scalar values |
| 1193 | if (items_read) { | 1191 | outbuf[p++] = be16toh((uint16_t)w); |
| 1194 | *items_read = i; | 1192 | i+=3; |
| 1195 | } | 1193 | } else if ((c0 >= 0xC2 && c0 <= 0xDF) && (i+1 < size) && ((c1 & 0xC0) == 0x80)) { |
| 1196 | if (items_written) { | 1194 | // 2 byte sequence |
| 1197 | *items_written = p; | 1195 | uint32_t w = ((uint32_t)(c1 & 0x3F)) | ((uint32_t)(c0 & 0x1F) << 6); |
| 1198 | } | 1196 | outbuf[p++] = be16toh((uint16_t)w); |
| 1199 | outbuf[p] = 0; | 1197 | i+=2; |
| 1198 | } else if (c0 < 0x80) { | ||
| 1199 | // 1 byte sequence | ||
| 1200 | outbuf[p++] = be16toh((uint16_t)c0); | ||
| 1201 | i+=1; | ||
| 1202 | } else { | ||
| 1203 | // invalid character | ||
| 1204 | PLIST_BIN_ERR("%s: invalid utf8 sequence in string at index %zu\n", __func__, i); | ||
| 1205 | break; | ||
| 1206 | } | ||
| 1207 | } | ||
| 1208 | if (items_read) { | ||
| 1209 | *items_read = i; | ||
| 1210 | } | ||
| 1211 | if (items_written) { | ||
| 1212 | *items_written = p; | ||
| 1213 | } | ||
| 1214 | outbuf[p] = 0; | ||
| 1200 | 1215 | ||
| 1201 | return outbuf; | 1216 | return outbuf; |
| 1202 | } | 1217 | } |
| 1203 | 1218 | ||
| 1204 | static void write_unicode(bytearray_t * bplist, char *val, size_t size) | 1219 | static void write_unicode(bytearray_t * bplist, char *val, size_t size) |
