summaryrefslogtreecommitdiffstats
path: root/src/bplist.c
diff options
context:
space:
mode:
authorGravatar Nikias Bassen2026-01-21 18:22:13 +0100
committerGravatar Nikias Bassen2026-01-21 18:22:13 +0100
commit80c2fe807308475d183ae62cc05766f3caee0463 (patch)
tree402447907966a436f6e1f490d842f66b26ec5d91 /src/bplist.c
parentf06c4c6b6cf29c9e53637731fedd86a6e99e9882 (diff)
downloadlibplist-80c2fe807308475d183ae62cc05766f3caee0463.tar.gz
libplist-80c2fe807308475d183ae62cc05766f3caee0463.tar.bz2
bplist: Fix UTF-8 to UTF-16 decoding and enforce strict validation
- Treat input as unsigned bytes - Correct UTF-8 bit decoding for 2/3/4-byte sequences - Add overlong, surrogate, and range checks - Enforce lead/continuation byte constraints This addresses issue #283. Credit to @hgarrereyn for reporting.
Diffstat (limited to 'src/bplist.c')
-rw-r--r--src/bplist.c123
1 files changed, 69 insertions, 54 deletions
diff --git a/src/bplist.c b/src/bplist.c
index 8d50f2e..559830f 100644
--- a/src/bplist.c
+++ b/src/bplist.c
@@ -1142,63 +1142,78 @@ static void write_string(bytearray_t * bplist, char *val, uint64_t size)
1142 write_raw_data(bplist, BPLIST_STRING, (uint8_t *) val, size); 1142 write_raw_data(bplist, BPLIST_STRING, (uint8_t *) val, size);
1143} 1143}
1144 1144
1145static uint16_t *plist_utf8_to_utf16be(char *unistr, size_t size, size_t *items_read, size_t *items_written) 1145static uint16_t *plist_utf8_to_utf16be(const unsigned char *unistr, size_t size, size_t *items_read, size_t *items_written)
1146{ 1146{
1147 uint16_t *outbuf; 1147 uint16_t *outbuf;
1148 size_t p = 0; 1148 size_t p = 0;
1149 size_t i = 0; 1149 size_t i = 0;
1150 1150
1151 unsigned char c0; 1151 unsigned char c0;
1152 unsigned char c1; 1152 unsigned char c1;
1153 unsigned char c2; 1153 unsigned char c2;
1154 unsigned char c3; 1154 unsigned char c3;
1155 1155
1156 uint32_t w; 1156 outbuf = (uint16_t*)malloc(((size*2)+1)*sizeof(uint16_t));
1157 1157 if (!outbuf) {
1158 outbuf = (uint16_t*)malloc(((size*2)+1)*sizeof(uint16_t)); 1158 PLIST_BIN_ERR("%s: Could not allocate %" PRIu64 " bytes\n", __func__, (uint64_t)((size*2)+1)*sizeof(uint16_t));
1159 if (!outbuf) { 1159 return NULL;
1160 PLIST_BIN_ERR("%s: Could not allocate %" PRIu64 " bytes\n", __func__, (uint64_t)((size*2)+1)*sizeof(uint16_t)); 1160 }
1161 return NULL;
1162 }
1163 1161
1164 while (i < size) { 1162 while (i < size) {
1165 c0 = unistr[i]; 1163 c0 = unistr[i];
1166 c1 = (i < size-1) ? unistr[i+1] : 0; 1164 c1 = (i+1 < size) ? unistr[i+1] : 0;
1167 c2 = (i < size-2) ? unistr[i+2] : 0; 1165 c2 = (i+2 < size) ? unistr[i+2] : 0;
1168 c3 = (i < size-3) ? unistr[i+3] : 0; 1166 c3 = (i+3 < size) ? unistr[i+3] : 0;
1169 if ((c0 >= 0xF0) && (i < size-3) && (c1 >= 0x80) && (c2 >= 0x80) && (c3 >= 0x80)) { 1167 if ((c0 >= 0xF0 && c0 <= 0xF4) && (i+3 < size) && ((c1 & 0xC0) == 0x80) && ((c2 & 0xC0) == 0x80) && ((c3 & 0xC0) == 0x80)) {
1170 // 4 byte sequence. Need to generate UTF-16 surrogate pair 1168 // 4 byte sequence. Need to generate UTF-16 surrogate pair
1171 w = ((((c0 & 7) << 18) + ((c1 & 0x3F) << 12) + ((c2 & 0x3F) << 6) + (c3 & 0x3F)) & 0x1FFFFF) - 0x010000; 1169 /* lead-specific second-byte constraints */
1172 outbuf[p++] = be16toh(0xD800 + (w >> 10)); 1170 if ((c0 == 0xF0 && c1 < 0x90) || /* overlong (< U+10000) */
1173 outbuf[p++] = be16toh(0xDC00 + (w & 0x3FF)); 1171 (c0 == 0xF4 && c1 > 0x8F)) /* > U+10FFFF */
1174 i+=4; 1172 {
1175 } else if ((c0 >= 0xE0) && (i < size-2) && (c1 >= 0x80) && (c2 >= 0x80)) { 1173 break;
1176 // 3 byte sequence 1174 }
1177 outbuf[p++] = be16toh(((c2 & 0x3F) + ((c1 & 3) << 6)) + (((c1 >> 2) & 15) << 8) + ((c0 & 15) << 12)); 1175 uint32_t w = ((uint32_t)(c3 & 0x3F)) | ((uint32_t)(c2 & 0x3F) << 6) | ((uint32_t)(c1 & 0x3F) << 12) | ((uint32_t)(c0 & 0x07) << 18);
1178 i+=3; 1176 if (w < 0x10000 || w > 0x10FFFF) break;
1179 } else if ((c0 >= 0xC0) && (i < size-1) && (c1 >= 0x80)) { 1177 w -= 0x10000;
1180 // 2 byte sequence 1178 outbuf[p++] = be16toh((uint16_t)(0xD800 + (w >> 10)));
1181 outbuf[p++] = be16toh(((c1 & 0x3F) + ((c0 & 3) << 6)) + (((c0 >> 2) & 7) << 8)); 1179 outbuf[p++] = be16toh((uint16_t)(0xDC00 + (w & 0x3FF)));
1182 i+=2; 1180 i+=4;
1183 } else if (c0 < 0x80) { 1181 } else if (((c0 & 0xF0) == 0xE0) && (i+2 < size) && ((c1 & 0xC0) == 0x80) && ((c2 & 0xC0) == 0x80)) {
1184 // 1 byte sequence 1182 // 3 byte sequence
1185 outbuf[p++] = be16toh(c0); 1183 if ((c0 == 0xE0 && c1 < 0xA0) || /* overlong (< U+0800) */
1186 i+=1; 1184 (c0 == 0xED && c1 > 0x9F)) /* UTF-16 surrogate range */
1187 } else { 1185 {
1188 // invalid character 1186 break;
1189 PLIST_BIN_ERR("%s: invalid utf8 sequence in string at index %zu\n", __func__, i); 1187 }
1190 break; 1188 uint32_t w = ((uint32_t)(c2 & 0x3F)) | ((uint32_t)(c1 & 0x3F) << 6) | ((uint32_t)(c0 & 0x0F) << 12);
1191 } 1189 if (w < 0x800) break;
1192 } 1190 if (w >= 0xD800 && w <= 0xDFFF) break; // invalid Unicode scalar values
1193 if (items_read) { 1191 outbuf[p++] = be16toh((uint16_t)w);
1194 *items_read = i; 1192 i+=3;
1195 } 1193 } else if ((c0 >= 0xC2 && c0 <= 0xDF) && (i+1 < size) && ((c1 & 0xC0) == 0x80)) {
1196 if (items_written) { 1194 // 2 byte sequence
1197 *items_written = p; 1195 uint32_t w = ((uint32_t)(c1 & 0x3F)) | ((uint32_t)(c0 & 0x1F) << 6);
1198 } 1196 outbuf[p++] = be16toh((uint16_t)w);
1199 outbuf[p] = 0; 1197 i+=2;
1198 } else if (c0 < 0x80) {
1199 // 1 byte sequence
1200 outbuf[p++] = be16toh((uint16_t)c0);
1201 i+=1;
1202 } else {
1203 // invalid character
1204 PLIST_BIN_ERR("%s: invalid utf8 sequence in string at index %zu\n", __func__, i);
1205 break;
1206 }
1207 }
1208 if (items_read) {
1209 *items_read = i;
1210 }
1211 if (items_written) {
1212 *items_written = p;
1213 }
1214 outbuf[p] = 0;
1200 1215
1201 return outbuf; 1216 return outbuf;
1202} 1217}
1203 1218
1204static void write_unicode(bytearray_t * bplist, char *val, size_t size) 1219static void write_unicode(bytearray_t * bplist, char *val, size_t size)