/* * xplist.c * XML plist implementation * * Copyright (c) 2010-2017 Nikias Bassen All Rights Reserved. * Copyright (c) 2010-2015 Martin Szulecki All Rights Reserved. * Copyright (c) 2008 Jonathan Beck All Rights Reserved. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #ifdef HAVE_CONFIG_H #include #endif #ifdef HAVE_STRPTIME #define _XOPEN_SOURCE 600 #endif #include #include #include #include #include #include #include #include #include #include #include "plist.h" #include "base64.h" #include "strbuf.h" #include "time64.h" #define XPLIST_KEY "key" #define XPLIST_KEY_LEN 3 #define XPLIST_FALSE "false" #define XPLIST_FALSE_LEN 5 #define XPLIST_TRUE "true" #define XPLIST_TRUE_LEN 4 #define XPLIST_INT "integer" #define XPLIST_INT_LEN 7 #define XPLIST_REAL "real" #define XPLIST_REAL_LEN 4 #define XPLIST_DATE "date" #define XPLIST_DATE_LEN 4 #define XPLIST_DATA "data" #define XPLIST_DATA_LEN 4 #define XPLIST_STRING "string" #define XPLIST_STRING_LEN 6 #define XPLIST_ARRAY "array" #define XPLIST_ARRAY_LEN 5 #define XPLIST_DICT "dict" #define XPLIST_DICT_LEN 4 #define MAC_EPOCH 978307200 #define MAX_DATA_BYTES_PER_LINE(__i) (((76 - (__i << 3)) >> 2) * 3) static const char XML_PLIST_PROLOG[] = "\n\ \n\ \n"; static const char XML_PLIST_EPILOG[] = "\n"; #ifdef DEBUG static int plist_xml_debug = 0; #define PLIST_XML_ERR(...) if (plist_xml_debug) { fprintf(stderr, "libplist[xmlparser] ERROR: " __VA_ARGS__); } #else #define PLIST_XML_ERR(...) #endif void plist_xml_init(void) { /* init XML stuff */ #ifdef DEBUG char *env_debug = getenv("PLIST_XML_DEBUG"); if (env_debug && !strcmp(env_debug, "1")) { plist_xml_debug = 1; } #endif } void plist_xml_deinit(void) { /* deinit XML stuff */ } static size_t dtostr(char *buf, size_t bufsize, double realval) { double f = realval; double ip = 0.0; int64_t v; size_t len; size_t p; double CORR = 0.0000005; f = modf(f, &ip); v = (int64_t)ip; if (f < 0) { if (((int)((f - CORR) * -10.0f)) >= 10) { v--; f = 0; } } else { if (((int)((f + CORR) * 10.0f)) >= 10) { v++; f = 0; } } len = snprintf(buf, bufsize, "%s%"PRIi64, ((f < 0) && (ip >= 0)) ? "-" : "", v); if (len >= bufsize) { return 0; } if (f < 0) { f *= -1; } f += CORR; p = len; buf[p++] = '.'; while (p < bufsize && (p <= len+6)) { f = modf(f*10, &ip); v = (int)ip; buf[p++] = (v + 0x30); } buf[p] = '\0'; return p; } static void node_to_xml(node_t* node, bytearray_t **outbuf, uint32_t depth) { plist_data_t node_data = NULL; char isStruct = FALSE; char tagOpen = FALSE; const char *tag = NULL; size_t tag_len = 0; char *val = NULL; size_t val_len = 0; uint32_t i = 0; if (!node) return; node_data = plist_get_data(node); switch (node_data->type) { case PLIST_BOOLEAN: { if (node_data->boolval) { tag = XPLIST_TRUE; tag_len = XPLIST_TRUE_LEN; } else { tag = XPLIST_FALSE; tag_len = XPLIST_FALSE_LEN; } } break; case PLIST_UINT: tag = XPLIST_INT; tag_len = XPLIST_INT_LEN; val = (char*)malloc(64); if (node_data->length == 16) { val_len = snprintf(val, 64, "%"PRIu64, node_data->intval); } else { val_len = snprintf(val, 64, "%"PRIi64, node_data->intval); } break; case PLIST_REAL: tag = XPLIST_REAL; tag_len = XPLIST_REAL_LEN; val = (char*)malloc(64); val_len = dtostr(val, 64, node_data->realval); break; case PLIST_STRING: tag = XPLIST_STRING; tag_len = XPLIST_STRING_LEN; /* contents processed directly below */ break; case PLIST_KEY: tag = XPLIST_KEY; tag_len = XPLIST_KEY_LEN; /* contents processed directly below */ break; case PLIST_DATA: tag = XPLIST_DATA; tag_len = XPLIST_DATA_LEN; /* contents processed directly below */ break; case PLIST_ARRAY: tag = XPLIST_ARRAY; tag_len = XPLIST_ARRAY_LEN; isStruct = (node->children) ? TRUE : FALSE; break; case PLIST_DICT: tag = XPLIST_DICT; tag_len = XPLIST_DICT_LEN; isStruct = (node->children) ? TRUE : FALSE; break; case PLIST_DATE: tag = XPLIST_DATE; tag_len = XPLIST_DATE_LEN; { Time64_T timev = (Time64_T)node_data->realval + MAC_EPOCH; struct TM _btime; struct TM *btime = gmtime64_r(&timev, &_btime); if (btime) { val = (char*)malloc(24); memset(val, 0, 24); struct tm _tmcopy; copy_TM64_to_tm(btime, &_tmcopy); val_len = strftime(val, 24, "%Y-%m-%dT%H:%M:%SZ", &_tmcopy); if (val_len <= 0) { free (val); val = NULL; } } } break; case PLIST_UID: tag = XPLIST_DICT; tag_len = XPLIST_DICT_LEN; val = (char*)malloc(64); if (node_data->length == 16) { val_len = snprintf(val, 64, "%"PRIu64, node_data->intval); } else { val_len = snprintf(val, 64, "%"PRIi64, node_data->intval); } break; default: break; } for (i = 0; i < depth; i++) { str_buf_append(*outbuf, "\t", 1); } /* append tag */ str_buf_append(*outbuf, "<", 1); str_buf_append(*outbuf, tag, tag_len); if (node_data->type == PLIST_STRING || node_data->type == PLIST_KEY) { size_t j; size_t len; off_t start = 0; off_t cur = 0; str_buf_append(*outbuf, ">", 1); tagOpen = TRUE; /* make sure we convert the following predefined xml entities */ /* < = < > = > & = & */ len = node_data->length; for (j = 0; j < len; j++) { switch (node_data->strval[j]) { case '<': str_buf_append(*outbuf, node_data->strval + start, cur - start); str_buf_append(*outbuf, "<", 4); start = cur+1; break; case '>': str_buf_append(*outbuf, node_data->strval + start, cur - start); str_buf_append(*outbuf, ">", 4); start = cur+1; break; case '&': str_buf_append(*outbuf, node_data->strval + start, cur - start); str_buf_append(*outbuf, "&", 5); start = cur+1; break; default: break; } cur++; } str_buf_append(*outbuf, node_data->strval + start, cur - start); } else if (node_data->type == PLIST_DATA) { str_buf_append(*outbuf, ">", 1); tagOpen = TRUE; str_buf_append(*outbuf, "\n", 1); if (node_data->length > 0) { uint32_t j = 0; uint32_t indent = (depth > 8) ? 8 : depth; uint32_t maxread = MAX_DATA_BYTES_PER_LINE(indent); size_t count = 0; size_t amount = (node_data->length / 3 * 4) + 4 + (((node_data->length / maxread) + 1) * (indent+1)); if ((*outbuf)->len + amount > (*outbuf)->capacity) { str_buf_grow(*outbuf, amount); } while (j < node_data->length) { for (i = 0; i < indent; i++) { str_buf_append(*outbuf, "\t", 1); } count = (node_data->length-j < maxread) ? node_data->length-j : maxread; assert((*outbuf)->len + count < (*outbuf)->capacity); (*outbuf)->len += base64encode((char*)(*outbuf)->data + (*outbuf)->len, node_data->buff + j, count); str_buf_append(*outbuf, "\n", 1); j+=count; } } for (i = 0; i < depth; i++) { str_buf_append(*outbuf, "\t", 1); } } else if (node_data->type == PLIST_UID) { /* special case for UID nodes: create a DICT */ str_buf_append(*outbuf, ">", 1); tagOpen = TRUE; str_buf_append(*outbuf, "\n", 1); /* add CF$UID key */ for (i = 0; i < depth+1; i++) { str_buf_append(*outbuf, "\t", 1); } str_buf_append(*outbuf, "CF$UID", 17); str_buf_append(*outbuf, "\n", 1); /* add UID value */ for (i = 0; i < depth+1; i++) { str_buf_append(*outbuf, "\t", 1); } str_buf_append(*outbuf, "", 9); str_buf_append(*outbuf, val, val_len); str_buf_append(*outbuf, "", 10); str_buf_append(*outbuf, "\n", 1); for (i = 0; i < depth; i++) { str_buf_append(*outbuf, "\t", 1); } } else if (val) { str_buf_append(*outbuf, ">", 1); tagOpen = TRUE; str_buf_append(*outbuf, val, val_len); } else if (isStruct) { tagOpen = TRUE; str_buf_append(*outbuf, ">", 1); } else { tagOpen = FALSE; str_buf_append(*outbuf, "/>", 2); } free(val); if (isStruct) { /* add newline for structured types */ str_buf_append(*outbuf, "\n", 1); /* add child nodes */ if (node_data->type == PLIST_DICT && node->children) { assert((node->children->count % 2) == 0); } node_t *ch; for (ch = node_first_child(node); ch; ch = node_next_sibling(ch)) { node_to_xml(ch, outbuf, depth+1); } /* fix indent for structured types */ for (i = 0; i < depth; i++) { str_buf_append(*outbuf, "\t", 1); } } if (tagOpen) { /* add closing tag */ str_buf_append(*outbuf, "", 1); } str_buf_append(*outbuf, "\n", 1); return; } static void parse_date(const char *strval, struct TM *btime) { if (!btime) return; memset(btime, 0, sizeof(struct tm)); if (!strval) return; #ifdef HAVE_STRPTIME strptime((char*)strval, "%Y-%m-%dT%H:%M:%SZ", btime); #else #ifdef USE_TM64 #define PLIST_SSCANF_FORMAT "%lld-%d-%dT%d:%d:%dZ" #else #define PLIST_SSCANF_FORMAT "%d-%d-%dT%d:%d:%dZ" #endif sscanf(strval, PLIST_SSCANF_FORMAT, &btime->tm_year, &btime->tm_mon, &btime->tm_mday, &btime->tm_hour, &btime->tm_min, &btime->tm_sec); btime->tm_year-=1900; btime->tm_mon--; #endif btime->tm_isdst=0; } #define PO10i_LIMIT (INT64_MAX/10) /* based on https://stackoverflow.com/a/4143288 */ static int num_digits_i(int64_t i) { int n; int64_t po10; n=1; if (i < 0) { i = -i; n++; } po10=10; while (i>=po10) { n++; if (po10 > PO10i_LIMIT) break; po10*=10; } return n; } #define PO10u_LIMIT (UINT64_MAX/10) /* based on https://stackoverflow.com/a/4143288 */ static int num_digits_u(uint64_t i) { int n; uint64_t po10; n=1; po10=10; while (i>=po10) { n++; if (po10 > PO10u_LIMIT) break; po10*=10; } return n; } static void node_estimate_size(node_t *node, uint64_t *size, uint32_t depth) { plist_data_t data; if (!node) { return; } data = plist_get_data(node); if (node->children) { node_t *ch; for (ch = node_first_child(node); ch; ch = node_next_sibling(ch)) { node_estimate_size(ch, size, depth + 1); } switch (data->type) { case PLIST_DICT: *size += (XPLIST_DICT_LEN << 1) + 7; break; case PLIST_ARRAY: *size += (XPLIST_ARRAY_LEN << 1) + 7; break; default: break; } *size += (depth << 1); } else { uint32_t indent = (depth > 8) ? 8 : depth; switch (data->type) { case PLIST_DATA: { uint32_t req_lines = (data->length / MAX_DATA_BYTES_PER_LINE(indent)) + 1; uint32_t b64len = data->length + (data->length / 3); b64len += b64len % 4; *size += b64len; *size += (XPLIST_DATA_LEN << 1) + 5 + (indent+1) * (req_lines+1) + 1; } break; case PLIST_STRING: *size += data->length; *size += (XPLIST_STRING_LEN << 1) + 6; break; case PLIST_KEY: *size += data->length; *size += (XPLIST_KEY_LEN << 1) + 6; break; case PLIST_UINT: if (data->length == 16) { *size += num_digits_u(data->intval); } else { *size += num_digits_i((int64_t)data->intval); } *size += (XPLIST_INT_LEN << 1) + 6; break; case PLIST_REAL: *size += num_digits_i((int64_t)data->realval) + 7; *size += (XPLIST_REAL_LEN << 1) + 6; break; case PLIST_DATE: *size += 20; /* YYYY-MM-DDThh:mm:ssZ */ *size += (XPLIST_DATE_LEN << 1) + 6; break; case PLIST_BOOLEAN: *size += ((data->boolval) ? XPLIST_TRUE_LEN : XPLIST_FALSE_LEN) + 4; break; case PLIST_DICT: *size += XPLIST_DICT_LEN + 4; /* */ break; case PLIST_ARRAY: *size += XPLIST_ARRAY_LEN + 4; /* */ break; case PLIST_UID: *size += num_digits_i((int64_t)data->intval); *size += (XPLIST_DICT_LEN << 1) + 7; *size += indent + ((indent+1) << 1); *size += 18; /* CF$UID */ *size += (XPLIST_INT_LEN << 1) + 6; break; default: break; } *size += indent; } } PLIST_API void plist_to_xml(plist_t plist, char **plist_xml, uint32_t * length) { uint64_t size = 0; node_estimate_size(plist, &size, 0); size += sizeof(XML_PLIST_PROLOG) + sizeof(XML_PLIST_EPILOG) - 1; strbuf_t *outbuf = str_buf_new(size); str_buf_append(outbuf, XML_PLIST_PROLOG, sizeof(XML_PLIST_PROLOG)-1); node_to_xml(plist, &outbuf, 0); str_buf_append(outbuf, XML_PLIST_EPILOG, sizeof(XML_PLIST_EPILOG)); *plist_xml = outbuf->data; *length = outbuf->len - 1; outbuf->data = NULL; str_buf_free(outbuf); } struct _parse_ctx { const char *pos; const char *end; int err; }; typedef struct _parse_ctx* parse_ctx; static void parse_skip_ws(parse_ctx ctx) { while (ctx->pos < ctx->end && ((*(ctx->pos) == ' ') || (*(ctx->pos) == '\t') || (*(ctx->pos) == '\r') || (*(ctx->pos) == '\n'))) { ctx->pos++; } } static void find_char(parse_ctx ctx, char c, int skip_quotes) { while (ctx->pos < ctx->end && (*(ctx->pos) != c)) { if (skip_quotes && (c != '"') && (*(ctx->pos) == '"')) { ctx->pos++; find_char(ctx, '"', 0); if (ctx->pos >= ctx->end) { PLIST_XML_ERR("EOF while looking for matching double quote\n"); return; } if (*(ctx->pos) != '"') { PLIST_XML_ERR("Unmatched double quote\n"); return; } } ctx->pos++; } } static void find_str(parse_ctx ctx, const char *str, size_t len, int skip_quotes) { while (ctx->pos < (ctx->end - len)) { if (!strncmp(ctx->pos, str, len)) { break; } if (skip_quotes && (*(ctx->pos) == '"')) { ctx->pos++; find_char(ctx, '"', 0); if (ctx->pos >= ctx->end) { PLIST_XML_ERR("EOF while looking for matching double quote\n"); return; } if (*(ctx->pos) != '"') { PLIST_XML_ERR("Unmatched double quote\n"); return; } } ctx->pos++; } } static void find_next(parse_ctx ctx, const char *nextchars, int numchars, int skip_quotes) { int i = 0; while (ctx->pos < ctx->end) { if (skip_quotes && (*(ctx->pos) == '"')) { ctx->pos++; find_char(ctx, '"', 0); if (ctx->pos >= ctx->end) { PLIST_XML_ERR("EOF while looking for matching double quote\n"); return; } if (*(ctx->pos) != '"') { PLIST_XML_ERR("Unmatched double quote\n"); return; } } for (i = 0; i < numchars; i++) { if (*(ctx->pos) == nextchars[i]) { return; } } ctx->pos++; } } typedef struct { const char *begin; size_t length; int is_cdata; void *next; } text_part_t; static text_part_t* text_part_init(text_part_t* part, const char *begin, size_t length, int is_cdata) { part->begin = begin; part->length = length; part->is_cdata = is_cdata; part->next = NULL; return part; } static void text_parts_free(text_part_t *tp) { while (tp) { text_part_t *tmp = tp; tp = tp->next; free(tmp); } } static text_part_t* text_part_append(text_part_t* parts, const char *begin, size_t length, int is_cdata) { text_part_t* newpart = malloc(sizeof(text_part_t)); assert(newpart); parts->next = text_part_init(newpart, begin, length, is_cdata); return newpart; } static text_part_t* get_text_parts(parse_ctx ctx, const char* tag, size_t tag_len, int skip_ws, text_part_t *parts) { const char *p = NULL; const char *q = NULL; text_part_t *last = NULL; if (skip_ws) { parse_skip_ws(ctx); } do { p = ctx->pos; find_char(ctx, '<', 0); if (ctx->pos >= ctx->end || *ctx->pos != '<') { PLIST_XML_ERR("EOF while looking for closing tag\n"); ctx->err++; return NULL; } q = ctx->pos; ctx->pos++; if (ctx->pos >= ctx->end) { PLIST_XML_ERR("EOF while parsing '%s'\n", p); ctx->err++; return NULL; } if (*ctx->pos == '!') { ctx->pos++; if (ctx->pos >= ctx->end-1) { PLIST_XML_ERR("EOF while parsing err++; return NULL; } if (*ctx->pos == '-' && *(ctx->pos+1) == '-') { if (last) { last = text_part_append(last, p, q-p, 0); } else if (parts) { last = text_part_init(parts, p, q-p, 0); } ctx->pos += 2; find_str(ctx, "-->", 3, 0); if (ctx->pos > ctx->end-3 || strncmp(ctx->pos, "-->", 3) != 0) { PLIST_XML_ERR("EOF while looking for end of comment\n"); ctx->err++; return NULL; } ctx->pos += 3; } else if (*ctx->pos == '[') { ctx->pos++; if (ctx->pos >= ctx->end - 8) { PLIST_XML_ERR("EOF while parsing <[ tag\n"); ctx->err++; return NULL; } if (strncmp(ctx->pos, "CDATA[", 6) == 0) { if (q-p > 0) { if (last) { last = text_part_append(last, p, q-p, 0); } else if (parts) { last = text_part_init(parts, p, q-p, 0); } } ctx->pos+=6; p = ctx->pos; find_str(ctx, "]]>", 3, 0); if (ctx->pos > ctx->end-3 || strncmp(ctx->pos, "]]>", 3) != 0) { PLIST_XML_ERR("EOF while looking for end of CDATA block\n"); ctx->err++; return NULL; } q = ctx->pos; if (last) { last = text_part_append(last, p, q-p, 1); } else if (parts) { last = text_part_init(parts, p, q-p, 1); } ctx->pos += 3; } else { p = ctx->pos; find_next(ctx, " \r\n\t>", 5, 1); PLIST_XML_ERR("Invalid special tag <[%.*s> encountered inside <%s> tag\n", (int)(ctx->pos - p), p, tag); ctx->err++; return NULL; } } else { p = ctx->pos; find_next(ctx, " \r\n\t>", 5, 1); PLIST_XML_ERR("Invalid special tag encountered inside <%s> tag\n", (int)(ctx->pos - p), p, tag); ctx->err++; return NULL; } } else if (*ctx->pos == '/') { break; } else { p = ctx->pos; find_next(ctx, " \r\n\t>", 5, 1); PLIST_XML_ERR("Invalid tag <%.*s> encountered inside <%s> tag\n", (int)(ctx->pos - p), p, tag); ctx->err++; return NULL; } } while (1); ctx->pos++; if (ctx->pos >= ctx->end-tag_len || strncmp(ctx->pos, tag, tag_len)) { PLIST_XML_ERR("EOF or end tag mismatch\n"); ctx->err++; return NULL; } ctx->pos+=tag_len; parse_skip_ws(ctx); if (ctx->pos >= ctx->end) { PLIST_XML_ERR("EOF while parsing closing tag\n"); ctx->err++; return NULL; } else if (*ctx->pos != '>') { PLIST_XML_ERR("Invalid closing tag; expected '>', found '%c'\n", *ctx->pos); ctx->err++; return NULL; } ctx->pos++; if (q-p > 0) { if (last) { last = text_part_append(last, p, q-p, 0); } else if (parts) { last = text_part_init(parts, p, q-p, 0); } } return parts; } static int unescape_entities(char *str, size_t *length) { size_t i = 0; size_t len = *length; while (len > 0 && i < len-1) { if (str[i] == '&') { char *entp = str + i + 1; while (i < len && str[i] != ';') { i++; } if (i >= len) { PLIST_XML_ERR("Invalid entity sequence encountered (missing terminating ';')\n"); return -1; } if (str+i >= entp+1) { int entlen = str+i - entp; int bytelen = 1; if (!strncmp(entp, "amp", 3)) { /* the '&' is already there */ } else if (!strncmp(entp, "apos", 4)) { *(entp-1) = '\''; } else if (!strncmp(entp, "quot", 4)) { *(entp-1) = '"'; } else if (!strncmp(entp, "lt", 2)) { *(entp-1) = '<'; } else if (!strncmp(entp, "gt", 2)) { *(entp-1) = '>'; } else if (*entp == '#') { /* numerical character reference */ uint64_t val = 0; char* ep = NULL; if (entlen > 8) { PLIST_XML_ERR("Invalid numerical character reference encountered, sequence too long: &%.*s;\n", entlen, entp); return -1; } if (*(entp+1) == 'x' || *(entp+1) == 'X') { if (entlen < 3) { PLIST_XML_ERR("Invalid numerical character reference encountered, sequence too short: &%.*s;\n", entlen, entp); return -1; } val = strtoull(entp+2, &ep, 16); } else { if (entlen < 2) { PLIST_XML_ERR("Invalid numerical character reference encountered, sequence too short: &%.*s;\n", entlen, entp); return -1; } val = strtoull(entp+1, &ep, 10); } if (val == 0 || val > 0x10FFFF || ep-entp != entlen) { PLIST_XML_ERR("Invalid numerical character reference found: &%.*s;\n", entlen, entp); return -1; } /* convert to UTF8 */ if (val >= 0x10000) { /* four bytes */ *(entp-1) = (char)(0xF0 + ((val >> 18) & 0x7)); *(entp+0) = (char)(0x80 + ((val >> 12) & 0x3F)); *(entp+1) = (char)(0x80 + ((val >> 6) & 0x3F)); *(entp+2) = (char)(0x80 + (val & 0x3F)); entp+=3; bytelen = 4; } else if (val >= 0x800) { /* three bytes */ *(entp-1) = (char)(0xE0 + ((val >> 12) & 0xF)); *(entp+0) = (char)(0x80 + ((val >> 6) & 0x3F)); *(entp+1) = (char)(0x80 + (val & 0x3F)); entp+=2; bytelen = 3; } else if (val >= 0x80) { /* two bytes */ *(entp-1) = (char)(0xC0 + ((val >> 6) & 0x1F)); *(entp+0) = (char)(0x80 + (val & 0x3F)); entp++; bytelen = 2; } else { /* one byte */ *(entp-1) = (char)(val & 0x7F); } } else { PLIST_XML_ERR("Invalid entity encountered: &%.*s;\n", entlen, entp); return -1; } memmove(entp, str+i+1, len - i); i -= entlen+1 - bytelen; len -= entlen+2 - bytelen; continue; } else { PLIST_XML_ERR("Invalid empty entity sequence &;\n"); return -1; } } i++; } *length = len; return 0; } static char* text_parts_get_content(text_part_t *tp, int unesc_entities, size_t *length, int *requires_free) { char *str = NULL; size_t total_length = 0; if (!tp) { return NULL; } char *p; if (requires_free && !tp->next) { if (tp->is_cdata || !unesc_entities) { *requires_free = 0; if (length) { *length = tp->length; } return (char*)tp->begin; } } text_part_t *tmp = tp; while (tp && tp->begin) { total_length += tp->length; tp = tp->next; } str = malloc(total_length + 1); assert(str); p = str; tp = tmp; while (tp && tp->begin) { size_t len = tp->length; strncpy(p, tp->begin, len); p[len] = '\0'; if (!tp->is_cdata && unesc_entities) { if (unescape_entities(p, &len) < 0) { free(str); return NULL; } } p += len; tp = tp->next; } *p = '\0'; if (length) { *length = p - str; } if (requires_free) { *requires_free = 1; } return str; } static void node_from_xml(parse_ctx ctx, plist_t *plist) { char *tag = NULL; char *keyname = NULL; plist_t subnode = NULL; const char *p = NULL; plist_t parent = NULL; int has_content = 0; struct node_path_item { const char *type; void *prev; }; struct node_path_item* node_path = NULL; while (ctx->pos < ctx->end && !ctx->err) { parse_skip_ws(ctx); if (ctx->pos >= ctx->end) { break; } if (*ctx->pos != '<') { p = ctx->pos; find_next(ctx, " \t\r\n", 4, 0); PLIST_XML_ERR("Expected: opening tag, found: %.*s\n", (int)(ctx->pos - p), p); ctx->err++; goto err_out; } ctx->pos++; if (ctx->pos >= ctx->end) { PLIST_XML_ERR("EOF while parsing tag\n"); ctx->err++; goto err_out; } if (*(ctx->pos) == '?') { find_str(ctx, "?>", 2, 1); if (ctx->pos > ctx->end-2) { PLIST_XML_ERR("EOF while looking for err++; goto err_out; } if (strncmp(ctx->pos, "?>", 2)) { PLIST_XML_ERR("Couldn't find err++; goto err_out; } ctx->pos += 2; continue; } else if (*(ctx->pos) == '!') { /* comment or DTD */ if (((ctx->end - ctx->pos) > 3) && !strncmp(ctx->pos, "!--", 3)) { ctx->pos += 3; find_str(ctx,"-->", 3, 0); if (ctx->pos > ctx->end-3 || strncmp(ctx->pos, "-->", 3)) { PLIST_XML_ERR("Couldn't find end of comment\n"); ctx->err++; goto err_out; } ctx->pos+=3; } else if (((ctx->end - ctx->pos) > 8) && !strncmp(ctx->pos, "!DOCTYPE", 8)) { int embedded_dtd = 0; ctx->pos+=8; while (ctx->pos < ctx->end) { find_next(ctx, " \t\r\n[>", 6, 1); if (ctx->pos >= ctx->end) { PLIST_XML_ERR("EOF while parsing !DOCTYPE\n"); ctx->err++; goto err_out; } if (*ctx->pos == '[') { embedded_dtd = 1; break; } else if (*ctx->pos == '>') { /* end of DOCTYPE found already */ ctx->pos++; break; } else { parse_skip_ws(ctx); } } if (embedded_dtd) { find_str(ctx, "]>", 2, 1); if (ctx->pos > ctx->end-2 || strncmp(ctx->pos, "]>", 2)) { PLIST_XML_ERR("Couldn't find end of DOCTYPE\n"); ctx->err++; goto err_out; } ctx->pos += 2; } } else { p = ctx->pos; find_next(ctx, " \r\n\t>", 5, 1); PLIST_XML_ERR("Invalid or incomplete special tag <%.*s> encountered\n", (int)(ctx->pos - p), p); ctx->err++; goto err_out; } continue; } else { int is_empty = 0; int closing_tag = 0; p = ctx->pos; find_next(ctx," \r\n\t<>", 6, 0); if (ctx->pos >= ctx->end) { PLIST_XML_ERR("Unexpected EOF while parsing XML\n"); ctx->err++; goto err_out; } int taglen = ctx->pos - p; tag = malloc(taglen + 1); strncpy(tag, p, taglen); tag[taglen] = '\0'; if (*ctx->pos != '>') { find_next(ctx, "<>", 2, 1); } if (ctx->pos >= ctx->end) { PLIST_XML_ERR("Unexpected EOF while parsing XML\n"); ctx->err++; goto err_out; } if (*ctx->pos != '>') { PLIST_XML_ERR("Missing '>' for tag <%s\n", tag); ctx->err++; goto err_out; } if (*(ctx->pos-1) == '/') { int idx = ctx->pos - p - 1; if (idx < taglen) tag[idx] = '\0'; is_empty = 1; } ctx->pos++; if (!strcmp(tag, "plist")) { free(tag); tag = NULL; has_content = 0; if (!node_path && *plist) { /* we don't allow another top-level */ break; } if (is_empty) { PLIST_XML_ERR("Empty plist tag\n"); ctx->err++; goto err_out; } struct node_path_item *path_item = malloc(sizeof(struct node_path_item)); if (!path_item) { PLIST_XML_ERR("out of memory when allocating node path item\n"); ctx->err++; goto err_out; } path_item->type = "plist"; path_item->prev = node_path; node_path = path_item; continue; } else if (!strcmp(tag, "/plist")) { if (!has_content) { PLIST_XML_ERR("encountered empty plist tag\n"); ctx->err++; goto err_out; } if (!node_path) { PLIST_XML_ERR("node path is empty while trying to match closing tag with opening tag\n"); ctx->err++; goto err_out; } if (strcmp(node_path->type, tag+1) != 0) { PLIST_XML_ERR("mismatching closing tag <%s> found for opening tag <%s>\n", tag, node_path->type); ctx->err++; goto err_out; } struct node_path_item *path_item = node_path; node_path = node_path->prev; free(path_item); free(tag); tag = NULL; continue; } plist_data_t data = plist_new_plist_data(); subnode = plist_new_node(data); has_content = 1; if (!strcmp(tag, XPLIST_DICT)) { data->type = PLIST_DICT; } else if (!strcmp(tag, XPLIST_ARRAY)) { data->type = PLIST_ARRAY; } else if (!strcmp(tag, XPLIST_INT)) { if (!is_empty) { text_part_t first_part = { NULL, 0, 0, NULL }; text_part_t *tp = get_text_parts(ctx, tag, taglen, 1, &first_part); if (!tp) { PLIST_XML_ERR("Could not parse text content for '%s' node\n", tag); text_parts_free(first_part.next); ctx->err++; goto err_out; } if (tp->begin) { int requires_free = 0; char *str_content = text_parts_get_content(tp, 0, NULL, &requires_free); if (!str_content) { PLIST_XML_ERR("Could not get text content for '%s' node\n", tag); text_parts_free(first_part.next); ctx->err++; goto err_out; } char *str = str_content; int is_negative = 0; if ((str[0] == '-') || (str[0] == '+')) { if (str[0] == '-') { is_negative = 1; } str++; } data->intval = strtoull((char*)str, NULL, 0); if (is_negative || (data->intval <= INT64_MAX)) { uint64_t v = data->intval; if (is_negative) { v = -v; } data->intval = v; data->length = 8; } else { data->length = 16; } if (requires_free) { free(str_content); } } else { is_empty = 1; } text_parts_free(tp->next); } if (is_empty) { data->intval = 0; data->length = 8; } data->type = PLIST_UINT; } else if (!strcmp(tag, XPLIST_REAL)) { if (!is_empty) { text_part_t first_part = { NULL, 0, 0, NULL }; text_part_t *tp = get_text_parts(ctx, tag, taglen, 1, &first_part); if (!tp) { PLIST_XML_ERR("Could not parse text content for '%s' node\n", tag); text_parts_free(first_part.next); ctx->err++; goto err_out; } if (tp->begin) { int requires_free = 0; char *str_content = text_parts_get_content(tp, 0, NULL, &requires_free); if (!str_content) { PLIST_XML_ERR("Could not get text content for '%s' node\n", tag); text_parts_free(first_part.next); ctx->err++; goto err_out; } data->realval = atof(str_content); if (requires_free) { free(str_content); } } text_parts_free(tp->next); } data->type = PLIST_REAL; data->length = 8; } else if (!strcmp(tag, XPLIST_TRUE)) { if (!is_empty) { get_text_parts(ctx, tag, taglen, 1, NULL); } data->type = PLIST_BOOLEAN; data->boolval = 1; data->length = 1; } else if (!strcmp(tag, XPLIST_FALSE)) { if (!is_empty) { get_text_parts(ctx, tag, taglen, 1, NULL); } data->type = PLIST_BOOLEAN; data->boolval = 0; data->length = 1; } else if (!strcmp(tag, XPLIST_STRING) || !strcmp(tag, XPLIST_KEY)) { if (!is_empty) { text_part_t first_part = { NULL, 0, 0, NULL }; text_part_t *tp = get_text_parts(ctx, tag, taglen, 0, &first_part); char *str = NULL; size_t length = 0; if (!tp) { PLIST_XML_ERR("Could not parse text content for '%s' node\n", tag); text_parts_free(first_part.next); ctx->err++; goto err_out; } str = text_parts_get_content(tp, 1, &length, NULL); text_parts_free(first_part.next); if (!str) { PLIST_XML_ERR("Could not get text content for '%s' node\n", tag); ctx->err++; goto err_out; } if (!strcmp(tag, "key") && !keyname && parent && (plist_get_node_type(parent) == PLIST_DICT)) { keyname = str; free(tag); tag = NULL; plist_free(subnode); subnode = NULL; continue; } else { data->strval = str; data->length = length; } } else { data->strval = strdup(""); data->length = 0; } data->type = PLIST_STRING; } else if (!strcmp(tag, XPLIST_DATA)) { if (!is_empty) { text_part_t first_part = { NULL, 0, 0, NULL }; text_part_t *tp = get_text_parts(ctx, tag, taglen, 1, &first_part); if (!tp) { PLIST_XML_ERR("Could not parse text content for '%s' node\n", tag); text_parts_free(first_part.next); ctx->err++; goto err_out; } if (tp->begin) { int requires_free = 0; char *str_content = text_parts_get_content(tp, 0, NULL, &requires_free); if (!str_content) { PLIST_XML_ERR("Could not get text content for '%s' node\n", tag); text_parts_free(first_part.next); ctx->err++; goto err_out; } size_t size = tp->length; if (size > 0) { data->buff = base64decode(str_content, &size); data->length = size; } if (requires_free) { free(str_content); } } text_parts_free(tp->next); } data->type = PLIST_DATA; } else if (!strcmp(tag, XPLIST_DATE)) { if (!is_empty) { text_part_t first_part = { NULL, 0, 0, NULL }; text_part_t *tp = get_text_parts(ctx, tag, taglen, 1, &first_part); if (!tp) { PLIST_XML_ERR("Could not parse text content for '%s' node\n", tag); text_parts_free(first_part.next); ctx->err++; goto err_out; } Time64_T timev = 0; if (tp->begin) { int requires_free = 0; size_t length = 0; char *str_content = text_parts_get_content(tp, 0, &length, &requires_free); if (!str_content) { PLIST_XML_ERR("Could not get text content for '%s' node\n", tag); text_parts_free(first_part.next); ctx->err++; goto err_out; } if ((length >= 11) && (length < 32)) { /* we need to copy here and 0-terminate because sscanf will read the entire string (whole rest of XML data) which can be huge */ char strval[32]; struct TM btime; strncpy(strval, str_content, length); strval[tp->length] = '\0'; parse_date(strval, &btime); timev = timegm64(&btime); } else { PLIST_XML_ERR("Invalid text content in date node\n"); } if (requires_free) { free(str_content); } } text_parts_free(tp->next); data->realval = (double)(timev - MAC_EPOCH); } data->length = sizeof(double); data->type = PLIST_DATE; } else if (tag[0] == '/') { closing_tag = 1; } else { PLIST_XML_ERR("Unexpected tag <%s%s> encountered\n", tag, (is_empty) ? "/" : ""); ctx->pos = ctx->end; ctx->err++; goto err_out; } if (subnode && !closing_tag) { if (!*plist) { /* first node, make this node the parent node */ *plist = subnode; if (data->type != PLIST_DICT && data->type != PLIST_ARRAY) { /* if the first node is not a structered node, we're done */ subnode = NULL; goto err_out; } parent = subnode; } else if (parent) { switch (plist_get_node_type(parent)) { case PLIST_DICT: if (!keyname) { PLIST_XML_ERR("missing key name while adding dict item\n"); ctx->err++; goto err_out; } plist_dict_set_item(parent, keyname, subnode); break; case PLIST_ARRAY: plist_array_append_item(parent, subnode); break; default: /* should not happen */ PLIST_XML_ERR("parent is not a structured node\n"); ctx->err++; goto err_out; } } if (!is_empty && (data->type == PLIST_DICT || data->type == PLIST_ARRAY)) { struct node_path_item *path_item = malloc(sizeof(struct node_path_item)); if (!path_item) { PLIST_XML_ERR("out of memory when allocating node path item\n"); ctx->err++; goto err_out; } path_item->type = (data->type == PLIST_DICT) ? XPLIST_DICT : XPLIST_ARRAY; path_item->prev = node_path; node_path = path_item; parent = subnode; } subnode = NULL; } else if (closing_tag) { if (!node_path) { PLIST_XML_ERR("node path is empty while trying to match closing tag with opening tag\n"); ctx->err++; goto err_out; } if (strcmp(node_path->type, tag+1) != 0) { PLIST_XML_ERR("unexpected %s found (for opening %s)\n", tag, node_path->type); ctx->err++; goto err_out; } struct node_path_item *path_item = node_path; node_path = node_path->prev; free(path_item); parent = ((node_t*)parent)->parent; if (!parent) { goto err_out; } } free(tag); tag = NULL; free(keyname); keyname = NULL; plist_free(subnode); subnode = NULL; } } if (node_path) { PLIST_XML_ERR("EOF encountered while was expected\n", node_path->type); ctx->err++; } err_out: free(tag); free(keyname); plist_free(subnode); /* clean up node_path if required */ while (node_path) { struct node_path_item *path_item = node_path; node_path = path_item->prev; free(path_item); } if (ctx->err) { plist_free(*plist); *plist = NULL; } } PLIST_API void plist_from_xml(const char *plist_xml, uint32_t length, plist_t * plist) { if (!plist_xml || (length == 0)) { *plist = NULL; return; } struct _parse_ctx ctx = { plist_xml, plist_xml + length, 0 }; node_from_xml(&ctx, plist); }