diff options
Diffstat (limited to 'ucs.c')
-rw-r--r-- | ucs.c | 71 |
1 files changed, 71 insertions, 0 deletions
@@ -0,0 +1,71 @@ +/** + * ucs.c + */ + +#include <stdio.h> +#include <stddef.h> +#include "ucs.h" + +u32 beucs2(u16 *ucs2) { + u16 ch; + u16 *ptr; + u32 len; + + ptr = ucs2; + len = 0; + while ((ch = be16((u8*)ptr)) != 0) { + *ptr = ch; + ptr++; + len++; + } + + return len; +} + +u32 ucs2len(u16 *ucs2) { + u32 len = 0; + while (ucs2[len] != 0) { + len++; + } + + return len; +} + +char firstByteMark[7] = {0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC}; + +/* Convert UCS-2 string to UTF-8 */ +void print_ucs2_as_utf8(u16 *ucs2) { + const u32 byteMask = 0xBF; + const u32 byteMark = 0x80; + u32 blen = 0; + u32 i = 0; + u16 *ptr_ucs; + u16 ch; + char utf8[8]= {0,0,0,0,0,0,0,0}; + + ptr_ucs = ucs2; + while (*ptr_ucs != 0) { + ch = *ptr_ucs; + + if (ch < 0x80) { + blen = 1; + } else if (ch < 0x800) { + blen = 2; + } else { + blen = 3; + } + + i = blen; + utf8[i--] = 0; + switch (blen) { + case 3: utf8[i--] = ((ch | byteMark) & byteMask); ch >>= 6; + case 2: utf8[i--] = ((ch | byteMark) & byteMask); ch >>= 6; + case 1: utf8[i--] = (ch | firstByteMark[blen]); + } + + fwrite(utf8, blen, 1, stdout); + + ptr_ucs++; + } +} + |