From 9c1ded3b0ae8e540177ee0c0baa1f9c8fcf91989 Mon Sep 17 00:00:00 2001 From: Martin Szulecki Date: Wed, 18 Mar 2009 20:52:11 +0100 Subject: Initial commit of sources --- ucs.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 ucs.c (limited to 'ucs.c') diff --git a/ucs.c b/ucs.c new file mode 100644 index 0000000..cd5a9f7 --- /dev/null +++ b/ucs.c @@ -0,0 +1,71 @@ +/** + * ucs.c + */ + +#include +#include +#include "ucs.h" + +u32 beucs2(u16 *ucs2) { + u16 ch; + u16 *ptr; + u32 len; + + ptr = ucs2; + len = 0; + while ((ch = be16((u8*)ptr)) != 0) { + *ptr = ch; + ptr++; + len++; + } + + return len; +} + +u32 ucs2len(u16 *ucs2) { + u32 len = 0; + while (ucs2[len] != 0) { + len++; + } + + return len; +} + +char firstByteMark[7] = {0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC}; + +/* Convert UCS-2 string to UTF-8 */ +void print_ucs2_as_utf8(u16 *ucs2) { + const u32 byteMask = 0xBF; + const u32 byteMark = 0x80; + u32 blen = 0; + u32 i = 0; + u16 *ptr_ucs; + u16 ch; + char utf8[8]= {0,0,0,0,0,0,0,0}; + + ptr_ucs = ucs2; + while (*ptr_ucs != 0) { + ch = *ptr_ucs; + + if (ch < 0x80) { + blen = 1; + } else if (ch < 0x800) { + blen = 2; + } else { + blen = 3; + } + + i = blen; + utf8[i--] = 0; + switch (blen) { + case 3: utf8[i--] = ((ch | byteMark) & byteMask); ch >>= 6; + case 2: utf8[i--] = ((ch | byteMark) & byteMask); ch >>= 6; + case 1: utf8[i--] = (ch | firstByteMark[blen]); + } + + fwrite(utf8, blen, 1, stdout); + + ptr_ucs++; + } +} + -- cgit v1.1-32-gdbae