summaryrefslogtreecommitdiffstats
path: root/ucs.c
diff options
context:
space:
mode:
Diffstat (limited to 'ucs.c')
-rw-r--r--ucs.c71
1 files changed, 71 insertions, 0 deletions
diff --git a/ucs.c b/ucs.c
new file mode 100644
index 0000000..cd5a9f7
--- /dev/null
+++ b/ucs.c
@@ -0,0 +1,71 @@
+/**
+ * ucs.c
+ */
+
+#include <stdio.h>
+#include <stddef.h>
+#include "ucs.h"
+
+u32 beucs2(u16 *ucs2) {
+ u16 ch;
+ u16 *ptr;
+ u32 len;
+
+ ptr = ucs2;
+ len = 0;
+ while ((ch = be16((u8*)ptr)) != 0) {
+ *ptr = ch;
+ ptr++;
+ len++;
+ }
+
+ return len;
+}
+
+u32 ucs2len(u16 *ucs2) {
+ u32 len = 0;
+ while (ucs2[len] != 0) {
+ len++;
+ }
+
+ return len;
+}
+
+char firstByteMark[7] = {0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC};
+
+/* Convert UCS-2 string to UTF-8 */
+void print_ucs2_as_utf8(u16 *ucs2) {
+ const u32 byteMask = 0xBF;
+ const u32 byteMark = 0x80;
+ u32 blen = 0;
+ u32 i = 0;
+ u16 *ptr_ucs;
+ u16 ch;
+ char utf8[8]= {0,0,0,0,0,0,0,0};
+
+ ptr_ucs = ucs2;
+ while (*ptr_ucs != 0) {
+ ch = *ptr_ucs;
+
+ if (ch < 0x80) {
+ blen = 1;
+ } else if (ch < 0x800) {
+ blen = 2;
+ } else {
+ blen = 3;
+ }
+
+ i = blen;
+ utf8[i--] = 0;
+ switch (blen) {
+ case 3: utf8[i--] = ((ch | byteMark) & byteMask); ch >>= 6;
+ case 2: utf8[i--] = ((ch | byteMark) & byteMask); ch >>= 6;
+ case 1: utf8[i--] = (ch | firstByteMark[blen]);
+ }
+
+ fwrite(utf8, blen, 1, stdout);
+
+ ptr_ucs++;
+ }
+}
+