summaryrefslogtreecommitdiffstats
path: root/ucs.c
blob: cd5a9f7b60abc78c02c180f7f380bf5f78825218 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
/**
 * ucs.c
 */

#include <stdio.h>
#include <stddef.h>
#include "ucs.h"

u32 beucs2(u16 *ucs2) {
	u16 ch;
	u16 *ptr;
	u32 len;
	
	ptr = ucs2;
	len = 0;
	while ((ch = be16((u8*)ptr)) != 0) {
		*ptr = ch;
		ptr++;
		len++;
	}
	
	return len;
}

u32 ucs2len(u16 *ucs2) {
	u32 len = 0;
	while (ucs2[len] != 0) {
		len++;
	}
	
	return len;
}

char firstByteMark[7] = {0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC};

/* Convert UCS-2 string to UTF-8 */
void print_ucs2_as_utf8(u16 *ucs2) {
	const u32 byteMask = 0xBF;
	const u32 byteMark = 0x80;
	u32 blen = 0;
	u32 i = 0;
	u16 *ptr_ucs;
	u16 ch;
	char utf8[8]= {0,0,0,0,0,0,0,0};

	ptr_ucs = ucs2;
	while (*ptr_ucs != 0) {
		ch = *ptr_ucs;

		if (ch < 0x80) {
			blen = 1;
		} else if (ch < 0x800) {
			blen = 2;
		} else {
			blen = 3;
		}
		
		i = blen;
		utf8[i--] = 0;
		switch (blen) {
			case 3: utf8[i--] = ((ch | byteMark) & byteMask); ch >>= 6;
			case 2: utf8[i--] = ((ch | byteMark) & byteMask); ch >>= 6;
			case 1: utf8[i--] = (ch | firstByteMark[blen]);
		}

		fwrite(utf8, blen, 1, stdout);

		ptr_ucs++;
	}
}