31
31
#define UTF8_SEQ_LENGTH (ch ) (((0xe5000000 >> (((ch) >> 3 ) & 0x1e )) & 3 ) + 1 )
32
32
33
33
/* note: macro expands to multiple lines */
34
- #define UTF8_SHIFT_AND_MASK (unicode, byte ) \
35
- (unicode)<<=6 ; (unicode) |= (0x3f & (byte));
34
+ #define UTF8_SHIFT_AND_MASK (unicode, byte ) \
35
+ (unicode) <<= 6 ; \
36
+ (unicode) |= (0x3f & (byte));
36
37
37
38
#define UNICODE_UPPER_LIMIT 0x10fffd
38
39
41
42
* length of the UTF-16 string (which may contain embedded \0's)
42
43
*/
43
44
44
- extern char16_t * strdup8to16 (const char * s, size_t *out_len)
45
- {
46
- char16_t *ret;
45
+ extern char16_t * strdup8to16 (const char * s, size_t * out_len) {
46
+ char16_t * ret;
47
47
size_t len;
48
48
49
49
if (s == NULL ) return NULL ;
50
50
51
51
len = strlen8to16 (s);
52
52
53
53
// fail on overflow
54
- if (len && SIZE_MAX/len < sizeof (char16_t ))
55
- return NULL ;
54
+ if (len && SIZE_MAX / len < sizeof (char16_t )) return NULL ;
56
55
57
56
// no plus-one here. UTF-16 strings are not null terminated
58
- ret = (char16_t *) malloc (sizeof (char16_t ) * len);
57
+ ret = (char16_t *) malloc (sizeof (char16_t ) * len);
59
58
60
- return strcpy8to16 (ret, s, out_len);
59
+ return strcpy8to16 (ret, s, out_len);
61
60
}
62
61
63
62
/* *
@@ -66,8 +65,7 @@ extern char16_t * strdup8to16 (const char* s, size_t *out_len)
66
65
* The value returned is the number of UTF-16 characters required
67
66
* to represent this string.
68
67
*/
69
- extern size_t strlen8to16 (const char * utf8Str)
70
- {
68
+ extern size_t strlen8to16 (const char * utf8Str) {
71
69
size_t len = 0 ;
72
70
int ic;
73
71
int expected = 0 ;
@@ -99,8 +97,6 @@ extern size_t strlen8to16 (const char* utf8Str)
99
97
return len;
100
98
}
101
99
102
-
103
-
104
100
/*
105
101
* Retrieve the next UTF-32 character from a UTF-8 string.
106
102
*
@@ -110,8 +106,7 @@ extern size_t strlen8to16 (const char* utf8Str)
110
106
*
111
107
* Advances "*pUtf8Ptr" to the start of the next character.
112
108
*/
113
- static inline uint32_t getUtf32FromUtf8 (const char ** pUtf8Ptr)
114
- {
109
+ static inline uint32_t getUtf32FromUtf8 (const char ** pUtf8Ptr) {
115
110
uint32_t ret;
116
111
int seq_len;
117
112
int i;
@@ -128,12 +123,12 @@ static inline uint32_t getUtf32FromUtf8(const char** pUtf8Ptr)
128
123
/* note we tolerate invalid leader 11111xxx here */
129
124
seq_len = UTF8_SEQ_LENGTH (**pUtf8Ptr);
130
125
131
- ret = (**pUtf8Ptr) & leaderMask [seq_len - 1 ];
126
+ ret = (**pUtf8Ptr) & leaderMask[seq_len - 1 ];
132
127
133
128
if (**pUtf8Ptr == ' \0 ' ) return ret;
134
129
135
130
(*pUtf8Ptr)++;
136
- for (i = 1 ; i < seq_len ; i++, (*pUtf8Ptr)++) {
131
+ for (i = 1 ; i < seq_len; i++, (*pUtf8Ptr)++) {
137
132
if ((**pUtf8Ptr) == ' \0 ' ) return UTF16_REPLACEMENT_CHAR;
138
133
if (((**pUtf8Ptr) & 0xc0 ) != 0x80 ) return UTF16_REPLACEMENT_CHAR;
139
134
@@ -143,30 +138,29 @@ static inline uint32_t getUtf32FromUtf8(const char** pUtf8Ptr)
143
138
return ret;
144
139
}
145
140
146
-
147
141
/* *
148
142
* out_len is an out parameter (which may not be null) containing the
149
143
* length of the UTF-16 string (which may contain embedded \0's)
150
144
*/
151
145
152
- extern char16_t * strcpy8to16 (char16_t *utf16Str, const char *utf8Str,
153
- size_t *out_len)
154
- {
155
- char16_t *dest = utf16Str;
146
+ extern char16_t * strcpy8to16 (char16_t * utf16Str, const char * utf8Str, size_t * out_len) {
147
+ char16_t * dest = utf16Str;
156
148
157
149
while (*utf8Str != ' \0 ' ) {
158
150
uint32_t ret;
159
151
160
152
ret = getUtf32FromUtf8 (&utf8Str);
161
153
162
154
if (ret <= 0xffff ) {
163
- *dest++ = (char16_t ) ret;
164
- } else if (ret <= UNICODE_UPPER_LIMIT) {
155
+ *dest++ = (char16_t )ret;
156
+ } else if (ret <= UNICODE_UPPER_LIMIT) {
165
157
/* Create surrogate pairs */
166
- /* See http://en.wikipedia.org/wiki/UTF-16/UCS-2#Method_for_code_points_in_Plane_1.2C_Plane_2 */
158
+ /* See
159
+ * http://en.wikipedia.org/wiki/UTF-16/UCS-2#Method_for_code_points_in_Plane_1.2C_Plane_2
160
+ */
167
161
168
162
*dest++ = 0xd800 | ((ret - 0x10000 ) >> 10 );
169
- *dest++ = 0xdc00 | ((ret - 0x10000 ) & 0x3ff );
163
+ *dest++ = 0xdc00 | ((ret - 0x10000 ) & 0x3ff );
170
164
} else {
171
165
*dest++ = UTF16_REPLACEMENT_CHAR;
172
166
}
0 commit comments