-
Notifications
You must be signed in to change notification settings - Fork 0
/
lib-utf8.c
209 lines (184 loc) · 4.07 KB
/
lib-utf8.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
/*
* Copyright Neil Brown ©2017-2023 <[email protected]>
* May be distributed under terms of GPLv2 - see file:COPYING
*
* Filter a view on a document to convert utf-8 sequences into
* the relevant unicode characters.
*/
#include <unistd.h>
#include <stdlib.h>
#define DOC_NEXT utf8_next
#define DOC_PREV utf8_prev
#define PANE_DATA_VOID
#include "core.h"
static struct map *utf8_map safe;
DEF_LOOKUP_CMD(utf8_handle, utf8_map);
static inline wint_t utf8_next(struct pane *home safe, struct mark *mark safe,
struct doc_ref *r, bool bytes)
{
int move = r == &mark->ref;
struct pane *p = home->parent;
wint_t ch;
struct mark *m = mark;
char buf[10];
const char *b;
int i;
wint_t ret;
if (move)
ch = doc_move(p, m, 1);
else
ch = doc_pending(p, m, 1);
if (ch == WEOF || (ch & 0x7f) == ch)
return ch;
if (!move) {
m = mark_dup(m);
doc_move(p, m, 1);
}
i = 0;
buf[i++] = ch;
while ((ch = doc_following(p, m)) != WEOF &&
(ch & 0xc0) == 0x80 && i < 10) {
buf[i++] = ch;
doc_next(p, m);
}
b = buf;
ret = get_utf8(&b, b+i);
if (ret == WERR)
ret = (unsigned char)buf[0];
if (!move)
mark_free(m);
return ret;
}
static inline wint_t utf8_prev(struct pane *home safe, struct mark *mark safe,
struct doc_ref *r, bool bytes)
{
int move = r == &mark->ref;
struct pane *p = home->parent;
wint_t ch;
struct mark *m = mark;
char buf[10];
const char *b;
int i;
wint_t ret;
if (move)
ch = doc_move(p, m, -1);
else
ch = doc_pending(p, m, -1);
if (ch == WEOF || (ch & 0x7f) == ch)
return ch;
if (!move) {
m = mark_dup(m);
doc_move(p, m, -1);
}
i = 10;
buf[--i] = ch;
while (ch != WEOF && (ch & 0xc0) != 0xc0 && i > 0) {
ch = doc_prev(p, m);
buf[--i] = ch;
}
b = buf + i;
ret = get_utf8(&b, buf+10);
if (ret == WERR)
ret = (unsigned char)buf[i];
if (!move)
mark_free(m);
return ret;
}
DEF_CMD(utf8_char)
{
return do_char_byte(ci);
}
DEF_CMD(utf8_byte)
{
return call("doc:char", ci->home->parent, ci->num, ci->mark, ci->str,
ci->num2, ci->mark2, ci->str2, ci->x, ci->y);
}
struct utf8cb {
struct command c;
struct command *cb safe;
struct pane *p safe;
char b[5];
short have, expect;
int size;
};
DEF_CMD(utf8_content_cb)
{
struct utf8cb *c = container_of(ci->comm, struct utf8cb, c);
wint_t wc = ci->num;
int ret = 1;
if (ci->x)
c->size = ci->x;
if ((wc & ~0x7f) == 0) {
/* 7bit char - easy. Pass following string too,
* utf8 is expected.
*/
if (c->expect)
c->expect = c->have = 0;
ret = comm_call(c->cb, ci->key, c->p, wc, ci->mark, ci->str,
ci->num2, NULL, NULL, c->size, 0);
c->size = 0;
return ret;
}
if ((wc & 0xc0) == 0x80) {
/* Continuation char */
if (!c->expect)
/* Ignore it */
return 1;
c->b[c->have++] = wc;
if (c->have >= c->expect) {
const char *b = c->b;
wc = get_utf8(&b, b+c->have);
if (wc == WERR)
wc = c->b[0];
c->expect = 0;
ret = comm_call(c->cb, ci->key, c->p,
wc, ci->mark, ci->str,
ci->num2, NULL, NULL, c->size, 0);
c->size = 0;
}
return ret;
}
/* First char of multi-byte */
c->have = 1;
c->b[0] = wc;
if (wc < 0xe0)
c->expect = 2;
else if (wc < 0xf0)
c->expect = 3;
else if (wc < 0xf8)
c->expect = 4;
else
c->expect = 5;
return 1;
}
DEF_CMD(utf8_content)
{
struct utf8cb c;
if (!ci->comm2 || !ci->mark)
return Enoarg;
c.c = utf8_content_cb;
c.cb = ci->comm2;
c.p = ci->focus;
c.size = 0;
c.expect = 0;
return home_call_comm(ci->home->parent, ci->key, ci->home,
&c.c, 1, ci->mark, NULL, 0, ci->mark2);
}
DEF_CMD(utf8_attach)
{
struct pane *p;
p = pane_register(ci->focus, 0, &utf8_handle.c);
if (!p)
return Efail;
return comm_call(ci->comm2, "callback:attach", p);
}
void edlib_init(struct pane *ed safe)
{
utf8_map = key_alloc();
key_add(utf8_map, "doc:char", &utf8_char);
key_add(utf8_map, "doc:byte", &utf8_byte);
key_add(utf8_map, "doc:content", &utf8_content);
/* No doc:content-bytes, that wouldn't make sense */
call_comm("global-set-command", ed, &utf8_attach, 0, NULL, "attach-charset-utf-8");
call_comm("global-set-command", ed, &utf8_attach, 0, NULL, "attach-utf8");
}