-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathtoken.c
199 lines (188 loc) · 5.93 KB
/
token.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
void revertinput(void);
// utf-8 char buffer
char utf8_buf[5];
long utf8_encode(char *out, uint64_t utf) {
if (utf <= 0x7F)
{
// Plain ASCII
out[0] = (char) utf;
out[1] = 0;
return 1;
}
else if (utf <= 0x07FF)
{
// 2-byte unicode
out[0] = (char) (((utf >> 6) & 0x1F) | 0xC0);
out[1] = (char) (((utf >> 0) & 0x3F) | 0x80);
out[2] = 0;
return 2;
}
else if (utf <= 0xFFFF)
{
// 3-byte unicode
out[0] = (char) (((utf >> 12) & 0x0F) | 0xE0);
out[1] = (char) (((utf >> 6) & 0x3F) | 0x80);
out[2] = (char) (((utf >> 0) & 0x3F) | 0x80);
out[3] = 0;
return 3;
}
else if (utf <= 0x10FFFF)
{
// 4-byte unicode
out[0] = (char) (((utf >> 18) & 0x07) | 0xF0);
out[1] = (char) (((utf >> 12) & 0x3F) | 0x80);
out[2] = (char) (((utf >> 6) & 0x3F) | 0x80);
out[3] = (char) (((utf >> 0) & 0x3F) | 0x80);
out[4] = 0;
return 4;
}
else {
// error - use replacement character
out[0] = (char) 0xEF;
out[1] = (char) 0xBF;
out[2] = (char) 0xBD;
out[3] = 0;
return 3;
}
}
int get_unicode_by_hex(char *chbuf, int usize, char **line_ptr) {
char numstr[usize];
for (int i = 0; i < usize - 1; i++) {
if (**line_ptr == '\0') return 0; // End of line reached unexpectedly
numstr[i] = **line_ptr;
(*line_ptr)++;
}
numstr[usize - 1] = '\0'; // Null terminate
int ucode = strtol(numstr, NULL, 16);
int num_bytes_ret = utf8_encode(chbuf, ucode);
return num_bytes_ret > 0 ? 1 : 0;
}
int get_ascii(char *chbuf, int usize, char **line_ptr) {
char numstr[usize];
for (int i = 0; i < usize - 1; i++) {
if (**line_ptr == '\0') return 0; // End of line reached unexpectedly
numstr[i] = **line_ptr;
(*line_ptr)++;
}
numstr[usize - 1] = '\0'; // Null terminate
int acode = strtol(numstr, NULL, 16);
chbuf[0] = (char) acode;
chbuf[1] = 0;
return 1;
}
char get_char() {
static char *rocket_prompt = "🚀dclang=> ";
static char *continue_prompt = "🔗...=> ";
// If we're at the end of the buffer, read a new line
if (*line_ptr == '\0') {
if (live_repl) {
// Show the prompt in interactive mode
fprintf(ofp, "%s", (in_string || def_mode) ? continue_prompt : rocket_prompt);
fflush(ofp);
}
if (!fgets(line_buf, 256, ifp)) {
return EOF; // End of input (CTRL+D or file EOF)
}
line_ptr = line_buf;
}
return (*line_ptr) ? *line_ptr++ : EOF;
}
void stringfunc() {
char ch, escape_ch, chbuf[5];
int stat = -1;
char *scratch = &memory_pool[unused_mem_idx];
char *scratch_start = scratch;
in_string = 1;
// Get the first character
if ((ch = get_char()) == EOF) exit(0);
while (ch != '"') {
if (ch == '\\') {
// Handle escape sequences
if ((escape_ch = get_char()) == EOF) exit(0);
switch (escape_ch) {
case 'b': chbuf[0] = 8; break; // Backspace
case 't': chbuf[0] = 9; break; // Tab
case 'n': chbuf[0] = 10; break; // Newline
case 'r': chbuf[0] = 13; break; // Carriage return
case 'x': stat = get_ascii(chbuf, 3, &line_ptr); goto check_valid;
case 'u': stat = get_unicode_by_hex(chbuf, 5, &line_ptr); goto check_valid;
case 'U': stat = get_unicode_by_hex(chbuf, 9, &line_ptr); goto check_valid;
default: chbuf[0] = escape_ch; break; // Literal char
}
chbuf[1] = 0;
} else {
// Regular character
chbuf[0] = ch;
chbuf[1] = 0;
}
check_valid:
if (stat == 0) {
printf("Illegal escape sequence in string.\n");
return;
}
scratch = mempcpy(scratch, chbuf, strlen(chbuf));
if ((ch = get_char()) == EOF) exit(0);
continue;
}
*scratch = '\0'; // Null-terminate string
int chr_cnt = (scratch - scratch_start) + 1;
unused_mem_idx = (unused_mem_idx + chr_cnt + 0x0f) & ~0x0f;
// Register string memory range
DCLANG_PTR string_dest_ptr = (DCLANG_PTR) scratch_start;
DCLANG_PTR buflen = (DCLANG_PTR) chr_cnt;
MIN_STR = (MIN_STR == 0 || string_dest_ptr < MIN_STR) ? string_dest_ptr : MIN_STR;
MAX_STR = (MAX_STR == 0 || string_dest_ptr + buflen > MAX_STR) ? string_dest_ptr + buflen : MAX_STR;
// Handle stack or program storage
if (def_mode) {
prog[iptr].opcode = OP_PUSH;
prog[iptr++].param = string_dest_ptr;
} else {
push(string_dest_ptr);
}
in_string = 0;
}
// Helpers for `get_token()`
void add_to_buf(char ch) {
if((bufused < IBUFSIZE - 1) && ch != EOF) {
buf[bufused++] = ch;
}
}
char *buf2str() {
buf[bufused++] = '\0';
return dclang_strdup(buf);
}
// End helpers for `get_token()`
char *get_token() {
DCLANG_LONG ch;
bufused = 0;
// Skip leading spaces and handle comments
while ((ch = get_char()) != EOF) {
if (isspace(ch)) continue;
switch (ch) {
case '#': // Comment detected, skip to end of line
while ((ch = get_char()) != EOF && ch != '\n');
continue;
case '"': // String detected, handle it separately
stringfunc();
continue;
default:
add_to_buf(ch);
goto read_token;
}
}
// Handle EOF case
revertinput();
return "EOF";
read_token:
// Read remaining characters until whitespace or EOF
while ((ch = get_char()) != EOF) {
if (isspace(ch)) {
//ungetc(ch, ifp); // Put back the whitespace
return buf2str();
}
add_to_buf(ch);
}
// Handle EOF case at the end
revertinput();
return "EOF";
}