@@ -64,6 +64,25 @@ int be_re_compile(bvm *vm) {
64
64
be_raise (vm , "type_error" , NULL );
65
65
}
66
66
67
+ // Native functions be_const_func()
68
+ // Berry: `re.compilebytes(pattern:string) -> instance(bytes)`
69
+ int be_re_compilebytes (bvm * vm ) {
70
+ int32_t argc = be_top (vm ); // Get the number of arguments
71
+ if (argc >= 1 && be_isstring (vm , 1 )) {
72
+ const char * regex_str = be_tostring (vm , 1 );
73
+ int sz = re1_5_sizecode (regex_str );
74
+ if (sz < 0 ) {
75
+ be_raise (vm , "internal_error" , "error in regex" );
76
+ }
77
+
78
+ be_pushbytes (vm , NULL , sizeof (ByteProg ) + sz );
79
+ ByteProg * code = (ByteProg * ) be_tobytes (vm , -1 , NULL );
80
+ re1_5_compilecode (code , regex_str );
81
+ be_return (vm );
82
+ }
83
+ be_raise (vm , "type_error" , NULL );
84
+ }
85
+
67
86
// pushes either a list if matched, else `nil`
68
87
// return index of next offset, or -1 if not found
69
88
const char * be_re_match_search_run (bvm * vm , ByteProg * code , const char * hay , bbool is_anchored , bbool size_only ) {
@@ -99,9 +118,10 @@ const char *be_re_match_search_run(bvm *vm, ByteProg *code, const char *hay, bbo
99
118
100
119
int be_re_match_search (bvm * vm , bbool is_anchored , bbool size_only ) {
101
120
int32_t argc = be_top (vm ); // Get the number of arguments
102
- if (argc >= 2 && be_isstring (vm , 1 ) && be_isstring (vm , 2 )) {
103
- const char * regex_str = be_tostring (vm , 1 );
121
+ if (argc >= 2 && (be_isstring (vm , 1 ) || be_isbytes (vm , 1 )) && be_isstring (vm , 2 )) {
104
122
const char * hay = be_tostring (vm , 2 );
123
+ ByteProg * code = NULL ;
124
+
105
125
int32_t offset = 0 ;
106
126
if (argc >= 3 && be_isint (vm , 3 )) {
107
127
offset = be_toint (vm , 3 );
@@ -111,49 +131,64 @@ int be_re_match_search(bvm *vm, bbool is_anchored, bbool size_only) {
111
131
if (offset >= hay_len ) { be_return_nil (vm ); } // any match of empty string returns nil, this catches implicitly when hay_len == 0
112
132
hay += offset ; // shift to offset
113
133
114
- int sz = re1_5_sizecode (regex_str );
115
- if (sz < 0 ) {
116
- be_raise (vm , "internal_error" , "error in regex" );
117
- }
134
+ if (be_isstring (vm , 1 )) {
135
+ const char * regex_str = be_tostring (vm , 1 );
136
+ int sz = re1_5_sizecode (regex_str );
137
+ if (sz < 0 ) {
138
+ be_raise (vm , "internal_error" , "error in regex" );
139
+ }
118
140
119
- ByteProg * code = be_os_malloc (sizeof (ByteProg ) + sz );
120
- if (code == NULL ) {
121
- be_throw (vm , BE_MALLOC_FAIL ); /* lack of heap space */
141
+ code = be_os_malloc (sizeof (ByteProg ) + sz );
142
+ if (code == NULL ) {
143
+ be_throw (vm , BE_MALLOC_FAIL ); /* lack of heap space */
144
+ }
145
+ int ret = re1_5_compilecode (code , regex_str );
146
+ if (ret != 0 ) {
147
+ be_os_free (code );
148
+ be_raise (vm , "internal_error" , "error in regex" );
149
+ }
150
+ } else {
151
+ code = (ByteProg * ) be_tobytes (vm , 1 , NULL );
122
152
}
123
- int ret = re1_5_compilecode (code , regex_str );
124
- if (ret != 0 ) {
153
+ // do the match
154
+ be_re_match_search_run (vm , code , hay , is_anchored , size_only );
155
+ // cleanup
156
+ if (be_isstring (vm , 1 )) {
125
157
be_os_free (code );
126
- be_raise (vm , "internal_error" , "error in regex" );
127
158
}
128
- be_re_match_search_run (vm , code , hay , is_anchored , size_only );
129
- be_os_free (code );
130
159
be_return (vm );
131
160
}
132
161
be_raise (vm , "type_error" , NULL );
133
162
}
134
163
135
164
int be_re_match_search_all (bvm * vm , bbool is_anchored ) {
136
165
int32_t argc = be_top (vm ); // Get the number of arguments
137
- if (argc >= 2 && be_isstring (vm , 1 ) && be_isstring (vm , 2 )) {
138
- const char * regex_str = be_tostring (vm , 1 );
166
+ if (argc >= 2 && (be_isstring (vm , 1 ) || be_isbytes (vm , 1 )) && be_isstring (vm , 2 )) {
139
167
const char * hay = be_tostring (vm , 2 );
168
+ ByteProg * code = NULL ;
140
169
int limit = -1 ;
141
170
if (argc >= 3 ) {
142
171
limit = be_toint (vm , 3 );
143
172
}
144
- int sz = re1_5_sizecode (regex_str );
145
- if (sz < 0 ) {
146
- be_raise (vm , "internal_error" , "error in regex" );
147
- }
148
173
149
- ByteProg * code = be_os_malloc (sizeof (ByteProg ) + sz );
150
- if (code == NULL ) {
151
- be_throw (vm , BE_MALLOC_FAIL ); /* lack of heap space */
152
- }
153
- int ret = re1_5_compilecode (code , regex_str );
154
- if (ret != 0 ) {
155
- be_os_free (code );
156
- be_raise (vm , "internal_error" , "error in regex" );
174
+ if (be_isstring (vm , 1 )) {
175
+ const char * regex_str = be_tostring (vm , 1 );
176
+ int sz = re1_5_sizecode (regex_str );
177
+ if (sz < 0 ) {
178
+ be_raise (vm , "internal_error" , "error in regex" );
179
+ }
180
+
181
+ code = be_os_malloc (sizeof (ByteProg ) + sz );
182
+ if (code == NULL ) {
183
+ be_throw (vm , BE_MALLOC_FAIL ); /* lack of heap space */
184
+ }
185
+ int ret = re1_5_compilecode (code , regex_str );
186
+ if (ret != 0 ) {
187
+ be_os_free (code );
188
+ be_raise (vm , "internal_error" , "error in regex" );
189
+ }
190
+ } else {
191
+ code = (ByteProg * ) be_tobytes (vm , 1 , NULL );
157
192
}
158
193
159
194
be_newobject (vm , "list" );
@@ -165,7 +200,10 @@ int be_re_match_search_all(bvm *vm, bbool is_anchored) {
165
200
be_pop (vm , 1 );
166
201
}
167
202
be_pop (vm , 1 );
168
- be_os_free (code );
203
+ // cleanup
204
+ if (be_isstring (vm , 1 )) {
205
+ be_os_free (code );
206
+ }
169
207
be_return (vm );
170
208
}
171
209
be_raise (vm , "type_error" , NULL );
@@ -329,29 +367,36 @@ int re_pattern_split(bvm *vm) {
329
367
// Berry: `re.split(pattern:string, s:string [, split_limit:int]) -> list(string)`
330
368
int be_re_split (bvm * vm ) {
331
369
int32_t argc = be_top (vm ); // Get the number of arguments
332
- if (argc >= 2 && be_isstring (vm , 1 ) && be_isstring (vm , 2 )) {
333
- const char * regex_str = be_tostring (vm , 1 );
370
+ if (argc >= 2 && (be_isstring (vm , 1 ) || be_isbytes (vm , 1 )) && be_isstring (vm , 2 )) {
334
371
const char * hay = be_tostring (vm , 2 );
372
+ ByteProg * code = NULL ;
335
373
int split_limit = -1 ;
336
374
if (argc >= 3 ) {
337
375
split_limit = be_toint (vm , 3 );
338
376
}
339
- int sz = re1_5_sizecode (regex_str );
340
- if (sz < 0 ) {
341
- be_raise (vm , "internal_error" , "error in regex" );
342
- }
377
+ if (be_isstring (vm , 1 )) {
378
+ const char * regex_str = be_tostring (vm , 1 );
379
+ int sz = re1_5_sizecode (regex_str );
380
+ if (sz < 0 ) {
381
+ be_raise (vm , "internal_error" , "error in regex" );
382
+ }
343
383
344
- ByteProg * code = be_os_malloc (sizeof (ByteProg ) + sz );
345
- if (code == NULL ) {
346
- be_throw (vm , BE_MALLOC_FAIL ); /* lack of heap space */
384
+ code = be_os_malloc (sizeof (ByteProg ) + sz );
385
+ if (code == NULL ) {
386
+ be_throw (vm , BE_MALLOC_FAIL ); /* lack of heap space */
387
+ }
388
+ int ret = re1_5_compilecode (code , regex_str );
389
+ if (ret != 0 ) {
390
+ be_os_free (code );
391
+ be_raise (vm , "internal_error" , "error in regex" );
392
+ }
393
+ } else {
394
+ code = (ByteProg * ) be_tobytes (vm , 1 , NULL );
347
395
}
348
- int ret = re1_5_compilecode ( code , regex_str );
349
- if (ret != 0 ) {
396
+ int ret = re_pattern_split_run ( vm , code , hay , split_limit );
397
+ if (be_isstring ( vm , 1 ) ) {
350
398
be_os_free (code );
351
- be_raise (vm , "internal_error" , "error in regex" );
352
399
}
353
- ret = re_pattern_split_run (vm , code , hay , split_limit );
354
- be_os_free (code );
355
400
return ret ;
356
401
}
357
402
be_raise (vm , "type_error" , NULL );
@@ -363,6 +408,7 @@ int be_re_split(bvm *vm) {
363
408
@const_object_info_begin
364
409
module re (scope: global) {
365
410
compile, func(be_re_compile)
411
+ compilebytes, func(be_re_compilebytes)
366
412
search, func(be_re_search)
367
413
searchall, func(be_re_search_all)
368
414
match, func(be_re_match)
0 commit comments