-
Notifications
You must be signed in to change notification settings - Fork 122
String matching module
Wang Renxin edited this page May 31, 2022
·
11 revisions
This is an implementation of a string matching module.
Add implementation functions. A strict matching algorithm as follow:
static int _find(struct mb_interpreter_t* s, void** l) {
int result = MB_FUNC_OK;
char* haystack = 0;
char* needle = 0;
int hlen = 0, nlen = 0, found = 0;
int i = 0, j = 0;
mb_value_t arg;
mb_assert(s && l);
mb_check(mb_attempt_open_bracket(s, l));
mb_check(mb_pop_string(s, l, &haystack));
mb_check(mb_pop_string(s, l, &needle));
if(mb_has_arg(s, l)) {
mb_check(mb_pop_int(s, l, &i));
}
mb_check(mb_attempt_close_bracket(s, l));
found = -1;
hlen = (int)strlen(haystack);
nlen = (int)strlen(needle);
for( ; i < hlen; i++) {
for(j = 0; j < nlen; j++) {
if(haystack[i + j] != needle[j])
break;
if(j == nlen - 1) {
found = i;
goto _exit;
}
}
}
_exit:
arg.type = MB_DT_INT;
arg.value.integer = found;
mb_check(mb_push_int(s, l, arg.value.integer));
return result;
}
And a wildcard matching algorithm:
static int _wildcard_find(struct mb_interpreter_t* s, void** l) {
int result = MB_FUNC_OK;
char* tame_text = 0;
char* wild_text = 0;
char* tame_bookmark = 0;
char* wild_bookmark = 0;
mb_value_t arg;
mb_assert(s && l);
mb_check(mb_attempt_open_bracket(s, l));
mb_check(mb_pop_string(s, l, &tame_text));
mb_check(mb_pop_string(s, l, &wild_text));
mb_check(mb_attempt_close_bracket(s, l));
arg.type = MB_DT_INT;
do {
if(*wild_text == '*') {
while(*(++wild_text) == '*') {
}
if(!*wild_text) {
arg.value.integer = true;
goto _exit;
}
if(*wild_text != '?') {
while(*tame_text != *wild_text) {
if(!(*(++tame_text))) {
arg.value.integer = false;
goto _exit;
}
}
}
wild_bookmark = wild_text;
tame_bookmark = tame_text;
} else if(*tame_text != *wild_text && *wild_text != '?') {
if(wild_bookmark) {
if(wild_text != wild_bookmark) {
wild_text = wild_bookmark;
if(*tame_text != *wild_text) {
tame_text = ++tame_bookmark;
continue;
} else {
wild_text++;
}
}
if(*tame_text) {
tame_text++;
continue;
}
}
arg.value.integer = false;
goto _exit;
}
tame_text++;
wild_text++;
if(!*tame_text) {
while(*wild_text == '*')
wild_text++;
if(!*wild_text) {
arg.value.integer = true;
goto _exit;
}
arg.value.integer = false;
goto _exit;
}
} while(1);
_exit:
mb_check(mb_push_int(s, l, arg.value.integer));
return result;
}
Register them:
mb_register_func(bas, "FIND", _find);
mb_register_func(bas, "WILDCARD_FIND", _wildcard_find);
The FIND
function accepts arguments as "(text, pattern, [begin index])". The WILDCARD_FIND
function accepts "(text, pattern)", and the pattern allows both ?
for single character placeholder and *
for none or multiple character placeholder. See the BASIC usage as follow:
print find("aabbccddaabb", "aabb"); ' Search from the beginning, result 0
print find("aabbccddaabb", "aabb", 1); ' Search from index 1, result 8
print find("aabbccddaabb", "bbaa"); ' Result -1
print wildcard_find("aabbccddaabb", "?a?b?c*"); ' Result 1
print wildcard_find("aabbccddaabb", "a*b*a*b"); ' Result 1
print wildcard_find("aabbccddaabb", "abcd"); ' Result 0
References
- Principles
- Coding
- Data types
- Standalone shell
- Integration
- Customization
- More scripting API
- FAQ