Skip to content

Commit ca009c7

Browse files
committed
improve handling of some literals, that normally don't occur in a BASIC program, when detokenizing. Remove special DATA/REM mode when tokenizing. Should fix complete round trip for many 'strange' cases
git-svn-id: https://svn.code.sf.net/p/vice-emu/code/trunk@45960 379a1393-f5fb-40a0-bcee-ef074d9b53f7
1 parent afe5a00 commit ca009c7

File tree

1 file changed

+16
-33
lines changed

1 file changed

+16
-33
lines changed

vice/src/tools/petcat/petcat.c

Lines changed: 16 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -2056,8 +2056,18 @@ static int p_expand(int version, int addr, int ctrls)
20562056
continue;
20572057
}
20582058

2059-
if (c == 13) {
2060-
/* return outside quotes, this can only be a control code */
2059+
/* some codes must always be converted to control codes, else they can't
2060+
be tokenized into the exact same thing again */
2061+
if ((c == 0x0d) || /* return */
2062+
(c == 0x2a) || /* literal "*" (else converts into a token) */
2063+
(c == 0x2b) || /* literal "+" (else converts into a token) */
2064+
(c == 0x2d) || /* literal "-" (else converts into a token) */
2065+
(c == 0x2f) || /* literal "/" (else converts into a token) */
2066+
(c == 0x3c) || /* literal "<" (else converts into a token) */
2067+
(c == 0x3d) || /* literal "=" (else converts into a token) */
2068+
(c == 0x3e) || /* literal ">" (else converts into a token) */
2069+
(c == 0x5e) /* literal "^" (else converts into a token) */
2070+
){
20612071
out_ctrl((int)c); /* output as control code */
20622072
} else {
20632073
_p_toascii((int)c, version, ctrls, quote); /* convert character */
@@ -2103,6 +2113,7 @@ static unsigned char* check_leading_space(int version, unsigned char* p)
21032113
return p;
21042114
}
21052115

2116+
/* this converts ASCII to BASIC */
21062117
static void p_tokenize(int version, unsigned int addr, int ctrls)
21072118
{
21082119
static char line[MAX_INLINE_LEN + 1];
@@ -2114,8 +2125,6 @@ static void p_tokenize(int version, unsigned int addr, int ctrls)
21142125
int c;
21152126
int ctmp = -1;
21162127
int kwlentmp = -1;
2117-
unsigned char rem_data_mode;
2118-
unsigned char rem_data_endchar = '\0';
21192128
unsigned int len = 0;
21202129
unsigned int match;
21212130
unsigned int match2;
@@ -2140,7 +2149,6 @@ static void p_tokenize(int version, unsigned int addr, int ctrls)
21402149
DBG(("line: %u [%s]\n", linum, line));
21412150

21422151
quote = 0;
2143-
rem_data_mode = 0;
21442152

21452153
p2 = check_leading_space(version, p2);
21462154

@@ -2154,7 +2162,7 @@ static void p_tokenize(int version, unsigned int addr, int ctrls)
21542162

21552163
match = 0;
21562164
match2 = 0;
2157-
if (quote) {
2165+
21582166
/*
21592167
* control code evaluation
21602168
* only strings that appear inside quotes are
@@ -2258,15 +2266,10 @@ static void p_tokenize(int version, unsigned int addr, int ctrls)
22582266
fprintf(stderr, "error: line %u - unknown control code: %s\n",
22592267
linum, p);
22602268
exit(-1);
2261-
}
2269+
22622270
/* DBG(("controlcode end\n")); */
2263-
} else if (rem_data_mode) {
2264-
/* if we have already encountered a REM or a DATA,
2265-
simply copy the char */
22662271

2267-
/* DO NOTHING! As we do not set "match", the if (!match) will be true,
2268-
* and this part will copy the char over to the new buffer */
2269-
} else if (isalpha((unsigned char)*p2) || strchr("+-*/^>=<", *p2)) {
2272+
} else if (!quote && (isalpha((unsigned char)*p2) || strchr("+-*/^>=<", *p2))) {
22702273
/* FE and CE prefixes are checked first */
22712274
if (version == B_7 || version == B_71 || version == B_10 || version == B_65 || version == B_SXC || version == B_SIMON) {
22722275
switch (version) {
@@ -2372,21 +2375,6 @@ static void p_tokenize(int version, unsigned int addr, int ctrls)
23722375
if ((version == B_35) || (ctmp != 0x4e)) { /* Skip prefix */
23732376
kwlentmp = (int)kwlen;
23742377
match++;
2375-
2376-
/* Check if the keyword is a REM or a DATA */
2377-
switch (ctmp) {
2378-
case TOKEN_DATA:
2379-
rem_data_mode = 1;
2380-
rem_data_endchar = ':';
2381-
break;
2382-
2383-
case TOKEN_REM:
2384-
rem_data_mode = 1;
2385-
rem_data_endchar = '\0';
2386-
break;
2387-
default:
2388-
break;
2389-
}
23902378
}
23912379
}
23922380
}
@@ -2462,11 +2450,6 @@ static void p_tokenize(int version, unsigned int addr, int ctrls)
24622450
/* convert character */
24632451
*p1++ = (unsigned char)(_a_topetscii(*p2 & 0xff, ctrls));
24642452

2465-
/* check if the REM/DATA mode has to be stopped: */
2466-
if (*p2 == rem_data_endchar) {
2467-
rem_data_mode = 0;
2468-
}
2469-
24702453
p3 = p2;
24712454
++p2;
24722455

0 commit comments

Comments
 (0)