From 7742b5c671f28cf296cc57a6138e467c1a79bd05 Mon Sep 17 00:00:00 2001 From: Thierry Arnoux Date: Fri, 24 Dec 2021 14:53:12 +0800 Subject: [PATCH 1/9] Structured Typesetting (STS) generation --- .gitignore | 2 + Makefile.am | 4 +- makefile | 11 + metamath.c | 87 ++++- mmcmdl.c | 35 +- mmcmds.c | 39 +- mmdata.c | 76 ++++ mmdata.h | 13 + mmhlpa.c | 18 + mmhlpb.c | 19 +- mmhtbl.c | 184 +++++++++ mmhtbl.h | 57 +++ mmwsts.c | 1069 +++++++++++++++++++++++++++++++++++++++++++++++++++ mmwsts.h | 38 ++ mmwtex.c | 39 +- mmwtex.h | 5 + 16 files changed, 1656 insertions(+), 40 deletions(-) create mode 100644 .gitignore create mode 100644 makefile create mode 100644 mmhtbl.c create mode 100644 mmhtbl.h create mode 100644 mmwsts.c create mode 100644 mmwsts.h diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..77ecc6d2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.o +metamath diff --git a/Makefile.am b/Makefile.am index d49c5d47..c8f773e8 100644 --- a/Makefile.am +++ b/Makefile.am @@ -15,7 +15,8 @@ noinst_HEADERS = \ mmveri.h \ mmvstr.h \ mmword.h \ - mmwtex.h + mmwtex.h \ + mmwsts.h metamath_SOURCES = \ metamath.c \ @@ -34,6 +35,7 @@ metamath_SOURCES = \ mmvstr.c \ mmword.c \ mmwtex.c \ + mmwsts.c \ $(noinst_HEADERS) dist_pkgdata_DATA = \ diff --git a/makefile b/makefile new file mode 100644 index 00000000..10bb970e --- /dev/null +++ b/makefile @@ -0,0 +1,11 @@ +objects = metamath.o mmcmdl.o mmcmds.o mmdata.o mmhlpa.o mmhlpb.o mminou.o mmmaci.o mmpars.o mmpfas.o mmunif.o mmutil.o mmveri.o mmvstr.o \ + mmword.o mmwtex.o mmhtbl.o mmwsts.o + + +metamath: $(objects) + cc -o metamath $(CFLAGS) $(objects) + +%.o: %.c + cc -c $*.c -std=gnu99 $(CFLAGS) + + diff --git a/metamath.c b/metamath.c index 77fee4a1..b8ed3728 100644 --- a/metamath.c +++ b/metamath.c @@ -604,6 +604,7 @@ mmutil.c - Miscellaneous I/O utilities (reserved for future use) mmveri.c - Proof verifier for source file mmvstr.c - BASIC-like string functions + mmwsts.c - Structured typesetting (MathML) generation mmwtex.c - LaTeX/HTML source generation mmword.c - File revision utility (for TOOLS> UPDATE) (not generally useful) */ @@ -638,6 +639,7 @@ #include "mmunif.h" #include "mmword.h" #include "mmwtex.h" +#include "mmwsts.h" #ifdef THINK_C #include "mmmaci.h" #endif @@ -2289,14 +2291,25 @@ void command(int argc, char *argv[]) htmlFlag = 1; /* If not specified, for backwards compatibility in scripts leave altHtmlFlag at current value */ + /* 7-Jul-17 - MathML/STS handling */ if (switchPos("/ HTML") != 0) { - if (switchPos("/ ALT_HTML") != 0) { - print2("?Please specify only one of / HTML and / ALT_HTML.\n"); + if (switchPos("/ ALT_HTML") != 0 || switchPos("/ STS") != 0 ) { + print2("?Please specify only one of / HTML , / ALT_HTML and / STS.\n"); continue; } altHtmlFlag = 0; - } else { - if (switchPos("/ ALT_HTML") != 0) altHtmlFlag = 1; + } else if (switchPos("/ ALT_HTML") != 0) { + if (switchPos ("/ STS") != 0) { + print2 ("?Please specify only one of / HTML , / ALT_HTML and / STS.\n"); + continue; + } + altHtmlFlag = 1; + } else if (switchPos ("/ STS") != 0) { + stsFlag = 1; + let(&stsOutput, fullArg[5]); + + /* Parse the STS rules corresponding to the expected output . */ + parsetSTSRules(stsOutput); } if (2/*error*/ == readTexDefs(0 /* 1 = check errors only */, @@ -2861,6 +2874,8 @@ void command(int argc, char *argv[]) switchPos("/ HTML") || switchPos("/ BRIEF_HTML") || switchPos("/ ALT_HTML") + /* 7-Jul-2017 added MathML/STS */ + || switchPos("/ STS") || switchPos("/ BRIEF_ALT_HTML"))) { /* Special processing for the / HTML qualifiers - for each matching statement, a .html file is opened, the statement is output, @@ -2873,6 +2888,8 @@ void command(int argc, char *argv[]) i = 5; /* # arguments with only / HTML or / ALT_HTML */ if (noVersioning) i = i + 2; if (switchPos("/ TIME")) i = i + 2; + /* 7-Jul-2017 added MathML/STS */ + if (switchPos("/ STS")) i = i + 1; if (rawArgs != i) { printLongLine(cat("?The HTML qualifiers may not be combined with", " others except / NO_VERSIONING and / TIME.\n", NULL), " ", " "); @@ -2925,6 +2942,15 @@ void command(int argc, char *argv[]) altHtmlFlag = 0; } + /* 7-Jul-2017 added MathML/STS */ + if (switchPos("/ STS")) { + stsFlag = 1; + let(&stsOutput, fullArg[5]); + + /* Parse the STS rules corresponding to the expected output. */ + parsetSTSRules(stsOutput); + } + q = 0; /* Special feature: if the match statement starts with "*", we @@ -2996,20 +3022,32 @@ void command(int argc, char *argv[]) let(&texFileName, cat(statement[showStatement].labelName, ".html", NULL)); } - print2("Creating HTML file \"%s\"...\n", texFileName); - texFilePtr = fSafeOpen(texFileName, "w", /* 17-Jul-2019 nm */ - noVersioning /*noVersioningFlag*/); - /****** old code before 17-Jul-2019 ******* - if (switchPos("/ NO_VERSIONING") == 0) { - texFilePtr = fSafeOpen(texFileName, "w", 0/@noVersioningFlag@/); - } else { - /@ 6-Jul-2008 nm Added / NO_VERSIONING @/ - /@ Don't create the backup versions ~1, ~2,... @/ - texFilePtr = fopen(texFileName, "w"); - if (!texFilePtr) print2("?Could not open the file \"%s\".\n", - texFileName); + /* 29-Sep-17 Thierry Arnoux - Post processing (for pre-rendering) */ + if(stsFlag && strlen(postProcess) != 0) { + vstring pipeCommand = ""; + print2("Creating and processing HTML file \"%s\"...\n", texFileName); + let(&pipeCommand, cat(postProcess, " > ", texFileName, NULL)); + texFilePtr = popen(pipeCommand, "w"); + if (!texFilePtr) print2("?Could not execute the command \"%s\".\n", + pipeCommand); + let(&pipeCommand, ""); + } + else { + print2("Creating HTML file \"%s\"...\n", texFileName); + texFilePtr = fSafeOpen(texFileName, "w", /* 17-Jul-2019 nm */ + noVersioning /*noVersioningFlag*/); + /****** old code before 17-Jul-2019 ******* + if (switchPos("/ NO_VERSIONING") == 0) { + texFilePtr = fSafeOpen(texFileName, "w", 0/@noVersioningFlag@/); + } else { + /@ 6-Jul-2008 nm Added / NO_VERSIONING @/ + /@ Don't create the backup versions ~1, ~2,... @/ + texFilePtr = fopen(texFileName, "w"); + if (!texFilePtr) print2("?Could not open the file \"%s\".\n", + texFileName); + } + ********* end of old code before 17-Jul-2019 *******/ } - ********* end of old code before 17-Jul-2019 *******/ if (!texFilePtr) goto htmlDone; /* Couldn't open it (err msg was provided) */ texFileOpenFlag = 1; @@ -3144,7 +3182,10 @@ void command(int argc, char *argv[]) if (!instr(1, str3, cat(" ", str1, " ", NULL))) { let(&str3, cat(str3, " ", str1, " ", NULL)); let(&str2, ""); - str2 = tokenToTex(mathToken[(statement[i].mathString)[j] + /* 27 Jul 2017 tar For MathML/STS */ + if(stsFlag) str2 = stsToken((statement[i].mathString)[j], i); + else + str2 = tokenToTex(mathToken[(statement[i].mathString)[j] ].tokenName, i/*stmt# for error msgs*/); /* 2/9/02 Skip any tokens (such as |- in QL Explorer) that may be suppressed */ @@ -3294,7 +3335,8 @@ void command(int argc, char *argv[]) ABORT_S: /*** Close the html file ***/ printTexTrailer(1 /*texHeaderFlag*/); - fclose(texFilePtr); + if(stsFlag && strlen(postProcess) != 0) pclose(texFilePtr); + else fclose(texFilePtr); texFileOpenFlag = 0; let(&texFileName,""); @@ -7913,6 +7955,13 @@ void command(int argc, char *argv[]) continue; } + if (cmdMatches("VERIFY STS")) { + /* 12-Dec-17 - Go through all non-definition axioms, + * and check whether there is a corresponding STS scheme */ + verifySts(fullArg[2]); + continue; + } + print2("?This command has not been implemented.\n"); continue; diff --git a/mmcmdl.c b/mmcmdl.c index 6208154f..627715ee 100644 --- a/mmcmdl.c +++ b/mmcmdl.c @@ -90,7 +90,7 @@ flag processCommandLine(void) "MARKUP|ASSIGN|REPLACE|MATCH|UNIFY|LET|INITIALIZE|DELETE|IMPROVE|", /* 11-Sep-2016 nm Added EXPAND */ "MINIMIZE_WITH|EXPAND|UNDO|REDO|SAVE|DEMO|INVOKE|CLI|EXPLORE|TEX|", - "LATEX|HTML|COMMENTS|MORE|", + "LATEX|HTML|STS|COMMENTS|MORE|", "TOOLS|MIDI|$|<$>", NULL))) goto pclbad; if (cmdMatches("HELP OPEN")) { /*if (!getFullArg(2, "LOG|TEX|HTML|")) goto pclbad;*/ @@ -124,7 +124,7 @@ flag processCommandLine(void) goto pclgood; } if (cmdMatches("HELP VERIFY")) { - if (!getFullArg(2, "PROOF|MARKUP|")) + if (!getFullArg(2, "PROOF|MARKUP|STS|")) goto pclbad; goto pclgood; } @@ -570,8 +570,20 @@ flag processCommandLine(void) if (!getFullArg(i, cat( "FULL|COMMENT|TEX|OLD_TEX|HTML|ALT_HTML|TIME|BRIEF_HTML", /* 12-May-2009 sa Added MNEMONICS */ - "|BRIEF_ALT_HTML|MNEMONICS|NO_VERSIONING|", NULL))) + "|BRIEF_ALT_HTML|MNEMONICS|NO_VERSIONING|" + /* 7-Jul-2017 added MATHML/STS */ + "|STS", NULL))) goto pclbad; + if (lastArgMatches("STS")) { + i++; + if (strlen(stsOutput)) { + if (!getFullArg(i,cat("* Using which output mode <",stsOutput,">? ",NULL))) + goto pclbad; + } else { + if (!getFullArg(i,"* Using which output mode ? ")) + goto pclbad; + } + } } else { break; } @@ -1345,7 +1357,7 @@ flag processCommandLine(void) if (cmdMatches("VERIFY")) { if (!getFullArg(1, - "PROOF|MARKUP|")) + "PROOF|MARKUP|STS|")) goto pclbad; if (cmdMatches("VERIFY PROOF")) { if (sourceHasBeenRead == 0) { @@ -1405,6 +1417,21 @@ flag processCommandLine(void) goto pclgood; } + + if (cmdMatches("VERIFY STS")) { + if (statements == 0) { + print2("?No source file has been read in. Use READ first.\n"); + goto pclbad; + } + if (strlen(stsOutput)) { + if (!getFullArg(2,cat("* Using which output mode <",stsOutput,">? ",NULL))) + goto pclbad; + } else { + if (!getFullArg(2,"* Using which output mode ? ")) + goto pclbad; + } + goto pclgood; + } } if (cmdMatches("DBG")) { diff --git a/mmcmds.c b/mmcmds.c index ed7cb81d..b5167a2f 100644 --- a/mmcmds.c +++ b/mmcmds.c @@ -24,6 +24,7 @@ #include "mmpfas.h" #include "mmunif.h" /* 26-Sep-2010 nm For bracketMatchInit, minSubstLen */ /* 1-Oct-2017 nm ...and firstConst */ +#include "mmwsts.h" /* 27 Jul 2017 tar For MathML/STS */ /* 12-Nov-2018 nm */ /* Local prototypes */ @@ -342,10 +343,11 @@ void typeStatement(long showStmt, } htmlDistinctVarsCommaFlag = 1; let(&str2, ""); - str2 = tokenToTex(mathToken[nmbrTmpPtr1[k]].tokenName, showStmt); + /* 27 Jul 2017 tar For MathML/STS */ + if(stsFlag) str2 = stsToken(nmbrTmpPtr1[k], showStmt); + else str2 = tokenToTex(mathToken[nmbrTmpPtr1[k]].tokenName, showStmt); /* tokenToTex allocates str2; we must deallocate it */ let(&htmlDistinctVars, cat(htmlDistinctVars, str2, NULL)); - } nmbrLet(&nmbrDDList, nmbrAddElement(nmbrDDList, nmbrTmpPtr1[k])); } @@ -361,7 +363,9 @@ void typeStatement(long showStmt, } htmlDistinctVarsCommaFlag = 1; let(&str2, ""); - str2 = tokenToTex(mathToken[nmbrTmpPtr2[k]].tokenName, showStmt); + /* 27 Jul 2017 tar For MathML/STS */ + if(stsFlag) str2 = stsToken(nmbrTmpPtr2[k], showStmt); + else str2 = tokenToTex(mathToken[nmbrTmpPtr2[k]].tokenName, showStmt); /* tokenToTex allocates str2; we must deallocate it */ let(&htmlDistinctVars, cat(htmlDistinctVars, str2, NULL)); @@ -620,11 +624,15 @@ void typeStatement(long showStmt, } else { if (htmlFlg && texFlag) { let(&str2, ""); - str2 = tokenToTex(mathToken[nmbrTmpPtr1[k]].tokenName, showStmt); + /* 27 Jul 2017 tar For MathML/STS */ + if(stsFlag) str2 = stsToken(nmbrTmpPtr1[k], showStmt); + else str2 = tokenToTex(mathToken[nmbrTmpPtr1[k]].tokenName, showStmt); /* tokenToTex allocates str2; we must deallocate it */ let(&str1, cat(str1, "   ", str2, NULL)); let(&str2, ""); - str2 = tokenToTex(mathToken[nmbrTmpPtr2[k]].tokenName, showStmt); + /* 27 Jul 2017 tar For MathML/STS */ + if(stsFlag) str2 = stsToken(nmbrTmpPtr2[k], showStmt); + else str2 = tokenToTex(mathToken[nmbrTmpPtr2[k]].tokenName, showStmt); let(&str1, cat(str1, ",", str2, NULL)); } } @@ -1301,7 +1309,9 @@ vstring htmlDummyVars(long showStmt) /* tokenToTex allocates str1; must deallocate it first */ let(&str1, ""); /* Convert token to htmldef/althtmldef string */ - str1 = tokenToTex(mathToken[dummyVar].tokenName, + /* 27 Jul 2017 tar For MathML/STS */ + if(stsFlag) str1 = stsToken(dummyVar, showStmt); + else str1 = tokenToTex(mathToken[dummyVar].tokenName, showStmt); let(&htmlDummyVarList, cat(htmlDummyVarList, " ", str1, NULL)); break; /* Found a match, so stop further checking */ @@ -1482,7 +1492,9 @@ vstring htmlAllowedSubst(long showStmt) if (found == 0) continue; /* All set vars have $d with this wff or class */ let(&str1, ""); - str1 = tokenToTex(mathToken[wffOrClassVar].tokenName, showStmt); + /* 27 Jul 2017 tar For MathML/STS */ + if(stsFlag) str1 = stsToken(wffOrClassVar, showStmt); + else str1 = tokenToTex(mathToken[wffOrClassVar].tokenName, showStmt); /* tokenToTex allocates str1; we must deallocate it eventually */ countInfo++; let(&htmlAllowedList, cat(htmlAllowedList, "   ", @@ -1491,7 +1503,9 @@ vstring htmlAllowedSubst(long showStmt) for (j = 0; j < setVars; j++) { if (setVarDVFlag[j] == 'N') { let(&str1, ""); - str1 = tokenToTex(mathToken[setVar[j]].tokenName, showStmt); + /* 27 Jul 2017 tar For MathML/STS */ + if(stsFlag) str1 = stsToken(setVar[j], showStmt); + else str1 = tokenToTex(mathToken[setVar[j]].tokenName, showStmt); let(&htmlAllowedList, cat(htmlAllowedList, (first == 0) ? "," : "", str1, NULL)); if (first == 0) countInfo++; @@ -2437,9 +2451,12 @@ void typeProof(long statemNum, && (!strcmp(statement[stmt].labelName, "cmpt") || !strcmp(statement[stmt].labelName, "cmpt2"))) ) { - tmpStr1 = - tokenToTex(mathToken[(statement[stmt].mathString)[i] - ].tokenName, stmt); + /* 27 Jul 2017 tar For MathML/STS */ + if(stsFlag) tmpStr1 = + stsToken((statement[stmt].mathString)[i], stmt); + else tmpStr1 = + tokenToTex(mathToken[(statement[stmt].mathString)[i] + ].tokenName, stmt); /* 14-Jan-2016 nm */ let(&tmpStr1, cat( (altHtmlFlag ? cat("", NULL) : ""), diff --git a/mmdata.c b/mmdata.c index 4770b4e8..6df6a659 100644 --- a/mmdata.c +++ b/mmdata.c @@ -1061,6 +1061,39 @@ int nmbrEq(nmbrString *s,nmbrString *t) } +/* Compute a hash of a string */ +/* This simply computes a XOR of the first numbers */ +int nmbrHash(nmbrString *s) +{ + static long salt[] = { 4938, 48977, 6897, 7293, 2663, 7925, 2999, 12238, + 40033, 14038, 10699, 29746, 56108, 34526, 63576, 52053, 61949, 41177, 43740, 22822 + }; + long i; + long hash = 0; + i = -1; + while (i < 13 && s[i] != -1) { + hash ^= ( s[i] + salt[i] ) << i; + i++; + } + return hash; +} + + +/* Compare two sub-strings */ +/* Unlike strcmp, this returns a 1 if the strings are equal + and 0 otherwise. */ +/* Only the token is compared. The whiteSpace string is + ignored. */ +int nmbrSubEq(nmbrString *s,long sstart, nmbrString *t, long tstart, long len) +{ + long i; + if (sstart - 1 + len > nmbrLen(s) + || tstart - 1 + len > nmbrLen(t)) return 0; + for (i = 0; s[sstart-1+i] == t[tstart-1+i] && i 0;occ--) { + long ls1, i, j; + ls1 = nmbrLen(string1); + if(start_position++ >= ls1) return 0; + for (i = start_position - 1; i <= ls1 - length2; i++) { + flag found = 1; + for (j = 0; j < length2; j++) { + if (string1[i+j] != string2[start2-1+j]) { + found = 0; + break; + } + } + if (found) { + start_position = i+1; + break; + } + } + if (i == ls1 - length2 + 1) return 0; + } + return start_position; +} + /* Search for string2 in string1 starting at start_position */ long nmbrInstr(long start_position,nmbrString *string1, nmbrString *string2) @@ -1508,6 +1569,21 @@ nmbrString *nmbrAddElement(nmbrString *g, long element) } +/* Add a single number to start of a nmbrString - faster than nmbrCat */ +nmbrString *nmbrUnshiftElement(nmbrString *g, long element) +{ + long length; + nmbrString *v; + length = nmbrLen(g); + v = nmbrTempAlloc(length + 2); /* Allow for end of string */ + nmbrCpy(v+1, g); + v[0] = element; + v[length + 1] = *NULL_NMBRSTRING; /* End of string */ +/*E*/if(db9)getPoolStats(&i1,&j1_,&k1); if(db9)printf("bbg2: pool %ld stat %ld\n",poolTotalFree,i1+j1_); + return(v); +} + + /* Get the set union of two math token strings (presumably variable lists) */ nmbrString *nmbrUnion(nmbrString *m1, nmbrString *m2) diff --git a/mmdata.h b/mmdata.h index adbec398..08b68784 100644 --- a/mmdata.h +++ b/mmdata.h @@ -274,6 +274,10 @@ long nmbrLen(nmbrString *s); long nmbrAllocLen(nmbrString *s); void nmbrZapLen(nmbrString *s, long length); +/* Search for the nth occurrence of string2 in string1 */ +long nmbrInstrN(long start, long occ,nmbrString *string1, + nmbrString *string2, long start2, long length2); + /* Search for string2 in string 1 starting at start_position */ long nmbrInstr(long start, nmbrString *sin, nmbrString *s); @@ -285,6 +289,12 @@ long nmbrRevInstr(long start_position,nmbrString *string1, /* 1 if strings are equal, 0 otherwise */ int nmbrEq(nmbrString *sout,nmbrString *sin); +/* Compute a hash of a string */ +int nmbrHash(nmbrString *s); + +/* 1 if substrings are equal, 0 otherwise */ +int nmbrSubEq(nmbrString *s,long sstart, nmbrString *t, long tstart, long len); + /* Converts mString to a vstring with one space between tokens */ vstring nmbrCvtMToVString(nmbrString *s); @@ -310,6 +320,9 @@ long nmbrElementIn(long start, nmbrString *g, long element); /* Add a single number to end of a nmbrString - faster than nmbrCat */ nmbrString *nmbrAddElement(nmbrString *g, long element); +/* Add a single number to start of a nmbrString - faster than nmbrCat */ +nmbrString *nmbrUnshiftElement(nmbrString *g, long element); + /* Get the set union of two math token strings (presumably variable lists) */ nmbrString *nmbrUnion(nmbrString *m1,nmbrString *m2); diff --git a/mmhlpa.c b/mmhlpa.c index 67bdf53a..950e4df4 100644 --- a/mmhlpa.c +++ b/mmhlpa.c @@ -790,6 +790,24 @@ H("\"exthtmlhome\" is used instead of that assigned to \"htmltitle\" and"); H("\"htmlhome\" respectively."); H(""); +/* 13-Jan-2018 tar Added STS */ +printHelp = !strcmp(saveHelpCmd, "HELP STS"); +H("To create an HTML output file for a $a or $p statement, use"); +H(" SHOW STATEMENT