From bdcc8f7cf380b8539273e6a54ba78258b606bade Mon Sep 17 00:00:00 2001 From: AiStudent Date: Tue, 27 Aug 2024 14:54:29 +0200 Subject: [PATCH 1/7] Added Python backend for use with PLY --- docs/user_guide.rst | 87 ++++ document/BNF_Converter_Python_Mode.html | 218 ++++++++++ source/BNFC.cabal | 9 + source/main/Main.hs | 3 + source/src/BNFC/Backend/Python.hs | 149 +++++++ source/src/BNFC/Backend/Python/CFtoPyAbs.hs | 382 ++++++++++++++++ source/src/BNFC/Backend/Python/CFtoPyLex.hs | 189 ++++++++ .../Backend/Python/CFtoPyPrettyPrinter.hs | 409 ++++++++++++++++++ source/src/BNFC/Backend/Python/CFtoPySkele.hs | 114 +++++ source/src/BNFC/Backend/Python/PyHelpers.hs | 80 ++++ source/src/BNFC/Backend/Python/RegToFlex.hs | 97 +++++ source/src/BNFC/Options.hs | 7 + testing/src/ParameterizedTests.hs | 12 + 13 files changed, 1756 insertions(+) create mode 100644 document/BNF_Converter_Python_Mode.html create mode 100644 source/src/BNFC/Backend/Python.hs create mode 100644 source/src/BNFC/Backend/Python/CFtoPyAbs.hs create mode 100644 source/src/BNFC/Backend/Python/CFtoPyLex.hs create mode 100644 source/src/BNFC/Backend/Python/CFtoPyPrettyPrinter.hs create mode 100644 source/src/BNFC/Backend/Python/CFtoPySkele.hs create mode 100644 source/src/BNFC/Backend/Python/PyHelpers.hs create mode 100644 source/src/BNFC/Backend/Python/RegToFlex.hs diff --git a/docs/user_guide.rst b/docs/user_guide.rst index 041c508a..8273f1d5 100644 --- a/docs/user_guide.rst +++ b/docs/user_guide.rst @@ -284,3 +284,90 @@ BNFC adds the grammar name as a file extension. So if the grammar file is named ``Calc.cf``, the lexer will be associated to the file extension ``.calc``. To associate other file extensions to a generated lexer, you need to modify (or subclass) the lexer. + +Python Backend +=============== + +The BNF Converter's Python Backend generates a Python frontend, that uses +`PLY `_ (Python Lex Yacc), to parse +input into an AST (abstract syntax tree). + +Python 3.10 or higher is needed. + +Example usage: :: + + bnfc --python Calc.cf + + +.. list-table:: The result is a set of files: + :widths: 25 25 + :header-rows: 1 + + * - Filename + - Description + * - bnfcPyGenCalc/Absyn.py + - Provides the classes for the abstract syntax. + * - bnfcPyGenCalc/LexTokens.py + - Provides PLY with the information needed to build the lexer. + * - bnfcPyGenCalc/ParserDefs.py + - Provides PLY with the information needed to build the parser. + * - bnfcPyGenCalc/PrettyPrinter.py + - Provides printing for both the AST and the linearized tree. + * - genTest.py + - A ready test-file, that uses the generated frontend to convert input into an AST. + * - skele.py + - Provides skeleton code to deconstruct an AST, using structural pattern matching. + +Optionally one may with ``-m``` also create a makefile that contains the target +"distclean" to remove the generated files. + +Testing the frontend +.................... + +It's possible to pipe input, like:: + + echo "(1 + 2) * 3" | python3.10 genTest.py + +or:: + + python3.10 genTest.py < file.txt + +and it's possible to just use an argument:: + + python3.10 genTest.py file.txt + + +Caveats +....... + +Presentation of conflicts in a grammar: + + A symbol-to-unicode transformation is made for the terminals in the grammar, + for example from "++" to "S_43_43". This however obfuscates PLYs generated + information of the grammar in the "parser.out" file. Users are hence + encouraged to use the Haskell backend to debug grammars and identify + conflicts. + +Several entrypoints: + + At the top of the ParserDefs.py file an additional rule is added, that has + every defined entrypoint as a possible production. This may create warnings + for conflicts, as it may introduce ambiguity. Therefore the added + parsing rule is by default removed beneath the function, with the statement + "del p__Start", and included if the user comments out the removal of + "p__Start". + +Special cases for special characters: + + Using non-special characters, instead of say parentheses when defining rules, + may not yield the expected behaviour. Using the below rule, an expression + such as "a1+2a" can not be parsed since the a's are classified as reserved + keywords, like "int", instead of symbols like "+":: + + _. Exp1 ::= "a" Exp "a" ; + +Results from the parameterized tests: + + While the Python backend generates working frontends for the example + grammars, four "failures" and six "errors" among the regression + tests are reported. diff --git a/document/BNF_Converter_Python_Mode.html b/document/BNF_Converter_Python_Mode.html new file mode 100644 index 00000000..429b5c44 --- /dev/null +++ b/document/BNF_Converter_Python_Mode.html @@ -0,0 +1,218 @@ + + + + BNF Converter Python Mode + + + +
+

BNF Converter

+

Python Mode

+
+

By Björn Werner

+ +

2024

+

+ The BNF Converter's Python Backend generates a Python frontend, that uses + PLY (Python Lex Yacc), to parse input into an AST (abstract syntax tree). +

+

+ BNFC on Github:
+ https://github.com/BNFC/bnfc +

+

+ PLY homepage:
+ https://www.dabeaz.com/ply/ply.html +

+

+ Python 3.10 or higher is needed. +

+

Usage

+
+ bnfc --python NAME.cf
+
+

+The result is a set of files: +

+ + + + + + + + + + + + + + + + + + + + + + +
Filename:Description:
bnfcGenNAME/LexTokens.pyProvides PLY with the information needed to build the lexer.
bnfcGenNAME/Absyn.pyProvides the classes for the abstract syntax.
bnfcGenNAME/ParserDefs.pyProvides PLY with the information needed to build the parser.
bnfcGenNAME/PrettyPrinter.pyProvides printing for both the AST and the linearized tree.
genTest.pyA ready test-file, that uses the generated frontend to convert input into an AST.
skele.pyProvides skeleton code to deconstruct an AST, using structural pattern matching.
+ +

Testing the frontend

+

+ The following example uses a frontend that is generated from a C-like grammar. +

+

+ $ python3.10 genTest.py < hello.c +

+

+ Generating LALR tables
+ Parse Successful!
+
+ [Abstract Syntax]
+ (PDefs [(DFun Type_int "main" [] [(SExp (EApp "printString" [(EString "Hello world")])), (SReturn (EInt 0))])])
+
+ [Linearized Tree]
+ int main ()
+ {
+  printString ("Hello world");
+  return 0;
+ }
+

+

+ The LALR tables are cached in a file called "parsetab.py", and a description by PLY of the grammar is stored in a file called "parser.out". +

+

The Abstract Syntax Tree

+

+ The AST is built up using instances of Python classes, using the dataclass decorator, such as: +

+

+@dataclass
+class EAdd:
+ exp_1: Exp
+ exp_2: Exp
+ _ann_type: _AnnType = field(default_factory=_AnnType) +

+

+ The "_ann_type" variable is a placeholder that can be used to store useful information, + for example type-information in order to create a type-annotated AST. +

+

Using the skeleton file

+

+ The skeleton file serves as a template, to create an interpreter for example. + Two different types of matchers are generated: the first with all the value + categories together, and a second type where each matcher only has one + individual value category, as in the example below: +

+

+def matcherExp(exp_: Exp):
+ match exp_:
+  case EAdd(exp_1, exp_2, _ann_type):
+   # Exp "+" Exp1
+   raise Exception('EAdd not implemented')
+  case ESub(exp_1, exp_2, _ann_type):
+   ... +

+

+ This can be modified, in order to return the addition of each evaluated argument + category, into: +

+

+ def matcherExp(exp_: Exp):
+  match exp_:
+   case EAdd(exp_1, exp_2, _ann_type):
+    # Exp "+" Exp1
+    return matcherExp(exp_1) + matcherExp(exp_2)
+   case ESub(exp_1, exp_2, _ann_type):
+    ... +

+

+ The function can now be imported and used in the generated test file + (similarly to how the pretty printer is imported and used): +

+

+ from skele import matcherExp
+ ...
+ print(matcherExp(ast)) +

+ +

Known issues

+

+ Presentation of conflicts in a grammar: +

+

+ A symbol-to-unicode transformation is made for the terminals in the grammar, + for example from "++" to "S_43_43". This however obfuscates PLYs generated + information of the grammar, inside the "parser.out" file. Users are hence + encouraged to use say the Haskell backend to debug their + grammars and identify conflicts. +

+

+ Several entrypoints: +

+

+ At the top of the ParserDefs.py file an additional rule is added, that has + every defined entrypoint as a possible production. This may create warnings + for conflicts if it introduces ambiguity, and warnings for unused rules if + the "_Start" category is not used as the entrypoint. Therefore the added + parsing rule is by default removed beneath the function, "del p__Start", + and included if the user comments out the removal: +

+

+ Skeleton code for using lists as entrypoints: +

+

+ Matchers for using lists, such as [Exp], are not generated in the + skeleton code as it may confuse users if the grammar uses several different + list categories. Users are instead encouraged to use a non-list entrypoint. +

+

+ The improper way to iterate over lists, as the value category is unknown: +

+

+  case list():
+   for ele in ast:
+    ... +

+

+ The proper way to deconstruct lists, where we know the value category: +

+

+  case RuleName(listexp_):
+   for exp in listexp_:
+    ... +

+

+ Special cases for special characters +

+

+ Using non-special characters instead of say parentheses when defining rules, may not yield the expected + behaviour. Using the below rule, an expression such as "a1+2a" can not be parsed. +

+

+ _. Exp1 ::= "a" Exp "a" ; +

+

+ Using multiple separators +

+

+ Using multiple separators for the same category, such as below, generates + Python functions with overlapping names, causing runtime errors. +

+

+ separator Exp1 "," ;
+ separator Exp1 ";" ; +

\ No newline at end of file diff --git a/source/BNFC.cabal b/source/BNFC.cabal index 7300a8d2..c232b401 100644 --- a/source/BNFC.cabal +++ b/source/BNFC.cabal @@ -280,6 +280,15 @@ library BNFC.Backend.TreeSitter.CFtoTreeSitter BNFC.Backend.TreeSitter.RegToJSReg + -- Python backend + BNFC.Backend.Python + BNFC.Backend.Python.CFtoPyAbs + BNFC.Backend.Python.CFtoPyLex + BNFC.Backend.Python.CFtoPyPrettyPrinter + BNFC.Backend.Python.RegToFlex + BNFC.Backend.Python.PyHelpers + BNFC.Backend.Python.CFtoPySkele + ----- Testing -------------------------------------------------------------- test-suite unit-tests diff --git a/source/main/Main.hs b/source/main/Main.hs index 754bf268..6377611f 100644 --- a/source/main/Main.hs +++ b/source/main/Main.hs @@ -26,6 +26,7 @@ import BNFC.Backend.Latex import BNFC.Backend.OCaml import BNFC.Backend.Pygments import BNFC.Backend.TreeSitter +import BNFC.Backend.Python import BNFC.CF (CF) import BNFC.GetCF import BNFC.Options hiding (make, Backend) @@ -83,3 +84,5 @@ maketarget = \case TargetPygments -> makePygments TargetCheck -> error "impossible" TargetTreeSitter -> makeTreeSitter + TargetPython -> makePython + \ No newline at end of file diff --git a/source/src/BNFC/Backend/Python.hs b/source/src/BNFC/Backend/Python.hs new file mode 100644 index 00000000..e0a3da35 --- /dev/null +++ b/source/src/BNFC/Backend/Python.hs @@ -0,0 +1,149 @@ +{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} + +{- + BNF Converter: Python main file + Copyright (C) 2004 Author: Bjorn Werner +-} + +module BNFC.Backend.Python (makePython) where + +import Prelude hiding ((<>)) +import System.FilePath (()) +import BNFC.CF (CF, firstEntry) +import BNFC.Options (SharedOptions, optMake, lang) +import BNFC.Backend.Base (MkFiles, mkfile) +import BNFC.Backend.Python.CFtoPyAbs (cf2PyAbs) +import BNFC.Backend.Python.CFtoPyLex (cf2PyLex) +import BNFC.Backend.Python.CFtoPyPrettyPrinter (cf2PyPretty) +import BNFC.Backend.Python.CFtoPySkele (cf2PySkele) +import BNFC.Backend.Python.PyHelpers + +import BNFC.PrettyPrint -- For Doc +import qualified BNFC.Backend.Common.Makefile as Makefile + +-- | Entrypoint for BNFC to use the Python backend. +makePython :: SharedOptions -> CF -> MkFiles () +makePython opts cf = do + let pkgName = "bnfcPyGen" ++ name + let (lexerDefs, tokensPly) = cf2PyLex cf + let (parsingDefs, abstractClasses) = cf2PyAbs pkgName cf tokensPly + let prettyPrinter = cf2PyPretty pkgName cf + let skeletonCode = cf2PySkele pkgName cf + mkPyFile (pkgName ++ "/LexTokens.py") lexerDefs + mkPyFile (pkgName ++ "/ParsingDefs.py") parsingDefs + mkPyFile (pkgName ++ "/Absyn.py") abstractClasses + mkPyFile (pkgName ++ "/PrettyPrinter.py") prettyPrinter + mkPyFile "skele.py" skeletonCode + mkPyFile "genTest.py" (pyTest pkgName cf) + Makefile.mkMakefile (optMake opts) $ makefile pkgName (optMake opts) + where + name :: String + name = lang opts + mkPyFile x = mkfile x comment + + +-- | A makefile with distclean and clean specifically for the testsuite. No +-- "all" is needed as bnfc has already generated the necessary Python files. +makefile :: String -> Maybe String -> String -> Doc +makefile pkgName optMakefileName basename = vcat + [ + Makefile.mkRule "all" [] + [ " " ] + , Makefile.mkRule "clean" [] + [ "rm -f parser.out parsetab.py" ] + , Makefile.mkRule "distclean" [ "vclean" ] [] + , Makefile.mkRule "vclean" [] + [ "rm -f " ++ unwords + [ + pkgName ++ "/LexTokens.py", + pkgName ++ "/ParsingDefs.py", + pkgName ++ "/Absyn.py", + pkgName ++ "/PrettyPrinter.py", + pkgName ++ "/LexTokens.py.bak", + pkgName ++ "/ParsingDefs.py.bak", + pkgName ++ "/Absyn.py.bak", + pkgName ++ "/PrettyPrinter.py.bak", + "skele.py", + "genTest.py", + "skele.py.bak", + "genTest.py.bak" + ], + "rm -f " ++ pkgName ++ "/__pycache__/*.pyc", + "rm -fd " ++ pkgName ++ "/__pycache__", + "rmdir " ++ pkgName, + "rm -f __pycache__/*.pyc", + "rm -fd __pycache__", + "rm -f parser.out parsetab.py", + "rm -f " ++ makefileName, + "rm -f " ++ makefileName ++ ".bak" + ] + ] + where + makefileName = case optMakefileName of + Just s -> s + Nothing -> "None" -- No makefile will be created. + + +-- | Put string into a comment. +comment :: String -> String +comment x = "# " ++ x + + +-- Produces the content for the testing file, genTest.py. +pyTest :: String -> CF -> String +pyTest pkgName cf = unlines + [ + "from ply.lex import lex", + "from ply.yacc import yacc", + "import sys", + "from " ++ pkgName ++ ".LexTokens import *", + "from " ++ pkgName ++ ".ParsingDefs import *", + "from " ++ pkgName ++ ".PrettyPrinter import *", + "", + "", + "# Suggested input options:", + "# python3.10 genTest.py < sourcefile", + "# python3.10 genTest.py sourcefile inputfile (i.e. for interpreters).", + "inputFile = None", + "if len(sys.argv) > 1:", + "\tf = open(sys.argv[1], 'r')", + "\tinp = f.read()", + "\tf.close()", + "\tif len(sys.argv) > 2:", + "\t\tinputFile = sys.argv[2]", + "else:", + "\tinp = ''", + "\tfor line in sys.stdin:", + "\t\tinp += line", + "", + "", + "# Customizable error handling for the parsing", + "def p_error(p: lex.LexToken):", + "\tif p is None:", + "\t\tprint('No rule could reduce the tokenized input')", + "\telse:", + "\t\tprint('line:', p.lineno, 'lexpos:', p.lexpos, f'Syntax error at {p.value!r}')", + "\t\tp.lexer.syntaxError = True", + "", + "", + "# By default the first entrypoint is used. See ParsingDefs.py for alternatives.", + "lexer = lex.lex()", + "parser = yacc(start=" ++ defaultEntry ++ ")", + "lexer.syntaxError = False", + "ast = parser.parse(inp, lexer=lexer)", + "if ast and not lexer.syntaxError:", + "\tprint('Parse Successful!\\n')", + "\tprint('[Abstract Syntax]')", + "\tprint(printAST(ast))", + "\tprint('\\n[Linearized Tree]')", + "\tlinTree = lin(ast)", + "\tprint(renderC(linTree))", + "\tprint()", + "else:", + "\tprint('Parse failed')", + "\tquit(1)" + ] + where + defaultEntry = (addCitationSigns . translateToList . show . firstEntry) cf + diff --git a/source/src/BNFC/Backend/Python/CFtoPyAbs.hs b/source/src/BNFC/Backend/Python/CFtoPyAbs.hs new file mode 100644 index 00000000..a4712fb0 --- /dev/null +++ b/source/src/BNFC/Backend/Python/CFtoPyAbs.hs @@ -0,0 +1,382 @@ + +{- + BNF Converter: Python abstract syntax and parsing definitions generator + Copyright (C) 2024 Author: Bjorn Werner + Based on CFtoCAbs.hs, Copyright (C) 2004 Michael Pellauer +-} + +module BNFC.Backend.Python.CFtoPyAbs (cf2PyAbs) where +import Data.List ( nub, intercalate ) +import BNFC.CF +import BNFC.Backend.Python.PyHelpers +import BNFC.Backend.Common.NamedVariables +import Text.PrettyPrint (Doc, render) +import Data.Either (lefts) +import Data.Char (toLower) +import qualified Data.List.NonEmpty as List1 + +-- | The result is ParsingDefs.py & Absyn.py +cf2PyAbs + :: String + -> CF -- ^ Grammar. + -> [(String, String)] -- Tokens to unicode mapping + -> (String, String) -- ParsingDefs.py, Absyn.py. +cf2PyAbs pkgName cf tokensPly = ( unlines + [ "from " ++ pkgName ++ ".Absyn import *" + , "\n\n" ++ createCommonEntrypointDef cf + , "\n\n" ++ (unlines parsingDefs) + , if length definesParsingDefs > 0 + then "\n\n# Parsing rules from defines" + else "" + , "\n\n" ++ unlines definesParsingDefs + ] + , "from typing import List as _List" ++ + "\n\n# Value categories (no coercsions):" ++ + "\n\n" ++ unlines valueCatsClasses ++ + "\n\n" ++ placeholderVariableClass ++ + "\n\n# Rules:" ++ + "\n" ++ "from dataclasses import dataclass, field" ++ + "\n\n" ++ (unlines dataClasses) + ) + where + rules = cfgRules cf + + -- To create ParsingDefs.py + parsingDefs :: [String] + parsingDefs = map (ruleToParsingDef cf tokensPly) + [r | r <- rules, isParsable r, not (isDefinedRule r)] + + definesParsingDefs = makeDefineParsingDefs cf tokensPly + + -- To create Absyn.py + dataClasses :: [String] + dataClasses = map makePythonClass + [ r | r <- rules, not (isDefinedRule r) + , not (isNilCons r) + , not (isCoercion r) + ] + + rulesNoListConstructors = + [r | r <- (cfgRules cf), not (isNilCons r), not (isCoercion r) ] + + -- Note: Custom tokens are set to inherit "str". + valueCatNames = nub $ + (map (show . normCat . valCat) rulesNoListConstructors) ++ + (map (++"(str)") (tokenNames cf)) ++ + [ "String(str)" + , "Char(str)" + , "Ident(str)" + , "Integer(int)" + , "Double(float)" + ] + + valueCatsClasses = map createValueCatClass valueCatNames + + +placeholderVariableClass :: String +placeholderVariableClass = unlines + [ "# Placeholder to add additional information to a node in the AST," ++ + " like type information." + , "class _AnnType:" + , " def __init__(self):" + , " self.__v = None" + , "" + , " def s(self, val):" + , " if not self.__v == None:" + , " if self.__v != val:" + , " raise Exception('already has type: ' + str(self.__v)" ++ + " + ' and tried to set to ' + str(val))" + , " self.__v = val" + , "" + , " def g(self):" + , " return self.__v" + , "" + , " def __str__(self):" + , " return str(self.__v.__class__)" + , "" + , " def __repr__(self):" + , " return str(self.__v.__class__)" + ] + +-- | Creates a parsing definition that points to all entrypoints. +createCommonEntrypointDef :: CF -> String +createCommonEntrypointDef cf = unlines + [ "def p__Start(p):" + , " '''" + , " _Start : " ++ (translateToList . show . head) cats ++ + concat (map createCase (tail cats)) + , " '''" + , " p[0] = p[1]" + , "" + , "" + , "# Comment the below line to enable the '_Start' entrypoint (may yield" + ++ " conflict warnings)." + , "del p__Start" + , "" + ] + where + cats = (List1.toList . allEntryPoints) cf + + createCase :: Cat -> String + createCase c = "\n | " ++ translateToList (show c) + + +-- | The value categories become abstract classes, for type hinting. +createValueCatClass :: String -> String +createValueCatClass s = "class " ++ s ++ ":\n\tpass\n" + + +-- | Creates a parsing definition, by checking what type of rule it is and +-- calling the corresponding make function. +ruleToParsingDef :: CF -> [(String, String)] -> Rul RFun -> String +ruleToParsingDef cf tokensPly rule + | isCoercion funcRStr = + makeParseCoercion cf tokensPly funcCat (fName, sentForm) + | isNilFun funcRStr = + makeParseNil tokensPly funcCat (fNameTranslated, sentForm) + | isOneFun funcRStr = + makeParseOne cf tokensPly funcCat (fNameTranslated, sentForm) + | isConsFun funcRStr = + makeParseCons cf tokensPly funcCat (fNameTranslated, sentForm) + | isDefinedRule rule = + error "Should not generate define rules in this step" + | otherwise = + makeParseFunc cf tokensPly funcCat (fName, sentForm) + where + funcRStr = funRule rule :: RString + fName = wpThing funcRStr :: String + + funcCat = valCat rule :: Cat + catStr = show (valCat rule) :: String + + fNameTranslated :: String + fNameTranslated + | isNilFun funcRStr = catStr + | otherwise = fName + + sentForm = rhsRule rule :: [Either Cat String] + + +-- | Make a Python class from a rule's name and production. +makePythonClass :: Rul RFun -> String +makePythonClass rule = + "@dataclass\n" ++ + "class " ++ name ++ ":\n" ++ + if length cats == 0 then "\tpass\n" else classBody + where + name = funName rule + sentForm = rhsRule rule + cats = lefts sentForm + nvCats = numVars sentForm :: [Either (Cat, Doc) String] + + enumeratedVarsWithType = [render d ++ ": " ++ + strCatToPyTyping (show (normCat c)) | (c, d) <- lefts nvCats] + + classBody = unlines $ map ("\t" ++) (enumeratedVarsWithType ++ + ["_ann_type: _AnnType = field(default_factory=_AnnType)"]) + + + +-- | Creates the corresponding type hinting for some member variable. +strCatToPyTyping :: String -> String +strCatToPyTyping s = + if strIsList s then "_List['" ++ (tail . init) s ++ "']" else s + + +-- | It could be this is only guarding against list categories. +literalsToPytypeMaybe :: CF -> String -> Maybe String +literalsToPytypeMaybe cf s = case s of + "Integer" -> Just "Integer" + "Double" -> Just "Double" + "Char" -> Just "Char" + "String" -> Just "String" + "Ident" -> Just "Ident" + _ -> if s `elem` (tokenNames cf) then Just s else Nothing + + +-- | The following makeParse functions create their corresponding parsing +-- definitions for some rule. +makeParseFunc :: CF -> [(String, String)] -> Cat -> (String, SentForm) + -> String +makeParseFunc cf tokensPly dataCat (name, sentForm) = unlines + [ "def " ++ "p_" ++ name ++ "(p):\n" ++ "\t" ++ "\"\"\"" + , "\t" ++ (show dataCat) ++ " : " ++ (prodToDocStr tokensPly sentForm) + , "\t" ++ "\"\"\"" + , "\t" ++ "p[0] = " ++ rhs ++ "\n" + ] + where + rhs = name ++ "(" ++ (addCommas (getLeftIndexes cf 1 sentForm)) ++ ")" + + +makeParseCoercion :: CF -> [(String, String)] -> Cat -> (String, SentForm) + -> String +makeParseCoercion cf tokensPly dataCat (_, sentForm) = unlines + [ "def " ++ "p_" ++ (show sourceCat) ++ "(p):\n" ++ "\t" ++ "\"\"\"" + , "\t" ++ (show dataCat) ++ " : " ++ (prodToDocStr tokensPly sentForm) + , "\t" ++ "\"\"\"" + , "\t" ++ "p[0] = " ++ strP ++ "\n" + ] + where + strP = head (getLeftIndexes cf 1 sentForm) + sourceCat = (head . lefts) sentForm + + +makeParseNil :: [(String, String)] -> Cat -> (String, SentForm) -> String +makeParseNil tokensPly dataCat (_, sentForm) = unlines + [ "def " ++ "p_" ++ "Nil" ++ translatedCat ++ "(p):\n" ++ "\t" ++ "\"\"\"" + , "\t" ++ translatedCat ++ " : " ++ (prodToDocStr tokensPly sentForm) + , "\t" ++ "\"\"\"" + , "\t" ++ "p[0] = []\n" + ] + where + translatedCat = translateToList $ show dataCat + + +makeParseOne :: CF -> [(String, String)] -> Cat -> (String, SentForm) -> String +makeParseOne cf tokensPly dataCat (_, sentForm) = unlines + [ "def " ++ "p_" ++ "One" ++ translatedCat ++ "(p):\n" ++ "\t" ++ "\"\"\"" + , "\t" ++ translatedCat ++ " : " ++ (prodToDocStr tokensPly sentForm) + , "\t" ++ "\"\"\"" + , "\t" ++ "p[0] = " ++ rhs ++ "\n" + ] + where + translatedCat = translateToList $ show dataCat + rhs = intercalate " + " (getLeftIndexesLists tokensPly cf 1 sentForm) + + +makeParseCons :: CF -> [(String, String)] -> Cat -> (String, SentForm) + -> String +makeParseCons cf tokensPly dataCat (_, sentForm) = unlines + [ "def " ++ "p_" ++ "Cons" ++ translatedCat ++ "(p):\n" ++ "\t" ++ "\"\"\"" + , "\t" ++ translatedCat ++ " : " ++ (prodToDocStr tokensPly sentForm) + , "\t" ++ "\"\"\"" ++ "\n" + , "\t" ++ "p[0] = " ++ rhs ++ "\n" + ] + where + translatedCat = translateToList $ show dataCat + rhs = intercalate " + " (getLeftIndexesLists tokensPly cf 1 sentForm) + + +-- | Produces a list of the elements in the code production, where the indices +-- match the argument categories. +getLeftIndexesLists :: [(String, String)] -> CF -> Int -> [Either Cat String] + -> [String] +getLeftIndexesLists _ _ _ [] = [] +getLeftIndexesLists tokensPly cf n (Left c:ecs) + | isList c = [typedPTerm] ++ (getLeftIndexesLists tokensPly cf (n+1) ecs) + | otherwise = ["[" ++ typedPTerm ++ "]"] ++ + (getLeftIndexesLists tokensPly cf (n+1) ecs) + where + pTerm = "p[" ++ (show n) ++ "]" + typedPTerm = case literalsToPytypeMaybe cf (show c) of + Just s -> s ++ "(" ++ pTerm ++ ")" + Nothing -> pTerm +getLeftIndexesLists tokensPly cf n (Right strOp:ecs) + | separatorIsEmpty tokensPly strOp = getLeftIndexesLists tokensPly cf n ecs + | otherwise = getLeftIndexesLists tokensPly cf (n+1) ecs + + +-- | In case the deliminator is "" or is not defined for the lexer, like +-- ignored characters. +separatorIsEmpty :: [(String, String)] -> String -> Bool +separatorIsEmpty tokensPly strOp + | length strOp > 0 = case lookup strOp tokensPly of + Just _ -> False + Nothing -> True + | otherwise = True + + +-- | Produces a list of the elements in the code production, where the indices +-- match the argument categories. +getLeftIndexes :: CF -> Int -> [Either Cat String] -> [String] +getLeftIndexes _ _ [] = [] +getLeftIndexes cf n (Left c:ecs) = [typedPTerm] ++ + (getLeftIndexes cf (n+1) ecs) + where + pTerm = "p[" ++ (show n) ++ "]" + typedPTerm = case literalsToPytypeMaybe cf (show c) of + Just s -> s ++ "(" ++ pTerm ++ ")" + Nothing -> pTerm +getLeftIndexes cf n (Right _:ecs) = getLeftIndexes cf (n+1) ecs + + +-- | Produces the production in the docstring for the parsing definitions. +prodToDocStr :: [(String, String)] -> [Either Cat String] -> String +prodToDocStr _ [] = "" +prodToDocStr tokensPly (ec:[]) = ecsToDocStr tokensPly ec +prodToDocStr tokensPly (ec:ecs) = + ecsToDocStr tokensPly ec ++ " " ++ prodToDocStr tokensPly ecs + + +-- Converts a single element in the production. +ecsToDocStr :: [(String, String)] -> Either Cat String -> String +ecsToDocStr _ (Left c) = translateToList $ show c +ecsToDocStr tokensPly (Right strOp) = case lookup strOp tokensPly of + (Just s) -> s + Nothing -> ("") -- We assume it is no token, this affects getLeftIndexes + + +-- | Creating the parsing definitions for the defines. +makeDefineParsingDefs :: CF -> [(String, String)] -> [String] +makeDefineParsingDefs cf tokensPly = defFuncsPy + where + rules = cfgRules cf + + definedRules :: [Rul RFun] + definedRules = [r | r <- rules, isDefinedRule r] + + pairs :: [(Rul RFun, Define)] + pairs = [(dr, d) | dr <- definedRules, d <- definitions cf, + nameCorresponds ((wpThing . defName) d) (funName dr)] + + -- Adds a number to the name to make each define separate. + numberedPairs = zip [1..] pairs + defFuncsPy = map (makeDefineParsingDef cf tokensPly) numberedPairs + + +-- | To compare names for defines. The first letter needs to be lowered, so +-- "while" == "While". +nameCorresponds :: String -> String -> Bool +nameCorresponds (x:xs) (y:ys) = (toLower x == toLower y) && (xs == ys) +nameCorresponds _ _ = error "Names can't be empty" + + +-- | Creates a define parsing definition. +makeDefineParsingDef :: + CF -> [(String, String)] -> (Int, (Rul RFun, Define)) -> String +makeDefineParsingDef cf tokensPly (n, (defRule, defi)) = unlines + [ "def p_D" ++ (show n) ++ name ++ "(p):" + , "\t\"\"\"" + , "\t" ++ translatedCat ++ " : " ++ (prodToDocStr tokensPly sentForm) + , "\t\"\"\"" + , "\t# " ++ show env + , "\tp[0] = " ++ expToDef env (defBody defi) + , "" + ] + where + name = (wpThing . defName) defi + translatedCat = translateToList $ (catToStr . valCat) defRule + sentForm = rhsRule defRule + indexes = getLeftIndexes cf 1 sentForm + args = map fst (defArgs defi) + env = zip args indexes + + +-- | Converts the production of a define, called an expression, to a +-- production for the parsing definition. +expToDef :: [(String, String)] -> Exp -> String +expToDef env (App "(:)" _ (e:[App "[]" _ _])) = expToDef env e ++ "]" +expToDef env (App "(:)" _ (e:[recList])) = "[" ++ expToDef env e ++ ", " ++ + expToDef env recList +expToDef _ (App "[]" _ _) = "[]" +expToDef env (App fName _ exps) = + fName ++ "(" ++ addCommas (map (expToDef env) exps) ++ ")" +expToDef env (Var s) = case lookup s env of + Just p -> p + Nothing -> error "Missing variable in define enviroment" +expToDef _ (LitInt i) = "Integer(" ++ show i ++ ")" +expToDef _ (LitDouble d) = "Double(" ++ show d ++ ")" +expToDef _ (LitChar s) = "Char(\"" ++ show s ++ "\")" +expToDef _ (LitString s) = "String('" ++ show s ++ "')" + + diff --git a/source/src/BNFC/Backend/Python/CFtoPyLex.hs b/source/src/BNFC/Backend/Python/CFtoPyLex.hs new file mode 100644 index 00000000..fd1a532a --- /dev/null +++ b/source/src/BNFC/Backend/Python/CFtoPyLex.hs @@ -0,0 +1,189 @@ + +{- + BNF Converter: Python lexer generator + Copyright (C) 2024 Author: Bjorn Werner +-} + +module BNFC.Backend.Python.CFtoPyLex ( cf2PyLex ) where + +import BNFC.CF + +import BNFC.Backend.Python.RegToFlex (printRegFlex, escapeChar) +import BNFC.Backend.Python.PyHelpers + + +-- | The entrypoint, returns LexTokens.py and the unicode mapping. +cf2PyLex :: CF -> (String, [(String, String)]) +cf2PyLex cf = (, tokensPly) $ unlines + [ "import ply.lex as lex\n" + , "" + , createReservedMap reservedWordsEnv + , "# PLY tokens:\n" ++ plyTokens ++ "\n" + , "# PLY tokens with RegEx:" + , unlines plyTokensRegEx + , "# Literals:" + , plyLiterals cf + , "# Comments:" + , unlines singleComments + , unlines multiComments + , footer + ] + where + -- The reserved keywords and the symbols are zipped with a + -- unicode representation, which are needed for the parsing. + + -- Reserved keywords -> [("int", "R_...")] + reservedWordsVar :: [String] + reservedWordsVar = reservedWords cf + + reservedWordsEnv :: [(String, String)] + reservedWordsEnv = + zip reservedWordsVar (map (("R" ++) . toOrd) reservedWordsVar) + + -- Symbols -> [("+", "S_43")] + literalsVar :: [String] + literalsVar = literals cf + + strOps :: [String] + strOps = map fst (cfTokens cf) + + strOpsFiltered = filterOut strOps reservedWordsVar + strOpsFilteredSymbols = map (("S" ++) . toOrd) strOpsFiltered + + strOpsAndSymbols :: [(String, String)] + strOpsAndSymbols = zip strOpsFiltered strOpsFilteredSymbols + + presentSymbols :: [String] + presentSymbols = + map addCitationSigns (strOpsFilteredSymbols ++ literalsVar) + + -- Defining the variables for the lexer. + plyTokens = + "tokens = reserved + (" ++ concat (map (++ ",") presentSymbols) ++ ")" + plyTokensRegEx = map createRegEx strOpsAndSymbols + + tokensPly :: [(String, String)] + tokensPly = reservedWordsEnv ++ strOpsAndSymbols + + -- Comments + (multiMatchers, singleMatchers) = comments cf + singleComments = map createLineCommentMatcher singleMatchers + multiComments = map createMultiLineCommentMatcher multiMatchers + + +-- | Creates tokens for the lexer, such as "t_S_43 = r'\+'". +createRegEx :: (String, String) -> String +createRegEx (s, u) = "t_" ++ u ++ " = r'" ++ concat (map escapeChar s) ++ "'" + + +-- | For single-line comments +createLineCommentMatcher :: String -> String +createLineCommentMatcher r = unlines + [ "def t_C" ++ (toOrd r) ++ "(t):" + , "\tr'" ++ concat (map escapeChar r) ++ ".*'" + , "\tpass" + ] + + +-- | For multi-line comments +createMultiLineCommentMatcher :: (String, String) -> String +createMultiLineCommentMatcher (s, e) = unlines + [ "def t_C" ++ (toOrd (s ++ e)) ++ "(t):" + , "\tr'" ++ (escaped s) ++ "([\\s\\S]*?)" ++ (escaped e) ++ "'" + , "\tpass" + ] + where + escaped s = concat $ map escapeChar s + + +-- | The reserved_map contains mappings for reserved keywords, +-- such as 'int' : 'R_105_110_116'. +createReservedMap :: [(String, String)] -> String +createReservedMap xs = unlines + [ "reserved_map = {" + , unlines rows + , "}" + , "" + , "reserved = (" + , unlines rowsSnd + , ")" + ] + where + rows :: [String] + rows = ["\t'" ++ w ++ "' : '" ++ u ++ "'," | (w, u) <- xs] + + rowsSnd = ["\t'" ++ u ++ "'," | (_, u) <- xs] + + +-- | Creates lexer definitions for the lexer which are interpreted using +-- the inspect module to retrieve useful information, for example: +-- def t_String(t): +-- r'"[^"]+"' +-- t.type = reserved_map.get(t.value, ’String’) +-- return t +plyLiterals :: CF -> String +plyLiterals cf = unlines $ concat + [ + ifC catString [createLexFunc "String" "\"(\\\\\"|[^\"])*\""] + , ifC catChar + [createLexFunc "Char" "\\'(\\\\x[0-9a-f][0-9a-f]|\\\\?[\\S\\s])\\'"] + , ifC catDouble [createLexFunc "Double" "\\d+\\.\\d+(e-?\\d+)?"] + , ifC catInteger [createLexFunc "Integer" "\\d+"] + -- Prolog requires user defined tokens to have priority over Ident; C + -- requires Double to have priority over user defined tokens, as C has + -- "CDouble" matching "3." in 3.14. The lexer definitions rely on the order + -- for priority, not the length. + , userDefTokens + , ifC catIdent [createLexFunc "Ident" "[A-Za-z]\\w*"] + -- If there is no Ident present, we need a lexer definition for reserved + -- words: + , if not (isUsedCat cf (TokenCat catIdent)) && length (reservedWords cf) > 0 + then [createLexFunc "" "[A-Za-z]\\w*"] + else [] + ] + where + ifC :: TokenCat -> [String] -> [String] + ifC cat s = if isUsedCat cf (TokenCat cat) then s else [] + + userDefTokens :: [String] + userDefTokens = [ + createLexFunc name (printRegFlex exp) | (name, exp) <- tokenPragmas cf + ] + + +-- | Creates a Lexing definition for a Literal +-- If no Literal name is used, this is just a reserved_map lookup. +createLexFunc :: String -> String -> String +createLexFunc name regex = unlines + [ "def t_" ++ (if name /= "" then name else "_NoIdentPresent") ++ "(t):" + , "\tr'" ++ regex ++ "'" + , if name /= "" + then "\tt.type = reserved_map.get(t.value, '" ++ name ++ "')" + else "\tt.type = reserved_map.get(t.value)" + , "\treturn t" + ] + + +-- | Adds lexer definitions to ignore whitespaces, and a testing block +-- which attempts tokenize some input, like: python3 LexTokens.py < input +footer :: String +footer = unlines + [ "# Ignored characters:" + , "t_ignore = ' \\t'" + , "" + , "# Ignored token with an action associated with it:" + , "def t_ignore_newline(t):" + , "\tr'\\n+'" + , "\tt.lexer.lineno += t.value.count('\\n')" + , "" + , "# Error handler for illegal characters:" + , "def t_error(t):" + , "\tprint('Illegal character', 'line', str(t.lineno) + ':', t.value[0], 'ascii:', ord(t.value[0]))" + , "\tquit()" + , "" + , "if __name__ == \"__main__\":" + , "\tlexer = lex.lex()" + , "\tlex.runmain(lexer)" + ] + + diff --git a/source/src/BNFC/Backend/Python/CFtoPyPrettyPrinter.hs b/source/src/BNFC/Backend/Python/CFtoPyPrettyPrinter.hs new file mode 100644 index 00000000..20255c19 --- /dev/null +++ b/source/src/BNFC/Backend/Python/CFtoPyPrettyPrinter.hs @@ -0,0 +1,409 @@ + +{- + BNF Converter: Python pretty-printer generator + Copyright (C) 2024 Author: Bjorn Werner + Based on CFtoCPrinter.hs, Copyright (C) 2004 Michael Pellauer +-} + +module BNFC.Backend.Python.CFtoPyPrettyPrinter ( cf2PyPretty ) where +import Data.List ( intercalate, nub ) +import BNFC.CF +import BNFC.Backend.Python.PyHelpers +import BNFC.Backend.Common.NamedVariables +import Text.PrettyPrint (Doc, render) +import Data.Either (lefts) +import BNFC.Backend.Common.StrUtils +import qualified Data.List.NonEmpty as List1 + +-- | Used to create PrettyPrinter.py, that contains the functionality +-- to print the AST and the linearized tree. +cf2PyPretty :: String -> CF -> String +cf2PyPretty pkgName cf = unlines + [ "from " ++ pkgName ++ ".Absyn import *" + , "" + , makePrintAST cf + , "" + , makeListDecons cf + , "" + , makeRenderC + , "" + , makeCoercCompare cf + , "" + , makeCompareFunc + , "" + , makeLinFunc cf + ] + + +-- | Creates the print AST function. +makePrintAST :: CF -> String +makePrintAST cf = concat + [ "def printAST(ast: object) -> list:\n" + , " match ast:\n" + , concat + [ ifUsedThen catInteger + [ " case Integer():" + , " return str(ast)" + ] + , ifUsedThen catDouble + [ " case Double():" + , " if ast.is_integer():" + , " return str(int(ast))" + , " else:" + , " return str(ast)" + ] + , ifUsedThen catString + [ " case String():" + , " return str(ast)" + ] + , ifUsedThen catChar + [ " case Char():" + , " return str(ast)" + ] + , ifUsedThen catIdent + [ " case Ident():" + , " return '\"' + str(ast) + '\"'" + ] + ] + , if length (tokenNames cf) > 0 + then unlines + [ " case (" ++ intercalate " | " (map (++"()") (tokenNames cf)) + ++ "):" + , " return '\"' + str(ast) + '\"'" + ] + else "" + , " case list():\n" + , " return '[' + ', '.join([printAST(a) for a in ast]) + ']'\n" + , "\n" + , " if len(vars(ast)) > 0:\n" + , " return '(' + ast.__class__.__name__ + ' ' + ' '.join([printAST(vars(ast)[k]) for k in vars(ast) if k != '_ann_type']) + ')'\n" + , " else:\n" + , " return ast.__class__.__name__\n" + ] + where + ifUsedThen :: TokenCat -> [String] -> String + ifUsedThen cat ss + | isUsedCat cf (TokenCat cat) = unlines ss + | otherwise = "" + + +-- Creates deconstructors for all list categories. +makeListDecons :: CF -> String +makeListDecons cf = unlines $ map (makeListDecon cf) listCats + where + rules = cfgRules cf + valCats = nub $ map valCat rules + listCats = [c | c <- valCats, isList c] + + +-- Creates a deconstructor for some list category. +makeListDecon :: CF -> Cat -> String +makeListDecon cf c = concat + [ "def list" ++ name ++ "Decon(xs):\n" + , oneRuleStr + , nilRuleStr + , consRuleStr + , "\n" + ] + where + name = show $ catOfList c + listRulesForCat = rulesForCat cf c + + nilRule = case [r | r <- listRulesForCat, isNilFun r] of + [] -> Nothing + rs -> Just (head rs) + oneRule = case [r | r <- listRulesForCat, isOneFun r] of + [] -> Nothing + rs -> Just (head rs) + consRule = case [r | r <- listRulesForCat, isConsFun r] of + [] -> Nothing + rs -> Just (head rs) + + -- List rules are of the form: + -- [C] ::= symbols.. C symbols.. [C] + -- The production, in Python, is concatenated recursively: + -- symbols.. + lin(xs[0]) + symbols.. + listCDecon(xs[1:]) + symbols.. + sentFormToArgs :: Int -> [Either Cat String] -> String + sentFormToArgs _ [] = "[]" + sentFormToArgs v (Right strOp:ecss) = + "['" ++ escapeChars strOp ++ "'] + " ++ + sentFormToArgs v ecss + sentFormToArgs v (Left _:ecss) + | v == 0 = "lin(xs[0]) + " ++ sentFormToArgs (v+1) ecss + | v == 1 = "list" ++ name ++ "Decon(xs[1:]) + " ++ + sentFormToArgs (v+1) ecss + | otherwise = error "A list production can max have C and [C]." + + nilRuleStr = case nilRule of + Nothing -> "" + Just r -> unlines + [ " if len(xs) == 0:" + , " return " ++ sentFormToArgs 0 (rhsRule r) + ] + + oneRuleStr = case oneRule of + Nothing -> "" + Just r -> unlines + [ " if len(xs) == 1:" + , " return " ++ sentFormToArgs 0 (rhsRule r) + ] + + consRuleStr = case consRule of + Nothing -> "" + Just r -> " return " ++ sentFormToArgs 0 (rhsRule r) ++ "\n" + + +-- | Creates the renderC function, which creates a string of a list of +-- strings, and inserts white-spaces to render the language in a C-like +-- manner. +makeRenderC :: String +makeRenderC = unlines + [ "def renderC(ss: list):" + , " def br(i):" + , " return '\\n' + ' ' * iLevel" + , "" + , " def ident(i):" + , " return ' ' * iLevel" + , "" + , " def removeTrailingWhitespace(tot):" + , " i = len(tot)" + , " while i > 0:" + , " if tot[i] == ' ':" + , " i -= 1" + , " else:" + , " break" + , "" + , " return tot[:i]" + , "" + , " def oneEmptyLine(tot):" + , " tot = tot.rstrip(' ')" + , " if len(tot) > 0 and tot[-1] != '\\n':" + , " tot += '\\n'" + , " tot += ident(iLevel)" + , " return tot" + , "" + , " tot = ''" + , " iLevel = 0" + , " for i in range(len(ss)):" + , " s = ss[i]" + , " match s:" + , " case '{':" + , " tot = oneEmptyLine(tot)" + , " iLevel += 1" + , " tot += '{' + br(iLevel)" + , " case ('(' | '['):" + , " tot += s" + , " case (')' | ']'):" + , " tot = tot.rstrip()" + , " tot += s + ' '" + , " case '}':" + , " iLevel -= 1" + , " tot = oneEmptyLine(tot)" + , " tot += s + br(iLevel)" + , " case ',':" + , " tot = tot.rstrip()" + , " tot += s + ' '" + , " case ';':" + , " tot = tot.rstrip()" + , " tot += s + br(iLevel)" + , " case '':" + , " tot += ''" + , " case ' ':" + , " tot += s" + , " case _:" + , " tot += s + ' '" + , "" + , " return tot" + ] + + +-- Provides a mapping from a rule to its value category. +makeCoercCompare :: CF -> String +makeCoercCompare cf = concat + [ "cdict = {\n" + , unlines (map (\(fs, cs) -> " " ++ fs ++ " : '" ++ cs ++ "',") scs) + , "}" + ] + where + scs :: [(String, String)] + scs = [(funName r, (show . wpThing . valRCat) r) | r <- cfgRules cf, + not (isCoercion r), not (isNilCons r), not (isDefinedRule r)] + + +-- | Creates a function that attempts to figure out if +-- parentheses are required, for example: +-- 1 + (2 * 3) +-- The precedence for the addition is low, say Exp, but the multiplication +-- has a higher precedence, say Exp1, so parantheses are needed. +makeCompareFunc :: String +makeCompareFunc = unlines + [ "def c(ast, cat: str) -> list:" + , " cl = ast.__class__" + , " if cl in cdict:" + , " clCat = cdict[cl]" + , " clCatAlphas = ''.join(filter(str.isalpha, clCat))" + , " catAlphas = ''.join(filter(str.isalpha, cat))" + , " clCatNums = ''.join(filter(str.isnumeric, clCat))" + , " catNums = ''.join(filter(str.isnumeric, cat))" + , " clCatNum = 0" + , " catNum = 0" + , " if clCatAlphas == catAlphas:" + , " if len(clCatNums) > 0:" + , " clCatNum = int(clCatNums)" + , " if len(catNums) > 0:" + , " catNum = int(catNums)" + , " if clCatNum < catNum:" + , " return ['('] + lin(ast) + [')']" + , " return lin(ast)" + ] + + +-- | Returns the AST as a list of characters, which can be sent into the +-- renderC.function. +makeLinFunc :: CF -> String +makeLinFunc cf = unlines + [ "def lin(ast: object) -> list:" + , " match ast:" + , concat + [ ifUsedThen catInteger + [ " case Integer():" + , " return [str(ast)]" + ] + , ifUsedThen catDouble + [ " case Double():" + , " if ast.is_integer():" + , " return [str(int(ast))]" + , " else:" + , " return [str(ast)]" + ] + , ifUsedThen catString + [ " case String():" + , " return [ast, ' ']" + ] + , ifUsedThen catIdent + [ " case Ident():" + , " return [ast]" + ] + , ifUsedThen catChar + [ " case Char():" + , " return [ast]" + ] + ] + , " # skeleTokenCases:" + , unlines skeleTokenCases + , " # skeleRuleCases:" + , unlines skeleRuleCases + , -- Deals with cases where the entrypoint is say [Stm] or [Exp], + -- with pattern matching on the first object in the list. + " case " ++ "list():" + , " if len(ast) == 0:" + , " return []" + , " else:" + , " match ast[0]:" + , unlines listEntrypointCases + , " case _:" + , " raise Exception(ast[0].__class__.__name__, " ++ + "'unmatched ast[0]')" + , " case _:" + , " raise Exception(str(ast.__class__) + ' unmatched')" + ] + where + -- Used to include standard literals, if needed. + ifUsedThen :: TokenCat -> [String] -> String + ifUsedThen cat ss + | isUsedCat cf (TokenCat cat) = unlines ss + | otherwise = "" + + -- Figures out the deliminators for the separators and terminators, + -- to further process a deconstructed object that contains list(s). + rules = [r | r <- cfgRules cf + , not (isCoercion r) + , not (isDefinedRule r) + , not (isNilCons r) + ] + + skeleTokenCases = map makeSkeleTokenCase (tokenNames cf) + skeleRuleCases = map makeSkeleRuleCase rules + + catEntrypointsForLists = + [catOfList c | c <- (List1.toList . allEntryPoints) cf, isList c] + + -- The Haskell backend defaults to the production for the lowest + -- precedence for lists that are defined. Like ``separator Exp1 ","``. + lowestPrecListCats = [c | c <- catEntrypointsForLists, + precCat c == (minimum (map precCat + [c2 | c2 <- catEntrypointsForLists, normCat c == normCat c2] + ) + ) + ] + + listEntrypointCases = + map (makeListEntrypointCase cf) lowestPrecListCats + + +-- | Creates cases that checks what class individual nodes might be, meaning +-- the rule names, or the token categories +makeListEntrypointCase :: CF -> Cat -> String +makeListEntrypointCase cf c = concat + [ " case " ++ intercalate "|" constructors ++ ":\n" + , " return list" ++ show c ++ "Decon(ast)" + ] + where + constructors = if isTokenCat c + then [show c ++ "()"] + else map ((++ "()") . funName) + [ + r | r <- rulesForNormalizedCat cf (normCat c), + not (isCoercion r), + not (isDefinedRule r) + ] + + +-- Creates a case for a user defined literal, which inherits str. +makeSkeleTokenCase :: String -> String +makeSkeleTokenCase tokenName = concat + [ " case " ++ tokenName ++ "():\n" + , " return [ast]" + ] + + +-- | Creates a case for some rule, with the additional information of what +-- separator- and terminator-delimiters there are. +makeSkeleRuleCase :: Rul RFun -> String +makeSkeleRuleCase rule = concat + [ " case " ++ fName ++ "(" ++ varNamesCommad ++ "):\n" + , " # " ++ (showEcss sentForm) ++ "\n" + , " return " ++ if (length args > 0) + then (intercalate " + " args) + else "[]" + ] + where + fName = wpThing (funRule rule) + sentForm = rhsRule rule + + nvCats = numVars sentForm :: [Either (Cat, Doc) String] + + enumeratedVarNames = [render d | (c, d) <- lefts nvCats] + + varNamesCommad = if length enumeratedVarNames > 0 + then addCommas (enumeratedVarNames ++ ["_ann_type"]) + else "" + + args = ecssAndVarsToList + sentForm + enumeratedVarNames + + +-- | Creates a list of a production with both terminals and non-terminals. +ecssAndVarsToList :: [Either Cat String] -> [String] -> [String] +ecssAndVarsToList [] _ = [] +ecssAndVarsToList (Left c:ecss) (s:ss) + | isList c = ["list" ++ name ++ "Decon(" ++ s ++ ")"] ++ + ecssAndVarsToList ecss ss + | otherwise = ["c(" ++ s ++ ", '" ++ (show c) ++ "')"] ++ + ecssAndVarsToList ecss ss + where + name = show $ catOfList c +ecssAndVarsToList (Right strOp:ecss) ss = + ["['" ++ escapeChars strOp ++ "']"] ++ ecssAndVarsToList ecss ss + diff --git a/source/src/BNFC/Backend/Python/CFtoPySkele.hs b/source/src/BNFC/Backend/Python/CFtoPySkele.hs new file mode 100644 index 00000000..5297fa02 --- /dev/null +++ b/source/src/BNFC/Backend/Python/CFtoPySkele.hs @@ -0,0 +1,114 @@ + +{- + BNF Converter: Python skeleton-code generator + Copyright (C) 2024 Author: Bjorn Werner +-} + +module BNFC.Backend.Python.CFtoPySkele where +import BNFC.CF +import BNFC.Backend.Python.PyHelpers +import Data.Char (toLower) +import BNFC.Backend.Common.NamedVariables +import Text.PrettyPrint (Doc, render) +import Data.Either (lefts) + +-- | Entrypoint. +cf2PySkele :: String -> CF -> String +cf2PySkele pkgName cf = unlines + [ "from ply.lex import lex" + , "from ply.yacc import yacc" + , "import sys" + , "from " ++ pkgName ++ ".LexTokens import *" + , "from " ++ pkgName ++ ".ParsingDefs import *" + , "from " ++ pkgName ++ ".PrettyPrinter import *" + , "" + , makeSkele cf + ] + + +-- Creates first a matcher with all value categories, and underneath one +-- matcher for each value category. +makeSkele :: CF -> String +makeSkele cf = unlines + [ "# Categories combined into one matcher" + , "def skeleMatcher(ast: object):" + , "\tmatch ast:" + , unlines skeleLiteralCases + , unlines skeleTokenCases + , unlines skeleRuleCases + , "\t\tcase _:" + , "\t\t\traise Exception(str(ast.__class__) + ' unmatched')" + , "" + , "# Categories split into their own matchers" + , unlines matchersOnCats + ] + where + rules = + [ r | r <- cfgRules cf + , not (isCoercion r) + , not (isDefinedRule r) + , not (isNilCons r) + ] + + presentLiterals = ifC catInteger ++ + ifC catDouble ++ + ifC catString ++ + ifC catIdent ++ + ifC catChar + + skeleLiteralCases = map makeSkeleTokenCase presentLiterals + skeleTokenCases = map makeSkeleTokenCase (tokenNames cf) + skeleRuleCases = map makeSkeleRuleCase rules + + parserCats = filter (not . isList) (allParserCatsNorm cf) :: [Cat] + rulesfornormalizedcat = map (rulesForNormalizedCat cf) parserCats + parserCatsWithRules = zip parserCats rulesfornormalizedcat + + matchersOnCats = map makeMatcherOnCat parserCatsWithRules + + ifC :: TokenCat -> [String] + ifC cat = if isUsedCat cf (TokenCat cat) then [cat] else [] + + +-- Creates a matcher for some value category. +makeMatcherOnCat :: (Cat, [Rul RFun]) -> String +makeMatcherOnCat (c, rules) = unlines + [ "def matcher" ++ show c ++ "(" ++ varName ++ ": " ++ show c ++ "):" + , "\tmatch " ++ varName ++ ":" + , unlines cases + ,"\t\tcase _:" + ,"\t\t\traise Exception(str(" ++ varName ++ ".__class__) + ' unmatched')" + ] + where + varName = map toLower (show c) ++ "_" + cases = map makeSkeleRuleCase (filter + (\r -> not (isCoercion r) && not (isDefinedRule r)) + rules) + + +-- | Creates a case for some rule. +makeSkeleRuleCase :: Rul RFun -> String +makeSkeleRuleCase rule = concat + [ "\t\tcase " ++ fName ++ "(" ++ varNamesCommad ++ "):\n" + , "\t\t\t# " ++ (showEcss sentForm) ++ "\n" + , "\t\t\traise Exception('" ++ fName ++ " not implemented')" + ] + where + funcRStr = funRule rule :: RString + fName = wpThing funcRStr :: String + sentForm = rhsRule rule + + nvCats = numVars sentForm :: [Either (Cat, Doc) String] + + enumeratedVarNames = [render d | (_, d) <- lefts nvCats] + + varNamesCommad = addCommas (enumeratedVarNames ++ ["_ann_type"]) + + +-- | Creates a case for a user-defined token. +makeSkeleTokenCase :: String -> String +makeSkeleTokenCase tokenName = concat + [ "\t\tcase " ++ tokenName ++ "():\n" + , "\t\t\traise Exception('not implemented')" + ] + diff --git a/source/src/BNFC/Backend/Python/PyHelpers.hs b/source/src/BNFC/Backend/Python/PyHelpers.hs new file mode 100644 index 00000000..300c1d9f --- /dev/null +++ b/source/src/BNFC/Backend/Python/PyHelpers.hs @@ -0,0 +1,80 @@ + +{- + BNF Converter: Python backend helper functions + Copyright (C) 2024 Author: Bjorn Werner +-} + +module BNFC.Backend.Python.PyHelpers where +import Data.List ( intercalate ) +import Data.Char +import BNFC.CF + + +addCommas :: [String] -> String +addCommas ss = intercalate ", " ss + + +addCitationSigns :: String -> String +addCitationSigns ss = "'" ++ ss ++ "'" + + +filterOut :: Eq a => [a] -> [a] -> [a] +filterOut xs ys = filter (\x -> not (elem x ys)) xs + + +-- Converts every character to unicode with an underscore in front. +toOrd :: String -> String +toOrd s = concat (map (("_" ++) . show . ord) s) + + +-- | Converts a string of underscores and unicode numbers such as "_53_53" +-- into "++". +toChr :: String -> String +toChr "" = "" +toChr xs = map chr nrs + where + nrsStr = tail $ split '_' xs :: [String] + nrs = map read nrsStr :: [Int] + + +split :: Char -> String -> [String] +split c s = split' c s "" + + +split' :: Char -> String -> String -> [String] +split' _ [] ps = [ps] +split' c (s:ss) ps + | c == s = [ps] ++ split' c ss "" + | otherwise = split' c ss (ps ++ [s]) + + +-- Converts [Cat] into ListCat, which is mainly used in the parser. +translateToList :: String -> String +translateToList s + | strIsList s = "List" ++ (tail $ init s) + | otherwise = s + + +strIsList :: String -> Bool +strIsList s = head s == '[' && last s == ']' + + +firstRight :: [Either a b] -> Maybe b +firstRight [] = Nothing +firstRight (Left _:es) = firstRight es +firstRight (Right r:_) = Just r + + +-- Retrieves the first character from strings such as "[Stm]" or "Stm". +firstAlpha :: String -> Char +firstAlpha s + | strIsList s = head $ tail s + | otherwise = head s + + +-- | Converts a production into a string, for comments. +showEcss :: [Either Cat String] -> String +showEcss [] = "" +showEcss (Left c:ecss) = show c ++ " " ++ (showEcss ecss) +showEcss (Right strOp:ecss) = "\"" ++ strOp ++ "\" " ++ (showEcss ecss) + diff --git a/source/src/BNFC/Backend/Python/RegToFlex.hs b/source/src/BNFC/Backend/Python/RegToFlex.hs new file mode 100644 index 00000000..37e357b4 --- /dev/null +++ b/source/src/BNFC/Backend/Python/RegToFlex.hs @@ -0,0 +1,97 @@ +{-# LANGUAGE LambdaCase #-} + +{- + Due to the almost full similarity, the name RegToFlex remains from the + C backend (2024). +-} + +module BNFC.Backend.Python.RegToFlex (printRegFlex, escapeChar) where + +-- modified from pretty-printer generated by the BNF converter + +import Data.Char (ord, showLitChar) +import qualified Data.List as List +import BNFC.Abs (Reg(..), Identifier(Identifier)) +import BNFC.Backend.Common (flexEps) + + +-- the top-level printing method +printRegFlex :: Reg -> String +printRegFlex = render . prt 0 + + +-- you may want to change render and parenth +render :: [String] -> String +render = rend (0::Int) where + rend i ss = case ss of + "[" :ts -> cons "[" $ rend i ts + "(" :ts -> cons "(" $ rend i ts + t : "," :ts -> cons t $ space "," $ rend i ts + t : ")" :ts -> cons t $ cons ")" $ rend i ts + t : "]" :ts -> cons t $ cons "]" $ rend i ts + t :ts -> space t $ rend i ts + _ -> "" + cons s t = s ++ t + space t s = if null s then t else t ++ s + + +parenth :: [String] -> [String] +parenth ss = ["("] ++ ss ++ [")"] + + +-- the printer class does the job +class Print a where + prt :: Int -> a -> [String] + + +prPrec :: Int -> Int -> [String] -> [String] +prPrec i j = if j prPrec i 2 (concat [prt 2 reg0 , prt 3 reg]) + RAlt reg0 reg -> prPrec i 1 (concat [prt 1 reg0 , ["|"] , prt 2 reg]) + + -- Flex does not support set difference. See link for valid patterns. + -- https://westes.github.io/flex/manual/Patterns.html#Patterns + -- RMinus reg0 reg -> prPrec i 1 (concat [prt 2 reg0 , ["#"] , prt 2 reg]) + RMinus reg0 REps -> prt i reg0 -- REps is identity for set difference + RMinus RAny (RChar c) -> [ concat [ "[^", escapeChar c, "]" ] ] + RMinus RAny (RAlts str) -> [ concat [ "[^", concatMap escapeChar str, "]" ] ] + -- FIXME: unicode inside brackets [...] is not accepted by flex + -- FIXME: maybe we could add cases for char - RDigit, RLetter etc. + RMinus _ _ -> error "Flex does not support general set difference" + + RStar reg -> concat [ prt 3 reg , ["*"] ] + RPlus reg -> concat [ prt 3 reg , ["+"] ] + ROpt reg -> concat [ prt 3 reg , ["?"] ] + REps -> [ flexEps ] + RChar c -> [ escapeChar c ] + -- Unicode characters cannot be inside [...] so we use | instead. + RAlts str -> prPrec i 1 $ List.intersperse "|" $ map escapeChar str + -- RAlts str -> concat [["["], prt 0 $ concatMap escapeChar str, ["]"]] + RSeqs str -> prPrec i 2 $ map escapeChar str + RDigit -> [ "\\d" ] + RLetter -> [ "[A-Za-z]" ] -- add underscore ? + RUpper -> [ "[A-Z]" ] + RLower -> [ "[a-z]" ] + RAny -> [ "." ] + + +-- | Handle special characters in regular expressions. +escapeChar :: Char -> String +escapeChar c + | c `elem` reserved = '\\':[c] + | let x = ord c, x >= 256 = [c] + -- keep unicode characters -- "\x" ++ showHex x "" + | otherwise = showLitChar c "" + where + reserved :: String + reserved = " '$+-*=<>[](){}!?.,;:^~|&%#/\\$_@\"" + + diff --git a/source/src/BNFC/Options.hs b/source/src/BNFC/Options.hs index ac5fdbf6..74a1c757 100644 --- a/source/src/BNFC/Options.hs +++ b/source/src/BNFC/Options.hs @@ -64,6 +64,7 @@ data Target = TargetC | TargetCpp | TargetCppNoStl | TargetHaskell | TargetHaskellGadt | TargetLatex | TargetJava | TargetOCaml | TargetPygments | TargetTreeSitter + | TargetPython | TargetCheck deriving (Eq, Bounded, Enum, Ord) @@ -83,6 +84,7 @@ instance Show Target where show TargetPygments = "Pygments" show TargetTreeSitter = "Tree-sitter" show TargetCheck = "Check LBNF file" + show TargetPython = "Python" -- | Which version of Alex is targeted? data AlexVersion = Alex3 @@ -261,6 +263,7 @@ printTargetOption = ("--" ++) . \case TargetOCaml -> "ocaml" TargetPygments -> "pygments" TargetTreeSitter -> "tree-sitter" + TargetPython -> "python" TargetCheck -> "check" printAlexOption :: AlexVersion -> String @@ -314,6 +317,8 @@ targetOptions = "Output a Python lexer for Pygments" , Option "" ["tree-sitter"] (NoArg (\o -> o {target = TargetTreeSitter})) "Output grammar.js file for use with tree-sitter" + , Option "" ["python"] (NoArg (\ o -> o{target = TargetPython })) + "Output Python code for use with PLY" , Option "" ["check"] (NoArg (\ o -> o{target = TargetCheck })) "No output. Just check input LBNF file" ] @@ -530,6 +535,7 @@ instance Maintained Target where TargetOCaml -> True TargetPygments -> True TargetTreeSitter -> True + TargetPython -> True TargetCheck -> True instance Maintained AlexVersion where @@ -661,4 +667,5 @@ translateOldOptions = mapM $ \ o -> do , ("--ghc" , "--generic") , ("--deriveGeneric" , "--generic") , ("--deriveDataTypeable" , "--generic") + , ("-python" , "--python") ] diff --git a/testing/src/ParameterizedTests.hs b/testing/src/ParameterizedTests.hs index ce0c945c..13d85e7d 100644 --- a/testing/src/ParameterizedTests.hs +++ b/testing/src/ParameterizedTests.hs @@ -421,6 +421,10 @@ parameters = concat , javaParams { tpName = "Java (with jflex and line numbers)" , tpBnfcOptions = ["--java", "--jflex", "-l"] } ] + -- Python + , [ pythonParams { tpName = "Python" + , tpBnfcOptions = ["--python"] } + ] ] where base = baseParameters @@ -444,6 +448,14 @@ parameters = concat , tpBnfcOptions = ["--ocaml"] , tpRunTestProg = haskellRunTestProg } + pythonParams = base + { tpBuild = do + return () -- nothing to make or compile + , + tpRunTestProg = \ _lang args -> do + pyFile_ <- findFile "genTest.py" + cmd "python3.10" $ pyFile_ : args + } -- | Helper function that runs bnfc with the context's options and an -- option to generate 'tpMakefile'. From 59181603b0fcfb50b5b0782b988e1fefb7558ecc Mon Sep 17 00:00:00 2001 From: AiStudent <8739546+AiStudent@users.noreply.github.com> Date: Tue, 27 Aug 2024 14:57:22 +0200 Subject: [PATCH 2/7] Update user_guide.rst --- docs/user_guide.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user_guide.rst b/docs/user_guide.rst index 8273f1d5..90e9ed97 100644 --- a/docs/user_guide.rst +++ b/docs/user_guide.rst @@ -318,7 +318,7 @@ Example usage: :: * - skele.py - Provides skeleton code to deconstruct an AST, using structural pattern matching. -Optionally one may with ``-m``` also create a makefile that contains the target +Optionally one may with ``-m`` also create a makefile that contains the target "distclean" to remove the generated files. Testing the frontend From 1f9a9179c22b97aea9c003ca37908efa725faa86 Mon Sep 17 00:00:00 2001 From: AiStudent Date: Tue, 27 Aug 2024 15:46:45 +0200 Subject: [PATCH 3/7] changed python3.10 commands to python3 --- docs/user_guide.rst | 6 +++--- document/BNF_Converter_Python_Mode.html | 2 +- testing/src/ParameterizedTests.hs | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/user_guide.rst b/docs/user_guide.rst index 8273f1d5..534eb1d2 100644 --- a/docs/user_guide.rst +++ b/docs/user_guide.rst @@ -326,15 +326,15 @@ Testing the frontend It's possible to pipe input, like:: - echo "(1 + 2) * 3" | python3.10 genTest.py + echo "(1 + 2) * 3" | python3 genTest.py or:: - python3.10 genTest.py < file.txt + python3 genTest.py < file.txt and it's possible to just use an argument:: - python3.10 genTest.py file.txt + python3 genTest.py file.txt Caveats diff --git a/document/BNF_Converter_Python_Mode.html b/document/BNF_Converter_Python_Mode.html index 429b5c44..8021b3e1 100644 --- a/document/BNF_Converter_Python_Mode.html +++ b/document/BNF_Converter_Python_Mode.html @@ -76,7 +76,7 @@

Testing the frontend

The following example uses a frontend that is generated from a C-like grammar.

- $ python3.10 genTest.py < hello.c + $ python3 genTest.py < hello.c

Generating LALR tables
diff --git a/testing/src/ParameterizedTests.hs b/testing/src/ParameterizedTests.hs index 13d85e7d..8231c8eb 100644 --- a/testing/src/ParameterizedTests.hs +++ b/testing/src/ParameterizedTests.hs @@ -454,7 +454,7 @@ parameters = concat , tpRunTestProg = \ _lang args -> do pyFile_ <- findFile "genTest.py" - cmd "python3.10" $ pyFile_ : args + cmd "python3" $ pyFile_ : args } -- | Helper function that runs bnfc with the context's options and an From 8ee25b37f1472cd9a1f17c5ed0a2987f101e7613 Mon Sep 17 00:00:00 2001 From: AiStudent Date: Tue, 27 Aug 2024 15:54:26 +0200 Subject: [PATCH 4/7] Cleanup --- docs/user_guide.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user_guide.rst b/docs/user_guide.rst index 780cd4db..4e517da9 100644 --- a/docs/user_guide.rst +++ b/docs/user_guide.rst @@ -290,7 +290,7 @@ Python Backend The BNF Converter's Python Backend generates a Python frontend, that uses `PLY `_ (Python Lex Yacc), to parse -input into an AST (abstract syntax tree). +input into an abstract syntax tree. Python 3.10 or higher is needed. From c8d5ede6851a99e52897854c3fe98177ff22ae54 Mon Sep 17 00:00:00 2001 From: AiStudent Date: Tue, 5 Nov 2024 13:45:29 +0100 Subject: [PATCH 5/7] Updated to Lark --- source/BNFC.cabal | 1 - source/src/BNFC/Backend/Python.hs | 41 +- source/src/BNFC/Backend/Python/CFtoPyAbs.hs | 563 +++++++++++--------- source/src/BNFC/Backend/Python/CFtoPyLex.hs | 189 ------- 4 files changed, 319 insertions(+), 475 deletions(-) delete mode 100644 source/src/BNFC/Backend/Python/CFtoPyLex.hs diff --git a/source/BNFC.cabal b/source/BNFC.cabal index c232b401..9280a8f3 100644 --- a/source/BNFC.cabal +++ b/source/BNFC.cabal @@ -283,7 +283,6 @@ library -- Python backend BNFC.Backend.Python BNFC.Backend.Python.CFtoPyAbs - BNFC.Backend.Python.CFtoPyLex BNFC.Backend.Python.CFtoPyPrettyPrinter BNFC.Backend.Python.RegToFlex BNFC.Backend.Python.PyHelpers diff --git a/source/src/BNFC/Backend/Python.hs b/source/src/BNFC/Backend/Python.hs index e0a3da35..13568e38 100644 --- a/source/src/BNFC/Backend/Python.hs +++ b/source/src/BNFC/Backend/Python.hs @@ -14,23 +14,20 @@ import BNFC.CF (CF, firstEntry) import BNFC.Options (SharedOptions, optMake, lang) import BNFC.Backend.Base (MkFiles, mkfile) import BNFC.Backend.Python.CFtoPyAbs (cf2PyAbs) -import BNFC.Backend.Python.CFtoPyLex (cf2PyLex) import BNFC.Backend.Python.CFtoPyPrettyPrinter (cf2PyPretty) import BNFC.Backend.Python.CFtoPySkele (cf2PySkele) import BNFC.Backend.Python.PyHelpers - -import BNFC.PrettyPrint -- For Doc +import BNFC.PrettyPrint import qualified BNFC.Backend.Common.Makefile as Makefile + -- | Entrypoint for BNFC to use the Python backend. makePython :: SharedOptions -> CF -> MkFiles () makePython opts cf = do let pkgName = "bnfcPyGen" ++ name - let (lexerDefs, tokensPly) = cf2PyLex cf - let (parsingDefs, abstractClasses) = cf2PyAbs pkgName cf tokensPly + let (parsingDefs, abstractClasses) = cf2PyAbs pkgName cf let prettyPrinter = cf2PyPretty pkgName cf let skeletonCode = cf2PySkele pkgName cf - mkPyFile (pkgName ++ "/LexTokens.py") lexerDefs mkPyFile (pkgName ++ "/ParsingDefs.py") parsingDefs mkPyFile (pkgName ++ "/Absyn.py") abstractClasses mkPyFile (pkgName ++ "/PrettyPrinter.py") prettyPrinter @@ -56,11 +53,9 @@ makefile pkgName optMakefileName basename = vcat , Makefile.mkRule "vclean" [] [ "rm -f " ++ unwords [ - pkgName ++ "/LexTokens.py", pkgName ++ "/ParsingDefs.py", pkgName ++ "/Absyn.py", pkgName ++ "/PrettyPrinter.py", - pkgName ++ "/LexTokens.py.bak", pkgName ++ "/ParsingDefs.py.bak", pkgName ++ "/Absyn.py.bak", pkgName ++ "/PrettyPrinter.py.bak", @@ -74,7 +69,6 @@ makefile pkgName optMakefileName basename = vcat "rmdir " ++ pkgName, "rm -f __pycache__/*.pyc", "rm -fd __pycache__", - "rm -f parser.out parsetab.py", "rm -f " ++ makefileName, "rm -f " ++ makefileName ++ ".bak" ] @@ -94,17 +88,14 @@ comment x = "# " ++ x pyTest :: String -> CF -> String pyTest pkgName cf = unlines [ - "from ply.lex import lex", - "from ply.yacc import yacc", "import sys", - "from " ++ pkgName ++ ".LexTokens import *", "from " ++ pkgName ++ ".ParsingDefs import *", "from " ++ pkgName ++ ".PrettyPrinter import *", "", "", "# Suggested input options:", - "# python3.10 genTest.py < sourcefile", - "# python3.10 genTest.py sourcefile inputfile (i.e. for interpreters).", + "# python3 genTest.py < sourcefile", + "# python3 genTest.py sourcefile inputfile (i.e. for interpreters).", "inputFile = None", "if len(sys.argv) > 1:", "\tf = open(sys.argv[1], 'r')", @@ -117,22 +108,14 @@ pyTest pkgName cf = unlines "\tfor line in sys.stdin:", "\t\tinp += line", "", - "", - "# Customizable error handling for the parsing", - "def p_error(p: lex.LexToken):", - "\tif p is None:", - "\t\tprint('No rule could reduce the tokenized input')", - "\telse:", - "\t\tprint('line:', p.lineno, 'lexpos:', p.lexpos, f'Syntax error at {p.value!r}')", - "\t\tp.lexer.syntaxError = True", - "", + "def onError(e):", + " print(e)", + " print('Parse failed')", + " quit(1)", "", "# By default the first entrypoint is used. See ParsingDefs.py for alternatives.", - "lexer = lex.lex()", - "parser = yacc(start=" ++ defaultEntry ++ ")", - "lexer.syntaxError = False", - "ast = parser.parse(inp, lexer=lexer)", - "if ast and not lexer.syntaxError:", + "ast = parser.parse(inp, on_error=onError)", + "if ast: # and not lexer.syntaxError:", "\tprint('Parse Successful!\\n')", "\tprint('[Abstract Syntax]')", "\tprint(printAST(ast))", @@ -144,6 +127,4 @@ pyTest pkgName cf = unlines "\tprint('Parse failed')", "\tquit(1)" ] - where - defaultEntry = (addCitationSigns . translateToList . show . firstEntry) cf diff --git a/source/src/BNFC/Backend/Python/CFtoPyAbs.hs b/source/src/BNFC/Backend/Python/CFtoPyAbs.hs index a4712fb0..adad97f3 100644 --- a/source/src/BNFC/Backend/Python/CFtoPyAbs.hs +++ b/source/src/BNFC/Backend/Python/CFtoPyAbs.hs @@ -9,45 +9,47 @@ module BNFC.Backend.Python.CFtoPyAbs (cf2PyAbs) where import Data.List ( nub, intercalate ) import BNFC.CF import BNFC.Backend.Python.PyHelpers +import BNFC.Backend.Python.RegToFlex (printRegFlex, escapeChar) import BNFC.Backend.Common.NamedVariables import Text.PrettyPrint (Doc, render) import Data.Either (lefts) -import Data.Char (toLower) +import Data.Char (toLower, toUpper) import qualified Data.List.NonEmpty as List1 + -- | The result is ParsingDefs.py & Absyn.py cf2PyAbs :: String -> CF -- ^ Grammar. - -> [(String, String)] -- Tokens to unicode mapping -> (String, String) -- ParsingDefs.py, Absyn.py. -cf2PyAbs pkgName cf tokensPly = ( unlines - [ "from " ++ pkgName ++ ".Absyn import *" - , "\n\n" ++ createCommonEntrypointDef cf - , "\n\n" ++ (unlines parsingDefs) - , if length definesParsingDefs > 0 - then "\n\n# Parsing rules from defines" - else "" - , "\n\n" ++ unlines definesParsingDefs +cf2PyAbs pkgName cf = ( unlines + [ "from lark import Lark, Transformer, v_args" + , "from dataclasses import dataclass" + , "from " ++ pkgName ++ ".Absyn import *" + , "" + , createGrammar cf + , createTransformer cf + , "" + , "# Create Lark parser with the given grammar" + , "parser = Lark(grammar, start='start', parser='lalr', lexer='basic', " ++ + "transformer=TreeTransformer())" + , "" + ] + , unlines + ["from typing import List as _List" + ,"# Value categories (no coercsions):" + , unlines valueCatsClasses + , "" + , placeholderVariableClass + , "" + ,"# Rules:" + ,"from dataclasses import dataclass, field" + ,"\n" ++ (unlines dataClasses) ] - , "from typing import List as _List" ++ - "\n\n# Value categories (no coercsions):" ++ - "\n\n" ++ unlines valueCatsClasses ++ - "\n\n" ++ placeholderVariableClass ++ - "\n\n# Rules:" ++ - "\n" ++ "from dataclasses import dataclass, field" ++ - "\n\n" ++ (unlines dataClasses) ) where rules = cfgRules cf - - -- To create ParsingDefs.py - parsingDefs :: [String] - parsingDefs = map (ruleToParsingDef cf tokensPly) - [r | r <- rules, isParsable r, not (isDefinedRule r)] - definesParsingDefs = makeDefineParsingDefs cf tokensPly - -- To create Absyn.py dataClasses :: [String] dataClasses = map makePythonClass @@ -69,10 +71,289 @@ cf2PyAbs pkgName cf tokensPly = ( unlines , "Integer(int)" , "Double(float)" ] - valueCatsClasses = map createValueCatClass valueCatNames - + +-- Creates a grammar for Lark. Not that it is a real string (r"..."). +createGrammar :: CF -> String +createGrammar cf = unlines + [ "grammar = r\"\"\"" + , " ?start: " ++ map toLower ((translateToList .show . firstEntry) cf) + , "" + , unlines orClauses + , larkLiterals cf + , unlines singleComments + , unlines multiComments + , " %import common.WS" + , " %ignore WS" + , "\"\"\"" + ] + where + aCats = reallyAllCats cf + rs = cfgRules cf + + enumeratedRules :: [(Int, Rul RFun)] + enumeratedRules = enumerateAllDefinedRules rs 1 [] + orClauses = map (createOrClause cf enumeratedRules) aCats + + (multiMatchers, singleMatchers) = comments cf + singleComments = map createLineCommentMatcher singleMatchers + multiComments = map createMultiLineCommentMatcher multiMatchers + + +-- Enumerates all (only defined relevant) rules to prevent naming overlap. +enumerateAllDefinedRules :: [Rul RFun] -> Int -> [(Int, Rul RFun)] + -> [(Int, Rul RFun)] +enumerateAllDefinedRules [] _ irs = irs +enumerateAllDefinedRules (r:rs) n irs + | isDefinedRule r = enumerateAllDefinedRules rs (n+1) (irs ++ [(n, r)]) + | otherwise = enumerateAllDefinedRules rs n (irs ++ [(0, r)]) + + +-- Creates an or clause with all rules for a given category. +createOrClause :: CF -> [(Int, Rul RFun)] -> Cat -> String +createOrClause cf irs c = unlines + [ " ?" ++ map toLower (translateToList (show c)) ++ ": " ++ + intercalate "\n | " + (map createProdAndNameForRule catsIrs) + ] + where + catsIrs = [(n, removeWhiteSpaceSeparators r) | (n, r) <- irs, + valCat r == c, isParsable r] + + +-- Creates an entry for an or clause. +createProdAndNameForRule :: (Int, Rul RFun) -> String +createProdAndNameForRule (n, r) = prodToDocStr (rhsRule r) ++ + if (not (isCoercion r)) then " -> " ++ map toLower name else "" + where + name + | isNilFun r = "nil" ++ (identCat . valCat) r + | isOneFun r = "one" ++ (identCat . valCat) r + | isConsFun r = "cons" ++ (identCat . valCat) r + | isDefinedRule r = "d" ++ show n ++ "_r_" ++ funName r + | otherwise = "r_" ++ funName r + + +-- Creates the literals for a grammar for Lark. +larkLiterals :: CF -> String +larkLiterals cf = unlines $ concat + [ + ifC catString [createLiteral "String.2" "\"(\\\\.|[^\"])*\""] + , ifC catChar [createLiteral "Char.2" "\\'(\\\\x[0-9a-f][0-9a-f]|\\\\?[\\S\\s])\\'"] + , ifC catDouble [createLiteral "Double.2" "\\d+\\.\\d+(e-?\\d+)?"] + , ifC catInteger [createLiteral "Integer.2" "\\d+"] + -- Prolog requires user defined tokens to have priority over Ident; C + -- requires Double to have priority over user defined tokens, as C has + -- "CDouble" matching "3." in 3.14. The lexer definitions rely on the order + -- for priority, not the length. + , userDefTokens + , ifC catIdent [createLiteral "Ident" "[A-Za-z]\\w*"] + ] + where + ifC :: TokenCat -> [String] -> [String] + ifC cat s = if isUsedCat cf (TokenCat cat) then s else [] + + userDefTokens :: [String] + userDefTokens = [ + createLiteral (name) (printRegFlex exp) | (name, exp) <- tokenPragmas cf + ] + + createLiteral :: String -> String -> String + createLiteral name regex = + " " ++ map toUpper name ++ ": /" ++ regex ++ "/" + + +-- Creates the class transformer, where each member method tells Lark how +-- to transform some parsed node in the tree. +createTransformer :: CF -> String +createTransformer cf = unlines + [ "#transformer" + , "class TreeTransformer(Transformer):" + , unlines (map createRuleTransform rs) + , unlines (map (makeDefineTransform cf) enumeratedRDs) + , unlines (map createListTransform listRules) + , createTokenTransformers cf + ] + where + enumeratedRules :: [(Int, Rul RFun)] + enumeratedRules = enumerateAllDefinedRules (cfgRules cf) 1 [] + + rs = [r | r <- cfgRules cf + , not (isCoercion r) + , not (isNilCons r) + , not (isDefinedRule r)] + listRules = [r | r <- cfgRules cf, isNilCons r] + + enumeratedRDs = [(n, r, d) | (n, r) <- enumeratedRules, d <- definitions cf + , not (isCoercion r) + , not (isNilCons r) + , isDefinedRule r + , nameCorresponds ((wpThing . defName) d) (funName r)] + + +-- Creates a transform for a rule +createRuleTransform :: Rul RFun -> String +createRuleTransform r = unlines + [ " @v_args(inline=True)" + , " def r_" ++ map toLower (funName r) ++ "(self" ++ + concat (map (", " ++) enumeratedVars) ++ "):" + , " return " ++ funName r ++ "(" ++ intercalate ", " enumeratedVars ++ ")" + ] + where + sentForm = rhsRule r + nvCats = numVars sentForm :: [Either (Cat, Doc) String] + enumeratedVars = [render d | (c, d) <- lefts nvCats] + + +-- Creates a transform for a list rule. +createListTransform :: Rul RFun -> String +createListTransform r = unlines + [ " @v_args(inline=True)" + , " def " ++ map toLower name ++ "(self" ++ + concat (map (", " ++) enumeratedVars) ++ "):" + , " return " ++ args + ] + where + name + | isNilFun r = "nil" ++ (identCat . valCat) r + | isOneFun r = "one" ++ (identCat . valCat) r + | isConsFun r = "cons" ++ (identCat . valCat) r + | otherwise = funName r + + sentForm = rhsRule r + nvCats = numVars sentForm :: [Either (Cat, Doc) String] + enumeratedVars = [render d | (c, d) <- lefts nvCats] + + args :: String + | isNilFun r = "[]" + | isOneFun r = "[" ++ head enumeratedVars ++ "]" + | isConsFun r = "[" ++ head enumeratedVars ++ "] + " ++ + last enumeratedVars + | otherwise = error "Should be a list function" + + +-- Creates the transformer functions for the tokens. +createTokenTransformers :: CF -> String +createTokenTransformers cf = unlines $ concat + [ + ifC catString [createTokenTransform "String"] + , ifC catChar [createTokenTransform "Char"] + , ifC catDouble [createTokenTransform "Double"] + , ifC catInteger [createTokenTransform "Integer"] + -- Prolog requires user defined tokens to have priority over Ident; C + -- requires Double to have priority over user defined tokens, as C has + -- "CDouble" matching "3." in 3.14. The lexer definitions rely on the order + -- for priority, not the length. + , userDefTokens + , ifC catIdent [createTokenTransform "Ident"] + ] + where + ifC :: TokenCat -> [String] -> [String] + ifC cat s = if isUsedCat cf (TokenCat cat) then s else [] + + userDefTokens :: [String] + userDefTokens = [ + createTokenTransform name | (name, exp) <- tokenPragmas cf + ] + + +-- Creates a transform for a token. +createTokenTransform :: String -> String +createTokenTransform name = unlines + [ " @v_args(inline=True)" + , " def " ++ map toUpper name ++ "(self, token):" + , " return " ++ name ++ "(token.value)" + ] + + +-- | Produces the production in the docstring for the parsing definitions. +prodToDocStr ::[Either Cat String] -> String +prodToDocStr [] = "" +prodToDocStr (ec:[]) = ecsToDocStr ec +prodToDocStr (ec:ecs) = + ecsToDocStr ec ++ " " ++ prodToDocStr ecs + + +-- Converts a single element in the production. +ecsToDocStr :: Either Cat String -> String +ecsToDocStr (Left (TokenCat t)) = map toUpper t +ecsToDocStr (Left c) = map toLower (translateToList (show c)) +ecsToDocStr (Right strOp) = "\"" ++ concat (map escapeBackslash strOp) ++ "\"" + + +-- | For single-line comments +createLineCommentMatcher :: String -> String +createLineCommentMatcher r = unlines + [ " C" ++ toOrd r ++ ": /" ++ concat (map escapeChar r) ++ "[^\\n]*/" + , " %ignore C" ++ toOrd r + ] + + +-- | For multi-line comments +createMultiLineCommentMatcher :: (String, String) -> String +createMultiLineCommentMatcher (s, e) = unlines + [ " C" ++ toOrd (s ++ e) ++ ": /" ++ escaped s ++ "([\\s\\S]*?)" ++ + escaped e ++ "/" + , " %ignore C" ++ toOrd (s ++ e) + ] + where + escaped s = concat $ map escapeChar s + + +-- Since we're using a real string for the grammar, r""" ... """ it seems +-- we can't escape everything in strOp from regflex. Only backslashes. +escapeBackslash :: Char -> String +escapeBackslash '\\' = "\\\\" +escapeBackslash c = [c] + + +-- | To compare names for defines. The first letter needs to be lowered, so +-- "while" == "While". +nameCorresponds :: String -> String -> Bool +nameCorresponds (x:xs) (y:ys) = (toLower x == toLower y) && (xs == ys) +nameCorresponds _ _ = error "Names can't be empty" + + +-- Creates a transformer for a rule with its corresponding define. +makeDefineTransform :: + CF -> (Int, Rul RFun, Define) -> String +makeDefineTransform cf (n, defRule, defi) = unlines + [ " @v_args(inline=True)" + , " def d" ++ show n ++ "_r_" ++ map toLower name ++ "(self" ++ + concat (map (", " ++) enumeratedVars) ++ "):" + , " return " ++ expToDef env2 (defBody defi) + , "" + ] + where + name = (wpThing . defName) defi + sentForm = rhsRule defRule + args = map fst (defArgs defi) + nvCats = numVars sentForm :: [Either (Cat, Doc) String] + enumeratedVars = [render d | (c, d) <- lefts nvCats] + env2 = zip args enumeratedVars + + +-- | Converts the production of a define, called an expression, to a +-- production for the parsing definition. +expToDef :: [(String, String)] -> Exp -> String +expToDef env (App "(:)" _ (e:[App "[]" _ _])) = expToDef env e ++ "]" +expToDef env (App "(:)" _ (e:[recList])) = "[" ++ expToDef env e ++ ", " ++ + expToDef env recList +expToDef _ (App "[]" _ _) = "[]" +expToDef env (App fName _ exps) = + fName ++ "(" ++ addCommas (map (expToDef env) exps) ++ ")" +expToDef env (Var s) = case lookup s env of + Just p -> p + Nothing -> error "Missing variable in define enviroment" +expToDef _ (LitInt i) = "Integer(" ++ show i ++ ")" +expToDef _ (LitDouble d) = "Double(" ++ show d ++ ")" +expToDef _ (LitChar s) = "Char(\"" ++ show s ++ "\")" +expToDef _ (LitString s) = "String('" ++ show s ++ "')" + + +-- A placeholder variable to store additional information, for say type +-- annotation. placeholderVariableClass :: String placeholderVariableClass = unlines [ "# Placeholder to add additional information to a node in the AST," ++ @@ -98,6 +379,7 @@ placeholderVariableClass = unlines , " return str(self.__v.__class__)" ] + -- | Creates a parsing definition that points to all entrypoints. createCommonEntrypointDef :: CF -> String createCommonEntrypointDef cf = unlines @@ -126,37 +408,6 @@ createValueCatClass :: String -> String createValueCatClass s = "class " ++ s ++ ":\n\tpass\n" --- | Creates a parsing definition, by checking what type of rule it is and --- calling the corresponding make function. -ruleToParsingDef :: CF -> [(String, String)] -> Rul RFun -> String -ruleToParsingDef cf tokensPly rule - | isCoercion funcRStr = - makeParseCoercion cf tokensPly funcCat (fName, sentForm) - | isNilFun funcRStr = - makeParseNil tokensPly funcCat (fNameTranslated, sentForm) - | isOneFun funcRStr = - makeParseOne cf tokensPly funcCat (fNameTranslated, sentForm) - | isConsFun funcRStr = - makeParseCons cf tokensPly funcCat (fNameTranslated, sentForm) - | isDefinedRule rule = - error "Should not generate define rules in this step" - | otherwise = - makeParseFunc cf tokensPly funcCat (fName, sentForm) - where - funcRStr = funRule rule :: RString - fName = wpThing funcRStr :: String - - funcCat = valCat rule :: Cat - catStr = show (valCat rule) :: String - - fNameTranslated :: String - fNameTranslated - | isNilFun funcRStr = catStr - | otherwise = fName - - sentForm = rhsRule rule :: [Either Cat String] - - -- | Make a Python class from a rule's name and production. makePythonClass :: Rul RFun -> String makePythonClass rule = @@ -176,207 +427,9 @@ makePythonClass rule = ["_ann_type: _AnnType = field(default_factory=_AnnType)"]) - -- | Creates the corresponding type hinting for some member variable. strCatToPyTyping :: String -> String strCatToPyTyping s = if strIsList s then "_List['" ++ (tail . init) s ++ "']" else s --- | It could be this is only guarding against list categories. -literalsToPytypeMaybe :: CF -> String -> Maybe String -literalsToPytypeMaybe cf s = case s of - "Integer" -> Just "Integer" - "Double" -> Just "Double" - "Char" -> Just "Char" - "String" -> Just "String" - "Ident" -> Just "Ident" - _ -> if s `elem` (tokenNames cf) then Just s else Nothing - - --- | The following makeParse functions create their corresponding parsing --- definitions for some rule. -makeParseFunc :: CF -> [(String, String)] -> Cat -> (String, SentForm) - -> String -makeParseFunc cf tokensPly dataCat (name, sentForm) = unlines - [ "def " ++ "p_" ++ name ++ "(p):\n" ++ "\t" ++ "\"\"\"" - , "\t" ++ (show dataCat) ++ " : " ++ (prodToDocStr tokensPly sentForm) - , "\t" ++ "\"\"\"" - , "\t" ++ "p[0] = " ++ rhs ++ "\n" - ] - where - rhs = name ++ "(" ++ (addCommas (getLeftIndexes cf 1 sentForm)) ++ ")" - - -makeParseCoercion :: CF -> [(String, String)] -> Cat -> (String, SentForm) - -> String -makeParseCoercion cf tokensPly dataCat (_, sentForm) = unlines - [ "def " ++ "p_" ++ (show sourceCat) ++ "(p):\n" ++ "\t" ++ "\"\"\"" - , "\t" ++ (show dataCat) ++ " : " ++ (prodToDocStr tokensPly sentForm) - , "\t" ++ "\"\"\"" - , "\t" ++ "p[0] = " ++ strP ++ "\n" - ] - where - strP = head (getLeftIndexes cf 1 sentForm) - sourceCat = (head . lefts) sentForm - - -makeParseNil :: [(String, String)] -> Cat -> (String, SentForm) -> String -makeParseNil tokensPly dataCat (_, sentForm) = unlines - [ "def " ++ "p_" ++ "Nil" ++ translatedCat ++ "(p):\n" ++ "\t" ++ "\"\"\"" - , "\t" ++ translatedCat ++ " : " ++ (prodToDocStr tokensPly sentForm) - , "\t" ++ "\"\"\"" - , "\t" ++ "p[0] = []\n" - ] - where - translatedCat = translateToList $ show dataCat - - -makeParseOne :: CF -> [(String, String)] -> Cat -> (String, SentForm) -> String -makeParseOne cf tokensPly dataCat (_, sentForm) = unlines - [ "def " ++ "p_" ++ "One" ++ translatedCat ++ "(p):\n" ++ "\t" ++ "\"\"\"" - , "\t" ++ translatedCat ++ " : " ++ (prodToDocStr tokensPly sentForm) - , "\t" ++ "\"\"\"" - , "\t" ++ "p[0] = " ++ rhs ++ "\n" - ] - where - translatedCat = translateToList $ show dataCat - rhs = intercalate " + " (getLeftIndexesLists tokensPly cf 1 sentForm) - - -makeParseCons :: CF -> [(String, String)] -> Cat -> (String, SentForm) - -> String -makeParseCons cf tokensPly dataCat (_, sentForm) = unlines - [ "def " ++ "p_" ++ "Cons" ++ translatedCat ++ "(p):\n" ++ "\t" ++ "\"\"\"" - , "\t" ++ translatedCat ++ " : " ++ (prodToDocStr tokensPly sentForm) - , "\t" ++ "\"\"\"" ++ "\n" - , "\t" ++ "p[0] = " ++ rhs ++ "\n" - ] - where - translatedCat = translateToList $ show dataCat - rhs = intercalate " + " (getLeftIndexesLists tokensPly cf 1 sentForm) - - --- | Produces a list of the elements in the code production, where the indices --- match the argument categories. -getLeftIndexesLists :: [(String, String)] -> CF -> Int -> [Either Cat String] - -> [String] -getLeftIndexesLists _ _ _ [] = [] -getLeftIndexesLists tokensPly cf n (Left c:ecs) - | isList c = [typedPTerm] ++ (getLeftIndexesLists tokensPly cf (n+1) ecs) - | otherwise = ["[" ++ typedPTerm ++ "]"] ++ - (getLeftIndexesLists tokensPly cf (n+1) ecs) - where - pTerm = "p[" ++ (show n) ++ "]" - typedPTerm = case literalsToPytypeMaybe cf (show c) of - Just s -> s ++ "(" ++ pTerm ++ ")" - Nothing -> pTerm -getLeftIndexesLists tokensPly cf n (Right strOp:ecs) - | separatorIsEmpty tokensPly strOp = getLeftIndexesLists tokensPly cf n ecs - | otherwise = getLeftIndexesLists tokensPly cf (n+1) ecs - - --- | In case the deliminator is "" or is not defined for the lexer, like --- ignored characters. -separatorIsEmpty :: [(String, String)] -> String -> Bool -separatorIsEmpty tokensPly strOp - | length strOp > 0 = case lookup strOp tokensPly of - Just _ -> False - Nothing -> True - | otherwise = True - - --- | Produces a list of the elements in the code production, where the indices --- match the argument categories. -getLeftIndexes :: CF -> Int -> [Either Cat String] -> [String] -getLeftIndexes _ _ [] = [] -getLeftIndexes cf n (Left c:ecs) = [typedPTerm] ++ - (getLeftIndexes cf (n+1) ecs) - where - pTerm = "p[" ++ (show n) ++ "]" - typedPTerm = case literalsToPytypeMaybe cf (show c) of - Just s -> s ++ "(" ++ pTerm ++ ")" - Nothing -> pTerm -getLeftIndexes cf n (Right _:ecs) = getLeftIndexes cf (n+1) ecs - - --- | Produces the production in the docstring for the parsing definitions. -prodToDocStr :: [(String, String)] -> [Either Cat String] -> String -prodToDocStr _ [] = "" -prodToDocStr tokensPly (ec:[]) = ecsToDocStr tokensPly ec -prodToDocStr tokensPly (ec:ecs) = - ecsToDocStr tokensPly ec ++ " " ++ prodToDocStr tokensPly ecs - - --- Converts a single element in the production. -ecsToDocStr :: [(String, String)] -> Either Cat String -> String -ecsToDocStr _ (Left c) = translateToList $ show c -ecsToDocStr tokensPly (Right strOp) = case lookup strOp tokensPly of - (Just s) -> s - Nothing -> ("") -- We assume it is no token, this affects getLeftIndexes - - --- | Creating the parsing definitions for the defines. -makeDefineParsingDefs :: CF -> [(String, String)] -> [String] -makeDefineParsingDefs cf tokensPly = defFuncsPy - where - rules = cfgRules cf - - definedRules :: [Rul RFun] - definedRules = [r | r <- rules, isDefinedRule r] - - pairs :: [(Rul RFun, Define)] - pairs = [(dr, d) | dr <- definedRules, d <- definitions cf, - nameCorresponds ((wpThing . defName) d) (funName dr)] - - -- Adds a number to the name to make each define separate. - numberedPairs = zip [1..] pairs - defFuncsPy = map (makeDefineParsingDef cf tokensPly) numberedPairs - - --- | To compare names for defines. The first letter needs to be lowered, so --- "while" == "While". -nameCorresponds :: String -> String -> Bool -nameCorresponds (x:xs) (y:ys) = (toLower x == toLower y) && (xs == ys) -nameCorresponds _ _ = error "Names can't be empty" - - --- | Creates a define parsing definition. -makeDefineParsingDef :: - CF -> [(String, String)] -> (Int, (Rul RFun, Define)) -> String -makeDefineParsingDef cf tokensPly (n, (defRule, defi)) = unlines - [ "def p_D" ++ (show n) ++ name ++ "(p):" - , "\t\"\"\"" - , "\t" ++ translatedCat ++ " : " ++ (prodToDocStr tokensPly sentForm) - , "\t\"\"\"" - , "\t# " ++ show env - , "\tp[0] = " ++ expToDef env (defBody defi) - , "" - ] - where - name = (wpThing . defName) defi - translatedCat = translateToList $ (catToStr . valCat) defRule - sentForm = rhsRule defRule - indexes = getLeftIndexes cf 1 sentForm - args = map fst (defArgs defi) - env = zip args indexes - - --- | Converts the production of a define, called an expression, to a --- production for the parsing definition. -expToDef :: [(String, String)] -> Exp -> String -expToDef env (App "(:)" _ (e:[App "[]" _ _])) = expToDef env e ++ "]" -expToDef env (App "(:)" _ (e:[recList])) = "[" ++ expToDef env e ++ ", " ++ - expToDef env recList -expToDef _ (App "[]" _ _) = "[]" -expToDef env (App fName _ exps) = - fName ++ "(" ++ addCommas (map (expToDef env) exps) ++ ")" -expToDef env (Var s) = case lookup s env of - Just p -> p - Nothing -> error "Missing variable in define enviroment" -expToDef _ (LitInt i) = "Integer(" ++ show i ++ ")" -expToDef _ (LitDouble d) = "Double(" ++ show d ++ ")" -expToDef _ (LitChar s) = "Char(\"" ++ show s ++ "\")" -expToDef _ (LitString s) = "String('" ++ show s ++ "')" - - diff --git a/source/src/BNFC/Backend/Python/CFtoPyLex.hs b/source/src/BNFC/Backend/Python/CFtoPyLex.hs deleted file mode 100644 index fd1a532a..00000000 --- a/source/src/BNFC/Backend/Python/CFtoPyLex.hs +++ /dev/null @@ -1,189 +0,0 @@ - -{- - BNF Converter: Python lexer generator - Copyright (C) 2024 Author: Bjorn Werner --} - -module BNFC.Backend.Python.CFtoPyLex ( cf2PyLex ) where - -import BNFC.CF - -import BNFC.Backend.Python.RegToFlex (printRegFlex, escapeChar) -import BNFC.Backend.Python.PyHelpers - - --- | The entrypoint, returns LexTokens.py and the unicode mapping. -cf2PyLex :: CF -> (String, [(String, String)]) -cf2PyLex cf = (, tokensPly) $ unlines - [ "import ply.lex as lex\n" - , "" - , createReservedMap reservedWordsEnv - , "# PLY tokens:\n" ++ plyTokens ++ "\n" - , "# PLY tokens with RegEx:" - , unlines plyTokensRegEx - , "# Literals:" - , plyLiterals cf - , "# Comments:" - , unlines singleComments - , unlines multiComments - , footer - ] - where - -- The reserved keywords and the symbols are zipped with a - -- unicode representation, which are needed for the parsing. - - -- Reserved keywords -> [("int", "R_...")] - reservedWordsVar :: [String] - reservedWordsVar = reservedWords cf - - reservedWordsEnv :: [(String, String)] - reservedWordsEnv = - zip reservedWordsVar (map (("R" ++) . toOrd) reservedWordsVar) - - -- Symbols -> [("+", "S_43")] - literalsVar :: [String] - literalsVar = literals cf - - strOps :: [String] - strOps = map fst (cfTokens cf) - - strOpsFiltered = filterOut strOps reservedWordsVar - strOpsFilteredSymbols = map (("S" ++) . toOrd) strOpsFiltered - - strOpsAndSymbols :: [(String, String)] - strOpsAndSymbols = zip strOpsFiltered strOpsFilteredSymbols - - presentSymbols :: [String] - presentSymbols = - map addCitationSigns (strOpsFilteredSymbols ++ literalsVar) - - -- Defining the variables for the lexer. - plyTokens = - "tokens = reserved + (" ++ concat (map (++ ",") presentSymbols) ++ ")" - plyTokensRegEx = map createRegEx strOpsAndSymbols - - tokensPly :: [(String, String)] - tokensPly = reservedWordsEnv ++ strOpsAndSymbols - - -- Comments - (multiMatchers, singleMatchers) = comments cf - singleComments = map createLineCommentMatcher singleMatchers - multiComments = map createMultiLineCommentMatcher multiMatchers - - --- | Creates tokens for the lexer, such as "t_S_43 = r'\+'". -createRegEx :: (String, String) -> String -createRegEx (s, u) = "t_" ++ u ++ " = r'" ++ concat (map escapeChar s) ++ "'" - - --- | For single-line comments -createLineCommentMatcher :: String -> String -createLineCommentMatcher r = unlines - [ "def t_C" ++ (toOrd r) ++ "(t):" - , "\tr'" ++ concat (map escapeChar r) ++ ".*'" - , "\tpass" - ] - - --- | For multi-line comments -createMultiLineCommentMatcher :: (String, String) -> String -createMultiLineCommentMatcher (s, e) = unlines - [ "def t_C" ++ (toOrd (s ++ e)) ++ "(t):" - , "\tr'" ++ (escaped s) ++ "([\\s\\S]*?)" ++ (escaped e) ++ "'" - , "\tpass" - ] - where - escaped s = concat $ map escapeChar s - - --- | The reserved_map contains mappings for reserved keywords, --- such as 'int' : 'R_105_110_116'. -createReservedMap :: [(String, String)] -> String -createReservedMap xs = unlines - [ "reserved_map = {" - , unlines rows - , "}" - , "" - , "reserved = (" - , unlines rowsSnd - , ")" - ] - where - rows :: [String] - rows = ["\t'" ++ w ++ "' : '" ++ u ++ "'," | (w, u) <- xs] - - rowsSnd = ["\t'" ++ u ++ "'," | (_, u) <- xs] - - --- | Creates lexer definitions for the lexer which are interpreted using --- the inspect module to retrieve useful information, for example: --- def t_String(t): --- r'"[^"]+"' --- t.type = reserved_map.get(t.value, ’String’) --- return t -plyLiterals :: CF -> String -plyLiterals cf = unlines $ concat - [ - ifC catString [createLexFunc "String" "\"(\\\\\"|[^\"])*\""] - , ifC catChar - [createLexFunc "Char" "\\'(\\\\x[0-9a-f][0-9a-f]|\\\\?[\\S\\s])\\'"] - , ifC catDouble [createLexFunc "Double" "\\d+\\.\\d+(e-?\\d+)?"] - , ifC catInteger [createLexFunc "Integer" "\\d+"] - -- Prolog requires user defined tokens to have priority over Ident; C - -- requires Double to have priority over user defined tokens, as C has - -- "CDouble" matching "3." in 3.14. The lexer definitions rely on the order - -- for priority, not the length. - , userDefTokens - , ifC catIdent [createLexFunc "Ident" "[A-Za-z]\\w*"] - -- If there is no Ident present, we need a lexer definition for reserved - -- words: - , if not (isUsedCat cf (TokenCat catIdent)) && length (reservedWords cf) > 0 - then [createLexFunc "" "[A-Za-z]\\w*"] - else [] - ] - where - ifC :: TokenCat -> [String] -> [String] - ifC cat s = if isUsedCat cf (TokenCat cat) then s else [] - - userDefTokens :: [String] - userDefTokens = [ - createLexFunc name (printRegFlex exp) | (name, exp) <- tokenPragmas cf - ] - - --- | Creates a Lexing definition for a Literal --- If no Literal name is used, this is just a reserved_map lookup. -createLexFunc :: String -> String -> String -createLexFunc name regex = unlines - [ "def t_" ++ (if name /= "" then name else "_NoIdentPresent") ++ "(t):" - , "\tr'" ++ regex ++ "'" - , if name /= "" - then "\tt.type = reserved_map.get(t.value, '" ++ name ++ "')" - else "\tt.type = reserved_map.get(t.value)" - , "\treturn t" - ] - - --- | Adds lexer definitions to ignore whitespaces, and a testing block --- which attempts tokenize some input, like: python3 LexTokens.py < input -footer :: String -footer = unlines - [ "# Ignored characters:" - , "t_ignore = ' \\t'" - , "" - , "# Ignored token with an action associated with it:" - , "def t_ignore_newline(t):" - , "\tr'\\n+'" - , "\tt.lexer.lineno += t.value.count('\\n')" - , "" - , "# Error handler for illegal characters:" - , "def t_error(t):" - , "\tprint('Illegal character', 'line', str(t.lineno) + ':', t.value[0], 'ascii:', ord(t.value[0]))" - , "\tquit()" - , "" - , "if __name__ == \"__main__\":" - , "\tlexer = lex.lex()" - , "\tlex.runmain(lexer)" - ] - - From 283dfd833168260c5512cd86f085583456a80904 Mon Sep 17 00:00:00 2001 From: AiStudent Date: Tue, 5 Nov 2024 16:09:08 +0100 Subject: [PATCH 6/7] Retargeted to Lark --- docs/user_guide.rst | 55 ++++++------- document/BNF_Converter_Python_Mode.html | 68 ++++++----------- source/src/BNFC/Backend/Python.hs | 85 +++++++++++---------- source/src/BNFC/Backend/Python/CFtoPyAbs.hs | 10 +-- 4 files changed, 98 insertions(+), 120 deletions(-) diff --git a/docs/user_guide.rst b/docs/user_guide.rst index 4e517da9..c4e1e287 100644 --- a/docs/user_guide.rst +++ b/docs/user_guide.rst @@ -289,10 +289,9 @@ Python Backend =============== The BNF Converter's Python Backend generates a Python frontend, that uses -`PLY `_ (Python Lex Yacc), to parse -input into an abstract syntax tree. +Lark, to parse input into an AST (abstract syntax tree). -Python 3.10 or higher is needed. +Lark and Python 3.10 or higher is needed. Example usage: :: @@ -307,10 +306,8 @@ Example usage: :: - Description * - bnfcPyGenCalc/Absyn.py - Provides the classes for the abstract syntax. - * - bnfcPyGenCalc/LexTokens.py - - Provides PLY with the information needed to build the lexer. * - bnfcPyGenCalc/ParserDefs.py - - Provides PLY with the information needed to build the parser. + - Provides Lark with the information needed to build the lexer and parser. * - bnfcPyGenCalc/PrettyPrinter.py - Provides printing for both the AST and the linearized tree. * - genTest.py @@ -318,7 +315,7 @@ Example usage: :: * - skele.py - Provides skeleton code to deconstruct an AST, using structural pattern matching. -Optionally one may with ``-m`` also create a makefile that contains the target +Optionally one may with ``-m``` also create a makefile that contains the target "distclean" to remove the generated files. Testing the frontend @@ -340,34 +337,28 @@ and it's possible to just use an argument:: Caveats ....... -Presentation of conflicts in a grammar: - - A symbol-to-unicode transformation is made for the terminals in the grammar, - for example from "++" to "S_43_43". This however obfuscates PLYs generated - information of the grammar in the "parser.out" file. Users are hence - encouraged to use the Haskell backend to debug grammars and identify - conflicts. - Several entrypoints: + The testfile genTest.py only uses the first entrypoint used by default. To + use all entrypoints, set the start parameter to "start_". If the + entrypoints cause reduce/reduce conflicts, a lark GrammarError will be + produced. - At the top of the ParserDefs.py file an additional rule is added, that has - every defined entrypoint as a possible production. This may create warnings - for conflicts, as it may introduce ambiguity. Therefore the added - parsing rule is by default removed beneath the function, with the statement - "del p__Start", and included if the user comments out the removal of - "p__Start". - -Special cases for special characters: +Results from the parameterized tests: + While the Python backend generates working frontends for the example + grammars, five "failures" and six "errors" among the regression + tests are reported. - Using non-special characters, instead of say parentheses when defining rules, - may not yield the expected behaviour. Using the below rule, an expression - such as "a1+2a" can not be parsed since the a's are classified as reserved - keywords, like "int", instead of symbols like "+":: +Skeleton code for using lists as entrypoints: + Matchers for using lists, such as [Exp], are not generated in the + skeleton code as it may confuse users if the grammar uses several different + list categories, as a user may then try to pattern match lists without + checking what type the elements have. Users are instead encouraged to use + non-list entrypoints. - _. Exp1 ::= "a" Exp "a" ; +Using multiple separators + Using multiple separators for the same category, such as below, generates + Python functions with overlapping names, causing runtime errors.:: -Results from the parameterized tests: + separator Exp1 "," ; + separator Exp1 ";" ; - While the Python backend generates working frontends for the example - grammars, four "failures" and six "errors" among the regression - tests are reported. diff --git a/document/BNF_Converter_Python_Mode.html b/document/BNF_Converter_Python_Mode.html index 8021b3e1..4ffb46f6 100644 --- a/document/BNF_Converter_Python_Mode.html +++ b/document/BNF_Converter_Python_Mode.html @@ -27,15 +27,15 @@

By Björn Werner

2024

The BNF Converter's Python Backend generates a Python frontend, that uses - PLY (Python Lex Yacc), to parse input into an AST (abstract syntax tree). + Lark, to parse input into an AST (abstract syntax tree).

BNFC on Github:
https://github.com/BNFC/bnfc

- PLY homepage:
- https://www.dabeaz.com/ply/ply.html + Lark github:
+ https://github.com/lark-parser/lark

Python 3.10 or higher is needed. @@ -51,14 +51,11 @@

Usage

Filename:Description: - - bnfcGenNAME/LexTokens.pyProvides PLY with the information needed to build the lexer. - bnfcGenNAME/Absyn.pyProvides the classes for the abstract syntax. - bnfcGenNAME/ParserDefs.pyProvides PLY with the information needed to build the parser. + bnfcGenNAME/ParserDefs.pyProvides Lark with the information needed to build the lexer and parser. bnfcGenNAME/PrettyPrinter.pyProvides printing for both the AST and the linearized tree. @@ -79,7 +76,6 @@

Testing the frontend

$ python3 genTest.py < hello.c

- Generating LALR tables
Parse Successful!

[Abstract Syntax]
@@ -92,9 +88,6 @@

Testing the frontend

 return 0;
}

-

- The LALR tables are cached in a file called "parsetab.py", and a description by PLY of the grammar is stored in a file called "parser.out". -

The Abstract Syntax Tree

The AST is built up using instances of Python classes, using the dataclass decorator, such as: @@ -150,34 +143,15 @@

Using the skeleton file

Known issues

-

- Presentation of conflicts in a grammar: -

-

- A symbol-to-unicode transformation is made for the terminals in the grammar, - for example from "++" to "S_43_43". This however obfuscates PLYs generated - information of the grammar, inside the "parser.out" file. Users are hence - encouraged to use say the Haskell backend to debug their - grammars and identify conflicts. -

-

- Several entrypoints: -

-

- At the top of the ParserDefs.py file an additional rule is added, that has - every defined entrypoint as a possible production. This may create warnings - for conflicts if it introduces ambiguity, and warnings for unused rules if - the "_Start" category is not used as the entrypoint. Therefore the added - parsing rule is by default removed beneath the function, "del p__Start", - and included if the user comments out the removal: -

Skeleton code for using lists as entrypoints:

Matchers for using lists, such as [Exp], are not generated in the skeleton code as it may confuse users if the grammar uses several different - list categories. Users are instead encouraged to use a non-list entrypoint. + list categories, as a user may then try to pattern match lists without + checking what type the elements have. Users are instead encouraged to use + non-list entrypoints.

The improper way to iterate over lists, as the value category is unknown: @@ -195,16 +169,6 @@

  for exp in listexp_:
   ...

-

- Special cases for special characters -

-

- Using non-special characters instead of say parentheses when defining rules, may not yield the expected - behaviour. Using the below rule, an expression such as "a1+2a" can not be parsed. -

-

- _. Exp1 ::= "a" Exp "a" ; -

Using multiple separators

@@ -215,4 +179,20 @@

separator Exp1 "," ;
separator Exp1 ";" ; -

\ No newline at end of file +

+

Several entrypoints:

+

+ The testfile genTest.py only uses the first entrypoint used by default. To + use all entrypoints, set the start parameter to "start_". If the + entrypoints cause reduce/reduce conflicts, a lark GrammarError will be + produced. +

+

+Results from the parameterized tests: +

+

+ While the Python backend generates working frontends for the example + grammars, five "failures" and six "errors" among the regression + tests are reported. +

+ diff --git a/source/src/BNFC/Backend/Python.hs b/source/src/BNFC/Backend/Python.hs index 13568e38..87ce07bb 100644 --- a/source/src/BNFC/Backend/Python.hs +++ b/source/src/BNFC/Backend/Python.hs @@ -18,6 +18,7 @@ import BNFC.Backend.Python.CFtoPyPrettyPrinter (cf2PyPretty) import BNFC.Backend.Python.CFtoPySkele (cf2PySkele) import BNFC.Backend.Python.PyHelpers import BNFC.PrettyPrint +import Data.Char (toLower) import qualified BNFC.Backend.Common.Makefile as Makefile @@ -87,44 +88,50 @@ comment x = "# " ++ x -- Produces the content for the testing file, genTest.py. pyTest :: String -> CF -> String pyTest pkgName cf = unlines - [ - "import sys", - "from " ++ pkgName ++ ".ParsingDefs import *", - "from " ++ pkgName ++ ".PrettyPrinter import *", - "", - "", - "# Suggested input options:", - "# python3 genTest.py < sourcefile", - "# python3 genTest.py sourcefile inputfile (i.e. for interpreters).", - "inputFile = None", - "if len(sys.argv) > 1:", - "\tf = open(sys.argv[1], 'r')", - "\tinp = f.read()", - "\tf.close()", - "\tif len(sys.argv) > 2:", - "\t\tinputFile = sys.argv[2]", - "else:", - "\tinp = ''", - "\tfor line in sys.stdin:", - "\t\tinp += line", - "", - "def onError(e):", - " print(e)", - " print('Parse failed')", - " quit(1)", - "", - "# By default the first entrypoint is used. See ParsingDefs.py for alternatives.", - "ast = parser.parse(inp, on_error=onError)", - "if ast: # and not lexer.syntaxError:", - "\tprint('Parse Successful!\\n')", - "\tprint('[Abstract Syntax]')", - "\tprint(printAST(ast))", - "\tprint('\\n[Linearized Tree]')", - "\tlinTree = lin(ast)", - "\tprint(renderC(linTree))", - "\tprint()", - "else:", - "\tprint('Parse failed')", - "\tquit(1)" + [ "import sys" + , "from " ++ pkgName ++ ".ParsingDefs import *" + , "from " ++ pkgName ++ ".PrettyPrinter import *" + , "" + , "# Suggested input options:" + , "# python3 genTest.py < sourcefile" + , "# python3 genTest.py sourcefile inputfile (i.e. for interpreters)." + , "inputFile = None" + , "if len(sys.argv) > 1:" + , "\tf = open(sys.argv[1], 'r')" + , "\tinp = f.read()" + , "\tf.close()" + , "\tif len(sys.argv) > 2:" + , "\t\tinputFile = sys.argv[2]" + , "else:" + , "\tinp = ''" + , "\tfor line in sys.stdin:" + , "\t\tinp += line" + , "" + , "def onError(e):" + , " print(e)" + , " print('Parse failed')" + , " quit(1)" + , "" + , "# Creates the Lark parser with the given grammar. By default to the first" + , "# entrypoint. Other entrypoints exist in ParsingDefs.py." + , "parser = Lark(grammar, start='" ++ defaultEntrypoint ++ "', parser='lalr', lexer='basic', transformer=TreeTransformer())" + , "" + , "# By default the first entrypoint is used. See ParsingDefs.py for alternatives." + , "ast = parser.parse(inp, on_error=onError)" + , "if ast: # and not lexer.syntaxError:" + , "\tprint('Parse Successful!\\n')" + , "\tprint('[Abstract Syntax]')" + , "\tprint(printAST(ast))" + , "\tprint('\\n[Linearized Tree]')" + , "\tlinTree = lin(ast)" + , "\tprint(renderC(linTree))" + , "\tprint()" + , "else:" + , "\tprint('Parse failed')" + , "\tquit(1)" ] + where + defaultEntrypoint = map toLower + ((translateToList . show . firstEntry) cf) + diff --git a/source/src/BNFC/Backend/Python/CFtoPyAbs.hs b/source/src/BNFC/Backend/Python/CFtoPyAbs.hs index adad97f3..3daefee6 100644 --- a/source/src/BNFC/Backend/Python/CFtoPyAbs.hs +++ b/source/src/BNFC/Backend/Python/CFtoPyAbs.hs @@ -30,10 +30,6 @@ cf2PyAbs pkgName cf = ( unlines , createGrammar cf , createTransformer cf , "" - , "# Create Lark parser with the given grammar" - , "parser = Lark(grammar, start='start', parser='lalr', lexer='basic', " ++ - "transformer=TreeTransformer())" - , "" ] , unlines ["from typing import List as _List" @@ -78,7 +74,7 @@ cf2PyAbs pkgName cf = ( unlines createGrammar :: CF -> String createGrammar cf = unlines [ "grammar = r\"\"\"" - , " ?start: " ++ map toLower ((translateToList .show . firstEntry) cf) + , " ?start_: " ++ entryOrClause , "" , unlines orClauses , larkLiterals cf @@ -100,6 +96,10 @@ createGrammar cf = unlines singleComments = map createLineCommentMatcher singleMatchers multiComments = map createMultiLineCommentMatcher multiMatchers + strListEntryPoints = map ((map toLower) . translateToList . show) + ((List1.toList . allEntryPoints) cf) + entryOrClause = intercalate "\n | " strListEntryPoints + -- Enumerates all (only defined relevant) rules to prevent naming overlap. enumerateAllDefinedRules :: [Rul RFun] -> Int -> [(Int, Rul RFun)] From f49fa38aa4a8a3d4f544fd8d12c6d839567d9612 Mon Sep 17 00:00:00 2001 From: AiStudent Date: Mon, 11 Nov 2024 02:18:44 +0100 Subject: [PATCH 7/7] 0 failures and 3 errors left due to an example with 1000+ recursion and to the lex prio not working as expected --- source/src/BNFC/Backend/Python.hs | 50 +++---- source/src/BNFC/Backend/Python/CFtoPyAbs.hs | 133 ++++++++---------- .../Backend/Python/CFtoPyPrettyPrinter.hs | 43 +++--- source/src/BNFC/Backend/Python/CFtoPySkele.hs | 55 ++++---- source/src/BNFC/Backend/Python/PyHelpers.hs | 56 ++++++++ 5 files changed, 188 insertions(+), 149 deletions(-) diff --git a/source/src/BNFC/Backend/Python.hs b/source/src/BNFC/Backend/Python.hs index 87ce07bb..e0dc012d 100644 --- a/source/src/BNFC/Backend/Python.hs +++ b/source/src/BNFC/Backend/Python.hs @@ -18,14 +18,14 @@ import BNFC.Backend.Python.CFtoPyPrettyPrinter (cf2PyPretty) import BNFC.Backend.Python.CFtoPySkele (cf2PySkele) import BNFC.Backend.Python.PyHelpers import BNFC.PrettyPrint -import Data.Char (toLower) +import Data.Char (toLower, isLetter) import qualified BNFC.Backend.Common.Makefile as Makefile -- | Entrypoint for BNFC to use the Python backend. makePython :: SharedOptions -> CF -> MkFiles () makePython opts cf = do - let pkgName = "bnfcPyGen" ++ name + let pkgName = "bnfcPyGen" ++ filter isLetter name let (parsingDefs, abstractClasses) = cf2PyAbs pkgName cf let prettyPrinter = cf2PyPretty pkgName cf let skeletonCode = cf2PySkele pkgName cf @@ -47,7 +47,7 @@ makefile :: String -> Maybe String -> String -> Doc makefile pkgName optMakefileName basename = vcat [ Makefile.mkRule "all" [] - [ " " ] + [ "@echo \"Doing nothing: No compilation of the parser needed.\"" ] , Makefile.mkRule "clean" [] [ "rm -f parser.out parsetab.py" ] , Makefile.mkRule "distclean" [ "vclean" ] [] @@ -90,27 +90,27 @@ pyTest :: String -> CF -> String pyTest pkgName cf = unlines [ "import sys" , "from " ++ pkgName ++ ".ParsingDefs import *" - , "from " ++ pkgName ++ ".PrettyPrinter import *" + , "from " ++ pkgName ++ ".PrettyPrinter import printAST, lin, renderC" , "" , "# Suggested input options:" , "# python3 genTest.py < sourcefile" , "# python3 genTest.py sourcefile inputfile (i.e. for interpreters)." , "inputFile = None" , "if len(sys.argv) > 1:" - , "\tf = open(sys.argv[1], 'r')" - , "\tinp = f.read()" - , "\tf.close()" - , "\tif len(sys.argv) > 2:" - , "\t\tinputFile = sys.argv[2]" + , " f = open(sys.argv[1], 'r')" + , " inp = f.read()" + , " f.close()" + , " if len(sys.argv) > 2:" + , " inputFile = sys.argv[2]" , "else:" - , "\tinp = ''" - , "\tfor line in sys.stdin:" - , "\t\tinp += line" + , " inp = ''" + , " for line in sys.stdin:" + , " inp += line" , "" , "def onError(e):" - , " print(e)" - , " print('Parse failed')" - , " quit(1)" + , " print(e)" + , " print('Parse failed')" + , " quit(1)" , "" , "# Creates the Lark parser with the given grammar. By default to the first" , "# entrypoint. Other entrypoints exist in ParsingDefs.py." @@ -118,17 +118,17 @@ pyTest pkgName cf = unlines , "" , "# By default the first entrypoint is used. See ParsingDefs.py for alternatives." , "ast = parser.parse(inp, on_error=onError)" - , "if ast: # and not lexer.syntaxError:" - , "\tprint('Parse Successful!\\n')" - , "\tprint('[Abstract Syntax]')" - , "\tprint(printAST(ast))" - , "\tprint('\\n[Linearized Tree]')" - , "\tlinTree = lin(ast)" - , "\tprint(renderC(linTree))" - , "\tprint()" + , "if ast:" + , " print('Parse Successful!\\n')" + , " print('[Abstract Syntax]')" + , " print(printAST(ast))" + , " print('\\n[Linearized Tree]')" + , " linTree = lin(ast)" + , " print(renderC(linTree))" + , " print()" , "else:" - , "\tprint('Parse failed')" - , "\tquit(1)" + , " print('Parse failed')" + , " quit(1)" ] where defaultEntrypoint = map toLower diff --git a/source/src/BNFC/Backend/Python/CFtoPyAbs.hs b/source/src/BNFC/Backend/Python/CFtoPyAbs.hs index 3daefee6..dd346984 100644 --- a/source/src/BNFC/Backend/Python/CFtoPyAbs.hs +++ b/source/src/BNFC/Backend/Python/CFtoPyAbs.hs @@ -13,7 +13,7 @@ import BNFC.Backend.Python.RegToFlex (printRegFlex, escapeChar) import BNFC.Backend.Common.NamedVariables import Text.PrettyPrint (Doc, render) import Data.Either (lefts) -import Data.Char (toLower, toUpper) +import Data.Char (toLower, toUpper, isLower) import qualified Data.List.NonEmpty as List1 @@ -29,6 +29,7 @@ cf2PyAbs pkgName cf = ( unlines , "" , createGrammar cf , createTransformer cf + , createDefineFunctions cf , "" ] , unlines @@ -59,8 +60,8 @@ cf2PyAbs pkgName cf = ( unlines -- Note: Custom tokens are set to inherit "str". valueCatNames = nub $ - (map (show . normCat . valCat) rulesNoListConstructors) ++ - (map (++"(str)") (tokenNames cf)) ++ + (map (unkw . show . normCat . valCat) rulesNoListConstructors) ++ + (map ((++ "(str)") . unkw) (tokenNames cf)) ++ [ "String(str)" , "Char(str)" , "Ident(str)" @@ -90,7 +91,7 @@ createGrammar cf = unlines enumeratedRules :: [(Int, Rul RFun)] enumeratedRules = enumerateAllDefinedRules rs 1 [] - orClauses = map (createOrClause cf enumeratedRules) aCats + orClauses = map (createOrClause enumeratedRules) aCats (multiMatchers, singleMatchers) = comments cf singleComments = map createLineCommentMatcher singleMatchers @@ -111,8 +112,8 @@ enumerateAllDefinedRules (r:rs) n irs -- Creates an or clause with all rules for a given category. -createOrClause :: CF -> [(Int, Rul RFun)] -> Cat -> String -createOrClause cf irs c = unlines +createOrClause :: [(Int, Rul RFun)] -> Cat -> String +createOrClause irs c = unlines [ " ?" ++ map toLower (translateToList (show c)) ++ ": " ++ intercalate "\n | " (map createProdAndNameForRule catsIrs) @@ -132,23 +133,25 @@ createProdAndNameForRule (n, r) = prodToDocStr (rhsRule r) ++ | isOneFun r = "one" ++ (identCat . valCat) r | isConsFun r = "cons" ++ (identCat . valCat) r | isDefinedRule r = "d" ++ show n ++ "_r_" ++ funName r - | otherwise = "r_" ++ funName r + | otherwise = "r_" ++ map toLower (funName r) ++ toOrd (funName r) --- Creates the literals for a grammar for Lark. +-- Creates the literals for a grammar for Lark.Priority is set after the +-- dot, such as "Name.PRIO". For literals with the same priority, it appears +-- that Lark (with basic mode) prioritizes the longest regular +-- expression, not the longest matched literal. larkLiterals :: CF -> String larkLiterals cf = unlines $ concat [ - ifC catString [createLiteral "String.2" "\"(\\\\.|[^\"])*\""] - , ifC catChar [createLiteral "Char.2" "\\'(\\\\x[0-9a-f][0-9a-f]|\\\\?[\\S\\s])\\'"] - , ifC catDouble [createLiteral "Double.2" "\\d+\\.\\d+(e-?\\d+)?"] - , ifC catInteger [createLiteral "Integer.2" "\\d+"] + ifC catString [createLiteral "String.1" "\"(\\\\.|[^\"])*\""] + , ifC catChar [createLiteral "Char.1" "\\'(\\\\x[0-9a-f][0-9a-f]|\\\\?[\\S\\s])\\'"] + , ifC catDouble [createLiteral "Double.1" "\\d+\\.\\d+(e-?\\d+)?"] + , ifC catInteger [createLiteral "Integer.1" "\\d+"] -- Prolog requires user defined tokens to have priority over Ident; C -- requires Double to have priority over user defined tokens, as C has - -- "CDouble" matching "3." in 3.14. The lexer definitions rely on the order - -- for priority, not the length. + -- "CDouble" matching "3." in 3.14. , userDefTokens - , ifC catIdent [createLiteral "Ident" "[A-Za-z]\\w*"] + , ifC catIdent [createLiteral "Ident" "[A-Za-z_]\\w*"] ] where ifC :: TokenCat -> [String] -> [String] @@ -156,7 +159,7 @@ larkLiterals cf = unlines $ concat userDefTokens :: [String] userDefTokens = [ - createLiteral (name) (printRegFlex exp) | (name, exp) <- tokenPragmas cf + createLiteral name (printRegFlex exp) | (name, exp) <- tokenPragmas cf ] createLiteral :: String -> String -> String @@ -171,7 +174,7 @@ createTransformer cf = unlines [ "#transformer" , "class TreeTransformer(Transformer):" , unlines (map createRuleTransform rs) - , unlines (map (makeDefineTransform cf) enumeratedRDs) + , unlines (map makeDefineTransform enumeratedRDs) , unlines (map createListTransform listRules) , createTokenTransformers cf ] @@ -196,14 +199,16 @@ createTransformer cf = unlines createRuleTransform :: Rul RFun -> String createRuleTransform r = unlines [ " @v_args(inline=True)" - , " def r_" ++ map toLower (funName r) ++ "(self" ++ + , " def r_" ++ nameWithUnicode ++ "(self" ++ concat (map (", " ++) enumeratedVars) ++ "):" - , " return " ++ funName r ++ "(" ++ intercalate ", " enumeratedVars ++ ")" + , " return " ++ className ++ "(" ++ intercalate ", " enumeratedVars ++ ")" ] where + nameWithUnicode = map toLower (funName r) ++ toOrd (funName r) + className = unkw (funName r) sentForm = rhsRule r nvCats = numVars sentForm :: [Either (Cat, Doc) String] - enumeratedVars = [render d | (c, d) <- lefts nvCats] + enumeratedVars = [render d | (_, d) <- lefts nvCats] -- Creates a transform for a list rule. @@ -223,7 +228,7 @@ createListTransform r = unlines sentForm = rhsRule r nvCats = numVars sentForm :: [Either (Cat, Doc) String] - enumeratedVars = [render d | (c, d) <- lefts nvCats] + enumeratedVars = [render d | (_, d) <- lefts nvCats] args :: String | isNilFun r = "[]" @@ -241,10 +246,6 @@ createTokenTransformers cf = unlines $ concat , ifC catChar [createTokenTransform "Char"] , ifC catDouble [createTokenTransform "Double"] , ifC catInteger [createTokenTransform "Integer"] - -- Prolog requires user defined tokens to have priority over Ident; C - -- requires Double to have priority over user defined tokens, as C has - -- "CDouble" matching "3." in 3.14. The lexer definitions rely on the order - -- for priority, not the length. , userDefTokens , ifC catIdent [createTokenTransform "Ident"] ] @@ -254,7 +255,7 @@ createTokenTransformers cf = unlines $ concat userDefTokens :: [String] userDefTokens = [ - createTokenTransform name | (name, exp) <- tokenPragmas cf + createTokenTransform name | (name, _) <- tokenPragmas cf ] @@ -263,7 +264,7 @@ createTokenTransform :: String -> String createTokenTransform name = unlines [ " @v_args(inline=True)" , " def " ++ map toUpper name ++ "(self, token):" - , " return " ++ name ++ "(token.value)" + , " return " ++ unkw name ++ "(token.value)" ] @@ -316,40 +317,38 @@ nameCorresponds _ _ = error "Names can't be empty" -- Creates a transformer for a rule with its corresponding define. -makeDefineTransform :: - CF -> (Int, Rul RFun, Define) -> String -makeDefineTransform cf (n, defRule, defi) = unlines +makeDefineTransform :: (Int, Rul RFun, Define) -> String +makeDefineTransform (n, defRule, defi) = unlines [ " @v_args(inline=True)" , " def d" ++ show n ++ "_r_" ++ map toLower name ++ "(self" ++ concat (map (", " ++) enumeratedVars) ++ "):" - , " return " ++ expToDef env2 (defBody defi) + , " return d_" ++ name ++ "(" ++ intercalate ", " enumeratedVars ++ ")" , "" ] where name = (wpThing . defName) defi sentForm = rhsRule defRule - args = map fst (defArgs defi) nvCats = numVars sentForm :: [Either (Cat, Doc) String] - enumeratedVars = [render d | (c, d) <- lefts nvCats] - env2 = zip args enumeratedVars + enumeratedVars = [render d | (_, d) <- lefts nvCats] -- | Converts the production of a define, called an expression, to a -- production for the parsing definition. -expToDef :: [(String, String)] -> Exp -> String -expToDef env (App "(:)" _ (e:[App "[]" _ _])) = expToDef env e ++ "]" -expToDef env (App "(:)" _ (e:[recList])) = "[" ++ expToDef env e ++ ", " ++ - expToDef env recList +expToDef :: CF -> Exp -> String +expToDef cf (App "(:)" _ (e:[App "[]" _ _])) = expToDef cf e ++ "]" +expToDef cf (App "(:)" _ (e:[recList])) = "[" ++ expToDef cf e ++ ", " ++ + expToDef cf recList expToDef _ (App "[]" _ _) = "[]" -expToDef env (App fName _ exps) = - fName ++ "(" ++ addCommas (map (expToDef env) exps) ++ ")" -expToDef env (Var s) = case lookup s env of - Just p -> p - Nothing -> error "Missing variable in define enviroment" +expToDef cf (App fName _ exps) + | isLower (head fName) = + "d_" ++ fName ++ "(" ++ addCommas (map (expToDef cf) exps) ++ ")" + | otherwise = + unkw fName ++ "(" ++ addCommas (map (expToDef cf) exps) ++ ")" +expToDef _ (Var s) = unkw s expToDef _ (LitInt i) = "Integer(" ++ show i ++ ")" expToDef _ (LitDouble d) = "Double(" ++ show d ++ ")" expToDef _ (LitChar s) = "Char(\"" ++ show s ++ "\")" -expToDef _ (LitString s) = "String('" ++ show s ++ "')" +expToDef _ (LitString s) = "String('" ++ s ++ "')" -- A placeholder variable to store additional information, for say type @@ -380,29 +379,6 @@ placeholderVariableClass = unlines ] --- | Creates a parsing definition that points to all entrypoints. -createCommonEntrypointDef :: CF -> String -createCommonEntrypointDef cf = unlines - [ "def p__Start(p):" - , " '''" - , " _Start : " ++ (translateToList . show . head) cats ++ - concat (map createCase (tail cats)) - , " '''" - , " p[0] = p[1]" - , "" - , "" - , "# Comment the below line to enable the '_Start' entrypoint (may yield" - ++ " conflict warnings)." - , "del p__Start" - , "" - ] - where - cats = (List1.toList . allEntryPoints) cf - - createCase :: Cat -> String - createCase c = "\n | " ++ translateToList (show c) - - -- | The value categories become abstract classes, for type hinting. createValueCatClass :: String -> String createValueCatClass s = "class " ++ s ++ ":\n\tpass\n" @@ -412,10 +388,10 @@ createValueCatClass s = "class " ++ s ++ ":\n\tpass\n" makePythonClass :: Rul RFun -> String makePythonClass rule = "@dataclass\n" ++ - "class " ++ name ++ ":\n" ++ + "class " ++ className ++ ":\n" ++ if length cats == 0 then "\tpass\n" else classBody where - name = funName rule + className = unkw (funName rule) sentForm = rhsRule rule cats = lefts sentForm nvCats = numVars sentForm :: [Either (Cat, Doc) String] @@ -429,7 +405,22 @@ makePythonClass rule = -- | Creates the corresponding type hinting for some member variable. strCatToPyTyping :: String -> String -strCatToPyTyping s = - if strIsList s then "_List['" ++ (tail . init) s ++ "']" else s +strCatToPyTyping s = if strIsList s + then "_List['" ++ (unkw . tail . init) s ++ "']" + else unkw s + + +-- | Creates functions for the defines. +createDefineFunctions :: CF -> String +createDefineFunctions cf = unlines + (map (createDefineFunction cf) (definitions cf)) +createDefineFunction :: CF -> Define -> String +createDefineFunction cf d = unlines + [ "def d_" ++ (wpThing . defName) d ++ "(" ++ addCommas args ++ "):" + , " return " ++ expToDef cf (defBody d) + ] + where + args = map (unkw . fst) (defArgs d) + diff --git a/source/src/BNFC/Backend/Python/CFtoPyPrettyPrinter.hs b/source/src/BNFC/Backend/Python/CFtoPyPrettyPrinter.hs index 20255c19..352f12c1 100644 --- a/source/src/BNFC/Backend/Python/CFtoPyPrettyPrinter.hs +++ b/source/src/BNFC/Backend/Python/CFtoPyPrettyPrinter.hs @@ -67,8 +67,8 @@ makePrintAST cf = concat ] , if length (tokenNames cf) > 0 then unlines - [ " case (" ++ intercalate " | " (map (++"()") (tokenNames cf)) - ++ "):" + [ " case (" ++ intercalate " | " + (map ((++ "()") . unkw) (tokenNames cf)) ++ "):" , " return '\"' + str(ast) + '\"'" ] else "" @@ -76,7 +76,8 @@ makePrintAST cf = concat , " return '[' + ', '.join([printAST(a) for a in ast]) + ']'\n" , "\n" , " if len(vars(ast)) > 0:\n" - , " return '(' + ast.__class__.__name__ + ' ' + ' '.join([printAST(vars(ast)[k]) for k in vars(ast) if k != '_ann_type']) + ')'\n" + , " return '(' + ast.__class__.__name__ + ' ' + ' '.join(" ++ + "[printAST(vars(ast)[k]) for k in vars(ast) if k != '_ann_type']) + ')'\n" , " else:\n" , " return ast.__class__.__name__\n" ] @@ -107,7 +108,7 @@ makeListDecon cf c = concat ] where name = show $ catOfList c - listRulesForCat = rulesForCat cf c + listRulesForCat = [ r | r <- cfgRules cf, isParsable r, valCat r == c] nilRule = case [r | r <- listRulesForCat, isNilFun r] of [] -> Nothing @@ -122,14 +123,14 @@ makeListDecon cf c = concat -- List rules are of the form: -- [C] ::= symbols.. C symbols.. [C] -- The production, in Python, is concatenated recursively: - -- symbols.. + lin(xs[0]) + symbols.. + listCDecon(xs[1:]) + symbols.. + -- symbols.. + c(xs[0], 'C1') + symbols.. + listCDecon(xs[1:]) + symbols.. sentFormToArgs :: Int -> [Either Cat String] -> String sentFormToArgs _ [] = "[]" sentFormToArgs v (Right strOp:ecss) = "['" ++ escapeChars strOp ++ "'] + " ++ sentFormToArgs v ecss sentFormToArgs v (Left _:ecss) - | v == 0 = "lin(xs[0]) + " ++ sentFormToArgs (v+1) ecss + | v == 0 = "c(xs[0], '" ++ name ++ "') + " ++ sentFormToArgs (v+1) ecss | v == 1 = "list" ++ name ++ "Decon(xs[1:]) + " ++ sentFormToArgs (v+1) ecss | otherwise = error "A list production can max have C and [C]." @@ -165,14 +166,6 @@ makeRenderC = unlines , " def ident(i):" , " return ' ' * iLevel" , "" - , " def removeTrailingWhitespace(tot):" - , " i = len(tot)" - , " while i > 0:" - , " if tot[i] == ' ':" - , " i -= 1" - , " else:" - , " break" - , "" , " return tot[:i]" , "" , " def oneEmptyLine(tot):" @@ -211,7 +204,11 @@ makeRenderC = unlines , " case ' ':" , " tot += s" , " case _:" - , " tot += s + ' '" + , " if s[-1] == ' ':" -- To not extend separators of spaces. + , " tot = tot.rstrip()" + , " tot += s" + , " else:" + , " tot += s + ' '" , "" , " return tot" ] @@ -221,7 +218,7 @@ makeRenderC = unlines makeCoercCompare :: CF -> String makeCoercCompare cf = concat [ "cdict = {\n" - , unlines (map (\(fs, cs) -> " " ++ fs ++ " : '" ++ cs ++ "',") scs) + , unlines (map (\(fs, cs) -> " " ++ unkw fs ++ " : '" ++ cs ++ "',") scs) , "}" ] where @@ -278,7 +275,7 @@ makeLinFunc cf = unlines ] , ifUsedThen catString [ " case String():" - , " return [ast, ' ']" + , " return [ast]" ] , ifUsedThen catIdent [ " case Ident():" @@ -350,8 +347,8 @@ makeListEntrypointCase cf c = concat ] where constructors = if isTokenCat c - then [show c ++ "()"] - else map ((++ "()") . funName) + then [unkw (show c) ++ "()"] + else map ((++ "()") . unkw . funName) [ r | r <- rulesForNormalizedCat cf (normCat c), not (isCoercion r), @@ -362,7 +359,7 @@ makeListEntrypointCase cf c = concat -- Creates a case for a user defined literal, which inherits str. makeSkeleTokenCase :: String -> String makeSkeleTokenCase tokenName = concat - [ " case " ++ tokenName ++ "():\n" + [ " case " ++ unkw tokenName ++ "():\n" , " return [ast]" ] @@ -371,7 +368,7 @@ makeSkeleTokenCase tokenName = concat -- separator- and terminator-delimiters there are. makeSkeleRuleCase :: Rul RFun -> String makeSkeleRuleCase rule = concat - [ " case " ++ fName ++ "(" ++ varNamesCommad ++ "):\n" + [ " case " ++ unkw fName ++ "(" ++ varNamesCommad ++ "):\n" , " # " ++ (showEcss sentForm) ++ "\n" , " return " ++ if (length args > 0) then (intercalate " + " args) @@ -382,8 +379,7 @@ makeSkeleRuleCase rule = concat sentForm = rhsRule rule nvCats = numVars sentForm :: [Either (Cat, Doc) String] - - enumeratedVarNames = [render d | (c, d) <- lefts nvCats] + enumeratedVarNames = [render d | (_, d) <- lefts nvCats] varNamesCommad = if length enumeratedVarNames > 0 then addCommas (enumeratedVarNames ++ ["_ann_type"]) @@ -406,4 +402,5 @@ ecssAndVarsToList (Left c:ecss) (s:ss) name = show $ catOfList c ecssAndVarsToList (Right strOp:ecss) ss = ["['" ++ escapeChars strOp ++ "']"] ++ ecssAndVarsToList ecss ss +ecssAndVarsToList ((Left _):_) [] = error "Missing variable name" diff --git a/source/src/BNFC/Backend/Python/CFtoPySkele.hs b/source/src/BNFC/Backend/Python/CFtoPySkele.hs index 5297fa02..26904764 100644 --- a/source/src/BNFC/Backend/Python/CFtoPySkele.hs +++ b/source/src/BNFC/Backend/Python/CFtoPySkele.hs @@ -11,16 +11,13 @@ import Data.Char (toLower) import BNFC.Backend.Common.NamedVariables import Text.PrettyPrint (Doc, render) import Data.Either (lefts) +import Data.List (intercalate) -- | Entrypoint. cf2PySkele :: String -> CF -> String cf2PySkele pkgName cf = unlines - [ "from ply.lex import lex" - , "from ply.yacc import yacc" - , "import sys" - , "from " ++ pkgName ++ ".LexTokens import *" - , "from " ++ pkgName ++ ".ParsingDefs import *" - , "from " ++ pkgName ++ ".PrettyPrinter import *" + ["from " ++ pkgName ++ ".Absyn import *" + , "" , "" , makeSkele cf ] @@ -32,15 +29,16 @@ makeSkele :: CF -> String makeSkele cf = unlines [ "# Categories combined into one matcher" , "def skeleMatcher(ast: object):" - , "\tmatch ast:" - , unlines skeleLiteralCases - , unlines skeleTokenCases - , unlines skeleRuleCases - , "\t\tcase _:" - , "\t\t\traise Exception(str(ast.__class__) + ' unmatched')" + , ind 1 "match ast:" + , intercalate "\n" skeleLiteralCases + , intercalate "\n" skeleTokenCases + , intercalate "\n" skeleRuleCases + , ind 2 "case _:" + , ind 3 "raise Exception(str(ast.__class__) + ' unmatched')" + , "" , "" , "# Categories split into their own matchers" - , unlines matchersOnCats + , unlines matchersOnCats ] where rules = @@ -72,12 +70,13 @@ makeSkele cf = unlines -- Creates a matcher for some value category. makeMatcherOnCat :: (Cat, [Rul RFun]) -> String -makeMatcherOnCat (c, rules) = unlines +makeMatcherOnCat (c, rules) = unlines [ "def matcher" ++ show c ++ "(" ++ varName ++ ": " ++ show c ++ "):" - , "\tmatch " ++ varName ++ ":" - , unlines cases - ,"\t\tcase _:" - ,"\t\t\traise Exception(str(" ++ varName ++ ".__class__) + ' unmatched')" + , ind 1 "match " ++ varName ++ ":" + , intercalate "\n" cases + , ind 2 "case _:" + , ind 3 "raise Exception(str(" ++ varName ++ ".__class__) + ' unmatched')" + , "" ] where varName = map toLower (show c) ++ "_" @@ -88,27 +87,23 @@ makeMatcherOnCat (c, rules) = unlines -- | Creates a case for some rule. makeSkeleRuleCase :: Rul RFun -> String -makeSkeleRuleCase rule = concat - [ "\t\tcase " ++ fName ++ "(" ++ varNamesCommad ++ "):\n" - , "\t\t\t# " ++ (showEcss sentForm) ++ "\n" - , "\t\t\traise Exception('" ++ fName ++ " not implemented')" +makeSkeleRuleCase rule = intercalate "\n" + [ ind 2 "case " ++ name ++ "(" ++ varNamesCommad ++ "):" + , ind 3 "# " ++ (showEcss sentForm) + , ind 3 "raise Exception('" ++ name ++ " not implemented')" ] where - funcRStr = funRule rule :: RString - fName = wpThing funcRStr :: String + name = unkw (funName rule) sentForm = rhsRule rule - nvCats = numVars sentForm :: [Either (Cat, Doc) String] - enumeratedVarNames = [render d | (_, d) <- lefts nvCats] - varNamesCommad = addCommas (enumeratedVarNames ++ ["_ann_type"]) -- | Creates a case for a user-defined token. makeSkeleTokenCase :: String -> String -makeSkeleTokenCase tokenName = concat - [ "\t\tcase " ++ tokenName ++ "():\n" - , "\t\t\traise Exception('not implemented')" +makeSkeleTokenCase tokenName = intercalate "\n" + [ ind 2 "case " ++ unkw tokenName ++ "():" + , ind 3 "raise Exception('" ++ unkw tokenName ++ " not implemented')" ] diff --git a/source/src/BNFC/Backend/Python/PyHelpers.hs b/source/src/BNFC/Backend/Python/PyHelpers.hs index 300c1d9f..f68abe13 100644 --- a/source/src/BNFC/Backend/Python/PyHelpers.hs +++ b/source/src/BNFC/Backend/Python/PyHelpers.hs @@ -10,6 +10,12 @@ import Data.Char import BNFC.CF +-- Indents by four spaces +ind :: Int -> String -> String +ind 0 s = s +ind n s = ind (n-1) (" " ++ s) + + addCommas :: [String] -> String addCommas ss = intercalate ", " ss @@ -78,3 +84,53 @@ showEcss [] = "" showEcss (Left c:ecss) = show c ++ " " ++ (showEcss ecss) showEcss (Right strOp:ecss) = "\"" ++ strOp ++ "\" " ++ (showEcss ecss) + +-- | Adds an underscore if the string overlaps with a keyword. +unkw :: String -> String +unkw s = if s `elem` kwListWithSoftKeywords then s ++ "_" else s + + +-- To add an extra underscore if something overlaps with a keyword. +kwListWithSoftKeywords :: [String] +kwListWithSoftKeywords = + [ "False" + , "None" + , "True" + , "and" + , "as" + , "assert" + , "async" + , "await" + , "break" + , "class" + , "continue" + , "def" + , "del" + , "elif" + , "else" + , "except" + , "finally" + , "for" + , "from" + , "global" + , "if" + , "import" + , "in" + , "is" + , "lambda" + , "nonlocal" + , "not" + , "or" + , "pass" + , "raise" + , "return" + , "try" + , "while" + , "with" + , "yield" + , "_" + , "case" + , "match" + , "type" + ] +