Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix #302: Keep internal token types out of lexer #303

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion source/src/BNFC/Backend/Agda.hs
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ cf2AgdaAST time tokenText mod amod pmod cf = vsep $
-- getAbstractSyntax also includes list categories, which isn't what we need
-- The user-defined token categories (including Ident).
tcats :: [(TokenCat, Bool)]
tcats = (if hasIdent cf then ((catIdent, False) :) else id)
tcats = (if hasIdent Internal cf then ((catIdent, False) :) else id)
[ (wpThing name, b) | TokenReg name b _ <- cfgPragmas cf ]
-- Bind printers for the following categories (involves lists and literals).
printerCats :: [Cat]
Expand Down
10 changes: 5 additions & 5 deletions source/src/BNFC/Backend/C.hs
Original file line number Diff line number Diff line change
Expand Up @@ -235,19 +235,19 @@ mkHeaderFile _ cf env = unlines $ concat
where
mkDefines n [] = mkString n
mkDefines n (s:ss) = ("#define " ++ s +++ (show n) ++ "\n") ++ (mkDefines (n+1) ss)
mkString n = if isUsedCat cf (TokenCat catString)
mkString n = if isUsedCat Parsable cf (TokenCat catString)
then ("#define _STRING_ " ++ show n ++ "\n") ++ mkChar (n+1)
else mkChar n
mkChar n = if isUsedCat cf (TokenCat catChar)
mkChar n = if isUsedCat Parsable cf (TokenCat catChar)
then ("#define _CHAR_ " ++ show n ++ "\n") ++ mkInteger (n+1)
else mkInteger n
mkInteger n = if isUsedCat cf (TokenCat catInteger)
mkInteger n = if isUsedCat Parsable cf (TokenCat catInteger)
then ("#define _INTEGER_ " ++ show n ++ "\n") ++ mkDouble (n+1)
else mkDouble n
mkDouble n = if isUsedCat cf (TokenCat catDouble)
mkDouble n = if isUsedCat Parsable cf (TokenCat catDouble)
then ("#define _DOUBLE_ " ++ show n ++ "\n") ++ mkIdent(n+1)
else mkIdent n
mkIdent n = if isUsedCat cf (TokenCat catIdent)
mkIdent n = if isUsedCat Parsable cf (TokenCat catIdent)
then ("#define _IDENT_ " ++ show n ++ "\n")
else ""
-- Andreas, 2019-04-29, issue #210: generate parsers also for coercions
Expand Down
2 changes: 1 addition & 1 deletion source/src/BNFC/Backend/C/CFtoBisonC.hs
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ specialToks cf = unlines $ concat
, ifC catIdent "%token<_string> _IDENT_"
]
where
ifC cat s = if isUsedCat cf (TokenCat cat) then [s] else []
ifC cat s = if isUsedCat Parsable cf (TokenCat cat) then [s] else []

startSymbol :: CF -> String
startSymbol cf = "%start" +++ identCat (firstEntry cf)
Expand Down
2 changes: 1 addition & 1 deletion source/src/BNFC/Backend/C/CFtoFlexC.hs
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ restOfFlex cf env = unlines $ concat
, footer
]
where
ifC cat s = if isUsedCat cf (TokenCat cat) then s else []
ifC cat s = if isUsedCat Parsable cf (TokenCat cat) then s else []
userDefTokens =
[ "<YYINITIAL>" ++ printRegFlex exp ++
" \t yylval._string = strdup(yytext); return " ++ sName name ++ ";"
Expand Down
10 changes: 5 additions & 5 deletions source/src/BNFC/Backend/CPP/NoSTL.hs
Original file line number Diff line number Diff line change
Expand Up @@ -163,19 +163,19 @@ mkHeaderFile cf cats eps env = unlines $ concat
mkVar _ = []
mkDefines n [] = mkString n
mkDefines n (s:ss) = "#define " ++ s +++ show n ++ "\n" ++ mkDefines (n+1) ss
mkString n = if isUsedCat cf (TokenCat catString)
mkString n = if isUsedCat Parsable cf (TokenCat catString)
then ("#define _STRING_ " ++ show n ++ "\n") ++ mkChar (n+1)
else mkChar n
mkChar n = if isUsedCat cf (TokenCat catChar)
mkChar n = if isUsedCat Parsable cf (TokenCat catChar)
then ("#define _CHAR_ " ++ show n ++ "\n") ++ mkInteger (n+1)
else mkInteger n
mkInteger n = if isUsedCat cf (TokenCat catInteger)
mkInteger n = if isUsedCat Parsable cf (TokenCat catInteger)
then ("#define _INTEGER_ " ++ show n ++ "\n") ++ mkDouble (n+1)
else mkDouble n
mkDouble n = if isUsedCat cf (TokenCat catDouble)
mkDouble n = if isUsedCat Parsable cf (TokenCat catDouble)
then ("#define _DOUBLE_ " ++ show n ++ "\n") ++ mkIdent(n+1)
else mkIdent n
mkIdent n = if isUsedCat cf (TokenCat catIdent)
mkIdent n = if isUsedCat Parsable cf (TokenCat catIdent)
then "#define _IDENT_ " ++ show n ++ "\n"
else ""
mkFunc s = identCat (normCat s) ++ "*" +++ "p" ++ identCat s ++ "(FILE *inp);"
2 changes: 1 addition & 1 deletion source/src/BNFC/Backend/CPP/NoSTL/CFtoFlex.hs
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ restOfFlex inPackage cf env = unlines $ concat
, footer
]
where
ifC cat s = if isUsedCat cf (TokenCat cat) then s else []
ifC cat s = if isUsedCat Parsable cf (TokenCat cat) then s else []
ns = nsString inPackage
userDefTokens =
[ "<YYINITIAL>" ++ printRegFlex exp ++
Expand Down
10 changes: 5 additions & 5 deletions source/src/BNFC/Backend/CPP/STL.hs
Original file line number Diff line number Diff line change
Expand Up @@ -204,19 +204,19 @@ mkHeaderFile inPackage cf cats eps env = unlines $ concat
mkVar _ = []
mkDefines n [] = mkString n
mkDefines n (s:ss) = "#define " ++ s +++ show n ++ "\n" ++ mkDefines (n+1) ss -- "nsDefine inPackage s" not needed (see cf2flex::makeSymEnv)
mkString n = if isUsedCat cf (TokenCat catString)
mkString n = if isUsedCat Parsable cf (TokenCat catString)
then ("#define " ++ nsDefine inPackage "_STRING_ " ++ show n ++ "\n") ++ mkChar (n+1)
else mkChar n
mkChar n = if isUsedCat cf (TokenCat catChar)
mkChar n = if isUsedCat Parsable cf (TokenCat catChar)
then ("#define " ++ nsDefine inPackage "_CHAR_ " ++ show n ++ "\n") ++ mkInteger (n+1)
else mkInteger n
mkInteger n = if isUsedCat cf (TokenCat catInteger)
mkInteger n = if isUsedCat Parsable cf (TokenCat catInteger)
then ("#define " ++ nsDefine inPackage "_INTEGER_ " ++ show n ++ "\n") ++ mkDouble (n+1)
else mkDouble n
mkDouble n = if isUsedCat cf (TokenCat catDouble)
mkDouble n = if isUsedCat Parsable cf (TokenCat catDouble)
then ("#define " ++ nsDefine inPackage "_DOUBLE_ " ++ show n ++ "\n") ++ mkIdent(n+1)
else mkIdent n
mkIdent n = if isUsedCat cf (TokenCat catIdent)
mkIdent n = if isUsedCat Parsable cf (TokenCat catIdent)
then "#define " ++ nsDefine inPackage "_IDENT_ " ++ show n ++ "\n"
else ""
mkFuncs s =
Expand Down
2 changes: 1 addition & 1 deletion source/src/BNFC/Backend/CSharp/CFtoGPLEX.hs
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ gplex namespace cf env = concat [
[("<YYINITIAL>." , "return (int)Tokens.error;")]
]
where
ifC cat s = if isUsedCat cf (TokenCat cat) then s else []
ifC cat s = if isUsedCat Parsable cf (TokenCat cat) then s else []
userDefTokens = map tokenline (tokenPragmas cf)
where
tokenline (name, exp) = ("<YYINITIAL>" ++ printRegGPLEX exp , action name)
Expand Down
2 changes: 1 addition & 1 deletion source/src/BNFC/Backend/CSharp/CFtoGPPG.hs
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ specialToks cf = unlinesInline [
ifC catIdent "%token<string_> IDENT_"
]
where
ifC cat s = if isUsedCat cf (TokenCat cat) then s else ""
ifC cat s = if isUsedCat Parsable cf (TokenCat cat) then s else ""

--The following functions are a (relatively) straightforward translation
--of the ones in CFtoHappy.hs
Expand Down
2 changes: 1 addition & 1 deletion source/src/BNFC/Backend/Haskell/CFtoAlex.hs
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ restOfAlex cf = [
]
where
ifC :: TokenCat -> String -> String
ifC cat s = if isUsedCat cf (TokenCat cat) then s else ""
ifC cat s = if isUsedCat Parsable cf (TokenCat cat) then s else ""
lexComments ([],[]) = []
lexComments (xs,s1:ys) = "<> ::= " ++ ('^':intersperse '^' s1) ++ " [.]* ^n\n" ++ lexComments (xs,ys)
lexComments (([l1,l2],[r1,r2]):xs,[]) = concat
Expand Down
2 changes: 1 addition & 1 deletion source/src/BNFC/Backend/Haskell/CFtoAlex2.hs
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ restOfAlex _ shareStrings tokenText cf = [
TextToken -> ("Data.Text.Text", "Data.Text.take", "Data.Text.uncons", "Data.Text.pack", "Data.Text.unpack", "Nothing", "Just (c,s)")

ifC :: TokenCat -> String -> String
ifC cat s = if isUsedCat cf (TokenCat cat) then s else ""
ifC cat s = if isUsedCat Parsable cf (TokenCat cat) then s else ""
lexComments ([],[]) = []
lexComments (xs,s1:ys) = '\"' : s1 ++ "\"" ++ " [.]* ; -- Toss single line comments\n" ++ lexComments (xs, ys)
lexComments (([l1,l2],[r1,r2]):xs,[]) = concat
Expand Down
5 changes: 4 additions & 1 deletion source/src/BNFC/Backend/Haskell/CFtoAlex3.hs
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ restOfAlex _ shareStrings tokenText cf = [
applyP f s = f ++ " (" ++ s ++ ")"

ifC :: TokenCat -> String -> String
ifC cat s = if isUsedCat cf (TokenCat cat) then s else ""
ifC cat s = if isUsedCat Parsable cf (TokenCat cat) then s else ""

lexComments
:: ( [(String, String)] -- block comment delimiters
Expand Down Expand Up @@ -308,16 +308,19 @@ restOfAlex _ shareStrings tokenText cf = [
[ printRegAlex exp ++
"\n { tok (\\p s -> PT p (eitherResIdent (T_" ++ name ++ " . share) s)) }"
| (name,exp) <- tokenPragmas cf
, isUsedCat Parsable cf $ TokenCat name
]

userDefTokenConstrs = unlines
[ " | T_" ++ name ++ " !"++stringType
| name <- tokenNames cf
, isUsedCat Parsable cf $ TokenCat name
]

userDefTokenPrint = unlines
[ " PT _ (T_" ++ name ++ " s) -> s"
| name <- tokenNames cf
, isUsedCat Parsable cf $ TokenCat name
]

ident =
Expand Down
2 changes: 1 addition & 1 deletion source/src/BNFC/Backend/Haskell/CFtoPrinter.hs
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ cf2Printer tokenText functor useGadt name absMod cf = unlines $ concat $
[ prologue tokenText useGadt name absMod
, integerRule absMod cf
, doubleRule absMod cf
, if hasIdent cf then identRule absMod tokenText cf else []
, if hasIdent Internal cf then identRule absMod tokenText cf else []
] ++ [ ownPrintRule absMod tokenText cf own | (own,_) <- tokenPragmas cf ] ++
[ rules absMod functor cf
]
Expand Down
3 changes: 2 additions & 1 deletion source/src/BNFC/Backend/Haskell/ToCNF.hs
Original file line number Diff line number Diff line change
Expand Up @@ -200,13 +200,14 @@ genTokTable units cf = vcat
, "tokenToCats p t = error (\"unknown token: \" ++ show t)"
]

tokInfo :: CFG f -> [ (TokenCat, Doc, Exp) ]
tokInfo cf = concat $
[ [ (catChar , "TC", Con "head")
, (catString , "TL", Id)
, (catInteger, "TI", Con "readInteger")
, (catDouble , "TD", Con "readDouble")
]
, [ (catIdent,"TV", Con "Ident") | hasIdent cf ]
, [ (catIdent,"TV", Con "Ident") | hasIdent Parsable cf ]
, [ (t, "T_" <> text t, Con t) | (t, _) <- tokenPragmas cf ]
]

Expand Down
2 changes: 1 addition & 1 deletion source/src/BNFC/Backend/Java/CFtoAntlr4Lexer.hs
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ restOfLexerGrammar cf = vcat
, ifChar charmodes
]
where
ifC cat s = if isUsedCat cf (TokenCat cat) then vcat s else ""
ifC cat s = if isUsedCat Parsable cf (TokenCat cat) then vcat s else ""
ifString = ifC catString
ifChar = ifC catChar
strdec = [ "// String token type"
Expand Down
2 changes: 1 addition & 1 deletion source/src/BNFC/Backend/Java/CFtoCup15.hs
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ specialToks cf = unlines
, ifC catIdent "terminal String _IDENT_;"
]
where
ifC cat s = if isUsedCat cf (TokenCat cat) then s else ""
ifC cat s = if isUsedCat Parsable cf (TokenCat cat) then s else ""

specialRules:: CF -> String
specialRules cf =
Expand Down
2 changes: 1 addition & 1 deletion source/src/BNFC/Backend/Java/CFtoJLex15.hs
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ restOfJLex jflex rp cf = vcat
]
where
ifC :: TokenCat -> Doc -> Doc
ifC cat s = if isUsedCat cf (TokenCat cat) then s else ""
ifC cat s = if isUsedCat Parsable cf (TokenCat cat) then s else ""
userDefTokens = vcat
[ "<YYINITIAL>" <> text (printRegJLex jflex exp)
<+> "{ return cf.newSymbol(\"\", sym." <> text name
Expand Down
2 changes: 1 addition & 1 deletion source/src/BNFC/Backend/Latex.hs
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ prtTerminals name cf = unlines $

identSection :: CF -> [String]
identSection cf
| hasIdent cf = [ "\\subsection*{Identifiers}" ] ++ prtIdentifiers
| hasIdent Parsable cf = [ "\\subsection*{Identifiers}" ] ++ prtIdentifiers
| otherwise = []

prtIdentifiers :: [String]
Expand Down
2 changes: 1 addition & 1 deletion source/src/BNFC/Backend/OCaml/CFtoOCamlPrinter.hs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ cf2Printer _name absMod cf = unlines [
integerRule cf,
doubleRule cf,
stringRule cf,
if hasIdent cf then identRule absMod cf else "",
if hasIdent Internal cf then identRule absMod cf else "",
unlines [ownPrintRule absMod cf own | (own,_) <- tokenPragmas cf],
rules absMod cf
]
Expand Down
2 changes: 1 addition & 1 deletion source/src/BNFC/Backend/OCaml/CFtoOCamlShow.hs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ cf2show _name absMod cf = unlines
[ prologue
, integerRule
, doubleRule
, if hasIdent cf then identRule absMod cf else ""
, if hasIdent Internal cf then identRule absMod cf else ""
, unlines [ ownPrintRule absMod cf own | (own,_) <- tokenPragmas cf ]
, rules absMod cf
]
Expand Down
3 changes: 2 additions & 1 deletion source/src/BNFC/Backend/Txt2Tag.hs
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ prtTerminals name cf = unlines $
, prtComments $ comments cf
]

identSection cf = if not (hasIdent cf) then [] else
identSection :: CF -> String
identSection cf = if not (hasIdent Parsable cf) then [] else
unlines [
"===Identifiers===",
prtIdentifiers
Expand Down
4 changes: 2 additions & 2 deletions source/src/BNFC/Backend/XML.hs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ cf2DTD typ name cf = unlines [
elemEmp "Integer",
elemEmp "Double",
elemEmp "String",
if hasIdent cf then elemEmp "Ident" else "",
if hasIdent Internal cf then elemEmp "Ident" else "",
unlines [elemEmp own | own <- tokenNames cf],
unlines (map (elemData typ cf) (cf2data cf)),
"]>"
Expand Down Expand Up @@ -141,7 +141,7 @@ cf2XMLPrinter typ opts absMod cf = unlines [
integerRule cf,
doubleRule cf,
stringRule cf,
if hasIdent cf then identRule cf else "",
if hasIdent Internal cf then identRule cf else "",
unlines [ownPrintRule cf own | (own,_) <- tokenPragmas cf],
rules cf
]
Expand Down
30 changes: 22 additions & 8 deletions source/src/BNFC/CF.hs
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,12 @@ data InternalRule
| Parsable -- ^ ordinary rule (also for parser)
deriving (Eq)

-- | @'Parsable' < 'Internal'@.
-- This allows to select only the parsable rule by @(<= Parsable)@.
instance Ord InternalRule where
Internal <= Parsable = False
_ <= _ = True

instance (Show function) => Show (Rul function) where
show (Rule f cat rhs internal) = unwords $
(if internal == Internal then ("internal" :) else id) $
Expand Down Expand Up @@ -613,10 +619,15 @@ allParserCatsNorm :: CFG f -> [Cat]
allParserCatsNorm = nub . map normCat . allParserCats

-- | Is the category is used on an rhs?
-- Includes internal rules.
isUsedCat :: CFG f -> Cat -> Bool
isUsedCat cf = (`elem` [ c | Rule _ _ rhs _ <- cfgRules cf, Left c <- rhs ])
-- TODO: isUsedCat is used in some places where the internal rules should be ignored.
--
-- * @isUsedCat Parsable@ only looks at the rules that generate the parser.
--
-- * @isUsedCat Internal@ also takes the internal rules into account
-- (relevant for AST and Printer).
--
isUsedCat :: InternalRule -> CFG f -> Cat -> Bool
isUsedCat internal cf = flip elem
[ c | Rule _ _ rhs i <- cfgRules cf, Left c <- rhs, i <= internal ]

-- | All token categories used in the grammar.
-- Includes internal rules.
Expand Down Expand Up @@ -660,16 +671,18 @@ numberOfBlockCommentForms = length . fst . comments
-- built-in categories (corresponds to lexer)

-- | Whether the grammar uses the predefined Ident type.
hasIdent :: CFG f -> Bool
hasIdent cf = isUsedCat cf $ TokenCat catIdent
hasIdent :: InternalRule -> CFG f -> Bool
hasIdent internal cf = isUsedCat internal cf $ TokenCat catIdent


-- these need new datatypes

-- | Categories corresponding to tokens. These end up in the
-- AST. (unlike tokens returned by 'cfTokens')
specialCats :: CF -> [TokenCat]
specialCats cf = (if hasIdent cf then (catIdent:) else id) (map fst (tokenPragmas cf))
specialCats cf =
(if hasIdent Internal cf then (catIdent:) else id) $
map fst (tokenPragmas cf)


-- * abstract syntax trees: data type definitions
Expand Down Expand Up @@ -797,9 +810,10 @@ precCF :: CF -> Bool
precCF cf = length (precLevels cf) > 1

-- | Defines or uses the grammar token types like @Ident@?
-- Includes internal rules.
-- Excludes position tokens.
hasIdentLikeTokens :: CFG g -> Bool
hasIdentLikeTokens cf = hasIdent cf || or [ not b | TokenReg _ b _ <- cfgPragmas cf ]
hasIdentLikeTokens cf = hasIdent Internal cf || or [ not b | TokenReg _ b _ <- cfgPragmas cf ]

-- | Is there a @position token@ declaration in the grammar?
hasPositionTokens :: CFG g -> Bool
Expand Down
9 changes: 7 additions & 2 deletions testing/regression-tests/204_InternalToken/test.cf
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,13 @@
-- Tokens from internal rules ended up in lexer.
-- Andreas, 2019-11-24, issue #264.
-- Internal non-terminals ended up as entrypoints.
-- Andreas, 2020-10-09, issue #302
-- Internal token types end up in lexer and shadow parsable token types.

internal Internal. Foo ::= "Internal";
Main. Prg ::= Ident;
Main. Prg ::= Ident ;
internal IId. Prg ::= Id ;

-- Should accept input "Internal".
token Id letter+ ; -- This overlaps with Ident, but should not confuse the parser.

-- Should accept input `Internal`.