Skip to content

Improve the performance of the Kore parser #2296

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Dec 9, 2020
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion kore/app/format/Main.hs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ module Main (main) where

import Prelude.Kore

import qualified Data.Text.IO as Text
import Options.Applicative
import System.IO
( stdout
Expand Down Expand Up @@ -78,4 +79,5 @@ main =
-- | Read a 'KoreDefinition' from the given file name or signal an error.
readKoreOrDie :: FilePath -> IO ParsedDefinition
readKoreOrDie fileName =
readFile fileName >>= either error return . parseKoreDefinition fileName
Text.readFile fileName
>>= either error return . parseKoreDefinition fileName
7 changes: 5 additions & 2 deletions kore/app/share/GlobalMain.hs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ import Data.Text
( Text
, pack
)
import qualified Data.Text.IO as Text
import Data.Time.Format
( defaultTimeLocale
, formatTime
Expand Down Expand Up @@ -477,12 +478,14 @@ parseDefinition :: FilePath -> Main ParsedDefinition
parseDefinition = mainParse parseKoreDefinition

mainParse
:: (FilePath -> String -> Either String a)
:: (FilePath -> Text -> Either String a)
-> String
-> Main a
mainParse parser fileName = do
contents <-
clockSomethingIO "Reading the input file" $ liftIO $ readFile fileName
Text.readFile fileName
& liftIO
& clockSomethingIO "Reading the input file"
Comment on lines +486 to +488
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ttuegel Why are these the other way round now?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I changed it from ($) to (&) so that the action (Text.readFile) would appear before the wrappers.

parseResult <-
clockSomething "Parsing the file" (parser fileName contents)
case parseResult of
Expand Down
8 changes: 4 additions & 4 deletions kore/src/Kore/Log/KoreLogOptions.hs
Original file line number Diff line number Diff line change
Expand Up @@ -210,17 +210,17 @@ parseEntryTypes =

parseCommaSeparatedEntries :: Options.ReadM EntryTypes
parseCommaSeparatedEntries =
Options.maybeReader $ Parser.parseMaybe parseEntryTypes'
Options.maybeReader $ Parser.parseMaybe parseEntryTypes' . Text.pack
where
parseEntryTypes' :: Parser.Parsec String String EntryTypes
parseEntryTypes' :: Parser.Parsec String Text EntryTypes
parseEntryTypes' = Set.fromList <$> Parser.sepEndBy parseSomeTypeRep comma

comma = void (Parser.char ',')

parseSomeTypeRep :: Parser.Parsec String String SomeTypeRep
parseSomeTypeRep :: Parser.Parsec String Text SomeTypeRep
parseSomeTypeRep =
Parser.takeWhile1P (Just "SomeTypeRep") (flip notElem [',', ' '])
>>= parseEntryType . Text.pack
>>= parseEntryType

parseSeverity :: Parser Severity
parseSeverity =
Expand Down
2 changes: 1 addition & 1 deletion kore/src/Kore/Log/Registry.hs
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ lookupTextFromTypeWithError type' =
<> show type'
<> " It should be added to Kore.Log.Registry.registry."

parseEntryType :: Ord e => Text -> Parser.Parsec e String SomeTypeRep
parseEntryType :: Ord e => Text -> Parser.Parsec e Text SomeTypeRep
parseEntryType entryText =
maybe empty return
$ Map.lookup entryText (textToType registry)
Expand Down
7 changes: 5 additions & 2 deletions kore/src/Kore/Parser.hs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ module Kore.Parser

import Prelude.Kore

import Data.Text
( Text
)
import Text.Megaparsec
( eof
)
Expand All @@ -58,7 +61,7 @@ else.
-}
parseKoreDefinition
:: FilePath -- ^ Filename used for error messages
-> String -- ^ The concrete syntax of a valid Kore definition
-> Text -- ^ The concrete syntax of a valid Kore definition
-> Either String ParsedDefinition
parseKoreDefinition = parseOnly (Lexer.space *> koreParser)

Expand All @@ -70,6 +73,6 @@ message otherwise. The input must contain a valid Kore pattern and nothing else.
-}
parseKorePattern
:: FilePath -- ^ Filename used for error messages
-> String -- ^ The concrete syntax of a valid Kore pattern
-> Text -- ^ The concrete syntax of a valid Kore pattern
-> Either String ParsedPattern
parseKorePattern = parseOnly (Lexer.space *> Parser.parsePattern)
107 changes: 41 additions & 66 deletions kore/src/Kore/Parser/Lexer.hs
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@ module Kore.Parser.Lexer
, parseId
, parseAnyId, parseSetId, isSymbolId
, isElementVariableId, isSetVariableId
, elementVariableIdParser
, setVariableIdParser
, parseSortId
, parseSymbolId
, parseModuleName
Expand All @@ -48,6 +46,9 @@ import Data.Map.Strict
( Map
)
import qualified Data.Map.Strict as Map
import Data.Text
( Text
)
import qualified Data.Text as Text
import Text.Megaparsec
( SourcePos (..)
Expand Down Expand Up @@ -84,6 +85,7 @@ space = L.space Parser.space1 lineComment blockComment
where
lineComment = L.skipLineComment "//"
blockComment = L.skipBlockComment "/*" "*/"
{-# INLINE space #-}

{- | Parse the character, but skip its result.
-}
Expand All @@ -97,7 +99,7 @@ skipChar = Monad.void . Parser.char
See also: 'L.symbol', 'space'

-}
symbol :: String -> Parser ()
symbol :: Text -> Parser ()
symbol = Monad.void . L.symbol space

colon :: Parser ()
Expand Down Expand Up @@ -163,7 +165,7 @@ consumes any trailing whitespace.
See also: 'space'

-}
keyword :: String -> Parser ()
keyword :: Text -> Parser ()
keyword s = lexeme $ do
_ <- Parser.chunk s
-- Check that the next character cannot be part of an @id@, i.e. check that
Expand All @@ -183,19 +185,16 @@ sourcePosToFileLocation
, column = unPos column'
}

{- Takes a parser for the string of the identifier
and returns an 'Id' annotated with position.
-}
stringParserToIdParser :: Parser String -> Parser Id
stringParserToIdParser stringRawParser = do
{- | Annotate a 'Text' parser with an 'AstLocation'.
-}
parseIntoId :: Parser Text -> Parser Id
parseIntoId stringRawParser = do
!pos <- sourcePosToFileLocation <$> getSourcePos
name <- lexeme stringRawParser
return Id
{ getId = Text.pack name
, idLocation = AstLocationFile pos
}
getId <- lexeme stringRawParser
return Id { getId, idLocation = AstLocationFile pos }
{-# INLINE parseIntoId #-}

koreKeywordsSet :: HashSet String
koreKeywordsSet :: HashSet Text
koreKeywordsSet = HashSet.fromList keywords
where
keywords =
Expand Down Expand Up @@ -224,17 +223,17 @@ genericIdRawParser
:: (Char -> Bool) -- ^ contains the characters allowed for @⟨prefix-char⟩@.
-> (Char -> Bool) -- ^ contains the characters allowed for @⟨body-char⟩@.
-> IdKeywordParsing
-> Parser String
-> Parser Text
genericIdRawParser isFirstChar isBodyChar idKeywordParsing = do
c <- Parser.satisfy isFirstChar <?> "first identifier character"
cs <- Parser.takeWhileP (Just "identifier character") isBodyChar
let genericId = c : cs
keywordsForbidden = idKeywordParsing == KeywordsForbidden
(genericId, _) <- Parser.match
$ (Parser.satisfy isFirstChar <?> "first identifier character")
>> Parser.takeWhileP (Just "identifier character") isBodyChar
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is easier to read:

Suggested change
(genericId, _) <- Parser.match
$ (Parser.satisfy isFirstChar <?> "first identifier character")
>> Parser.takeWhileP (Just "identifier character") isBodyChar
(genericId, _) <- Parser.match $ do
_ <- Parser.satisfy isFirstChar <?> "first identifier character"
_ <- Parser.takeWhileP (Just "identifier character") isBodyChar
pure ()

let keywordsForbidden = idKeywordParsing == KeywordsForbidden
isKeyword = HashSet.member genericId koreKeywordsSet
when (keywordsForbidden && isKeyword)
$ fail
( "Identifiers should not be keywords: '"
++ genericId
++ Text.unpack genericId
++ "'."
)
return genericId
Expand Down Expand Up @@ -293,11 +292,14 @@ isIdChar c = isIdFirstChar c || isIdOtherChar c
An identifier cannot be a keyword.
-}
parseId :: Parser Id
parseId = stringParserToIdParser (parseIdRaw KeywordsForbidden)
parseId = parseIntoId parseIdText

parseIdRaw :: IdKeywordParsing -> Parser String
parseIdRaw :: IdKeywordParsing -> Parser Text
parseIdRaw = genericIdRawParser isIdFirstChar isIdChar

parseIdText :: Parser Text
parseIdText = parseIdRaw KeywordsForbidden

{- | Parse a module name.

@
Expand All @@ -309,7 +311,7 @@ parseModuleName = lexeme moduleNameRawParser

moduleNameRawParser :: Parser ModuleName
moduleNameRawParser =
ModuleName . Text.pack <$> parseIdRaw KeywordsForbidden
ModuleName <$> parseIdRaw KeywordsForbidden

{- | Parses a 'Sort' 'Id'

Expand All @@ -321,7 +323,9 @@ parseSortId :: Parser Id
parseSortId = parseId <?> "sort identifier"

parseAnyId :: Parser Id
parseAnyId = (parseSpecialId <|> parseSetId <|> parseId) <?> "identifier"
parseAnyId = parseIntoId
(parseSpecialIdText <|> parseSetIdText <|> parseIdText)
<?> "identifier"

isSymbolId :: Id -> Bool
isSymbolId Id { getId } =
Expand All @@ -336,19 +340,16 @@ isElementVariableId Id { getId } =
isSetVariableId :: Id -> Bool
isSetVariableId Id { getId } = Text.head getId == '@'

parseSpecialId :: Parser Id
parseSpecialId =
stringParserToIdParser parseSpecialIdString
where
parseSpecialIdString =
(:) <$> Parser.char '\\' <*> parseIdRaw KeywordsPermitted
parseSpecialIdText :: Parser Text
parseSpecialIdText = fst <$> Parser.match
(Parser.char '\\' >> parseIdRaw KeywordsPermitted)

parseSetIdText :: Parser Text
parseSetIdText = fst <$> Parser.match
(Parser.char '@' >> parseIdRaw KeywordsPermitted)

parseSetId :: Parser Id
parseSetId =
stringParserToIdParser parseSetIdString
where
parseSetIdString =
(:) <$> Parser.char '@' <*> parseIdRaw KeywordsPermitted
parseSetId = parseIntoId parseSetIdText

{- | Parses a 'Symbol' 'Id'

Expand All @@ -357,42 +358,16 @@ parseSetId =
@
-}
parseSymbolId :: Parser Id
parseSymbolId =
stringParserToIdParser symbolIdRawParser <?> "symbol or alias identifier"
parseSymbolId = parseIntoId symbolIdRawParser <?> "symbol or alias identifier"

symbolIdRawParser :: Parser String
symbolIdRawParser :: Parser Text
symbolIdRawParser = do
c <- peekChar'
if c == '\\'
then do
skipChar '\\'
(c :) <$> parseIdRaw KeywordsPermitted
then fst <$> Parser.match
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using match here makes the look-ahead (above) redundant.

(Parser.char '\\' >> parseIdRaw KeywordsPermitted)
else parseIdRaw KeywordsForbidden

{-|Parses a @set-variable-id@, which always starts with @\@@.

@
<set-variable-id> ::= ['@'] <id>
@
-}
setVariableIdParser :: Parser Id
setVariableIdParser = stringParserToIdParser setVariableIdRawParser

setVariableIdRawParser :: Parser String
setVariableIdRawParser = do
start <- Parser.char '@'
end <- parseIdRaw KeywordsPermitted
return (start:end)

{-| Parses an @element-variable-id@

@
<element-variable-id> ::= <id>
@
-}
elementVariableIdParser :: Parser Id
elementVariableIdParser = parseId

{- | Parses a C-style string literal, unescaping it.

@
Expand Down
7 changes: 5 additions & 2 deletions kore/src/Kore/Parser/ParserUtils.hs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ import Prelude.Kore hiding
( takeWhile
)

import Data.Text
( Text
)
import Data.Void
( Void
)
Expand All @@ -34,7 +37,7 @@ import Text.Megaparsec.Error
( errorBundlePretty
)

type Parser = Parsec Void String
type Parser = Parsec Void Text

{-|'peekChar' is similar to Attoparsec's 'peekChar'. It returns the next
available character in the input, without consuming it. Returns 'Nothing'
Expand All @@ -55,7 +58,7 @@ peekChar' = lookAhead anySingle
a FilePath that is used for generating error messages and an input string
and produces either a parsed object, or an error message.
-}
parseOnly :: Parser a -> FilePath -> String -> Either String a
parseOnly :: Parser a -> FilePath -> Text -> Either String a
parseOnly parser filePathForErrors input =
case parse parser filePathForErrors input of
Left err -> Left (errorBundlePretty err)
Expand Down
3 changes: 2 additions & 1 deletion kore/src/Kore/Repl.hs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ import Data.List
)
import qualified Data.Map.Strict as Map
import qualified Data.Sequence as Seq
import qualified Data.Text as Text
import Kore.Attribute.RuleIndex
( RuleIndex (..)
)
Expand Down Expand Up @@ -176,7 +177,7 @@ runRepl
repl0 = do
str <- prompt
let command =
fromMaybe ShowUsage $ parseMaybe commandParser str
fromMaybe ShowUsage $ parseMaybe commandParser (Text.pack str)
when (shouldStore command) $ field @"commands" Lens.%= (Seq.|> str)
void $ replInterpreter printIfNotEmpty command

Expand Down
4 changes: 2 additions & 2 deletions kore/src/Kore/Repl/Interpreter.hs
Original file line number Diff line number Diff line change
Expand Up @@ -1251,7 +1251,7 @@ tryAlias replAlias@ReplAlias { name } printAux printKore = do
parsedCommand =
fromMaybe
ShowUsage
$ parseMaybe commandParser command
$ parseMaybe commandParser (Text.pack command)
config <- ask
(cont, st') <- get >>= runInterpreter parsedCommand config
put st'
Expand Down Expand Up @@ -1500,7 +1500,7 @@ parseEvalScript file scriptModeOutput = do
if exists
then do
contents <- lift . liftIO $ readFile file
let result = runParser scriptParser file contents
let result = runParser scriptParser file (Text.pack contents)
either parseFailed executeScript result
else lift . liftIO . putStrLn $ "Cannot find " <> file

Expand Down
7 changes: 5 additions & 2 deletions kore/src/Kore/Repl/Parser.hs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ import qualified Data.Set as Set
import Data.String
( IsString (..)
)
import Data.Text
( Text
)
import qualified Data.Text as Text
import Text.Megaparsec
( Parsec
Expand All @@ -57,7 +60,7 @@ import qualified Kore.Log as Log
import qualified Kore.Log.Registry as Log
import Kore.Repl.Data

type Parser = Parsec ReplParseError String
type Parser = Parsec ReplParseError Text

newtype ReplParseError = ReplParseError { unReplParseError :: String }
deriving (Eq, Ord)
Expand Down Expand Up @@ -449,7 +452,7 @@ spaceNoNewline :: Parser ()
spaceNoNewline =
void . many $ oneOf [' ', '\t', '\r', '\f', '\v']

literal :: String -> Parser ()
literal :: Text -> Parser ()
literal str = void $ Char.string str <* spaceNoNewline

decimal :: Integral a => Parser a
Expand Down
Loading