Skip to content

Improve the performance of the Kore parser #2296

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Dec 9, 2020
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion kore/app/format/Main.hs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ module Main (main) where

import Prelude.Kore

import qualified Data.Text.IO as Text
import Options.Applicative
import System.IO
( stdout
Expand Down Expand Up @@ -78,4 +79,5 @@ main =
-- | Read a 'KoreDefinition' from the given file name or signal an error.
readKoreOrDie :: FilePath -> IO ParsedDefinition
readKoreOrDie fileName =
readFile fileName >>= either error return . parseKoreDefinition fileName
Text.readFile fileName
>>= either error return . parseKoreDefinition fileName
7 changes: 5 additions & 2 deletions kore/app/share/GlobalMain.hs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ import Data.Text
( Text
, pack
)
import qualified Data.Text.IO as Text
import Data.Time.Format
( defaultTimeLocale
, formatTime
Expand Down Expand Up @@ -511,12 +512,14 @@ parseDefinition :: FilePath -> Main ParsedDefinition
parseDefinition = mainParse parseKoreDefinition

mainParse
:: (FilePath -> String -> Either String a)
:: (FilePath -> Text -> Either String a)
-> String
-> Main a
mainParse parser fileName = do
contents <-
clockSomethingIO "Reading the input file" $ liftIO $ readFile fileName
Text.readFile fileName
& liftIO
& clockSomethingIO "Reading the input file"
Comment on lines +486 to +488
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ttuegel Why are these the other way round now?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I changed it from ($) to (&) so that the action (Text.readFile) would appear before the wrappers.

parseResult <-
clockSomething "Parsing the file" (parser fileName contents)
case parseResult of
Expand Down
7 changes: 5 additions & 2 deletions kore/src/Kore/Parser.hs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ module Kore.Parser

import Prelude.Kore

import Data.Text
( Text
)
import Text.Megaparsec
( eof
)
Expand All @@ -58,7 +61,7 @@ else.
-}
parseKoreDefinition
:: FilePath -- ^ Filename used for error messages
-> String -- ^ The concrete syntax of a valid Kore definition
-> Text -- ^ The concrete syntax of a valid Kore definition
-> Either String ParsedDefinition
parseKoreDefinition = parseOnly (Lexer.space *> koreParser)

Expand All @@ -70,6 +73,6 @@ message otherwise. The input must contain a valid Kore pattern and nothing else.
-}
parseKorePattern
:: FilePath -- ^ Filename used for error messages
-> String -- ^ The concrete syntax of a valid Kore pattern
-> Text -- ^ The concrete syntax of a valid Kore pattern
-> Either String ParsedPattern
parseKorePattern = parseOnly (Lexer.space *> Parser.parsePattern)
80 changes: 27 additions & 53 deletions kore/src/Kore/Parser/Lexer.hs
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@ module Kore.Parser.Lexer
, parseId
, parseAnyId, parseSetId, isSymbolId
, isElementVariableId, isSetVariableId
, elementVariableIdParser
, setVariableIdParser
, parseSortId
, parseSymbolId
, parseModuleName
Expand All @@ -48,6 +46,9 @@ import Data.Map.Strict
( Map
)
import qualified Data.Map.Strict as Map
import Data.Text
( Text
)
import qualified Data.Text as Text
import Text.Megaparsec
( SourcePos (..)
Expand Down Expand Up @@ -84,6 +85,7 @@ space = L.space Parser.space1 lineComment blockComment
where
lineComment = L.skipLineComment "//"
blockComment = L.skipBlockComment "/*" "*/"
{-# INLINE space #-}

{- | Parse the character, but skip its result.
-}
Expand All @@ -97,7 +99,7 @@ skipChar = Monad.void . Parser.char
See also: 'L.symbol', 'space'

-}
symbol :: String -> Parser ()
symbol :: Text -> Parser ()
symbol = Monad.void . L.symbol space

colon :: Parser ()
Expand Down Expand Up @@ -163,7 +165,7 @@ consumes any trailing whitespace.
See also: 'space'

-}
keyword :: String -> Parser ()
keyword :: Text -> Parser ()
keyword s = lexeme $ do
_ <- Parser.chunk s
-- Check that the next character cannot be part of an @id@, i.e. check that
Expand All @@ -183,19 +185,16 @@ sourcePosToFileLocation
, column = unPos column'
}

{- Takes a parser for the string of the identifier
and returns an 'Id' annotated with position.
-}
stringParserToIdParser :: Parser String -> Parser Id
stringParserToIdParser stringRawParser = do
{- | Annotate a 'Text' parser with an 'AstLocation'.
-}
parseIntoId :: Parser Text -> Parser Id
parseIntoId stringRawParser = do
!pos <- sourcePosToFileLocation <$> getSourcePos
name <- lexeme stringRawParser
return Id
{ getId = Text.pack name
, idLocation = AstLocationFile pos
}
getId <- lexeme stringRawParser
return Id { getId, idLocation = AstLocationFile pos }
{-# INLINE parseIntoId #-}

koreKeywordsSet :: HashSet String
koreKeywordsSet :: HashSet Text
koreKeywordsSet = HashSet.fromList keywords
where
keywords =
Expand Down Expand Up @@ -224,17 +223,17 @@ genericIdRawParser
:: (Char -> Bool) -- ^ contains the characters allowed for @⟨prefix-char⟩@.
-> (Char -> Bool) -- ^ contains the characters allowed for @⟨body-char⟩@.
-> IdKeywordParsing
-> Parser String
-> Parser Text
genericIdRawParser isFirstChar isBodyChar idKeywordParsing = do
c <- Parser.satisfy isFirstChar <?> "first identifier character"
cs <- Parser.takeWhileP (Just "identifier character") isBodyChar
let genericId = c : cs
let genericId = Text.cons c cs
keywordsForbidden = idKeywordParsing == KeywordsForbidden
isKeyword = HashSet.member genericId koreKeywordsSet
when (keywordsForbidden && isKeyword)
$ fail
( "Identifiers should not be keywords: '"
++ genericId
++ Text.unpack genericId
++ "'."
)
return genericId
Expand Down Expand Up @@ -293,9 +292,9 @@ isIdChar c = isIdFirstChar c || isIdOtherChar c
An identifier cannot be a keyword.
-}
parseId :: Parser Id
parseId = stringParserToIdParser (parseIdRaw KeywordsForbidden)
parseId = parseIntoId (parseIdRaw KeywordsForbidden)

parseIdRaw :: IdKeywordParsing -> Parser String
parseIdRaw :: IdKeywordParsing -> Parser Text
parseIdRaw = genericIdRawParser isIdFirstChar isIdChar

{- | Parse a module name.
Expand All @@ -309,7 +308,7 @@ parseModuleName = lexeme moduleNameRawParser

moduleNameRawParser :: Parser ModuleName
moduleNameRawParser =
ModuleName . Text.pack <$> parseIdRaw KeywordsForbidden
ModuleName <$> parseIdRaw KeywordsForbidden

{- | Parses a 'Sort' 'Id'

Expand Down Expand Up @@ -338,17 +337,17 @@ isSetVariableId Id { getId } = Text.head getId == '@'

parseSpecialId :: Parser Id
parseSpecialId =
stringParserToIdParser parseSpecialIdString
parseIntoId parseSpecialIdString
where
parseSpecialIdString =
(:) <$> Parser.char '\\' <*> parseIdRaw KeywordsPermitted
Text.cons <$> Parser.char '\\' <*> parseIdRaw KeywordsPermitted

parseSetId :: Parser Id
parseSetId =
stringParserToIdParser parseSetIdString
parseIntoId parseSetIdString
where
parseSetIdString =
(:) <$> Parser.char '@' <*> parseIdRaw KeywordsPermitted
Text.cons <$> Parser.char '@' <*> parseIdRaw KeywordsPermitted

{- | Parses a 'Symbol' 'Id'

Expand All @@ -357,42 +356,17 @@ parseSetId =
@
-}
parseSymbolId :: Parser Id
parseSymbolId =
stringParserToIdParser symbolIdRawParser <?> "symbol or alias identifier"
parseSymbolId = parseIntoId symbolIdRawParser <?> "symbol or alias identifier"

symbolIdRawParser :: Parser String
symbolIdRawParser :: Parser Text
symbolIdRawParser = do
c <- peekChar'
if c == '\\'
then do
skipChar '\\'
(c :) <$> parseIdRaw KeywordsPermitted
Text.cons c <$> parseIdRaw KeywordsPermitted
else parseIdRaw KeywordsForbidden

{-|Parses a @set-variable-id@, which always starts with @\@@.

@
<set-variable-id> ::= ['@'] <id>
@
-}
setVariableIdParser :: Parser Id
setVariableIdParser = stringParserToIdParser setVariableIdRawParser

setVariableIdRawParser :: Parser String
setVariableIdRawParser = do
start <- Parser.char '@'
end <- parseIdRaw KeywordsPermitted
return (start:end)

{-| Parses an @element-variable-id@

@
<element-variable-id> ::= <id>
@
-}
elementVariableIdParser :: Parser Id
elementVariableIdParser = parseId

{- | Parses a C-style string literal, unescaping it.

@
Expand Down
7 changes: 5 additions & 2 deletions kore/src/Kore/Parser/ParserUtils.hs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ import Prelude.Kore hiding
( takeWhile
)

import Data.Text
( Text
)
import Data.Void
( Void
)
Expand All @@ -34,7 +37,7 @@ import Text.Megaparsec.Error
( errorBundlePretty
)

type Parser = Parsec Void String
type Parser = Parsec Void Text

{-|'peekChar' is similar to Attoparsec's 'peekChar'. It returns the next
available character in the input, without consuming it. Returns 'Nothing'
Expand All @@ -55,7 +58,7 @@ peekChar' = lookAhead anySingle
a FilePath that is used for generating error messages and an input string
and produces either a parsed object, or an error message.
-}
parseOnly :: Parser a -> FilePath -> String -> Either String a
parseOnly :: Parser a -> FilePath -> Text -> Either String a
parseOnly parser filePathForErrors input =
case parse parser filePathForErrors input of
Left err -> Left (errorBundlePretty err)
Expand Down