runtimeverification · ttuegel · Dec 9, 2020 · Dec 2, 2020 · Dec 2, 2020 · Dec 2, 2020
diff --git a/kore/app/format/Main.hs b/kore/app/format/Main.hs
@@ -2,6 +2,7 @@ module Main (main) where
 
 import Prelude.Kore
 
+import qualified Data.Text.IO as Text
 import Options.Applicative
 import System.IO
     ( stdout
@@ -78,4 +79,5 @@ main =
 -- | Read a 'KoreDefinition' from the given file name or signal an error.
 readKoreOrDie :: FilePath -> IO ParsedDefinition
 readKoreOrDie fileName =
-    readFile fileName >>= either error return . parseKoreDefinition fileName
+    Text.readFile fileName
+    >>= either error return . parseKoreDefinition fileName
diff --git a/kore/app/share/GlobalMain.hs b/kore/app/share/GlobalMain.hs
@@ -48,6 +48,7 @@ import Data.Text
     ( Text
     , pack
     )
+import qualified Data.Text.IO as Text
 import Data.Time.Format
     ( defaultTimeLocale
     , formatTime
@@ -477,12 +478,14 @@ parseDefinition :: FilePath -> Main ParsedDefinition
 parseDefinition = mainParse parseKoreDefinition
 
 mainParse
-    :: (FilePath -> String -> Either String a)
+    :: (FilePath -> Text -> Either String a)
     -> String
     -> Main a
 mainParse parser fileName = do
     contents <-
-        clockSomethingIO "Reading the input file" $ liftIO $ readFile fileName
+        Text.readFile fileName
+        & liftIO
+        & clockSomethingIO "Reading the input file"
     parseResult <-
         clockSomething "Parsing the file" (parser fileName contents)
     case parseResult of

diff --git a/kore/src/Kore/Log/KoreLogOptions.hs b/kore/src/Kore/Log/KoreLogOptions.hs
@@ -210,17 +210,17 @@ parseEntryTypes =
 
 parseCommaSeparatedEntries :: Options.ReadM EntryTypes
 parseCommaSeparatedEntries =
-    Options.maybeReader $ Parser.parseMaybe parseEntryTypes'
+    Options.maybeReader $ Parser.parseMaybe parseEntryTypes' . Text.pack
   where
-    parseEntryTypes' :: Parser.Parsec String String EntryTypes
+    parseEntryTypes' :: Parser.Parsec String Text EntryTypes
     parseEntryTypes' = Set.fromList <$> Parser.sepEndBy parseSomeTypeRep comma
 
     comma = void (Parser.char ',')
 
-    parseSomeTypeRep :: Parser.Parsec String String SomeTypeRep
+    parseSomeTypeRep :: Parser.Parsec String Text SomeTypeRep
     parseSomeTypeRep =
         Parser.takeWhile1P (Just "SomeTypeRep") (flip notElem [',', ' '])
-        >>= parseEntryType . Text.pack
+        >>= parseEntryType
 
 parseSeverity :: Parser Severity
 parseSeverity =

diff --git a/kore/src/Kore/Log/Registry.hs b/kore/src/Kore/Log/Registry.hs
@@ -235,7 +235,7 @@ lookupTextFromTypeWithError type' =
             <> show type'
             <> " It should be added to Kore.Log.Registry.registry."
 
-parseEntryType :: Ord e => Text -> Parser.Parsec e String SomeTypeRep
+parseEntryType :: Ord e => Text -> Parser.Parsec e Text SomeTypeRep
 parseEntryType entryText =
     maybe empty return
     $ Map.lookup entryText (textToType registry)

diff --git a/kore/src/Kore/Parser.hs b/kore/src/Kore/Parser.hs
@@ -34,6 +34,9 @@ module Kore.Parser
 
 import Prelude.Kore
 
+import Data.Text
+    ( Text
+    )
 import Text.Megaparsec
     ( eof
     )
@@ -58,7 +61,7 @@ else.
  -}
 parseKoreDefinition
     :: FilePath  -- ^ Filename used for error messages
-    -> String  -- ^ The concrete syntax of a valid Kore definition
+    -> Text  -- ^ The concrete syntax of a valid Kore definition
     -> Either String ParsedDefinition
 parseKoreDefinition = parseOnly (Lexer.space *> koreParser)
 
@@ -70,6 +73,6 @@ message otherwise. The input must contain a valid Kore pattern and nothing else.
  -}
 parseKorePattern
     :: FilePath  -- ^ Filename used for error messages
-    -> String  -- ^ The concrete syntax of a valid Kore pattern
+    -> Text  -- ^ The concrete syntax of a valid Kore pattern
     -> Either String ParsedPattern
 parseKorePattern = parseOnly (Lexer.space *> Parser.parsePattern)
diff --git a/kore/src/Kore/Parser/Lexer.hs b/kore/src/Kore/Parser/Lexer.hs
@@ -25,8 +25,6 @@ module Kore.Parser.Lexer
     , parseId
     , parseAnyId, parseSetId, isSymbolId
     , isElementVariableId, isSetVariableId
-    , elementVariableIdParser
-    , setVariableIdParser
     , parseSortId
     , parseSymbolId
     , parseModuleName
@@ -48,6 +46,9 @@ import Data.Map.Strict
     ( Map
     )
 import qualified Data.Map.Strict as Map
+import Data.Text
+    ( Text
+    )
 import qualified Data.Text as Text
 import Text.Megaparsec
     ( SourcePos (..)
@@ -84,6 +85,7 @@ space = L.space Parser.space1 lineComment blockComment
   where
     lineComment = L.skipLineComment "//"
     blockComment = L.skipBlockComment "/*" "*/"
+{-# INLINE space #-}
 
 {- | Parse the character, but skip its result.
  -}
@@ -97,7 +99,7 @@ skipChar = Monad.void . Parser.char
 See also: 'L.symbol', 'space'
 
  -}
-symbol :: String -> Parser ()
+symbol :: Text -> Parser ()
 symbol = Monad.void . L.symbol space
 
 colon :: Parser ()
@@ -163,7 +165,7 @@ consumes any trailing whitespace.
 See also: 'space'
 
  -}
-keyword :: String -> Parser ()
+keyword :: Text -> Parser ()
 keyword s = lexeme $ do
     _ <- Parser.chunk s
     -- Check that the next character cannot be part of an @id@, i.e.  check that
@@ -183,19 +185,16 @@ sourcePosToFileLocation
     , column   = unPos column'
     }
 
-{- Takes a parser for the string of the identifier
-   and returns an 'Id' annotated with position.
--}
-stringParserToIdParser :: Parser String -> Parser Id
-stringParserToIdParser stringRawParser = do
+{- | Annotate a 'Text' parser with an 'AstLocation'.
+ -}
+parseIntoId :: Parser Text -> Parser Id
+parseIntoId stringRawParser = do
     !pos <- sourcePosToFileLocation <$> getSourcePos
-    name <- lexeme stringRawParser
-    return Id
-        { getId = Text.pack name
-        , idLocation = AstLocationFile pos
-        }
+    getId <- lexeme stringRawParser
+    return Id { getId, idLocation = AstLocationFile pos }
+{-# INLINE parseIntoId #-}
 
-koreKeywordsSet :: HashSet String
+koreKeywordsSet :: HashSet Text
 koreKeywordsSet = HashSet.fromList keywords
   where
     keywords =
@@ -224,17 +223,17 @@ genericIdRawParser
     :: (Char -> Bool)  -- ^ contains the characters allowed for @⟨prefix-char⟩@.
     -> (Char -> Bool)  -- ^ contains the characters allowed for @⟨body-char⟩@.
     -> IdKeywordParsing
-    -> Parser String
+    -> Parser Text
 genericIdRawParser isFirstChar isBodyChar idKeywordParsing = do
-    c <- Parser.satisfy isFirstChar <?> "first identifier character"
-    cs <- Parser.takeWhileP (Just "identifier character") isBodyChar
-    let genericId = c : cs
-        keywordsForbidden = idKeywordParsing == KeywordsForbidden
+    (genericId, _) <- Parser.match
+        $ (Parser.satisfy isFirstChar <?> "first identifier character")
+        >> Parser.takeWhileP (Just "identifier character") isBodyChar
-    (genericId, _) <- Parser.match
-        $ (Parser.satisfy isFirstChar <?> "first identifier character")
-        >> Parser.takeWhileP (Just "identifier character") isBodyChar
+        (genericId, _) <- Parser.match $ do
+        _ <- Parser.satisfy isFirstChar <?> "first identifier character"
+        _ <- Parser.takeWhileP (Just "identifier character") isBodyChar
+        pure ()
-    (genericId, _) <- Parser.match
-        $ (Parser.satisfy isFirstChar <?> "first identifier character")
-        >> Parser.takeWhileP (Just "identifier character") isBodyChar
+        (genericId, _) <- Parser.match $ do
+        _ <- Parser.satisfy isFirstChar <?> "first identifier character"
+        _ <- Parser.takeWhileP (Just "identifier character") isBodyChar
+        pure ()
+    let keywordsForbidden = idKeywordParsing == KeywordsForbidden
         isKeyword = HashSet.member genericId koreKeywordsSet
     when (keywordsForbidden && isKeyword)
         $ fail
             (  "Identifiers should not be keywords: '"
-            ++ genericId
+            ++ Text.unpack genericId
             ++ "'."
             )
     return genericId
@@ -293,11 +292,14 @@ isIdChar c = isIdFirstChar c || isIdOtherChar c
 An identifier cannot be a keyword.
 -}
 parseId :: Parser Id
-parseId = stringParserToIdParser (parseIdRaw KeywordsForbidden)
+parseId = parseIntoId parseIdText
 
-parseIdRaw :: IdKeywordParsing -> Parser String
+parseIdRaw :: IdKeywordParsing -> Parser Text
 parseIdRaw = genericIdRawParser isIdFirstChar isIdChar
 
+parseIdText :: Parser Text
+parseIdText = parseIdRaw KeywordsForbidden
+
 {- | Parse a module name.
 
 @
@@ -309,7 +311,7 @@ parseModuleName = lexeme moduleNameRawParser
 
 moduleNameRawParser :: Parser ModuleName
 moduleNameRawParser =
-  ModuleName . Text.pack <$> parseIdRaw KeywordsForbidden
+  ModuleName <$> parseIdRaw KeywordsForbidden
 
 {- | Parses a 'Sort' 'Id'
 
@@ -321,7 +323,9 @@ parseSortId :: Parser Id
 parseSortId = parseId <?> "sort identifier"
 
 parseAnyId :: Parser Id
-parseAnyId = (parseSpecialId <|> parseSetId <|> parseId) <?> "identifier"
+parseAnyId = parseIntoId
+    (parseSpecialIdText <|> parseSetIdText <|> parseIdText)
+    <?> "identifier"
 
 isSymbolId :: Id -> Bool
 isSymbolId Id { getId } =
@@ -336,19 +340,16 @@ isElementVariableId Id { getId } =
 isSetVariableId :: Id -> Bool
 isSetVariableId Id { getId } = Text.head getId == '@'
 
-parseSpecialId :: Parser Id
-parseSpecialId =
-    stringParserToIdParser parseSpecialIdString
-  where
-    parseSpecialIdString =
-        (:) <$> Parser.char '\\' <*> parseIdRaw KeywordsPermitted
+parseSpecialIdText :: Parser Text
+parseSpecialIdText = fst <$> Parser.match
+    (Parser.char '\\' >> parseIdRaw KeywordsPermitted)
+
+parseSetIdText :: Parser Text
+parseSetIdText = fst <$> Parser.match
+    (Parser.char '@' >> parseIdRaw KeywordsPermitted)
 
 parseSetId :: Parser Id
-parseSetId =
-    stringParserToIdParser parseSetIdString
-  where
-    parseSetIdString =
-        (:) <$> Parser.char '@' <*> parseIdRaw KeywordsPermitted
+parseSetId = parseIntoId parseSetIdText
 
 {- | Parses a 'Symbol' 'Id'
 
@@ -357,42 +358,16 @@ parseSetId =
 @
 -}
 parseSymbolId :: Parser Id
-parseSymbolId =
-    stringParserToIdParser symbolIdRawParser <?> "symbol or alias identifier"
+parseSymbolId = parseIntoId symbolIdRawParser <?> "symbol or alias identifier"
 
-symbolIdRawParser :: Parser String
+symbolIdRawParser :: Parser Text
 symbolIdRawParser = do
     c <- peekChar'
     if c == '\\'
-    then do
-        skipChar '\\'
-        (c :) <$> parseIdRaw KeywordsPermitted
+    then fst <$> Parser.match
+        (Parser.char '\\' >> parseIdRaw KeywordsPermitted)
     else parseIdRaw KeywordsForbidden
 
-{-|Parses a @set-variable-id@, which always starts with @\@@.
-
-@
-<set-variable-id> ::= ['@'] <id>
-@
--}
-setVariableIdParser :: Parser Id
-setVariableIdParser = stringParserToIdParser setVariableIdRawParser
-
-setVariableIdRawParser :: Parser String
-setVariableIdRawParser = do
-    start <- Parser.char '@'
-    end <- parseIdRaw KeywordsPermitted
-    return (start:end)
-
-{-| Parses an @element-variable-id@
-
-@
-<element-variable-id> ::= <id>
-@
--}
-elementVariableIdParser :: Parser Id
-elementVariableIdParser = parseId
-
 {- | Parses a C-style string literal, unescaping it.
 
 @

diff --git a/kore/src/Kore/Parser/ParserUtils.hs b/kore/src/Kore/Parser/ParserUtils.hs
@@ -21,6 +21,9 @@ import Prelude.Kore hiding
     ( takeWhile
     )
 
+import Data.Text
+    ( Text
+    )
 import Data.Void
     ( Void
     )
@@ -34,7 +37,7 @@ import Text.Megaparsec.Error
     ( errorBundlePretty
     )
 
-type Parser = Parsec Void String
+type Parser = Parsec Void Text
 
 {-|'peekChar' is similar to Attoparsec's 'peekChar'. It returns the next
 available character in the input, without consuming it. Returns 'Nothing'
@@ -55,7 +58,7 @@ peekChar' = lookAhead anySingle
 a FilePath that is used for generating error messages and an input string
 and produces either a parsed object, or an error message.
 -}
-parseOnly :: Parser a -> FilePath -> String -> Either String a
+parseOnly :: Parser a -> FilePath -> Text -> Either String a
 parseOnly parser filePathForErrors input =
     case parse parser filePathForErrors input of
         Left err -> Left (errorBundlePretty err)

diff --git a/kore/src/Kore/Repl.hs b/kore/src/Kore/Repl.hs
@@ -45,6 +45,7 @@ import Data.List
     )
 import qualified Data.Map.Strict as Map
 import qualified Data.Sequence as Seq
+import qualified Data.Text as Text
 import Kore.Attribute.RuleIndex
     ( RuleIndex (..)
     )
@@ -176,7 +177,7 @@ runRepl
     repl0 = do
         str <- prompt
         let command =
-                fromMaybe ShowUsage $ parseMaybe commandParser str
+                fromMaybe ShowUsage $ parseMaybe commandParser (Text.pack str)
         when (shouldStore command) $ field @"commands" Lens.%= (Seq.|> str)
         void $ replInterpreter printIfNotEmpty command
 

diff --git a/kore/src/Kore/Repl/Interpreter.hs b/kore/src/Kore/Repl/Interpreter.hs
@@ -1251,7 +1251,7 @@ tryAlias replAlias@ReplAlias { name } printAux printKore = do
                 parsedCommand =
                     fromMaybe
                         ShowUsage
-                        $ parseMaybe commandParser command
+                        $ parseMaybe commandParser (Text.pack command)
             config <- ask
             (cont, st') <- get >>= runInterpreter parsedCommand config
             put st'
@@ -1500,7 +1500,7 @@ parseEvalScript file scriptModeOutput = do
     if exists
         then do
             contents <- lift . liftIO $ readFile file
-            let result = runParser scriptParser file contents
+            let result = runParser scriptParser file (Text.pack contents)
             either parseFailed executeScript result
         else lift . liftIO . putStrLn $ "Cannot find " <> file
 

diff --git a/kore/src/Kore/Repl/Parser.hs b/kore/src/Kore/Repl/Parser.hs
@@ -31,6 +31,9 @@ import qualified Data.Set as Set
 import Data.String
     ( IsString (..)
     )
+import Data.Text
+    ( Text
+    )
 import qualified Data.Text as Text
 import Text.Megaparsec
     ( Parsec
@@ -57,7 +60,7 @@ import qualified Kore.Log as Log
 import qualified Kore.Log.Registry as Log
 import Kore.Repl.Data
 
-type Parser = Parsec ReplParseError String
+type Parser = Parsec ReplParseError Text
 
 newtype ReplParseError = ReplParseError { unReplParseError :: String }
     deriving (Eq, Ord)
@@ -449,7 +452,7 @@ spaceNoNewline :: Parser ()
 spaceNoNewline =
     void . many $ oneOf [' ', '\t', '\r', '\f', '\v']
 
-literal :: String -> Parser ()
+literal :: Text -> Parser ()
 literal str = void $ Char.string str <* spaceNoNewline
 
 decimal :: Integral a => Parser a