mirror of https://github.com/voidlizard/hbs2
379 lines
9.2 KiB
Haskell
379 lines
9.2 KiB
Haskell
{-# LANGUAGE OverloadedStrings #-}
|
|
{-# LANGUAGE QuasiQuotes #-}
|
|
{-# LANGUAGE ExtendedDefaultRules #-}
|
|
{-# LANGUAGE LambdaCase #-}
|
|
{-# LANGUAGE ImportQualifiedPost #-}
|
|
{-# LANGUAGE DerivingStrategies #-}
|
|
{-# LANGUAGE PatternSynonyms #-}
|
|
{-# LANGUAGE ViewPatterns #-}
|
|
{-# LANGUAGE TemplateHaskell #-}
|
|
module Data.Text.Fuzzy.SExp where
|
|
|
|
import Data.Text (Text)
|
|
|
|
import Control.Applicative
|
|
import Control.Monad
|
|
import Data.Function
|
|
import Data.Functor
|
|
import Data.Text.Fuzzy.Tokenize
|
|
import Control.Monad.Reader
|
|
import Data.Typeable
|
|
import Control.Monad.Except
|
|
import Control.Monad.RWS
|
|
import Data.Maybe
|
|
import Data.Char (isSpace,digitToInt)
|
|
import Data.Generics.Uniplate.Data()
|
|
import Safe
|
|
import Data.Data
|
|
import GHC.Generics
|
|
import Lens.Micro.Platform
|
|
import Data.Text qualified as Text
|
|
import Data.Coerce
|
|
import Data.Scientific
|
|
|
|
import Data.HashMap.Strict (HashMap)
|
|
import Data.HashMap.Strict qualified as HM
|
|
|
|
import Prettyprinter hiding (braces,list)
|
|
|
|
|
|
import Streaming.Prelude qualified as S
|
|
|
|
data TTok = TChar Char
|
|
| TSChar Char
|
|
| TPunct Char
|
|
| TText Text
|
|
| TStrLit Text
|
|
| TKeyword Text
|
|
| TEmpty
|
|
| TIndent Int
|
|
deriving stock (Eq,Ord,Show,Data,Generic)
|
|
|
|
instance IsToken TTok where
|
|
mkChar = TChar
|
|
mkSChar = TSChar
|
|
mkPunct = TPunct
|
|
mkText = TText
|
|
mkStrLit = TStrLit
|
|
mkKeyword = TKeyword
|
|
mkEmpty = TEmpty
|
|
mkIndent = TIndent
|
|
|
|
newtype C0 = C0 (Maybe Int)
|
|
deriving stock (Eq,Ord,Show,Data,Typeable,Generic)
|
|
|
|
data SExpParseError =
|
|
ParensOver C0
|
|
| ParensUnder C0
|
|
| ParensUnmatched C0
|
|
| SyntaxError C0
|
|
deriving stock (Show,Typeable)
|
|
|
|
|
|
data NumType =
|
|
NumInteger Integer
|
|
| NumDouble Scientific
|
|
deriving stock (Eq,Ord,Show,Data,Generic)
|
|
|
|
class Monoid c => ForMicroSexp c where
|
|
|
|
instance Monoid C0 where
|
|
mempty = C0 Nothing
|
|
|
|
instance Semigroup C0 where
|
|
(<>) (C0 a) (C0 b) = C0 (b <|> a)
|
|
|
|
instance ForMicroSexp C0 where
|
|
|
|
|
|
instance ForMicroSexp () where
|
|
|
|
data MicroSexp c =
|
|
List_ c [MicroSexp c]
|
|
| Symbol_ c Text
|
|
| String_ c Text
|
|
| Number_ c NumType
|
|
| Boolean_ c Bool
|
|
deriving stock (Show,Data,Generic)
|
|
|
|
pattern List :: ForMicroSexp c => [MicroSexp c] -> MicroSexp c
|
|
pattern List xs <- List_ _ xs where
|
|
List xs = List_ mempty xs
|
|
|
|
pattern Symbol :: ForMicroSexp c => Text -> MicroSexp c
|
|
pattern Symbol xs <- Symbol_ _ xs where
|
|
Symbol xs = Symbol_ mempty xs
|
|
|
|
pattern String :: ForMicroSexp c => Text -> MicroSexp c
|
|
pattern String x <- String_ _ x where
|
|
String x = String_ mempty x
|
|
|
|
pattern Number :: ForMicroSexp c => NumType -> MicroSexp c
|
|
pattern Number n <- Number_ _ n where
|
|
Number n = Number_ mempty n
|
|
|
|
pattern Boolean :: ForMicroSexp c => Bool -> MicroSexp c
|
|
pattern Boolean b <- Boolean_ _ b where
|
|
Boolean b = Boolean_ mempty b
|
|
|
|
{-# COMPLETE List, Symbol, String, Number, Boolean #-}
|
|
|
|
|
|
contextOf :: Lens (MicroSexp c) (MicroSexp c) c c
|
|
contextOf = lens g s
|
|
where
|
|
s sexp c = case sexp of
|
|
List_ _ a -> List_ c a
|
|
Symbol_ _ a -> Symbol_ c a
|
|
String_ _ a -> String_ c a
|
|
Number_ _ a -> Number_ c a
|
|
Boolean_ _ a -> Boolean_ c a
|
|
|
|
g = \case
|
|
List_ c _ -> c
|
|
Symbol_ c _ -> c
|
|
String_ c _ -> c
|
|
Number_ c _ -> c
|
|
Boolean_ c _ -> c
|
|
|
|
nil :: forall c . ForMicroSexp c => MicroSexp c
|
|
nil = List []
|
|
|
|
symbol :: forall c . ForMicroSexp c => Text -> MicroSexp c
|
|
symbol = Symbol
|
|
|
|
str :: forall c . ForMicroSexp c => Text -> MicroSexp c
|
|
str = String
|
|
|
|
newtype SExpEnv =
|
|
SExpEnv
|
|
{ sexpTranslate :: Bool
|
|
}
|
|
|
|
data SExpState =
|
|
SExpState
|
|
{ _sexpLno :: Int
|
|
, _sexpBraces :: [Char]
|
|
}
|
|
|
|
makeLenses 'SExpState
|
|
|
|
defEnv :: SExpEnv
|
|
defEnv = SExpEnv True
|
|
|
|
newtype SExpM m a = SExpM { fromSexpM :: RWST SExpEnv () SExpState m a }
|
|
deriving newtype
|
|
( Applicative
|
|
, Functor
|
|
, Monad
|
|
, MonadState SExpState
|
|
, MonadReader SExpEnv
|
|
, MonadTrans
|
|
)
|
|
|
|
|
|
instance MonadError SExpParseError m => MonadError SExpParseError (SExpM m) where
|
|
throwError = lift . throwError
|
|
catchError w = catchError (coerce $ fromSexpM w)
|
|
|
|
tokenizeSexp :: Text -> [TTok]
|
|
tokenizeSexp txt = do
|
|
let spec = delims " \r\t" <> comment ";"
|
|
<> punct "'{}()[]\n"
|
|
<> sqq
|
|
<> uw
|
|
tokenize spec txt
|
|
|
|
runSexpM :: Monad m => SExpM m a -> m a
|
|
runSexpM f = evalRWST (fromSexpM f) defEnv (SExpState 0 []) <&> fst
|
|
|
|
|
|
parseSexp :: (ForMicroSexp c, MonadError SExpParseError m) => Text -> m (MicroSexp c)
|
|
parseSexp txt = do
|
|
(s, _) <- runSexpM do
|
|
(s,rest) <- sexp (tokenizeSexp txt)
|
|
checkBraces
|
|
pure (s,rest)
|
|
|
|
pure s
|
|
|
|
checkBraces :: (MonadError SExpParseError m) => SExpM m ()
|
|
checkBraces = do
|
|
braces <- gets (view sexpBraces)
|
|
unless (null braces) $ raiseWith ParensUnder
|
|
|
|
succLno :: (MonadError SExpParseError m) => SExpM m ()
|
|
succLno = modify (over sexpLno succ)
|
|
|
|
parseTop :: (ForMicroSexp c, MonadError SExpParseError m) => Text -> m [MicroSexp c]
|
|
parseTop txt = do
|
|
let tokens = tokenizeSexp txt
|
|
S.toList_ $ runSexpM do
|
|
flip fix (mempty,tokens) $ \next -> \case
|
|
(acc, []) -> do
|
|
emit acc
|
|
(acc, TPunct '\n' : rest) -> do
|
|
succLno
|
|
emit acc
|
|
next (mempty,rest)
|
|
(acc, rest) -> do
|
|
(s, xs) <- sexp rest
|
|
next (acc <> [s],xs)
|
|
|
|
where
|
|
|
|
emit [] = pure ()
|
|
emit wtf = case wtf of
|
|
[List one] -> lift $ S.yield (List one)
|
|
xs -> lift $ S.yield (List xs)
|
|
|
|
sexp :: (ForMicroSexp c, MonadError SExpParseError m) => [TTok] -> SExpM m (MicroSexp c, [TTok])
|
|
sexp s = case s of
|
|
[] -> do
|
|
checkBraces
|
|
pure (nil, mempty)
|
|
|
|
(TText l : w) -> (,w) <$> trNum (Symbol l)
|
|
|
|
(TStrLit l : w) -> pure (String l, w)
|
|
|
|
-- so far ignored
|
|
(TPunct '\'' : rest) -> sexp rest
|
|
|
|
(TPunct '\n' : rest) -> succLno >> sexp rest
|
|
|
|
(TPunct c : rest) | isSpace c -> sexp rest
|
|
|
|
(TPunct c : rest) | isBrace c ->
|
|
maybe (pure (nil, rest)) (`list` rest) (closing c)
|
|
| otherwise -> do
|
|
raiseWith ParensOver
|
|
|
|
( _ : _ ) -> raiseWith SyntaxError
|
|
|
|
where
|
|
|
|
setContext w = do
|
|
co <- getC0
|
|
pure $ over _2 (set contextOf co) w
|
|
|
|
isBrace :: Char -> Bool
|
|
isBrace c = HM.member c braces
|
|
|
|
closing :: Char -> Maybe Char
|
|
closing c = HM.lookup c braces
|
|
|
|
braces :: HashMap Char Char
|
|
braces = HM.fromList[ ('{', '}')
|
|
, ('(', ')')
|
|
, ('[', ']')
|
|
, ('<', '>')
|
|
]
|
|
|
|
cBraces :: [Char]
|
|
cBraces = HM.elems braces
|
|
|
|
trNum tok = do
|
|
|
|
trans <- asks sexpTranslate
|
|
|
|
case tok of
|
|
Symbol s | trans -> do
|
|
let s0 = Text.unpack s
|
|
|
|
let what = Number . NumInteger <$> readMay @Integer s0
|
|
<|>
|
|
Number . NumInteger <$> parseBinary s0
|
|
<|>
|
|
Number . NumDouble <$> readMay @Scientific s0
|
|
<|>
|
|
( case s of
|
|
"#t" -> Just (Boolean True)
|
|
"#f" -> Just (Boolean False)
|
|
_ -> Nothing
|
|
)
|
|
|
|
pure $ fromMaybe (Symbol s) what
|
|
|
|
|
|
x -> pure x
|
|
{-# INLINE trNum #-}
|
|
|
|
list :: (ForMicroSexp c, MonadError SExpParseError m)
|
|
=> Char
|
|
-> [TTok]
|
|
-> SExpM m (MicroSexp c, [TTok])
|
|
|
|
list _ [] = raiseWith ParensUnder
|
|
|
|
list cb tokens = do
|
|
modify $ over sexpBraces (cb:)
|
|
|
|
go cb mempty tokens
|
|
|
|
where
|
|
|
|
isClosingFor :: Char -> Bool
|
|
isClosingFor c = c `elem` cBraces
|
|
|
|
go _ _ [] = do
|
|
checkBraces
|
|
pure (List mempty, mempty)
|
|
|
|
go cl acc (TPunct c : rest) | isSpace c = do
|
|
go cl acc rest
|
|
|
|
go cl acc (TPunct c : rest)
|
|
| isClosingFor c && c == cl = do
|
|
modify $ over sexpBraces (drop 1)
|
|
pure (List (reverse acc), rest)
|
|
|
|
| isClosingFor c && c /= cl = do
|
|
raiseWith ParensUnmatched
|
|
-- throwError =<< ParensUnmatched <$> undefined
|
|
|
|
go cl acc rest = do
|
|
(e,r) <- sexp rest
|
|
go cl (e : acc) r
|
|
|
|
|
|
getC0 :: Monad m => SExpM m C0
|
|
getC0 = do
|
|
lno <- gets (view sexpLno)
|
|
pure (C0 (Just lno))
|
|
|
|
raiseWith :: (MonadError SExpParseError m)
|
|
=> (C0 -> SExpParseError) -> SExpM m b
|
|
|
|
raiseWith a = throwError =<< a <$> getC0
|
|
|
|
instance Pretty NumType where
|
|
pretty = \case
|
|
NumInteger n -> pretty n
|
|
NumDouble n -> viaShow n
|
|
|
|
instance ForMicroSexp c => Pretty (MicroSexp c) where
|
|
|
|
pretty = \case
|
|
List xs -> parens (hsep (fmap pretty xs))
|
|
String s -> dquotes (pretty s)
|
|
Symbol s -> pretty s
|
|
Number n -> pretty n
|
|
Boolean True -> pretty "#t"
|
|
Boolean False -> pretty "#f"
|
|
|
|
isBinaryDigit :: Char -> Bool
|
|
isBinaryDigit c = c == '0' || c == '1'
|
|
|
|
parseBinary :: String -> Maybe Integer
|
|
parseBinary str =
|
|
let
|
|
withoutPrefix = case str of
|
|
'0':'b':rest -> Just rest
|
|
'0':'B':rest -> Just rest
|
|
_ -> Nothing
|
|
in if isJust withoutPrefix && all isBinaryDigit (fromJust withoutPrefix)
|
|
then Just $ foldl (\acc x -> acc * 2 + toInteger (digitToInt x)) 0 (fromJust withoutPrefix)
|
|
else Nothing
|
|
|