Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/Language/PureScript/CST/Convert.hs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ comments :: [Comment a] -> [C.Comment]
comments = mapMaybe comment

sourcePos :: SourcePos -> Pos.SourcePos
sourcePos (SourcePos line col) = Pos.SourcePos line col
sourcePos (SourcePos line (tabs, col)) = Pos.SourcePos line (tabs + col)

sourceSpan :: String -> SourceRange -> Pos.SourceSpan
sourceSpan name (SourceRange start end) = Pos.SourceSpan name (sourcePos start) (sourcePos end)
Expand Down
41 changes: 26 additions & 15 deletions src/Language/PureScript/CST/Lexer.hs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import Data.Text.PureScript qualified as Text
import Language.PureScript.CST.Errors (ParserErrorInfo(..), ParserErrorType(..))
import Language.PureScript.CST.Monad (LexResult, LexState(..), ParserM(..), throw)
import Language.PureScript.CST.Layout (LayoutDelim(..), insertLayout, lytToken, unwindLayout)
import Language.PureScript.CST.Positions (advanceLeading, advanceToken, advanceTrailing, applyDelta, textDelta)
import Language.PureScript.CST.Positions (advanceLeading, advanceToken, advanceTrailing, applyDelta, textDelta, PosDelta (PosDelta))
import Language.PureScript.CST.Types (Comment(..), LineFeed(..), SourcePos(..), SourceRange(..), SourceStyle(..), SourceToken(..), Token(..), TokenAnn(..))

-- | Stops at the first lexing error and replaces it with TokEof. Otherwise,
Expand Down Expand Up @@ -50,24 +50,24 @@ lex' lexComments src = do
let (leading, src') = lexComments src

lexWithState $ LexState
{ lexPos = advanceLeading (SourcePos 1 1) leading
{ lexPos = advanceLeading (SourcePos 1 (0, 1)) leading
, lexLeading = leading
, lexSource = src'
, lexStack = [(SourcePos 0 0, LytRoot)]
, lexStack = [(SourcePos 0 (0, 0), LytRoot)]
}

-- | Lexes according to top-level declaration context rules.
lexTopLevel :: Text -> [LexResult]
lexTopLevel src = do
let
(leading, src') = comments src
lexPos = advanceLeading (SourcePos 1 1) leading
lexPos = advanceLeading (SourcePos 1 (0, 1)) leading
hd = Right $ lytToken lexPos TokLayoutStart
tl = lexWithState $ LexState
{ lexPos = lexPos
, lexLeading = leading
, lexSource = src'
, lexStack = [(lexPos, LytWhere), (SourcePos 0 0, LytRoot)]
, lexStack = [(lexPos, LytWhere), (SourcePos 0 (0, 0), LytRoot)]
}
hd : tl

Expand All @@ -90,7 +90,7 @@ lexWithState = go
pos = applyDelta lexPos chunkDelta
pure $ Left
( state { lexSource = lexSource' }
, ParserErrorInfo (SourceRange pos $ applyDelta pos (0, 1)) [] lexStack err
, ParserErrorInfo (SourceRange pos $ applyDelta pos (PosDelta 0 0 1)) [] lexStack err
)

onSuccess _ (TokEof, _) =
Expand Down Expand Up @@ -183,26 +183,30 @@ breakComments = k0 []
case mbComm of
Just comm -> k0 (comm : acc')
Nothing -> pure (reverse acc', [])
else
k1 acc' (goWs [] $ Text.unpack lines)
else do
tabs <- nextWhile (== '\t')
k1 acc' (goWs [] $ Text.unpack (lines <> tabs))

k1 trl acc = do
ws <- nextWhile (\c -> c == ' ' || isLineFeed c)
ws <- nextWhile (\c -> c == ' ' || c == '\t' || isLineFeed c)
let acc' = goWs acc $ Text.unpack ws
mbComm <- comment
case mbComm of
Just comm -> k1 trl (comm : acc')
Nothing -> pure (reverse trl, reverse acc')

goWs a ('\r' : '\n' : ls) = goWs (Line CRLF : a) ls
goWs a ('\r' : ls) = goWs (Line CRLF : a) ls
goWs a ('\n' : ls) = goWs (Line LF : a) ls
goWs a ('\r' : '\n' : ls) = goTab CRLF a 0 ls
goWs a ('\r' : ls) = goTab CRLF a 0 ls
goWs a ('\n' : ls) = goTab LF a 0 ls
goWs a (' ' : ls) = goSpace a 1 ls
goWs a _ = a

goSpace a !n (' ' : ls) = goSpace a (n + 1) ls
goSpace a n ls = goWs (Space n : a) ls

goTab ending a !n ('\t' : ls) = goTab ending a (n + 1) ls
goTab ending a n ls = goWs (Line ending n : a) ls

isBlockComment = Parser $ \inp _ ksucc ->
case Text.uncons inp of
Just ('-', inp2) ->
Expand Down Expand Up @@ -260,14 +264,21 @@ breakShebang = shebangComment >>= \case
Just ('\r', inp2) ->
case Text.uncons inp2 of
Just ('\n', inp3) ->
Just (Line CRLF, inp3)
Just (unconsTabs CRLF inp3)
_ ->
Just (Line CRLF, inp2)
Just (unconsTabs CRLF inp2)
Just ('\n', inp2) ->
Just (Line LF, inp2)
Just (unconsTabs LF inp2)
_ ->
Nothing

unconsTabs :: LineFeed -> Text -> (Comment LineFeed, Text)
unconsTabs l inp = case Text.takeWhile (== '\t') inp of
"" -> (Line l 0, inp)
str -> do
let tabs = Text.length str
(Line l tabs, Text.drop tabs str)

unconsShebang :: Text -> Maybe (Text, Text)
unconsShebang = fmap ("#!",) . Text.stripPrefix "#!"

Expand Down
2 changes: 1 addition & 1 deletion src/Language/PureScript/CST/Monad.hs
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ mkParserError stack toks ty =
}
where
range = case NE.nonEmpty toks of
Nothing -> SourceRange (SourcePos 0 0) (SourcePos 0 0)
Nothing -> SourceRange (SourcePos 0 (0, 0)) (SourcePos 0 (0, 0))
Just neToks -> widen
(tokRange . tokAnn $ NE.head neToks)
(tokRange . tokAnn $ NE.last neToks)
Expand Down
126 changes: 68 additions & 58 deletions src/Language/PureScript/CST/Positions.hs
Original file line number Diff line number Diff line change
Expand Up @@ -11,88 +11,98 @@ import Data.Foldable (foldl')
import Data.List.NonEmpty qualified as NE
import Data.Maybe (fromMaybe)
import Data.Text (Text)
import Data.Void (Void)
import Data.Void (Void, absurd)
import Data.Text qualified as Text
import Language.PureScript.CST.Types

data PosDelta = PosDelta
{ posDeltaLines :: {-# UNPACK #-} !Int
, posDeltaTabs :: {-# UNPACK #-} !Int
, posDeltaColumns :: {-# UNPACK #-} !Int
}

advanceToken :: SourcePos -> Token -> SourcePos
advanceToken pos = applyDelta pos . tokenDelta

advanceLeading :: SourcePos -> [Comment LineFeed] -> SourcePos
advanceLeading = foldl' $ \a -> applyDelta a . commentDelta lineDelta

advanceTrailing :: SourcePos -> [Comment Void] -> SourcePos
advanceTrailing = foldl' $ \a -> applyDelta a . commentDelta (const (0, 0))
advanceTrailing = foldl' $ \a -> applyDelta a . commentDelta absurd

tokenDelta :: Token -> (Int, Int)
tokenDelta :: Token -> PosDelta
tokenDelta = \case
TokLeftParen -> (0, 1)
TokRightParen -> (0, 1)
TokLeftBrace -> (0, 1)
TokRightBrace -> (0, 1)
TokLeftSquare -> (0, 1)
TokRightSquare -> (0, 1)
TokLeftArrow ASCII -> (0, 2)
TokLeftArrow Unicode -> (0, 1)
TokRightArrow ASCII -> (0, 2)
TokRightArrow Unicode -> (0, 1)
TokRightFatArrow ASCII -> (0, 2)
TokRightFatArrow Unicode -> (0, 1)
TokDoubleColon ASCII -> (0, 2)
TokDoubleColon Unicode -> (0, 1)
TokForall ASCII -> (0, 6)
TokForall Unicode -> (0, 1)
TokEquals -> (0, 1)
TokPipe -> (0, 1)
TokTick -> (0, 1)
TokDot -> (0, 1)
TokComma -> (0, 1)
TokUnderscore -> (0, 1)
TokBackslash -> (0, 1)
TokLowerName qual name -> (0, qualDelta qual + Text.length name)
TokUpperName qual name -> (0, qualDelta qual + Text.length name)
TokOperator qual sym -> (0, qualDelta qual + Text.length sym)
TokSymbolName qual sym -> (0, qualDelta qual + Text.length sym + 2)
TokSymbolArr Unicode -> (0, 3)
TokSymbolArr ASCII -> (0, 4)
TokHole hole -> (0, Text.length hole + 1)
TokChar raw _ -> (0, Text.length raw + 2)
TokInt raw _ -> (0, Text.length raw)
TokNumber raw _ -> (0, Text.length raw)
TokLeftParen -> PosDelta 0 0 1
TokRightParen -> PosDelta 0 0 1
TokLeftBrace -> PosDelta 0 0 1
TokRightBrace -> PosDelta 0 0 1
TokLeftSquare -> PosDelta 0 0 1
TokRightSquare -> PosDelta 0 0 1
TokLeftArrow ASCII -> PosDelta 0 0 2
TokLeftArrow Unicode -> PosDelta 0 0 1
TokRightArrow ASCII -> PosDelta 0 0 2
TokRightArrow Unicode -> PosDelta 0 0 1
TokRightFatArrow ASCII -> PosDelta 0 0 2
TokRightFatArrow Unicode -> PosDelta 0 0 1
TokDoubleColon ASCII -> PosDelta 0 0 2
TokDoubleColon Unicode -> PosDelta 0 0 1
TokForall ASCII -> PosDelta 0 0 6
TokForall Unicode -> PosDelta 0 0 1
TokEquals -> PosDelta 0 0 1
TokPipe -> PosDelta 0 0 1
TokTick -> PosDelta 0 0 1
TokDot -> PosDelta 0 0 1
TokComma -> PosDelta 0 0 1
TokUnderscore -> PosDelta 0 0 1
TokBackslash -> PosDelta 0 0 1
TokLowerName qual name -> PosDelta 0 0 (qualDelta qual + Text.length name)
TokUpperName qual name -> PosDelta 0 0 (qualDelta qual + Text.length name)
TokOperator qual sym -> PosDelta 0 0 (qualDelta qual + Text.length sym)
TokSymbolName qual sym -> PosDelta 0 0 (qualDelta qual + Text.length sym + 2)
TokSymbolArr Unicode -> PosDelta 0 0 3
TokSymbolArr ASCII -> PosDelta 0 0 4
TokHole hole -> PosDelta 0 0 (Text.length hole + 1)
TokChar raw _ -> PosDelta 0 0 (Text.length raw + 2)
TokInt raw _ -> PosDelta 0 0 (Text.length raw)
TokNumber raw _ -> PosDelta 0 0 (Text.length raw)
TokString raw _ -> multiLine 1 $ textDelta raw
TokRawString raw -> multiLine 3 $ textDelta raw
TokLayoutStart -> (0, 0)
TokLayoutSep -> (0, 0)
TokLayoutEnd -> (0, 0)
TokEof -> (0, 0)
TokLayoutStart -> PosDelta 0 0 0
TokLayoutSep -> PosDelta 0 0 0
TokLayoutEnd -> PosDelta 0 0 0
TokEof -> PosDelta 0 0 0

qualDelta :: [Text] -> Int
qualDelta = foldr ((+) . (+ 1) . Text.length) 0

multiLine :: Int -> (Int, Int) -> (Int, Int)
multiLine n (0, c) = (0, c + n + n)
multiLine n (l, c) = (l, c + n)
multiLine :: Int -> PosDelta -> PosDelta
multiLine n (PosDelta 0 0 c) = PosDelta 0 0 (c + n + n)
multiLine n (PosDelta l i c) = PosDelta l i (c + n)

commentDelta :: (a -> (Int, Int)) -> Comment a -> (Int, Int)
commentDelta :: (a -> Int -> PosDelta) -> Comment a -> PosDelta
commentDelta k = \case
Comment raw -> textDelta raw
Space n -> (0, n)
Line a -> k a
Space n -> PosDelta 0 0 n
Line n i -> k n i

lineDelta :: LineFeed -> (Int, Int)
lineDelta _ = (1, 1)
lineDelta :: LineFeed -> Int -> PosDelta
lineDelta _ tabs = PosDelta 1 tabs 1

textDelta :: Text -> (Int, Int)
textDelta = Text.foldl' go (0, 0)
textDelta :: Text -> PosDelta
textDelta = Text.foldl' go (PosDelta 0 0 0)
where
go (!l, !c) = \case
'\n' -> (l + 1, 1)
_ -> (l, c + 1)

applyDelta :: SourcePos -> (Int, Int) -> SourcePos
applyDelta (SourcePos l c) = \case
(0, n) -> SourcePos l (c + n)
(k, d) -> SourcePos (l + k) d
go (PosDelta l i c) = \case
'\n' -> PosDelta (l + 1) 0 1
'\t' -> PosDelta l (i + 1) c
_ -> PosDelta l i (c + 1)

applyDelta :: SourcePos -> PosDelta -> SourcePos
applyDelta pos delta = case (pos, delta) of
(SourcePos l (i, c), PosDelta 0 0 n) -> SourcePos l (i, c + n)
(SourcePos l (i, 1), PosDelta 0 t n) -> SourcePos l (i + t, n)
-- If we encounter tabs mid-line, treat them as 1 character
(SourcePos l (i, c), PosDelta 0 t n) -> SourcePos l (i, c + t + n - 1)
(SourcePos l _, PosDelta k t n) -> SourcePos (l + k) (t, n)

sepLast :: Separated a -> a
sepLast (Separated hd []) = hd
Expand Down
8 changes: 4 additions & 4 deletions src/Language/PureScript/CST/Print.hs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import Data.Text (Text)
import Data.Text qualified as Text
import Language.PureScript.CST.Types (Comment(..), LineFeed(..), Module, SourceStyle(..), SourceToken(..), Token(..), TokenAnn(..))
import Language.PureScript.CST.Flatten (flattenModule)
import Data.Void (Void)

printToken :: Token -> Text
printToken = printToken' True
Expand Down Expand Up @@ -86,11 +87,10 @@ printLeadingComment :: Comment LineFeed -> Text
printLeadingComment = \case
Comment raw -> raw
Space n -> Text.replicate n " "
Line LF -> "\n"
Line CRLF -> "\r\n"
Line LF n -> "\n" <> Text.replicate n "\t"
Line CRLF n -> "\r\n" <> Text.replicate n "\t"

printTrailingComment :: Comment void -> Text
printTrailingComment :: Comment Void -> Text
printTrailingComment = \case
Comment raw -> raw
Space n -> Text.replicate n " "
Line _ -> ""
4 changes: 2 additions & 2 deletions src/Language/PureScript/CST/Types.hs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import Language.PureScript.PSString (PSString)

data SourcePos = SourcePos
{ srcLine :: {-# UNPACK #-} !Int
, srcColumn :: {-# UNPACK #-} !Int
, srcColumn :: {-# UNPACK #-} !(Int, Int)
} deriving (Show, Eq, Ord, Generic, NFData)

data SourceRange = SourceRange
Expand All @@ -32,7 +32,7 @@ data SourceRange = SourceRange
data Comment l
= Comment !Text
| Space {-# UNPACK #-} !Int
| Line !l
| Line !l !Int
deriving (Show, Eq, Ord, Generic, Functor, NFData)

data LineFeed = LF | CRLF
Expand Down
2 changes: 1 addition & 1 deletion src/Language/PureScript/CST/Utils.hs
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ opName n = OpName (N.coerceOpName <$> n)

placeholder :: SourceToken
placeholder = SourceToken
{ tokAnn = TokenAnn (SourceRange (SourcePos 0 0) (SourcePos 0 0)) [] []
{ tokAnn = TokenAnn (SourceRange (SourcePos 0 (0, 0)) (SourcePos 0 (0, 0))) [] []
, tokValue = TokLowerName [] "<placeholder>"
}

Expand Down
10 changes: 0 additions & 10 deletions tests/purs/failing/Whitespace1.out

This file was deleted.

5 changes: 0 additions & 5 deletions tests/purs/failing/Whitespace1.purs

This file was deleted.

Loading