From c49e725ff1635ef369d18bf9ada41d0f1ebb1bfa Mon Sep 17 00:00:00 2001
From: Artemis System <theArtemisSystem@gmail.com>
Date: Fri, 13 Jun 2025 12:32:50 +0200
Subject: [PATCH 1/2] Implement tab support

---
 src/Language/PureScript/CST/Convert.hs   |   2 +-
 src/Language/PureScript/CST/Lexer.hs     |  41 +++++---
 src/Language/PureScript/CST/Monad.hs     |   2 +-
 src/Language/PureScript/CST/Positions.hs | 126 ++++++++++++-----------
 src/Language/PureScript/CST/Print.hs     |   8 +-
 src/Language/PureScript/CST/Types.hs     |   4 +-
 src/Language/PureScript/CST/Utils.hs     |   2 +-
 7 files changed, 103 insertions(+), 82 deletions(-)

diff --git a/src/Language/PureScript/CST/Convert.hs b/src/Language/PureScript/CST/Convert.hs
index 59b68adf1d..aab94a718d 100644
--- a/src/Language/PureScript/CST/Convert.hs
+++ b/src/Language/PureScript/CST/Convert.hs
@@ -50,7 +50,7 @@ comments :: [Comment a] -> [C.Comment]
 comments = mapMaybe comment
 
 sourcePos :: SourcePos -> Pos.SourcePos
-sourcePos (SourcePos line col) = Pos.SourcePos line col
+sourcePos (SourcePos line (tabs, col)) = Pos.SourcePos line (tabs + col)
 
 sourceSpan :: String -> SourceRange -> Pos.SourceSpan
 sourceSpan name (SourceRange start end) = Pos.SourceSpan name (sourcePos start) (sourcePos end)
diff --git a/src/Language/PureScript/CST/Lexer.hs b/src/Language/PureScript/CST/Lexer.hs
index 726a76f26a..15f459e652 100644
--- a/src/Language/PureScript/CST/Lexer.hs
+++ b/src/Language/PureScript/CST/Lexer.hs
@@ -22,7 +22,7 @@ import Data.Text.PureScript qualified as Text
 import Language.PureScript.CST.Errors (ParserErrorInfo(..), ParserErrorType(..))
 import Language.PureScript.CST.Monad (LexResult, LexState(..), ParserM(..), throw)
 import Language.PureScript.CST.Layout (LayoutDelim(..), insertLayout, lytToken, unwindLayout)
-import Language.PureScript.CST.Positions (advanceLeading, advanceToken, advanceTrailing, applyDelta, textDelta)
+import Language.PureScript.CST.Positions (advanceLeading, advanceToken, advanceTrailing, applyDelta, textDelta, PosDelta (PosDelta))
 import Language.PureScript.CST.Types (Comment(..), LineFeed(..), SourcePos(..), SourceRange(..), SourceStyle(..), SourceToken(..), Token(..), TokenAnn(..))
 
 -- | Stops at the first lexing error and replaces it with TokEof. Otherwise,
@@ -50,10 +50,10 @@ lex' lexComments src = do
   let (leading, src') = lexComments src
 
   lexWithState $ LexState
-    { lexPos = advanceLeading (SourcePos 1 1) leading
+    { lexPos = advanceLeading (SourcePos 1 (0, 1)) leading
     , lexLeading = leading
     , lexSource = src'
-    , lexStack = [(SourcePos 0 0, LytRoot)]
+    , lexStack = [(SourcePos 0 (0, 0), LytRoot)]
     }
 
 -- | Lexes according to top-level declaration context rules.
@@ -61,13 +61,13 @@ lexTopLevel :: Text -> [LexResult]
 lexTopLevel src = do
   let
     (leading, src') = comments src
-    lexPos = advanceLeading (SourcePos 1 1) leading
+    lexPos = advanceLeading (SourcePos 1 (0, 1)) leading
     hd = Right $ lytToken lexPos TokLayoutStart
     tl = lexWithState $ LexState
       { lexPos = lexPos
       , lexLeading = leading
       , lexSource = src'
-      , lexStack = [(lexPos, LytWhere), (SourcePos 0 0, LytRoot)]
+      , lexStack = [(lexPos, LytWhere), (SourcePos 0 (0, 0), LytRoot)]
       }
   hd : tl
 
@@ -90,7 +90,7 @@ lexWithState = go
         pos = applyDelta lexPos chunkDelta
       pure $ Left
         ( state { lexSource = lexSource' }
-        , ParserErrorInfo (SourceRange pos $ applyDelta pos (0, 1)) [] lexStack err
+        , ParserErrorInfo (SourceRange pos $ applyDelta pos (PosDelta 0 0 1)) [] lexStack err
         )
 
     onSuccess _ (TokEof, _) =
@@ -183,26 +183,30 @@ breakComments = k0 []
         case mbComm of
           Just comm -> k0 (comm : acc')
           Nothing   -> pure (reverse acc', [])
-      else
-        k1 acc' (goWs [] $ Text.unpack lines)
+      else do
+        tabs <- nextWhile (== '\t')
+        k1 acc' (goWs [] $ Text.unpack (lines <> tabs))
 
   k1 trl acc = do
-    ws <- nextWhile (\c -> c == ' ' || isLineFeed c)
+    ws <- nextWhile (\c -> c == ' ' || c == '\t' || isLineFeed c)
     let acc' = goWs acc $ Text.unpack ws
     mbComm <- comment
     case mbComm of
       Just comm -> k1 trl (comm : acc')
       Nothing   -> pure (reverse trl, reverse acc')
 
-  goWs a ('\r' : '\n' : ls) = goWs (Line CRLF : a) ls
-  goWs a ('\r' : ls) = goWs (Line CRLF : a) ls
-  goWs a ('\n' : ls) = goWs (Line LF : a) ls
+  goWs a ('\r' : '\n' : ls) = goTab CRLF a 0 ls
+  goWs a ('\r' : ls) = goTab CRLF a 0 ls
+  goWs a ('\n' : ls) = goTab LF a 0 ls
   goWs a (' ' : ls) = goSpace a 1 ls
   goWs a _ = a
 
   goSpace a !n (' ' : ls) = goSpace a (n + 1) ls
   goSpace a n ls = goWs (Space n : a) ls
 
+  goTab ending a !n ('\t' : ls) = goTab ending a (n + 1) ls
+  goTab ending a n ls = goWs (Line ending n : a) ls
+
   isBlockComment = Parser $ \inp _ ksucc ->
     case Text.uncons inp of
       Just ('-', inp2) ->
@@ -260,14 +264,21 @@ breakShebang = shebangComment >>= \case
       Just ('\r', inp2) ->
         case Text.uncons inp2 of
           Just ('\n', inp3) ->
-            Just (Line CRLF, inp3)
+            Just (unconsTabs CRLF inp3)
           _ ->
-            Just (Line CRLF, inp2)
+            Just (unconsTabs CRLF inp2)
       Just ('\n', inp2) ->
-        Just (Line LF, inp2)
+        Just (unconsTabs LF inp2)
       _ ->
         Nothing
 
+  unconsTabs :: LineFeed -> Text -> (Comment LineFeed, Text)
+  unconsTabs l inp = case Text.takeWhile (== '\t') inp of
+    "" -> (Line l 0, inp)
+    str -> do
+      let tabs = Text.length str
+      (Line l tabs, Text.drop tabs str)
+
   unconsShebang :: Text -> Maybe (Text, Text)
   unconsShebang = fmap ("#!",) . Text.stripPrefix "#!"
 
diff --git a/src/Language/PureScript/CST/Monad.hs b/src/Language/PureScript/CST/Monad.hs
index 2b79f1a9b3..de027e3962 100644
--- a/src/Language/PureScript/CST/Monad.hs
+++ b/src/Language/PureScript/CST/Monad.hs
@@ -103,7 +103,7 @@ mkParserError stack toks ty =
     }
   where
   range = case NE.nonEmpty toks of
-    Nothing -> SourceRange (SourcePos 0 0) (SourcePos 0 0)
+    Nothing -> SourceRange (SourcePos 0 (0, 0)) (SourcePos 0 (0, 0))
     Just neToks -> widen
      (tokRange . tokAnn $ NE.head neToks)
      (tokRange . tokAnn $ NE.last neToks)
diff --git a/src/Language/PureScript/CST/Positions.hs b/src/Language/PureScript/CST/Positions.hs
index 20d5724271..3f61cb647b 100644
--- a/src/Language/PureScript/CST/Positions.hs
+++ b/src/Language/PureScript/CST/Positions.hs
@@ -11,10 +11,16 @@ import Data.Foldable (foldl')
 import Data.List.NonEmpty qualified as NE
 import Data.Maybe (fromMaybe)
 import Data.Text (Text)
-import Data.Void (Void)
+import Data.Void (Void, absurd)
 import Data.Text qualified as Text
 import Language.PureScript.CST.Types
 
+data PosDelta = PosDelta
+  { posDeltaLines :: {-# UNPACK #-} !Int
+  , posDeltaTabs :: {-# UNPACK #-} !Int
+  , posDeltaColumns :: {-# UNPACK #-} !Int
+  }
+
 advanceToken :: SourcePos -> Token -> SourcePos
 advanceToken pos = applyDelta pos . tokenDelta
 
@@ -22,77 +28,81 @@ advanceLeading :: SourcePos -> [Comment LineFeed] -> SourcePos
 advanceLeading = foldl' $ \a -> applyDelta a . commentDelta lineDelta
 
 advanceTrailing :: SourcePos -> [Comment Void] -> SourcePos
-advanceTrailing = foldl' $ \a -> applyDelta a . commentDelta (const (0, 0))
+advanceTrailing = foldl' $ \a -> applyDelta a . commentDelta absurd
 
-tokenDelta :: Token -> (Int, Int)
+tokenDelta :: Token -> PosDelta
 tokenDelta = \case
-  TokLeftParen             -> (0, 1)
-  TokRightParen            -> (0, 1)
-  TokLeftBrace             -> (0, 1)
-  TokRightBrace            -> (0, 1)
-  TokLeftSquare            -> (0, 1)
-  TokRightSquare           -> (0, 1)
-  TokLeftArrow ASCII       -> (0, 2)
-  TokLeftArrow Unicode     -> (0, 1)
-  TokRightArrow ASCII      -> (0, 2)
-  TokRightArrow Unicode    -> (0, 1)
-  TokRightFatArrow ASCII   -> (0, 2)
-  TokRightFatArrow Unicode -> (0, 1)
-  TokDoubleColon ASCII     -> (0, 2)
-  TokDoubleColon Unicode   -> (0, 1)
-  TokForall ASCII          -> (0, 6)
-  TokForall Unicode        -> (0, 1)
-  TokEquals                -> (0, 1)
-  TokPipe                  -> (0, 1)
-  TokTick                  -> (0, 1)
-  TokDot                   -> (0, 1)
-  TokComma                 -> (0, 1)
-  TokUnderscore            -> (0, 1)
-  TokBackslash             -> (0, 1)
-  TokLowerName qual name   -> (0, qualDelta qual + Text.length name)
-  TokUpperName qual name   -> (0, qualDelta qual + Text.length name)
-  TokOperator qual sym     -> (0, qualDelta qual + Text.length sym)
-  TokSymbolName qual sym   -> (0, qualDelta qual + Text.length sym + 2)
-  TokSymbolArr Unicode     -> (0, 3)
-  TokSymbolArr ASCII       -> (0, 4)
-  TokHole hole             -> (0, Text.length hole + 1)
-  TokChar raw _            -> (0, Text.length raw + 2)
-  TokInt raw _             -> (0, Text.length raw)
-  TokNumber raw _          -> (0, Text.length raw)
+  TokLeftParen             -> PosDelta 0 0 1
+  TokRightParen            -> PosDelta 0 0 1
+  TokLeftBrace             -> PosDelta 0 0 1
+  TokRightBrace            -> PosDelta 0 0 1
+  TokLeftSquare            -> PosDelta 0 0 1
+  TokRightSquare           -> PosDelta 0 0 1
+  TokLeftArrow ASCII       -> PosDelta 0 0 2
+  TokLeftArrow Unicode     -> PosDelta 0 0 1
+  TokRightArrow ASCII      -> PosDelta 0 0 2
+  TokRightArrow Unicode    -> PosDelta 0 0 1
+  TokRightFatArrow ASCII   -> PosDelta 0 0 2
+  TokRightFatArrow Unicode -> PosDelta 0 0 1
+  TokDoubleColon ASCII     -> PosDelta 0 0 2
+  TokDoubleColon Unicode   -> PosDelta 0 0 1
+  TokForall ASCII          -> PosDelta 0 0 6
+  TokForall Unicode        -> PosDelta 0 0 1
+  TokEquals                -> PosDelta 0 0 1
+  TokPipe                  -> PosDelta 0 0 1
+  TokTick                  -> PosDelta 0 0 1
+  TokDot                   -> PosDelta 0 0 1
+  TokComma                 -> PosDelta 0 0 1
+  TokUnderscore            -> PosDelta 0 0 1
+  TokBackslash             -> PosDelta 0 0 1
+  TokLowerName qual name   -> PosDelta 0 0 (qualDelta qual + Text.length name)
+  TokUpperName qual name   -> PosDelta 0 0 (qualDelta qual + Text.length name)
+  TokOperator qual sym     -> PosDelta 0 0 (qualDelta qual + Text.length sym)
+  TokSymbolName qual sym   -> PosDelta 0 0 (qualDelta qual + Text.length sym + 2)
+  TokSymbolArr Unicode     -> PosDelta 0 0 3
+  TokSymbolArr ASCII       -> PosDelta 0 0 4
+  TokHole hole             -> PosDelta 0 0 (Text.length hole + 1)
+  TokChar raw _            -> PosDelta 0 0 (Text.length raw + 2)
+  TokInt raw _             -> PosDelta 0 0 (Text.length raw)
+  TokNumber raw _          -> PosDelta 0 0 (Text.length raw)
   TokString raw _          -> multiLine 1 $ textDelta raw
   TokRawString raw         -> multiLine 3 $ textDelta raw
-  TokLayoutStart           -> (0, 0)
-  TokLayoutSep             -> (0, 0)
-  TokLayoutEnd             -> (0, 0)
-  TokEof                   -> (0, 0)
+  TokLayoutStart           -> PosDelta 0 0 0
+  TokLayoutSep             -> PosDelta 0 0 0
+  TokLayoutEnd             -> PosDelta 0 0 0
+  TokEof                   -> PosDelta 0 0 0
 
 qualDelta :: [Text] -> Int
 qualDelta = foldr ((+) . (+ 1) . Text.length) 0
 
-multiLine :: Int -> (Int, Int) -> (Int, Int)
-multiLine n (0, c) = (0, c + n + n)
-multiLine n (l, c) = (l, c + n)
+multiLine :: Int -> PosDelta -> PosDelta
+multiLine n (PosDelta 0 0 c) = PosDelta 0 0 (c + n + n)
+multiLine n (PosDelta l i c) = PosDelta l i (c + n)
 
-commentDelta :: (a -> (Int, Int)) -> Comment a -> (Int, Int)
+commentDelta :: (a -> Int -> PosDelta) -> Comment a -> PosDelta
 commentDelta k = \case
   Comment raw -> textDelta raw
-  Space n -> (0, n)
-  Line a -> k a
+  Space n -> PosDelta 0 0 n
+  Line n i -> k n i
 
-lineDelta :: LineFeed -> (Int, Int)
-lineDelta _ = (1, 1)
+lineDelta :: LineFeed -> Int -> PosDelta
+lineDelta _ tabs = PosDelta 1 tabs 1
 
-textDelta :: Text -> (Int, Int)
-textDelta = Text.foldl' go (0, 0)
+textDelta :: Text -> PosDelta
+textDelta = Text.foldl' go (PosDelta 0 0 0)
   where
-  go (!l, !c) = \case
-    '\n' -> (l + 1, 1)
-    _    -> (l, c + 1)
-
-applyDelta :: SourcePos -> (Int, Int) -> SourcePos
-applyDelta (SourcePos l c) = \case
-  (0, n) -> SourcePos l (c + n)
-  (k, d) -> SourcePos (l + k) d
+  go (PosDelta l i c) = \case
+    '\n' -> PosDelta (l + 1) 0 1
+    '\t' -> PosDelta l (i + 1) c
+    _    -> PosDelta l i (c + 1)
+
+applyDelta :: SourcePos -> PosDelta -> SourcePos
+applyDelta pos delta = case (pos, delta) of
+  (SourcePos l (i, c), PosDelta 0 0 n) -> SourcePos l (i, c + n)
+  (SourcePos l (i, 1), PosDelta 0 t n) -> SourcePos l (i + t, n)
+  -- If we encounter tabs mid-line, treat them as 1 character
+  (SourcePos l (i, c), PosDelta 0 t n) -> SourcePos l (i, c + t + n - 1)
+  (SourcePos l _, PosDelta k t n) -> SourcePos (l + k) (t, n)
 
 sepLast :: Separated a -> a
 sepLast (Separated hd []) = hd
diff --git a/src/Language/PureScript/CST/Print.hs b/src/Language/PureScript/CST/Print.hs
index f6d300ab67..505909ed13 100644
--- a/src/Language/PureScript/CST/Print.hs
+++ b/src/Language/PureScript/CST/Print.hs
@@ -17,6 +17,7 @@ import Data.Text (Text)
 import Data.Text qualified as Text
 import Language.PureScript.CST.Types (Comment(..), LineFeed(..), Module, SourceStyle(..), SourceToken(..), Token(..), TokenAnn(..))
 import Language.PureScript.CST.Flatten (flattenModule)
+import Data.Void (Void)
 
 printToken :: Token -> Text
 printToken = printToken' True
@@ -86,11 +87,10 @@ printLeadingComment :: Comment LineFeed -> Text
 printLeadingComment = \case
   Comment raw -> raw
   Space n -> Text.replicate n " "
-  Line LF -> "\n"
-  Line CRLF -> "\r\n"
+  Line LF n -> "\n" <> Text.replicate n "\t"
+  Line CRLF n -> "\r\n" <> Text.replicate n "\t"
 
-printTrailingComment :: Comment void -> Text
+printTrailingComment :: Comment Void -> Text
 printTrailingComment = \case
   Comment raw -> raw
   Space n -> Text.replicate n " "
-  Line _ -> ""
diff --git a/src/Language/PureScript/CST/Types.hs b/src/Language/PureScript/CST/Types.hs
index ba90f7e95b..781b1d4700 100644
--- a/src/Language/PureScript/CST/Types.hs
+++ b/src/Language/PureScript/CST/Types.hs
@@ -21,7 +21,7 @@ import Language.PureScript.PSString (PSString)
 
 data SourcePos = SourcePos
   { srcLine :: {-# UNPACK #-} !Int
-  , srcColumn :: {-# UNPACK #-} !Int
+  , srcColumn :: {-# UNPACK #-} !(Int, Int)
   } deriving (Show, Eq, Ord, Generic, NFData)
 
 data SourceRange = SourceRange
@@ -32,7 +32,7 @@ data SourceRange = SourceRange
 data Comment l
   = Comment !Text
   | Space {-# UNPACK #-} !Int
-  | Line !l
+  | Line !l !Int
   deriving (Show, Eq, Ord, Generic, Functor, NFData)
 
 data LineFeed = LF | CRLF
diff --git a/src/Language/PureScript/CST/Utils.hs b/src/Language/PureScript/CST/Utils.hs
index 68dcf7d87c..fd4f04ae13 100644
--- a/src/Language/PureScript/CST/Utils.hs
+++ b/src/Language/PureScript/CST/Utils.hs
@@ -73,7 +73,7 @@ opName n = OpName (N.coerceOpName <$> n)
 
 placeholder :: SourceToken
 placeholder = SourceToken
-  { tokAnn = TokenAnn (SourceRange (SourcePos 0 0) (SourcePos 0 0)) [] []
+  { tokAnn = TokenAnn (SourceRange (SourcePos 0 (0, 0)) (SourcePos 0 (0, 0))) [] []
   , tokValue = TokLowerName [] "<placeholder>"
   }
 

From b0b29804f70153b7f4e8dec4905ea3cbff8a4a7d Mon Sep 17 00:00:00 2001
From: Artemis System <theArtemisSystem@gmail.com>
Date: Fri, 13 Jun 2025 12:34:32 +0200
Subject: [PATCH 2/2] Remove failing tab indent test

---
 tests/purs/failing/Whitespace1.out  | 10 ----------
 tests/purs/failing/Whitespace1.purs |  5 -----
 2 files changed, 15 deletions(-)
 delete mode 100644 tests/purs/failing/Whitespace1.out
 delete mode 100644 tests/purs/failing/Whitespace1.purs

diff --git a/tests/purs/failing/Whitespace1.out b/tests/purs/failing/Whitespace1.out
deleted file mode 100644
index 299c3ddb53..0000000000
--- a/tests/purs/failing/Whitespace1.out
+++ /dev/null
@@ -1,10 +0,0 @@
-Error found:
-at tests/purs/failing/Whitespace1.purs:5:1 - 5:2 (line 5, column 1 - line 5, column 2)
-
-  Unable to parse module:
-  Illegal whitespace character U+0009
-
-
-See https://github.com/purescript/documentation/blob/master/errors/ErrorParsingModule.md for more information,
-or to contribute content related to this error.
-
diff --git a/tests/purs/failing/Whitespace1.purs b/tests/purs/failing/Whitespace1.purs
deleted file mode 100644
index b73805a0c7..0000000000
--- a/tests/purs/failing/Whitespace1.purs
+++ /dev/null
@@ -1,5 +0,0 @@
--- @shouldFailWith ErrorParsingModule
-module Main where
-
-test = do
-	test