From cca7a01d0f83b61f12491db0923e502c5650b918 Mon Sep 17 00:00:00 2001 From: Justin Bedo Date: Fri, 11 Feb 2022 11:50:36 +1100 Subject: init --- README | 2 + default.nix | 18 ++++++++ flake.lock | 42 +++++++++++++++++++ flake.nix | 20 +++++++++ latexfmt.hs | 136 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 218 insertions(+) create mode 100644 README create mode 100644 default.nix create mode 100644 flake.lock create mode 100644 flake.nix create mode 100644 latexfmt.hs diff --git a/README b/README new file mode 100644 index 0000000..cf1d4fd --- /dev/null +++ b/README @@ -0,0 +1,2 @@ +This is an idempotent latex formatter. Sentences are broken on new lines to +limit conflicts. Reads from STDIN and writes formatted output to STDOUT. diff --git a/default.nix b/default.nix new file mode 100644 index 0000000..2afb73b --- /dev/null +++ b/default.nix @@ -0,0 +1,18 @@ +{ stdenv +, ghc +}: +stdenv.mkDerivation { + name = "latexfmt"; + + src = ./.; + + nativeBuildInputs = [ (ghc.withPackages (x: [ x.attoparsec ])) ]; + + buildPhase = '' + ghc -O -o latexfmt latexfmt.hs + ''; + + installPhase = '' + install -Dm 755 ./latexfmt $out/bin/latexfmt + ''; +} diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..28b70f1 --- /dev/null +++ b/flake.lock @@ -0,0 +1,42 @@ +{ + "nodes": { + "flake-utils": { + "locked": { + "lastModified": 1644229661, + "narHash": "sha256-1YdnJAsNy69bpcjuoKdOYQX0YxZBiCYZo4Twxerqv7k=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "3cecb5b042f7f209c56ffd8371b2711a290ec797", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1644539890, + "narHash": "sha256-plaenLdLspbDinPM9ULYq15zyQybsYKDs1qiDiIzI8E=", + "owner": "nixos", + "repo": "nixpkgs", + "rev": "2c320fd36cb284ad5767de8550e1dcffa8cfdad8", + "type": "github" + }, + "original": { + "owner": "nixos", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "flake-utils": "flake-utils", + "nixpkgs": "nixpkgs" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..951ec9e --- /dev/null +++ b/flake.nix @@ -0,0 +1,20 @@ +{ + inputs = { + nixpkgs.url = "github:nixos/nixpkgs"; + flake-utils.url = "github:numtide/flake-utils"; + }; + + outputs = + { nixpkgs + , flake-utils + , self + }: + flake-utils.lib.eachDefaultSystem ( + system: let + pkgs = import nixpkgs { inherit system; }; + in + { + defaultPackage = pkgs.callPackage ./. { }; + } + ); +} diff --git a/latexfmt.hs b/latexfmt.hs new file mode 100644 index 0000000..c1ecfe1 --- /dev/null +++ b/latexfmt.hs @@ -0,0 +1,136 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE FlexibleContexts #-} + +import Control.Applicative +import Control.Monad.State +import Data.Attoparsec.Text hiding ( take ) +import Data.Char ( isAlpha + , isDigit + ) +import qualified Data.Text as T +import qualified Data.Text.IO as T +import Prelude hiding ( print + , takeWhile + ) + +data Tokens = Wrd {unwrd :: T.Text} | Cmd T.Text | BSq [Tokens] | BCr [Tokens] | NL | Cmt T.Text | Nobs + deriving Show + +isCtrl x = not $ isAlpha x || isDigit x + +-- tokens to strings +toStr (Wrd w) = do + if T.length w >= 1 && T.head w `elem` (".,;:!?)" :: [Char]) + then print' w + else print w + when (T.last w == '.') printnl +toStr (Cmd c) = if T.length c > 0 && isCtrl (T.head c) + then print' $ "\\" `T.append` c + else do + printnl' + when (c == "end") unindent + print $ "\\" `T.append` c + when (c == "begin") indent +toStr (BSq ts) = do + print' "[" + suppress + indent + mapM_ toStr ts + unindent + print' "]" +toStr (BCr ts) = do + print' "{" + suppress + indent + mapM_ toStr ts + unindent + print' "}" +toStr NL = printnl' >> printnl +toStr (Cmt c) = do + print "%" + print' c +toStr Nobs = do + print' "~" + suppress + +-- Tokeniser +ws = inClass " \t\n" +pad = takeWhile ws +token = nl <|> pad *> (nobs <|> cmd <|> bsq <|> bcr <|> cmt <|> wrd) +nobs = string "~" *> pure Nobs +nl = string "\n" *> takeWhile (inClass " \t") *> string "\n" *> pure NL +cmd :: Parser Tokens +cmd = Cmd <$> (string "\\" *> ((unwrd <$> wrd) <|> pure "")) +bsq = BSq <$> (string "[" *> many' token) <* pad <* string "]" +bcr = BCr <$> (string "{" *> many' token) <* pad <* string "}" +wrd = Wrd <$> takeWhile1 (notInClass " \t\n[]{}\\~") +cmt = Cmt <$> (takeWhile1 (== '%') *> takeWhile (/= '\n')) + +-- indented printer +type Beginning = Bool +type Indent = Int +type SuppressSpace = Bool +data Printer = Printer Beginning SuppressSpace Indent T.Text + deriving Show + +print :: T.Text -> State Printer () +print str = do + (Printer beg sup i s) <- get + if sup + then print' str + else do + let indent = T.pack . take (2 * i) $ repeat ' ' + str' = T.concat [s, if beg then indent else " ", str] + put $ Printer False False i str' + +-- force unindented +print' :: T.Text -> State Printer () +print' str = do + (Printer beg _ i s) <- get + let indent = T.pack . take (2 * i) $ repeat ' ' + str' = T.concat [s, if beg then indent else "", str] + put $ Printer False False i str' + +printnl :: State Printer () +printnl = do + (Printer beg sup i str) <- get + when (not sup) $ put $ Printer True False i $ str `T.append` "\n" + +printnl' :: State Printer () +printnl' = do + (Printer beg sup i str) <- get + when (not beg && not sup) $ put $ Printer True False i $ str `T.append` "\n" + +println :: T.Text -> State Printer () +println str = do + print str + printnl + +println' :: T.Text -> State Printer () +println' str = do + print' str + printnl + +indent :: State Printer () +indent = do + (Printer b sup i s) <- get + put $ Printer b sup (i + 1) s + +unindent :: State Printer () +unindent = do + (Printer b sup i s) <- get + put $ Printer b sup (i - 1) s + +suppress :: State Printer () +suppress = do + (Printer b _ i s) <- get + put $ Printer b True i s + +-- entry + +main = do + str <- T.getContents + let Right toks = parseOnly (many' token) str + Printer _ _ _ str' = + flip execState (Printer True False 0 "") $ mapM_ toStr toks + T.putStr str' -- cgit v1.2.3