tzemanovic/plfa: standard-library/src/Test/Golden.agda

------------------------------------------------------------------------
-- The Agda standard library
--
-- Golden testing framework
------------------------------------------------------------------------

-- This is a port of the golden testing framework used by the Idris2
-- compiler and various Idris2 libraries.
-- It provides the core features required to perform golden file testing.
--
-- We provide the core functionality to run a *single* golden file test,
-- or a whole test tree. This allows the developer freedom to use as is
-- or design the rest of the test harness to their liking.

------------------------------------------------------------------------
-- Test Structure
--
-- This harness works from the assumption that each individual golden
-- test comprises of a directory with the following structure:
--
-- + `run` a *shell* script that runs the test. We expect it to:
--   * Use `$1` as the variable standing for the Agda executable to be tested
--   * Clean up after itself (e.g. by running `rm -rf build/`)
--
-- + `expected` a file containting the expected output of `run`
--
-- During testing, the test harness will generate an `output` file.
-- It will be compared to the `expected` golden file provided by the user.
-- In case there is a mismatch, the framework will:

-- + either display output & expected if the session is not interactive
-- + or use the following command line to produce a diff and ask the user
--   whether they want to overwrite the currently `expected` value:
--
--  git diff --no-index --exit-code --word-diff=color expected output
--
-- If `git` fails then the runner will simply present the expected and
-- 'given' files side-by-side.
--
-- Of note, it is helpful to add `output` to a local `.gitignore` instance
-- to ensure that it is not mistakenly versioned.


------------------------------------------------------------------------
-- Options
--
-- The test harness has several options that may be set:
--
-- + `exeUnderTest` The path of the executable we are testing (typically `agda`)
-- + `onlyNames`    The tests to run relative to the generated executable.
-- + `onlyFile`     The file listing the tests to run relative to the generated executable.
-- + `interactive`  Whether to offer to update the expected file or not.
-- + `timing`       Whether to display time taken for each test.
-- + `failureFile`  The file in which to write the list of failing tests.
-- + `colour`       The output should use ANSI escape codes
--
-- We provide an options parser (`options`) that takes the list of command line
-- arguments and constructs this for you.
--

------------------------------------------------------------------------
-- Usage
--
-- When compiled to an executable the expected usage is:
--
--```sh
--runtests <path to executable under test>
--   [--timing]
--   [--interactive]
--   [--only-file PATH]
--   [--failure-file PATH]
--   [--no-colour]
--   [--only [NAMES]]
--```
--
-- assuming that the test runner is compiled to an executable named `runtests`.

{-# OPTIONS --cubical-compatible --guardedness #-}

module Test.Golden where

open import Data.Bool.Base using (Bool; true; false; if_then_else_)
import Data.Char as Char
open import Data.Fin using (#_)
import Data.Integer.Base as ℤ
open import Data.List.Base as List using (List; []; _∷_; _++_; filter; partitionSums)
open import Data.List.Relation.Binary.Infix.Heterogeneous.Properties using (infix?)
open import Data.List.Relation.Unary.Any using (any?)
open import Data.Maybe.Base using (Maybe; just; nothing; fromMaybe)
open import Data.Nat.Base using (ℕ; _≡ᵇ_; _<ᵇ_; _+_; _∸_)
import Data.Nat.Show as ℕ using (show)
open import Data.Product.Base using (_×_; _,_)
open import Data.String.Base as String using (String; lines; unlines; unwords; concat)
open import Data.String.Properties as String using (_≟_)
open import Data.Sum.Base using (_⊎_; inj₁; inj₂)
open import Data.Unit.Base using (⊤)

open import Function.Base using (id; _$_; case_of_)

open import Relation.Nullary.Decidable.Core using (does)

open import Codata.Musical.Notation using (♯_)
open import IO

open import System.Clock as Clock using (time′; Time; seconds)
open import System.Console.ANSI
open import System.Directory using (doesFileExist; doesDirectoryExist)
open import System.Environment using (getArgs; lookupEnv)
open import System.Exit
open import System.FilePath.Posix using (mkFilePath)
open import System.Process using (callCommand; system)

record Options : Set where
  field
    -- What is the name of the Agda executable?
    exeUnderTest : String
    -- Should we only run some specific cases?
    onlyNames : List String
    -- Should we run the test suite interactively?
    interactive : Bool
    -- Should we time and display the tests?
    timing : Bool
    -- Should we write the list of failing cases to a file?
    failureFile : Maybe String
    -- Should we use ANSI escape codes to colour the output?
    colour : Bool
open Options

initOptions : String → Options
initOptions exe = record
  { exeUnderTest = exe
  ; onlyNames    = []
  ; interactive  = false
  ; timing       = false
  ; failureFile  = nothing
  ; colour       = true
  }

usage : String
usage = unwords
  $ "Usage:"
  ∷ "runtests <path>"
  ∷ "[--timing]"
  ∷ "[--interactive]"
  ∷ "[--failure-file PATH]"
  ∷ "[--only-file PATH]"
  ∷ "[--no-colour]"
  ∷ "[--only [NAMES]]"
  ∷ []

data Error : Set where
  MissingExecutable : Error
  InvalidArgument : String → Error

show : Error → String
show MissingExecutable = "Expected a path to Agda, got nothing"
show (InvalidArgument arg) = "Invalid argument: " String.++ arg

-- Process the command line options
options : List String → IO (Error ⊎ Options)
options(exe ∷ rest) = mkOptions exe rest where

  go : List String → Maybe String → Options → String ⊎ (Maybe String × Options)
  go []                             mfp opts = inj₂ (mfp , opts)
  go ("--timing"            ∷ args) mfp opts =
    go args mfp (record opts { timing = true })
  go ("--interactive"       ∷ args) mfp opts =
    go args mfp (record opts { interactive = true })
  go ("--failure-file" ∷ fp ∷ args) mfp opts =
    go args mfp (record opts { failureFile = just fp })
  go ("--only"              ∷ args) mfp opts =
    inj₂ (mfp , (record opts { onlyNames = args }))
  go ("--only-file" ∷ fp    ∷ args) mfp opts =
    go args (just fp) (record opts { onlyNames = args })
  go ("--no-colour"         ∷ args) mfp opts =
    go args mfp (record opts { colour = false })
  go (arg ∷ _) _ _ = inj₁ arg

  mkOptions : String → List String → IO (Error ⊎ Options)
  mkOptions exe rest = do
    inj₂ (mfp , opts) ← pure $ go rest nothing (initOptions exe)
      where inj₁ arg → pure (inj₁ (InvalidArgument arg))
    term ← fromMaybe "" <$> lookupEnv "TERM"
    let opts = if does (term ≟ "DUMB")
               then record opts { colour = false }
               else opts
    just fp ← pure mfp
      where _ → pure (inj₂ opts)
    only ← readFiniteFile fp
    pure $ inj₂ $ record opts { onlyNames = lines only ++ onlyNames opts }

options [] = pure (inj₁ MissingExecutable)


-- The result of a test run
-- `Left` corresponds to a failure and `Right` to a success
Result : Set
Result = String ⊎ String

-- Run the specified golden test with the supplied options.
runTest : Options → String → IO Result
runTest opts testPath = do

  true ← doesDirectoryExist (mkFilePath testPath)
    where false → fail directoryNotFound

  time ← time′ $ callCommand $ unwords
           $ "cd" ∷ testPath
           ∷ "&&" ∷ "sh ./run" ∷ opts .exeUnderTest
           ∷ "| tr -d '\\r' > output"
           ∷ []

  just out ← readLocalFile "output"
    where nothing → fail (fileNotFound "output")
  just exp ← readLocalFile "expected"
    where nothing → do if opts .interactive
                         then mayOverwrite nothing out
                         else putStrLn (fileNotFound "expected")
                       pure (inj₁ testPath)

  let result = does (out String.≟ exp)

  if result
    then printTiming (opts .timing) time
           $ if opts .colour
               then withCommand (setColour foreground classic green)
               else id
           $ "success"
    else do printTiming (opts .timing) time
             $ if opts .colour
               then withCommand (setColour foreground bright red)
               else id
             $ "FAILURE"
            if opts .interactive
              then mayOverwrite (just exp) out
              else putStrLn (unlines (expVsOut exp out))

  pure $ if result then inj₂ testPath else inj₁ testPath

  where

    directoryNotFound : String
    directoryNotFound = unwords
                      $ "Directory"
                      ∷ testPath
                      ∷ "does not exist" ∷ []

    fileNotFound : String → String
    fileNotFound name = unwords
                      $ "File"
                      ∷ (testPath String.++ "/output")
                      ∷ "does not exist" ∷ []

    fail : String → IO Result
    fail msg = do putStrLn msg
                  pure (inj₁ testPath)

    readLocalFile : String → IO (Maybe String)
    readLocalFile name = do
      let fp = concat (testPath ∷ "/" ∷ name ∷ [])
      true ← doesFileExist (mkFilePath fp)
        where false → pure nothing
      just <$> readFiniteFile fp

    getAnswer : IO Bool
    getAnswer = untilJust $ do
      str ← getLine
      case str of λ where
        "y" → pure $ just true
        "n" → pure $ just false
        ""  → pure $ just false -- default answer is no
        _   → do putStrLn "Invalid answer."
                 pure nothing

    expVsOut : String → String → List String
    expVsOut exp out = "Expected:" ∷ exp ∷ "Given:" ∷ out ∷ []

    hasFailed : ExitCode → Bool
    hasFailed ExitSuccess        = false
    hasFailed (ExitFailure code) = code ℤ.≤ᵇ ℤ.+ 0

    mayOverwrite : Maybe String → String → IO _
    mayOverwrite mexp out = do
      case mexp of λ where
        nothing → putStrLn $ unlines
          $ "Golden value missing. I computed the following result:"
          ∷ out
          ∷ "Accept new golden value? [y/N]"
          ∷ []
        (just exp) → do
          code ← system $ concat
            $ "git diff --no-index --exit-code --word-diff=color "
            ∷ testPath ∷ "/expected "
            ∷ testPath ∷ "/output"
            ∷ []
          putStrLn $ unlines
            $ "Golden value differs from actual value."
            ∷ (if hasFailed code then expVsOut exp out else [])
            ++ "Accept actual value as new golden value? [y/N]"
            ∷ []

      b ← getAnswer
      when b $ writeFile (testPath String.++ "/expected") out

    printTiming : Bool → Time → String → IO _
    printTiming false _    msg = putStrLn $ concat (testPath ∷ ": " ∷ msg ∷ [])
    printTiming true  time msg =
      let time  = ℕ.show (time .seconds) String.++ "s"
          spent = 9 + List.sum (List.map String.length (testPath ∷ time ∷ []))
               -- ^ hack: both "success" and "FAILURE" have the same length
               --   can't use `String.length msg` because the msg contains escape codes
          pad   = String.replicate (72 ∸ spent) ' '
      in putStrLn (concat (testPath ∷ ": " ∷ msg ∷ pad ∷ time ∷ []))

-- A test pool is characterised by
--  + a name
--  + and a list of directory paths
record TestPool : Set where
  constructor mkTestPool
  field
    poolName : String
    testCases : List String
open TestPool

module Summary where

  -- The summary of a test pool run
  record Summary : Set where
    constructor mkSummary
    field
      success : List String
      failure : List String
  open Summary public

  init : Summary
  init = mkSummary [] []

  merge : Summary → Summary → Summary
  merge (mkSummary ws1 ls1) (mkSummary ws2 ls2) =
   mkSummary (ws2 ++ ws1) (ls2 ++ ls1)

  update : List Result → Summary → Summary
  update res sum =
    let (ls2 , ws2) = partitionSums res in
    merge sum (mkSummary ws2 ls2)

open Summary using (Summary) hiding (module Summary)

-- Only keep the tests that have been asked for
filterTests : Options → List String → List String
filterTests opts = case onlyNames opts of λ where
  [] → id
  xs → let names = List.map String.toList xs in
       filter (λ n → any? (λ m → infix? Char._≟_ m (String.toList n)) names)

poolRunner : Options → TestPool → IO Summary
poolRunner opts pool = do
  -- check there is a non-empty list of tests we want to run
  let tests = filterTests opts (pool .testCases)
  (_ ∷ _) ← pure tests
    where [] → pure Summary.init

  -- announce the test pool and run them
  putStrLn banner
  loop Summary.init tests

  where

  separator : String
  separator = String.replicate 72 '-'

  banner : String
  banner = unlines
         $ "" ∷ separator
         ∷ pool .poolName
         ∷ separator ∷ ""
         ∷ []

  loop : Summary → List String → IO Summary
  loop acc [] = pure acc
  loop acc (test ∷ tests) = do
    res <- runTest opts test
    loop (Summary.update (res ∷ []) acc) tests

runner : List TestPool → IO ⊤
runner tests = do
  -- figure out the options
  args ← getArgs
  inj₂ opts ← options args
    where inj₁ err → die (unlines (show err ∷ usage ∷ []))

  -- run the tests
  ress ← List.mapM (poolRunner opts) tests
  let open Summary
  let res = List.foldl merge init ress

  -- report the result
  let nsucc = List.length (res .success)
  let nfail = List.length (res .failure)
  let ntotal = nsucc + nfail
  putStrLn $ concat $ ℕ.show nsucc ∷ "/" ∷ ℕ.show ntotal ∷ " tests successful" ∷ []

  -- deal with failures
  let list = unlines (res .failure)
  when (0 <ᵇ nfail) $ do
    putStrLn "Failing tests:"
    putStrLn list
  whenJust (opts .failureFile) $ λ fp → writeFile fp list

  -- exit
  if 0 ≡ᵇ nfail
   then exitSuccess
   else exitFailure