From 7d779e30e79dc3ff32d354a95e1c0e3ae7b2061f Mon Sep 17 00:00:00 2001
From: Morten Piibeleht <morten.piibeleht@gmail.com>
Date: Sun, 7 Jul 2024 00:04:55 +1200
Subject: [PATCH] Implement and document a new API

---
 .github/workflows/CI.yml |  17 ++-
 LICENSE                  |   2 +-
 Project.toml             |   3 +-
 README.md                | 112 ++++++++++++++++-
 src/CodeEvaluation.jl    |   7 +-
 src/codeblock.jl         |  16 +++
 src/namedsandboxes.jl    |  22 ----
 src/parseblock.jl        |  87 +++++++++++++
 src/replblock.jl         | 161 ++++++++++++++++++++++++
 src/sandbox.jl           | 264 ++++++++++++++++++++++++---------------
 test/codeblock.jl        | 115 +++++++++++++++++
 test/parseblock.jl       |  95 ++++++++++++++
 test/replblock.jl        | 121 ++++++++++++++++++
 test/runtests.jl         |  87 ++-----------
 test/sandbox.jl          | 171 +++++++++++++++++++++++++
 15 files changed, 1064 insertions(+), 216 deletions(-)
 create mode 100644 src/codeblock.jl
 delete mode 100644 src/namedsandboxes.jl
 create mode 100644 src/parseblock.jl
 create mode 100644 src/replblock.jl
 create mode 100644 test/codeblock.jl
 create mode 100644 test/parseblock.jl
 create mode 100644 test/replblock.jl
 create mode 100644 test/sandbox.jl

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 64a4f48..88643c9 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -23,13 +23,28 @@ jobs:
       fail-fast: false
       matrix:
         version:
-          - '1.10'
           - '1.6'
+          - '1'
           - 'nightly'
         os:
           - ubuntu-latest
+          - macos-latest
+          - windows-latest
         arch:
           - x64
+        include:
+          - version: '1'
+            os: ubuntu-latest
+            arch: x86
+          - version: '1.7'
+            os: ubuntu-latest
+            arch: x64
+          - version: '1.8'
+            os: ubuntu-latest
+            arch: x64
+          - version: '1.9'
+            os: ubuntu-latest
+            arch: x64
     steps:
       - uses: actions/checkout@v4
       - uses: julia-actions/setup-julia@v2
diff --git a/LICENSE b/LICENSE
index fa86886..653e4ce 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,7 +1,7 @@
 MIT License
 
 Copyright (c) 2016-2021: Michael Hatherly, Morten Piibeleht, Fredrik Ekre, and Documenter.jl contributors
-Copyright (c) 2024 Morten Piibeleht <morten.piibeleht@gmail.com> and contributors
+Copyright (c) 2024: Morten Piibeleht <morten.piibeleht@gmail.com> and contributors
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
diff --git a/Project.toml b/Project.toml
index f1b37c4..8c2d9df 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,10 +1,11 @@
 name = "CodeEvaluation"
 uuid = "5a076611-96cb-4f02-9d3a-9e309f06f8ff"
-authors = ["Morten Piibeleht <morten.piibeleht@juliahub.com> and contributors"]
+authors = ["Morten Piibeleht <morten.piibeleht@gmail.com> and contributors"]
 version = "0.0.1"
 
 [deps]
 IOCapture = "b5f81e59-6552-4d32-b1f0-c071b021bf89"
+REPL = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
 
 [compat]
 IOCapture = "0.2"
diff --git a/README.md b/README.md
index a25bbdd..644681f 100644
--- a/README.md
+++ b/README.md
@@ -3,14 +3,114 @@
 [![Build Status](https://github.com/JuliaDocs/CodeEvaluation.jl/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/JuliaDocs/CodeEvaluation.jl/actions/workflows/CI.yml?query=branch%3Amain)
 [![PkgEval](https://JuliaCI.github.io/NanosoldierReports/pkgeval_badges/C/CodeEvaluation.svg)](https://JuliaCI.github.io/NanosoldierReports/pkgeval_badges/C/CodeEvaluation.html)
 
+A small utility package to emulate executing Julia code, seemingly in a clean `Main` module.
+
 > [!NOTE]
-> This package is in active development, and not yet registered.
+> This package is in active development.
+
+## API overview
+
+There are two main parts to the API:
 
-A small utility package to emulate executing Julia code in a clean `Main` module.
+1. The `Sandbox` object: provides a clean, mock Julia Main module, and the related low-level `evaluate!` function to directly evaluate Julia expressions in the sandbox.
 
-The package uses [IOCapture.jl](https://github.com/JuliaDocs/IOCapture.jl) to perform output capture of the evaluated code.
+2. Higher level functions that can be used to run code in the sandbox in different modes (`codeblock!` and `replblock!`).
+
+> [!NOTE]
+> The functions that run code in a sandbox are marked with `!` because they mutate the sandbox.
 
 > [!WARNING]
-> The code evaluation is not thread-safe.
-> This is because for each evaluation, the code has to change the Julia processe's working directory with `cd`.
-> This global change will also affect any code running in parallel in other tasks or threads.
+> The code evaluation is not thread/async-safe.
+> For each evaluation, the code has to change the Julia process' working directory with `cd`.
+> This also affects any code running in parallel in other tasks or threads.
+
+> [!NOTE]
+> This is just a high-level overview.
+> See the docstrings for more details!
+
+### Sandbox
+
+Constructing a `Sandbox` object provides you
+
+The `evaluate!` function can be used to evaluate Julia expressions within the context of the sandbox module.
+It returns a `Result` object that contains the captured return value and what was printed into the standard output and error streams.
+
+```julia-repl
+julia> sb = CodeEvaluation.Sandbox();
+
+julia> r = CodeEvaluation.evaluate!(sb, :(x = 40 + 2));
+
+julia> r.value, r.output
+(42, "")
+
+julia> r = CodeEvaluation.evaluate!(sb, :(println("x = " * string(x))));
+
+julia> r.value, r.output
+(nothing, "x = 42\n")
+```
+
+As an implementation detail, it uses the [IOCapture.jl](https://github.com/JuliaDocs/IOCapture.jl) package underneath to perform output capture of the evaluated code.
+
+As an asterisk, as the sandboxes are implemented as anonymous Julia modules, all within the same Julia process, there are limitations to to their independence (e.g. method definitions and other global state modifications can, of course, leak over).
+The goal is to be best-effort in terms of providing a seemingly independent Julia session to execute code in.
+
+> [!NOTE]
+> The notion of a sandbox can probably be abstracted.
+> While a module-based sandbox is very simple, it would be useful to have a way to execute Julia code in a clean process (e.g. to fully enforce the independence of the sandboxes, run code in a different package environment, or multi-threading settings).
+> However, ideally the high-level API would be the same, irrespective of how the sandbox is implemented.
+
+### Evaluating code
+
+Presently, there are two functions that offer a
+
+1. `codeblock!` is meant to offer a simple way to execute a block of Julia code (provided as a simple string, not a parsed expression).
+   This is roughly meant to correspond to running a Julia script.
+
+   ```julia-repl
+   julia> sb = CodeEvaluation.Sandbox();
+
+   julia> code = """
+          x = 40
+          println("x = \$x")
+          x + 2
+          """
+   "x = 40\nprintln(\"x = \$x\")\nx + 2\n"
+
+   julia> r = CodeEvaluation.codeblock!(sb, code);
+
+   julia> r.value, r.output
+   (42, "x = 40\n")
+   ```
+
+2. `replblock!` emulates a REPL session.
+   The input code is split up and evaluated as if copy-pasted into the REPL line-by-line.
+   The outputs are then captured as if they would be shown in the REPL.
+
+   ```julia-repl
+   julia> sb = CodeEvaluation.Sandbox();
+
+   julia> code = """
+          x = 40
+          println("x = \$x")
+          x + 2
+          """
+   "x = 40\nprintln(\"x = \$x\")\nx + 2\n"
+
+   julia> r = CodeEvaluation.replblock!(sb, code);
+   ```
+
+   At this point, using the `CodeEvaluation.join_to_string(r)` function, the package is able to reconstruct how the corresponding REPL session would look like.
+
+   ```julia-repl
+   julia> x = 40
+   40
+
+   julia> println("x = $x")
+   x = 40
+
+   julia> x + 2
+   42
+   ```
+
+> [!NOTE]
+> Additional code evaluation "modes" could be added as new functions --- the precise requirements differ, so it would be useful to have a library of methods available.
diff --git a/src/CodeEvaluation.jl b/src/CodeEvaluation.jl
index decd2a9..9329c23 100644
--- a/src/CodeEvaluation.jl
+++ b/src/CodeEvaluation.jl
@@ -1,7 +1,10 @@
 module CodeEvaluation
-import IOCapture
+using IOCapture: IOCapture
+using REPL: REPL
 
+include("parseblock.jl")
 include("sandbox.jl")
-include("namedsandboxes.jl")
+include("codeblock.jl")
+include("replblock.jl")
 
 end
diff --git a/src/codeblock.jl b/src/codeblock.jl
new file mode 100644
index 0000000..558d3dd
--- /dev/null
+++ b/src/codeblock.jl
@@ -0,0 +1,16 @@
+"""
+    CodeEvaluation.codeblock!(sandbox::Sandbox, code::AbstractString; kwargs...) -> Result
+
+Evaluates a block of Julia code `code` in the `sandbox`, as if it is included as
+a script. Returns a [`Result`](@ref) object, containing the result of the evaluation.
+
+# Keywords
+
+- `color::Bool=true`: determines whether or not to capture colored output (i.e. controls
+  the IOContext).
+"""
+function codeblock!(sandbox::Sandbox, code::AbstractString; color::Bool=true)
+    exprs = CodeEvaluation.parseblock(code)
+    block_expr = Expr(:block, (expr.expr for expr in exprs)...)
+    return evaluate!(sandbox, block_expr; setans=true, color)
+end
diff --git a/src/namedsandboxes.jl b/src/namedsandboxes.jl
deleted file mode 100644
index 14c0a8a..0000000
--- a/src/namedsandboxes.jl
+++ /dev/null
@@ -1,22 +0,0 @@
-struct NamedSandboxes
-    _pwd::String
-    _prefix::String
-    _sandboxes::Dict{Symbol,Sandbox}
-
-    function NamedSandboxes(pwd::AbstractString, prefix::AbstractString="")
-        unique_prefix = _gensym_string()
-        prefix = isempty(prefix) ? unique_prefix : string(prefix, "_", unique_prefix)
-        return new(pwd, prefix, Dict{Symbol,Sandbox}())
-    end
-end
-
-function Base.get!(s::NamedSandboxes, name::Union{AbstractString,Nothing}=nothing)
-    sym = if isnothing(name) || isempty(name)
-        Symbol("__", s._prefix, "__", _gensym_string())
-    else
-        Symbol("__", s._prefix, "__named__", name)
-    end
-    # Either fetch and return an existing sandbox from the meta dictionary (based on the generated name),
-    # or initialize a new clean one, which gets stored in meta for future re-use.
-    return get!(() -> Sandbox(sym, s._pwd), s._sandboxes, sym)
-end
diff --git a/src/parseblock.jl b/src/parseblock.jl
new file mode 100644
index 0000000..303b3e2
--- /dev/null
+++ b/src/parseblock.jl
@@ -0,0 +1,87 @@
+struct ParsedExpression
+    expr::Any
+    code::SubString{String}
+end
+
+"""
+Returns a vector of parsed expressions and their corresponding raw strings.
+
+Returns a `Vector` of tuples `(expr, code)`, where `expr` is the corresponding expression
+(e.g. a `Expr` or `Symbol` object) and `code` is the string of code the expression was
+parsed from.
+
+The keyword argument `skip = N` drops the leading `N` lines from the input string.
+
+If `raise=false` is passed, the `Meta.parse` does not raise an exception on parse errors,
+but instead returns an expression that will raise an error when evaluated. `parseblock`
+returns this expression normally and it must be handled appropriately by the caller.
+
+The `linenumbernode` can be passed as a `LineNumberNode` to give information about filename
+and starting line number of the block.
+"""
+function parseblock(
+    code::AbstractString;
+    skip=0,
+    keywords=true,
+    raise=true,
+    linenumbernode=nothing
+)
+    # Drop `skip` leading lines from the code block. Needed for deprecated `{docs}` syntax.
+    code = string(code, '\n')
+    code = last(split(code, '\n', limit=skip + 1))
+    endofstr = lastindex(code)
+    results = ParsedExpression[]
+    cursor = 1
+    while cursor < endofstr
+        # Check for keywords first since they will throw parse errors if we `parse` them.
+        line = match(r"^(.*)\r?\n"m, SubString(code, cursor)).match
+        keyword = Symbol(strip(line))
+        (ex, ncursor) = if keywords && haskey(Docs.keywords, keyword)
+            (QuoteNode(keyword), cursor + lastindex(line))
+        else
+            try
+                Meta.parse(code, cursor; raise=raise)
+            catch err
+                @error "parse error"
+                break
+            end
+        end
+        str = SubString(code, cursor, prevind(code, ncursor))
+        if !isempty(strip(str)) && ex !== nothing
+            push!(results, ParsedExpression(ex, str))
+        end
+        cursor = ncursor
+    end
+    if linenumbernode isa LineNumberNode
+        exs = Meta.parseall(code; filename=linenumbernode.file).args
+        @assert length(exs) == 2 * length(results) "Issue at $linenumbernode:\n$code"
+        for (i, ex) in enumerate(Iterators.partition(exs, 2))
+            @assert ex[1] isa LineNumberNode
+            expr = Expr(:toplevel, ex...) # LineNumberNode + expression
+            # in the REPL each evaluation is considered a new file, e.g.
+            # REPL[1], REPL[2], ..., so try to mimic that by incrementing
+            # the counter for each sub-expression in this code block
+            if linenumbernode.file === Symbol("REPL")
+                newfile = "REPL[$i]"
+                # to reset the line counter for each new "file"
+                lineshift = 1 - ex[1].line
+                _update_linenumbernodes!(expr, newfile, lineshift)
+            else
+                _update_linenumbernodes!(expr, linenumbernode.file, linenumbernode.line)
+            end
+            results[i] = ParsedExpression(expr, results[i].code)
+        end
+    end
+    results
+end
+
+function _update_linenumbernodes!(x::Expr, newfile, lineshift)
+    for i = 1:length(x.args)
+        x.args[i] = _update_linenumbernodes!(x.args[i], newfile, lineshift)
+    end
+    return x
+end
+_update_linenumbernodes!(x::Any, newfile, lineshift) = x
+function _update_linenumbernodes!(x::LineNumberNode, newfile, lineshift)
+    return LineNumberNode(x.line + lineshift, newfile)
+end
diff --git a/src/replblock.jl b/src/replblock.jl
new file mode 100644
index 0000000..1cf1f4c
--- /dev/null
+++ b/src/replblock.jl
@@ -0,0 +1,161 @@
+"""
+    struct CodeBlock
+
+Represents a single block in a sequence of REPL inputs and outputs in the [`REPLResult`](@ref) object.
+
+# Properties
+
+- `input::Bool`: whether this block represents a REPL input (i.e. an input `julia>`) or an output
+  from the REPL (either the plain string representation of the object, or the standard output or error
+  stream contents).
+
+- `code::String`: the contents of the block
+
+"""
+struct CodeBlock
+    input::Bool
+    code::String
+end
+
+"""
+    struct REPLResult
+
+The result from a [`replblock!`](@ref) evaluation. It contains a sequence of input and output blocks.
+The inputs and outputs are separated, so that it would be easy for the users to style them differently
+if that is needed.
+
+See also: [`CodeBlock`](@ref), [`join_to_string`](@ref).
+
+# Properties
+
+- `sandbox :: Sandbox`: The `Sandbox` object in which the code was evaluated.
+- `blocks :: Vector{CodeBlock}`: The sequence of input and output block text.
+"""
+struct REPLResult
+    sandbox::Sandbox
+    blocks::Vector{CodeBlock}
+    _code::AbstractString
+    _source_exprs::Vector{Any}
+end
+
+function join_to_string(result::REPLResult)
+    out = IOBuffer()
+    for block in result.blocks
+        println(out, block.code)
+    end
+    return String(take!(out))
+end
+
+"""
+    CodeEvaluation.replblock!(sandbox::Sandbox, code::AbstractString; kwargs...) -> REPLResult
+
+Evaluates the code in a special REPL-mode, where `code` gets split up into expressions,
+each of which gets evaluated one by one. The output is a string representing what this
+would look like if each expression had been evaluated in the REPL as separate commands.
+
+See also: [`REPLResult`](@ref), [`join_to_string`](@ref).
+
+# Keywords
+
+- `color::Bool=true`: determines whether or not to capture colored output (i.e. controls
+  the IOContext).
+
+- `post_process_inputs`: a function that can be used to post-process the input expressions.
+  It does not affect the code that is evaluated, just what gets included in the REPL input
+  blocks.
+"""
+function replblock!(
+    sandbox::Sandbox, code::AbstractString;
+    color::Bool=true,
+    post_process_inputs = identity,
+)
+    exprs = parseblock(
+        code;
+        keywords = false,
+        # line unused, set to 0
+        linenumbernode = LineNumberNode(0, "REPL"),
+    )
+    codeblocks = CodeBlock[]
+    source_exprs = map(exprs) do pex
+        input = post_process_inputs(pex.code)
+        result = evaluate!(sandbox, pex.expr; color, softscope=true, setans = true)
+        # Add the input and output to the codeblocks, if appropriate.
+        if !isempty(input)
+            push!(codeblocks, CodeBlock(true, _prepend_prompt(input)))
+        end
+        # Determine the output string and add to codeblocks
+        object_repl_repr = let buffer = IOContext(IOBuffer(), :color=>color)
+            if !result.error
+                hide = REPL.ends_with_semicolon(input)
+                _result_to_string(buffer, hide ? nothing : result.value)
+            else
+                _error_to_string(buffer, result.value, result.backtrace)
+            end
+        end
+        # Construct the full output. We have to prepend the stdout/-err to the
+        # output first, and then finally render the returned object.
+        out = IOBuffer()
+        print(out, result.output) # stdout and stderr from the evaluation
+        if !isempty(input) && !isempty(object_repl_repr)
+            print(out, object_repl_repr, "\n")
+        end
+        outstr = _remove_sandbox_from_output(sandbox, String(take!(out)))
+        push!(codeblocks, CodeBlock(false, outstr))
+        return (;
+            expr = pex,
+            result,
+            input,
+            outstr,
+        )
+    end
+    return REPLResult(sandbox, codeblocks, code, source_exprs)
+end
+
+# Replace references to gensym'd module with Main
+function _remove_sandbox_from_output(sandbox::Sandbox, str::AbstractString)
+    replace(str, Regex(("(Main\\.)?$(nameof(sandbox))")) => "Main")
+end
+
+function _prepend_prompt(input::AbstractString)
+    prompt  = "julia> "
+    padding = " "^length(prompt)
+    out = IOBuffer()
+    for (n, line) in enumerate(split(input, '\n'))
+        line = rstrip(line)
+        println(out, n == 1 ? prompt : padding, line)
+    end
+    rstrip(String(take!(out)))
+end
+
+function _result_to_string(buffer::IO, value::Any)
+    if value !== nothing
+        Base.invokelatest(
+            show,
+            IOContext(buffer, :limit => true),
+            MIME"text/plain"(),
+            value
+        )
+    end
+    return _sanitise(buffer)
+end
+
+function _error_to_string(buffer::IO, e::Any, bt)
+    # Remove unimportant backtrace info.
+    bt = _remove_common_backtrace(bt, backtrace())
+    # Remove everything below the last eval call (which should be the one in IOCapture.capture)
+    index = findlast(ptr -> Base.ip_matches_func(ptr, :eval), bt)
+    bt = (index === nothing) ? bt : bt[1:(index - 1)]
+    # Print a REPL-like error message.
+    print(buffer, "ERROR: ")
+    Base.invokelatest(showerror, buffer, e, bt)
+    return _sanitise(buffer)
+end
+
+# Strip trailing whitespace from each line and return resulting string
+function _sanitise(buffer::IO)
+    out = IOBuffer()
+    for line in eachline(seekstart(Base.unwrapcontext(buffer)[1]))
+        println(out, rstrip(line))
+    end
+    return rstrip(String(take!(out)), '\n')
+end
diff --git a/src/sandbox.jl b/src/sandbox.jl
index 570f3d1..b613f98 100644
--- a/src/sandbox.jl
+++ b/src/sandbox.jl
@@ -1,7 +1,7 @@
-# Constructs a new sandbox module, that emulates an emptry Julia Main module.
-function _sandbox_module(sym::Symbol)
+# Constructs a new sandbox module, that emulates an empty Julia Main module.
+function _sandbox_module(name::Symbol)
     # If the module does not exists already, we need to construct a new one.
-    m = Module(sym)
+    m = Module(name)
     # eval(expr) is available in the REPL (i.e. Main) so we emulate that for the sandbox
     Core.eval(m, :(eval(x) = Core.eval($m, x)))
     # modules created with Module() does not have include defined
@@ -9,133 +9,189 @@ function _sandbox_module(sym::Symbol)
     return m
 end
 
-# TODO: add a method to write to _codebuffer without evaluating the code
-# This is to enable the "ContinuedCode" use case, where you want to "prepare"
-# the input code over multiple writes.
+"""
+    mutable struct Sandbox
+
+Represents a fake Julia `Main` module, where code can be evaluated in isolation.
+
+Technically, it wraps a fresh Julia `Module` object (accessible via the `.m` properties),
+and the code is evaluated within the context of that module.
+
+# Properties
+
+- `m :: Module`: The actual Julia module in which the code will be evaluated.
+- `pwd :: String`: The working directory where the code gets evaluated (irrespective of
+  the current working directory of the process).
+
+# Constructors
+
+```julia
+Sandbox([name::Symbol]; workingdirectory::AbstractString=pwd())
+```
+
+Creates a new `Sandbox` object. If `name` is not provided, a unique name is generated.
+`workingdirectory` can be used to set a working directory that is different from the current
+one.
+
+See also: [`evaluate!`](@ref).
+"""
 mutable struct Sandbox
     m::Module
     pwd::String
-    _codebuffer::IOBuffer
-    Sandbox(sym::Symbol, pwd::AbstractString) = new(_sandbox_module(sym), pwd, IOBuffer())
-end
 
+    function Sandbox(
+        name::Union{Symbol,Nothing}=nothing;
+        workingdirectory::AbstractString=pwd()
+    )
+        if isnothing(name)
+            name = Symbol("__CodeEvaluation__", _gensym_string())
+        end
+        return new(_sandbox_module(name), workingdirectory)
+    end
+end
 # TODO: by stripping the #-s, we're probably losing the uniqueness guarantee?
 _gensym_string() = lstrip(string(gensym()), '#')
 
-function evaluate!(sandbox::Sandbox; ansicolor::Bool=true)
-    code = String(take!(sandbox._codebuffer))
+"""
+    Core.eval(sandbox::Sandbox, expr) -> Any
 
-    # Evaluate the code block. We redirect stdout/stderr to `buffer`.
-    result, buffer = nothing, IOBuffer()
+Convenience function that evaluates the given Julia expression in the sandbox module.
+This is low-level and does not do any handling of evalution (like enforcing the working
+directory, capturing outputs, or error handling).
+"""
+function Base.Core.eval(sandbox::Sandbox, expr)
+    return Core.eval(sandbox.m, expr)
+end
 
-    # TODO: use keywords, linenumbernode?
-    @show parseblock(code)
-    for (ex, str) in parseblock(code)
-        c = IOCapture.capture(rethrow=InterruptException, color=ansicolor) do
-            cd(sandbox.pwd) do
-                Core.eval(sandbox.m, ex)
-            end
-        end
-        Core.eval(sandbox.m, Expr(:global, Expr(:(=), :ans, QuoteNode(c.value))))
-        result = c.value
-        print(buffer, c.output)
-        if c.error
-            #bt = Documenter.remove_common_backtrace(c.backtrace)
-            bt = c.backtrace
-            @error """
-                Error executing code:
-                ```
-                $(code)
-                ```
-            """ exception = (c.value, bt)
-            return
+"""
+    Base.nameof(sandbox::Sandbox) -> Symbol
+
+Returns the name of the underlying module of the `Sandbox` object.
+"""
+Base.nameof(sandbox::Sandbox) = nameof(sandbox.m)
+
+# Will either be [`AnsValue`](@ref) if the code evaluated successfully,
+# or [`ExceptionValue`](@ref) if it did not.
+abstract type AbstractValue end
+
+struct AnsValue <: AbstractValue
+    object::Any
+end
+Base.getindex(v::AnsValue) = v.object
+
+struct ExceptionValue <: AbstractValue
+    exception::Any
+    backtrace::Any
+    full_backtrace::Any
+end
+Base.getindex(v::ExceptionValue) = v.exception
+
+"""
+    struct Result
+
+Contains the result of an evaluation (see [`evaluate!`](@ref)).
+
+# Properties
+
+- `sandbox :: Sandbox`: The `Sandbox` object in which the code was evaluated.
+- `value :: AbstractValue`: The result of the evaluation. Depending on the outcome
+  of the evaluation (success vs error etc), this will be of a different subtype of
+  [`AbstractValue`](@ref).
+- `output :: String`: The captured stdout and stderr output of the evaluation.
+"""
+struct Result
+    sandbox::Sandbox
+    _value::AbstractValue
+    output::String
+    _source_expr::Any
+end
+
+function Base.getproperty(r::Result, name::Symbol)
+    if name === :error
+        return getfield(r, :_value) isa ExceptionValue
+    elseif name === :value
+        return getfield(r, :_value)[]
+    elseif name === :backtrace
+        value = getfield(r, :_value)
+        if value isa ExceptionValue
+            return value.backtrace
+        else
+            return nothing
         end
+    else
+        return getfield(r, name)
     end
-
-    return (; result, output=String(take!(buffer)))
 end
 
-Base.write(sandbox::Sandbox, data) = write(sandbox._codebuffer, data)
+function Base.propertynames(::Type{Result})
+    return (:sandbox, :value, :output, :error)
+end
 
 """
-Returns a vector of parsed expressions and their corresponding raw strings.
+    CodeEvaluation.evaluate!(sandbox::Sandbox, expr; kwargs...) -> Result
+
+Low-level function to evaluate Julia expressions in a sandbox. The keyword arguments can be
+used to control how exactly the code is evaluated.
+
+# Keyword arguments
 
-Returns a `Vector` of tuples `(expr, code)`, where `expr` is the corresponding expression
-(e.g. a `Expr` or `Symbol` object) and `code` is the string of code the expression was
-parsed from.
+- `setans :: Bool=false`: whether or not to set the result of the expression to `ans`, emulating
+  the behavior of the Julia REPL.
+- `softscope :: Bool=false`: evaluates the code in REPL softscope mode.
+- `color :: Bool=true`: whether or not to capture colored output (i.e. controls the IOContext
+  of the output stream; see the `IOCapture.capture` function for more details).
 
-The keyword argument `skip = N` drops the leading `N` lines from the input string.
+# REPL mode
 
-If `raise=false` is passed, the `Meta.parse` does not raise an exception on parse errors,
-but instead returns an expression that will raise an error when evaluated. `parseblock`
-returns this expression normally and it must be handled appropriately by the caller.
+When evaluating the code in "REPL mode" (`repl = true`), there are the following differences:
 
-The `linenumbernode` can be passed as a `LineNumberNode` to give information about filename
-and starting line number of the block (requires Julia 1.6 or higher).
+- The code is evaluated in a "soft scope" (i.e. `REPL.softscope` is applied to the code).
+- It honors the semicolon suppression (i.e. the result of the last expression is set to `nothing`
+  if the line ends with a semicolon).
 """
-function parseblock(
-    code::AbstractString;
-    skip=0,
-    keywords=true,
-    raise=true,
-    linenumbernode=nothing
+function evaluate!(
+    sandbox::Sandbox,
+    expr;
+    color::Bool=true,
+    softscope::Bool=false,
+    setans::Bool=false
 )
-    # Drop `skip` leading lines from the code block. Needed for deprecated `{docs}` syntax.
-    code = string(code, '\n')
-    code = last(split(code, '\n', limit=skip + 1))
-    endofstr = lastindex(code)
-    results = []
-    cursor = 1
-    while cursor < endofstr
-        # Check for keywords first since they will throw parse errors if we `parse` them.
-        line = match(r"^(.*)\r?\n"m, SubString(code, cursor)).match
-        keyword = Symbol(strip(line))
-        (ex, ncursor) = if keywords && haskey(Docs.keywords, keyword)
-            (QuoteNode(keyword), cursor + lastindex(line))
-        else
-            try
-                Meta.parse(code, cursor; raise=raise)
-            catch err
-                @error "parse error"
-                break
-            end
-        end
-        str = SubString(code, cursor, prevind(code, ncursor))
-        if !isempty(strip(str)) && ex !== nothing
-            push!(results, (ex, str))
+    if softscope
+        expr = REPL.softscope(expr)
+    end
+    c = IOCapture.capture(; rethrow=InterruptException, color) do
+        cd(sandbox.pwd) do
+            Core.eval(sandbox, expr)
         end
-        cursor = ncursor
     end
-    if linenumbernode isa LineNumberNode
-        exs = Meta.parseall(code; filename=linenumbernode.file).args
-        @assert length(exs) == 2 * length(results) "Issue at $linenumbernode:\n$code"
-        for (i, ex) in enumerate(Iterators.partition(exs, 2))
-            @assert ex[1] isa LineNumberNode
-            expr = Expr(:toplevel, ex...) # LineNumberNode + expression
-            # in the REPL each evaluation is considered a new file, e.g.
-            # REPL[1], REPL[2], ..., so try to mimic that by incrementing
-            # the counter for each sub-expression in this code block
-            if linenumbernode.file === Symbol("REPL")
-                newfile = "REPL[$i]"
-                # to reset the line counter for each new "file"
-                lineshift = 1 - ex[1].line
-                update_linenumbernodes!(expr, newfile, lineshift)
-            else
-                update_linenumbernodes!(expr, linenumbernode.file, linenumbernode.line)
-            end
-            results[i] = (expr, results[i][2])
+    value = if c.error
+        ExceptionValue(c.value, c.backtrace, c.backtrace)
+    else
+        if setans
+            Core.eval(sandbox.m, Expr(:global, Expr(:(=), :ans, QuoteNode(c.value))))
         end
+        AnsValue(c.value)
     end
-    results
+    return Result(
+        sandbox,
+        value,
+        c.output,
+        expr,
+    )
 end
 
-function update_linenumbernodes!(x::Expr, newfile, lineshift)
-    for i = 1:length(x.args)
-        x.args[i] = update_linenumbernodes!(x.args[i], newfile, lineshift)
+function _remove_common_backtrace(bt, reference_bt = backtrace())
+    cutoff = nothing
+    # We'll start from the top of the backtrace (end of the array) and go down, checking
+    # if the backtraces agree
+    for ridx in 1:length(bt)
+        # Cancel search if we run out the reference BT or find a non-matching one frames:
+        if ridx > length(reference_bt) || bt[length(bt) - ridx + 1] != reference_bt[length(reference_bt) - ridx + 1]
+            cutoff = length(bt) - ridx + 1
+            break
+        end
     end
-    return x
-end
-update_linenumbernodes!(x::Any, newfile, lineshift) = x
-function update_linenumbernodes!(x::LineNumberNode, newfile, lineshift)
-    return LineNumberNode(x.line + lineshift, newfile)
+    # It's possible that the loop does not find anything, i.e. that all BT elements are in
+    # the reference_BT too. In that case we'll just return an empty BT.
+    bt[1:(cutoff === nothing ? 0 : cutoff)]
 end
diff --git a/test/codeblock.jl b/test/codeblock.jl
new file mode 100644
index 0000000..6e54e91
--- /dev/null
+++ b/test/codeblock.jl
@@ -0,0 +1,115 @@
+@testset "codeblock! - basic" begin
+    # Basic cases
+    let sb = CodeEvaluation.Sandbox()
+        r = CodeEvaluation.codeblock!(sb, "2+2")
+        @test !r.error
+        @test r.value == 4
+        @test r.output == ""
+    end
+    let sb = CodeEvaluation.Sandbox()
+        r = CodeEvaluation.codeblock!(sb, ":foo")
+        @test !r.error
+        @test r.value === :foo
+        @test r.output == ""
+    end
+    # Output capture
+    let sb = CodeEvaluation.Sandbox()
+        r = CodeEvaluation.codeblock!(sb, "print(\"123\")")
+        @test !r.error
+        @test r.value === nothing
+        @test r.output == "123"
+    end
+    # Multi-line evaluation
+    let sb = CodeEvaluation.Sandbox()
+        r = CodeEvaluation.codeblock!(sb, "x=25\nx *= 2\nx - 8")
+        @test !r.error
+        @test r.value == 42
+        @test r.output == ""
+    end
+    # Complex session
+    let sb = CodeEvaluation.Sandbox()
+        r = CodeEvaluation.codeblock!(sb, "x=25\nx *= 2\ny = x - 8")
+        @test !r.error
+        @test r.value == 42
+        @test r.output == ""
+
+        r = CodeEvaluation.codeblock!(sb, "print(string(y))")
+        @test !r.error
+        @test r.value === nothing
+        @test r.output == "42"
+
+        r = CodeEvaluation.codeblock!(sb, "s = string(y); println(s); length(s)")
+        @test !r.error
+        @test r.value == 2
+        @test r.output == "42\n"
+    end
+end
+
+@testset "codeblock! - errors" begin
+    # Error handling
+    let sb = CodeEvaluation.Sandbox()
+        r = CodeEvaluation.codeblock!(sb, "error(\"x\")")
+        @test r.error
+        @test r.value isa ErrorException
+        @test r.value.msg == "x"
+        @test r.output == ""
+    end
+    let sb = CodeEvaluation.Sandbox()
+        r = CodeEvaluation.codeblock!(sb, "print(\"x\"); error(\"x\"); print(\"y\")")
+        @test r.error
+        @test r.value isa ErrorException
+        @test r.value.msg == "x"
+        @test r.output == "x"
+    end
+    let sb = CodeEvaluation.Sandbox()
+        r = CodeEvaluation.codeblock!(sb, "print(\"x\")\nerror(\"x\")\nprint(\"y\")")
+        @test r.error
+        @test r.value isa ErrorException
+        @test r.value.msg == "x"
+        @test r.output == "x"
+    end
+end
+
+@testset "codeblock! - working directory" begin
+    # Working directory
+    mktempdir() do path
+        let sb = CodeEvaluation.Sandbox(; workingdirectory=path)
+            let r = CodeEvaluation.codeblock!(sb, "pwd()")
+                @test !r.error
+                # Apparently on MacOS, pwd() and the temporary directory do
+                # not exactly match. Put their realpath() versions do.
+                @test realpath(r.value) == realpath(path)
+                @test r.output == ""
+            end
+
+            write(joinpath(path, "test.txt"), "123")
+            let r = CodeEvaluation.codeblock!(sb, """
+                isfile("test.txt"), read("test.txt", String)
+                """)
+                @test !r.error
+                @test r.value === (true, "123")
+            end
+            let r = CodeEvaluation.codeblock!(sb, """
+                isfile("does-not-exist.txt")
+                """)
+                @test !r.error
+                @test r.value === false
+            end
+            let r = CodeEvaluation.codeblock!(sb, """
+                read("does-not-exist.txt", String)
+                """)
+                @test r.error
+                @test r.value isa SystemError
+            end
+        end
+    end
+end
+
+@testset "codeblock! - parse errors" begin
+    sb = CodeEvaluation.Sandbox()
+    let r = CodeEvaluation.codeblock!(sb, "...")
+        @test_broken r.error
+        @test_broken r.value isa ParseError
+        @test r.output == ""
+    end
+end
diff --git a/test/parseblock.jl b/test/parseblock.jl
new file mode 100644
index 0000000..c741fc4
--- /dev/null
+++ b/test/parseblock.jl
@@ -0,0 +1,95 @@
+@testset "basic" begin
+    let exprs = CodeEvaluation.parseblock("")
+        @test isa(exprs, Vector{CodeEvaluation.ParsedExpression})
+        @test isempty(exprs)
+    end
+    let exprs = CodeEvaluation.parseblock("0")
+        @test isa(exprs, Vector{CodeEvaluation.ParsedExpression})
+        @test length(exprs) == 1
+        let expr = exprs[1]
+            @test expr.expr == 0
+            @test expr.code == "0\n" # TODO: trailing newline?
+        end
+    end
+    let exprs = CodeEvaluation.parseblock("40  + 2")
+        @test isa(exprs, Vector{CodeEvaluation.ParsedExpression})
+        @test length(exprs) == 1
+        let expr = exprs[1]
+            @test expr.expr == :(40 + 2)
+            @test expr.code == "40  + 2\n" # TODO: trailing newline?
+        end
+    end
+end
+
+@testset "complex" begin
+    exprs = CodeEvaluation.parseblock(
+        """
+        x += 3
+        γγγ_γγγ
+        γγγ
+        """
+    )
+    @test isa(exprs, Vector{CodeEvaluation.ParsedExpression})
+    @test length(exprs) == 3
+
+    let expr = exprs[1]
+        @test expr.expr isa Expr
+        @test expr.expr.head === :(+=)
+        @test expr.code == "x += 3\n"
+    end
+
+    let expr = exprs[2]
+        @test expr.expr === :γγγ_γγγ
+        @test expr.code == "γγγ_γγγ\n"
+    end
+
+    let expr = exprs[3]
+        @test expr.expr === :γγγ
+        if VERSION >= v"1.10.0-DEV.1520" # JuliaSyntax merge
+            @test expr.code == "γγγ\n\n"
+        else
+            @test expr.code == "γγγ\n"
+        end
+    end
+end
+
+# These tests were covering cases reported in
+# https://github.com/JuliaDocs/Documenter.jl/issues/749
+# https://github.com/JuliaDocs/Documenter.jl/issues/790
+# https://github.com/JuliaDocs/Documenter.jl/issues/823
+@testset "line endings" begin
+    parse(s) = CodeEvaluation.parseblock(s)
+    for LE in ("\r\n", "\n")
+        l1, l2 = parse("x = Int[]$(LE)$(LE)push!(x, 1)$(LE)")
+        @test l1.expr == :(x = Int[])
+        @test l2.expr == :(push!(x, 1))
+        if VERSION >= v"1.10.0-DEV.1520" # JuliaSyntax merge
+            @test l1.code == "x = Int[]$(LE)$(LE)"
+            @test l2.code == "push!(x, 1)$(LE)\n"
+        else
+            @test l1.code == "x = Int[]$(LE)"
+            @test l2.code == "push!(x, 1)$(LE)"
+        end
+    end
+end
+
+@testset "multi-expr" begin
+    let exprs = CodeEvaluation.parseblock("x; y; z")
+        @test length(exprs) == 1
+        @test exprs[1].expr == Expr(:toplevel, :x, :y, :z)
+        @test exprs[1].code == "x; y; z\n"
+    end
+
+    let exprs = CodeEvaluation.parseblock("x; y; z\nq\n\n")
+        @test length(exprs) == 2
+        @test exprs[1].expr == Expr(:toplevel, :x, :y, :z)
+        @test exprs[1].code == "x; y; z\n"
+        @test exprs[2].expr == :q
+        # TODO: There is a parsing difference here.. probably due to the JuliaSyntax change.
+        if VERSION < v"1.10"
+            @test exprs[2].code == "q\n"
+        else
+            @test exprs[2].code == "q\n\n\n"
+        end
+    end
+end
diff --git a/test/replblock.jl b/test/replblock.jl
new file mode 100644
index 0000000..51f9678
--- /dev/null
+++ b/test/replblock.jl
@@ -0,0 +1,121 @@
+@testset "replblock! - basic" begin
+    sb = CodeEvaluation.Sandbox()
+    let r = CodeEvaluation.replblock!(sb, "nothing")
+        @test r.sandbox === sb
+        @test length(r.blocks) == 2
+        let b = r.blocks[1]
+            @test b.input
+            @test b.code == "julia> nothing"
+        end
+        let b = r.blocks[2]
+            @test !b.input
+            @test b.code == ""
+        end
+
+        @test CodeEvaluation.join_to_string(r) == """
+        julia> nothing
+
+        """
+    end
+    let r = CodeEvaluation.replblock!(sb, "40 +  2")
+        @test r.sandbox === sb
+        @test length(r.blocks) == 2
+        let b = r.blocks[1]
+            @test b.input
+            @test b.code == "julia> 40 +  2"
+        end
+        let b = r.blocks[2]
+            @test !b.input
+            @test b.code == "42\n"
+        end
+        @test CodeEvaluation.join_to_string(r) == """
+        julia> 40 +  2
+        42
+
+        """
+    end
+    let r = CodeEvaluation.replblock!(sb, "println(\"...\")")
+        @test r.sandbox === sb
+        @test length(r.blocks) == 2
+        let b = r.blocks[1]
+            @test b.input
+            @test b.code == "julia> println(\"...\")"
+        end
+        let b = r.blocks[2]
+            @test !b.input
+            @test b.code == "...\n"
+        end
+        @test CodeEvaluation.join_to_string(r) == """
+        julia> println("...")
+        ...
+
+        """
+    end
+end
+
+@testset "replblock! - multiple expressions" begin
+    sb = CodeEvaluation.Sandbox()
+    r = CodeEvaluation.replblock!(sb, """
+    x = 2
+    x += 2
+    x ^ 2
+    """)
+    @test length(r.blocks) == 6
+    let b = r.blocks[1]
+        @test b.input
+        @test b.code == "julia> x = 2"
+    end
+    let b = r.blocks[2]
+        @test !b.input
+        @test b.code == "2\n"
+    end
+    let b = r.blocks[3]
+        @test b.input
+        @test b.code == "julia> x += 2"
+    end
+    let b = r.blocks[4]
+        @test !b.input
+        @test b.code == "4\n"
+    end
+    let b = r.blocks[5]
+        @test b.input
+        @test b.code == "julia> x ^ 2"
+    end
+    let b = r.blocks[6]
+        @test !b.input
+        @test b.code == "16\n"
+    end
+
+    @test CodeEvaluation.join_to_string(r) == """
+    julia> x = 2
+    2
+
+    julia> x += 2
+    4
+
+    julia> x ^ 2
+    16
+
+    """
+end
+
+@testset "replblock! - output & results" begin
+    sb = CodeEvaluation.Sandbox()
+    r = CodeEvaluation.replblock!(sb, """
+    print(stdout, "out"); print(stderr, "err"); 42
+    """)
+    @test length(r.blocks) == 2
+    let b = r.blocks[1]
+        @test b.input
+        @test b.code == "julia> print(stdout, \"out\"); print(stderr, \"err\"); 42"
+    end
+    let b = r.blocks[2]
+        @test !b.input
+        @test b.code == "outerr42\n"
+    end
+    @test CodeEvaluation.join_to_string(r) == """
+    julia> print(stdout, "out"); print(stderr, "err"); 42
+    outerr42
+
+    """
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index 42c11d7..53c5250 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -2,87 +2,16 @@ using CodeEvaluation
 using Test
 
 @testset "CodeEvaluation.jl" begin
-    @testset "parseblock" begin
-        code = """
-        x += 3
-        γγγ_γγγ
-        γγγ
-        """
-        exprs = CodeEvaluation.parseblock(code)
-
-        @test isa(exprs, Vector)
-        @test length(exprs) === 3
-
-        @test isa(exprs[1][1], Expr)
-        @test exprs[1][1].head === :+=
-        @test exprs[1][2] == "x += 3\n"
-
-        @test exprs[2][2] == "γγγ_γγγ\n"
-
-        @test exprs[3][1] === :γγγ
-        if VERSION >= v"1.10.0-DEV.1520" # JuliaSyntax merge
-            @test exprs[3][2] == "γγγ\n\n"
-        else
-            @test exprs[3][2] == "γγγ\n"
-        end
+    @testset "parseblock()" begin
+        include("parseblock.jl")
     end
-
-    # These tests were covering cases reported in
-    # https://github.com/JuliaDocs/Documenter.jl/issues/749
-    # https://github.com/JuliaDocs/Documenter.jl/issues/790
-    # https://github.com/JuliaDocs/Documenter.jl/issues/823
-    let parse(x) = CodeEvaluation.parseblock(x)
-        for LE in ("\r\n", "\n")
-            l1, l2 = parse("x = Int[]$(LE)$(LE)push!(x, 1)$(LE)")
-            @test l1[1] == :(x = Int[])
-            @test l2[1] == :(push!(x, 1))
-            if VERSION >= v"1.10.0-DEV.1520" # JuliaSyntax merge
-                @test l1[2] == "x = Int[]$(LE)$(LE)"
-                @test l2[2] == "push!(x, 1)$(LE)\n"
-            else
-                @test l1[2] == "x = Int[]$(LE)"
-                @test l2[2] == "push!(x, 1)$(LE)"
-            end
-        end
+    @testset "Sandbox" begin
+        include("sandbox.jl")
     end
-
-    @testset "NamedSandboxes" begin
-        sandboxes = CodeEvaluation.NamedSandboxes(@__DIR__, "testsandbox")
-        sb1 = get!(sandboxes, "foo")
-        sb2 = get!(sandboxes, "bar")
-        sb3 = get!(sandboxes, "foo")
-        @test sb1.m !== sb2.m
-        @test sb1.m === sb3.m
-        @test sb2.m !== sb3.m
+    @testset "codeblock!" begin
+        include("codeblock.jl")
     end
-
-    @testset "evaluate!" begin
-        let sb = CodeEvaluation.Sandbox(:foo, @__DIR__)
-            write(sb, "2 + 2")
-            (result, output) = CodeEvaluation.evaluate!(sb)
-            @test result === 4
-            @test output === ""
-        end
-
-        let sb = CodeEvaluation.Sandbox(:foo, @__DIR__)
-            write(sb, "print(\"123\")")
-            (result, output) = CodeEvaluation.evaluate!(sb)
-            @test result === nothing
-            @test output === "123"
-        end
-
-        let sb = CodeEvaluation.Sandbox(:foo, @__DIR__)
-            write(
-                sb,
-                """
-                x = 2 + 2
-                print(x)
-                x + 1
-                """
-            )
-            (result, output) = CodeEvaluation.evaluate!(sb)
-            @test result === 5
-            @test output === "4"
-        end
+    @testset "replblock!" begin
+        include("replblock.jl")
     end
 end
diff --git a/test/sandbox.jl b/test/sandbox.jl
new file mode 100644
index 0000000..15820bf
--- /dev/null
+++ b/test/sandbox.jl
@@ -0,0 +1,171 @@
+@testset "Sandbox" begin
+    sb = CodeEvaluation.Sandbox(:foo)
+    @test isa(sb, CodeEvaluation.Sandbox)
+    @test nameof(sb) == :foo
+    @test sb.pwd == pwd()
+
+    sb = CodeEvaluation.Sandbox(; workingdirectory=@__DIR__)
+    @test isa(sb, CodeEvaluation.Sandbox)
+    @test nameof(sb) isa Symbol
+    @test sb.pwd == @__DIR__
+end
+
+@testset "Core.eval" begin
+    sb = CodeEvaluation.Sandbox()
+    @test Core.eval(sb, :(x = 2 + 2)) == 4
+    @test Core.eval(sb, :x) == 4
+    @test_throws UndefVarError Core.eval(sb, :y)
+end
+
+@testset "evaluate! - basic" begin
+    sb = CodeEvaluation.Sandbox()
+
+    r = CodeEvaluation.evaluate!(sb, :(2 + 2))
+    @test !r.error
+    @test r.value === 4
+    @test r.output === ""
+
+    r = CodeEvaluation.evaluate!(sb, :x)
+    @test r.error
+    @test r.value isa UndefVarError
+    @test r.output === ""
+
+    r = CodeEvaluation.evaluate!(sb, :(x = 2; nothing))
+    @test !r.error
+    @test r.value === nothing
+    @test r.output === ""
+
+    r = CodeEvaluation.evaluate!(sb, :x)
+    @test !r.error
+    @test r.value === 2
+    @test r.output === ""
+end
+
+@testset "evaluate! - ans" begin
+    # Setting the 'ans' variable is opt-in, so by default
+    # it does not get set.
+    let sb = CodeEvaluation.Sandbox()
+        r = CodeEvaluation.evaluate!(sb, :(2 + 2))
+        @test !r.error
+        @test r.value === 4
+        @test r.output === ""
+        r = CodeEvaluation.evaluate!(sb, :ans)
+        @test r.error
+        @test r.value isa UndefVarError
+        @test r.output === ""
+    end
+    # If we set the 'setans' flag to true, then it does.
+    let sb = CodeEvaluation.Sandbox()
+        r = CodeEvaluation.evaluate!(sb, :(2 + 2); setans=true)
+        @test !r.error
+        @test r.value === 4
+        @test r.output === ""
+        r = CodeEvaluation.evaluate!(sb, :ans)
+        @test !r.error
+        @test r.value === 4
+        @test r.output === ""
+        # Not setting it again, so it stays '4'
+        r = CodeEvaluation.evaluate!(sb, :(3 * 3); setans=false)
+        @test !r.error
+        @test r.value === 9
+        @test r.output === ""
+        r = CodeEvaluation.evaluate!(sb, :ans)
+        @test !r.error
+        @test r.value === 4
+        @test r.output === ""
+    end
+end
+
+@testset "evaluate! - pwd" begin
+    mktempdir() do path
+        # By default, the sandbox picks up the current working directory when the sandbox
+        # gets constructed.
+        let sb = CodeEvaluation.Sandbox()
+            r = CodeEvaluation.evaluate!(sb, :(pwd()))
+            @test !r.error
+            @test r.value != path
+            @test r.value == pwd()
+            @test r.output === ""
+        end
+        # But we can override that
+        let sb = CodeEvaluation.Sandbox(; workingdirectory=path)
+            r = CodeEvaluation.evaluate!(sb, :(pwd()))
+            @test !r.error
+            # Apparently on MacOS, pwd() and the temporary directory do
+            # not exactly match. Put their realpath() versions do.
+            @test realpath(r.value) == realpath(path)
+            @test r.output === ""
+        end
+    end
+end
+
+@testset "evaluate! - output capture" begin
+    sb = CodeEvaluation.Sandbox()
+
+    r = CodeEvaluation.evaluate!(sb, :(print("123")))
+    @test !r.error
+    @test r.value === nothing
+    @test r.output === "123"
+
+    # stdout and stderr gets concatenated
+    r = CodeEvaluation.evaluate!(sb, quote
+        println(stdout, "123")
+        println(stderr, "456")
+    end)
+    @test !r.error
+    @test r.value === nothing
+    @test r.output === "123\n456\n"
+
+    # We can also capture the output in color
+    r = CodeEvaluation.evaluate!(sb, quote
+        printstyled("123"; color=:red)
+    end)
+    @test !r.error
+    @test r.value === nothing
+    @test r.output === "\e[31m123\e[39m"
+    # But this can be disabled with color=false
+    r = CodeEvaluation.evaluate!(sb, quote
+        printstyled("123"; color=:red)
+    end; color=false)
+    @test !r.error
+    @test r.value === nothing
+    @test r.output === "123"
+
+    # Capturing output logging macros
+    r = CodeEvaluation.evaluate!(sb, quote
+        @info "12345"
+        42
+    end; color=false)
+    @test !r.error
+    @test r.value === 42
+    @test r.output == "[ Info: 12345\n"
+end
+
+@testset "evaluate! - scoping" begin
+    expr = quote
+        s = 0
+        for i = 1:10
+            s = i
+        end
+        s
+    end
+
+    let sb = CodeEvaluation.Sandbox()
+        r = CodeEvaluation.evaluate!(sb, expr; color=false)
+        @test !r.error
+        @test r.value === 0
+        # The evaluation prints a warning that should look something like this:
+        #
+        # ┌ Warning: Assignment to `s` in soft scope is ambiguous because a global variable by the same name exists: `s` will be treated as a new local. Disambiguate by using `local s` to suppress this warning or `global s` to assign to the existing global variable.
+        # └ @ ~/.../CodeEvaluation/test/sandbox.jl:146
+        @test contains(r.output, "┌ Warning:")
+    end
+    # However, if we set softscope=true, it follows the REPL soft scoping rules
+    # https://docs.julialang.org/en/v1/manual/variables-and-scoping/#on-soft-scope
+    let sb = CodeEvaluation.Sandbox()
+        r = CodeEvaluation.evaluate!(sb, expr; softscope=true, color=false)
+        @test !r.error
+        @test r.value === 10
+        @test r.output == ""
+    end
+end