diff --git a/swebench/harness/reporting.py b/swebench/harness/reporting.py
new file mode 100644
index 00000000..3920e8b8
--- /dev/null
+++ b/swebench/harness/reporting.py
@@ -0,0 +1,136 @@
+import docker
+import json
+from pathlib import Path
+
+from swebench.harness.constants import (
+    KEY_INSTANCE_ID,
+    KEY_MODEL,
+    KEY_PREDICTION,
+    RUN_EVALUATION_LOG_DIR,
+    LOG_REPORT,
+)
+from swebench.harness.docker_utils import list_images
+from swebench.harness.test_spec import make_test_spec
+
+
+def make_run_report(
+        predictions: dict,
+        full_dataset: list,
+        run_id: str,
+        client: docker.DockerClient | None = None,
+    ) -> Path:
+    """
+    Make a final evaluation and run report of the instances that have been run.
+    Also reports on images and containers that may still running if client is provided.
+
+    Args:
+        predictions (dict): Predictions dict generated by the model
+        full_dataset (list): List of all instances
+        run_id (str): Run ID
+        client (docker.DockerClient): Docker client (optional)
+    
+    Returns:
+        Path to report file
+    """
+    # instantiate sets to store IDs of different outcomes
+    completed_ids = set()
+    resolved_ids = set()
+    error_ids = set()
+    unstopped_containers = set()
+    unremoved_images = set()
+    unresolved_ids = set()
+    incomplete_ids = set()
+    # get instances with empty patches
+    empty_patch_ids = set()
+
+    # iterate through dataset and check if the instance has been run
+    for instance in full_dataset:
+        instance_id = instance[KEY_INSTANCE_ID]
+        if instance_id not in predictions:
+            # skip instances without predictions
+            incomplete_ids.add(instance_id)
+            continue
+        prediction = predictions[instance_id]
+        if prediction.get(KEY_PREDICTION, None) in ["", None]:
+            empty_patch_ids.add(instance_id)
+            continue
+        report_file = (
+            RUN_EVALUATION_LOG_DIR
+            / run_id
+            / prediction[KEY_MODEL].replace("/", "__")
+            / prediction[KEY_INSTANCE_ID]
+            / LOG_REPORT
+        )
+        if report_file.exists():
+            # If report file exists, then the instance has been run
+            completed_ids.add(instance_id)
+            report = json.loads(report_file.read_text())
+            if report[instance_id]["resolved"]:
+                # Record if the instance was resolved
+                resolved_ids.add(instance_id)
+            else:
+                unresolved_ids.add(instance_id)
+        else:
+            # Otherwise, the instance was not run successfully
+            error_ids.add(instance_id)
+
+    if client:
+        # get remaining images and containers
+        images = list_images(client)
+        test_specs = list(map(make_test_spec, full_dataset))
+        for spec in test_specs:
+            image_name = spec.instance_image_key
+            if image_name in images:
+                unremoved_images.add(image_name)
+        containers = client.containers.list(all=True)
+        for container in containers:
+            if run_id in container.name:
+                unstopped_containers.add(container.name)
+
+    # print final report
+    dataset_ids = {i[KEY_INSTANCE_ID] for i in full_dataset}
+    print(f"Total instances: {len(full_dataset)}")
+    print(f"Instances submitted: {len(set(predictions.keys()) & dataset_ids)}")
+    print(f"Instances completed: {len(completed_ids)}")
+    print(f"Instances incomplete: {len(incomplete_ids)}")
+    print(f"Instances resolved: {len(resolved_ids)}")
+    print(f"Instances unresolved: {len(unresolved_ids)}")
+    print(f"Instances with empty patches: {len(empty_patch_ids)}")
+    print(f"Instances with errors: {len(error_ids)}")
+    if client:
+        print(f"Unstopped containers: {len(unstopped_containers)}")
+        print(f"Unremoved images: {len(unremoved_images)}")
+
+    # write report to file
+    report = {
+        "total_instances": len(full_dataset),
+        "submitted_instances": len(predictions),
+        "completed_instances": len(completed_ids),
+        "resolved_instances": len(resolved_ids),
+        "unresolved_instances": len(unresolved_ids),
+        "empty_patch_instances": len(empty_patch_ids),
+        "error_instances": len(error_ids),
+        "completed_ids": list(sorted(completed_ids)),
+        "incomplete_ids": list(sorted(incomplete_ids)),
+        "empty_patch_ids": list(sorted(empty_patch_ids)),
+        "submitted_ids": list(sorted(predictions.keys())),
+        "resolved_ids": list(sorted(resolved_ids)),
+        "unresolved_ids": list(sorted(unresolved_ids)),
+        "error_ids": list(sorted(error_ids)),
+        "schema_version": 2,
+    }
+    if not client:
+        report.update({
+            "unstopped_instances": len(unstopped_containers),
+            "unstopped_containers": list(sorted(unstopped_containers)),
+            "unremoved_images": list(sorted(unremoved_images)),
+        })
+    report_file = Path(
+        list(predictions.values())[0][KEY_MODEL].replace("/", "__")
+        + f".{run_id}"
+        + ".json"
+    )
+    with open(report_file, "w") as f:
+        print(json.dumps(report, indent=4), file=f)
+    print(f"Report written to {report_file}")
+    return report_file
diff --git a/swebench/harness/run_evaluation.py b/swebench/harness/run_evaluation.py
index 61d2885d..b3ab7e0c 100644
--- a/swebench/harness/run_evaluation.py
+++ b/swebench/harness/run_evaluation.py
@@ -48,10 +48,10 @@
 from swebench.harness.grading import get_eval_report
 from swebench.harness.test_spec import make_test_spec, TestSpec
 from swebench.harness.utils import load_swebench_dataset, str2bool, EvaluationError
+from swebench.harness.reporting import make_run_report
 from swebench.harness.run_evaluation_modal import run_instances_modal
 
 
-
 def run_instance(
         test_spec: TestSpec,
         pred: dict,
@@ -349,124 +349,6 @@ def get_dataset_from_preds(
     return dataset
 
 
-def make_run_report(
-        predictions: dict,
-        full_dataset: list,
-        client: docker.DockerClient,
-        run_id: str
-    ) -> Path:
-    """
-    Make a final evaluation and run report of the instances that have been run.
-    Also reports on images and containers that may still running!
-
-    Args:
-        predictions (dict): Predictions dict generated by the model
-        full_dataset (list): List of all instances
-        client (docker.DockerClient): Docker client
-        run_id (str): Run ID
-    
-    Returns:
-        Path to report file
-    """
-    # instantiate sets to store IDs of different outcomes
-    completed_ids = set()
-    resolved_ids = set()
-    error_ids = set()
-    unstopped_containers = set()
-    unremoved_images = set()
-    unresolved_ids = set()
-    incomplete_ids = set()
-    # get instances with empty patches
-    empty_patch_ids = set()
-
-    # iterate through dataset and check if the instance has been run
-    for instance in full_dataset:
-        instance_id = instance[KEY_INSTANCE_ID]
-        if instance_id not in predictions:
-            # skip instances without 
-            incomplete_ids.add(instance_id)
-            continue
-        prediction = predictions[instance_id]
-        if prediction.get(KEY_PREDICTION, None) in ["", None]:
-            empty_patch_ids.add(instance_id)
-            continue
-        report_file = (
-            RUN_EVALUATION_LOG_DIR
-            / run_id
-            / prediction[KEY_MODEL].replace("/", "__")
-            / prediction[KEY_INSTANCE_ID]
-            / LOG_REPORT
-        )
-        if report_file.exists():
-            # If report file exists, then the instance has been run
-            completed_ids.add(instance_id)
-            report = json.loads(report_file.read_text())
-            if report[instance_id]["resolved"]:
-                # Record if the instance was resolved
-                resolved_ids.add(instance_id)
-            else:
-                unresolved_ids.add(instance_id)
-        else:
-            # Otherwise, the instance was not run successfully
-            error_ids.add(instance_id)
-
-    # get remaining images and containers
-    images = list_images(client)
-    test_specs = list(map(make_test_spec, full_dataset))
-    for spec in test_specs:
-        image_name = spec.instance_image_key
-        if image_name in images:
-            unremoved_images.add(image_name)
-    containers = client.containers.list(all=True)
-    for container in containers:
-        if run_id in container.name:
-            unstopped_containers.add(container.name)
-
-    # print final report
-    dataset_ids = {i[KEY_INSTANCE_ID] for i in full_dataset}
-    print(f"Total instances: {len(full_dataset)}")
-    print(f"Instances submitted: {len(set(predictions.keys()) & dataset_ids)}")
-    print(f"Instances completed: {len(completed_ids)}")
-    print(f"Instances incomplete: {len(incomplete_ids)}")
-    print(f"Instances resolved: {len(resolved_ids)}")
-    print(f"Instances unresolved: {len(unresolved_ids)}")
-    print(f"Instances with empty patches: {len(empty_patch_ids)}")
-    print(f"Instances with errors: {len(error_ids)}")
-    print(f"Unstopped containers: {len(unstopped_containers)}")
-    print(f"Unremoved images: {len(unremoved_images)}")
-
-    # write report to file
-    report = {
-        "total_instances": len(full_dataset),
-        "submitted_instances": len(predictions),
-        "completed_instances": len(completed_ids),
-        "resolved_instances": len(resolved_ids),
-        "unresolved_instances": len(unresolved_ids),
-        "empty_patch_instances": len(empty_patch_ids),
-        "error_instances": len(error_ids),
-        "unstopped_instances": len(unstopped_containers),
-        "completed_ids": list(sorted(completed_ids)),
-        "incomplete_ids": list(sorted(incomplete_ids)),
-        "empty_patch_ids": list(sorted(empty_patch_ids)),
-        "submitted_ids": list(sorted(predictions.keys())),
-        "resolved_ids": list(sorted(resolved_ids)),
-        "unresolved_ids": list(sorted(unresolved_ids)),
-        "error_ids": list(sorted(error_ids)),
-        "unstopped_containers": list(sorted(unstopped_containers)),
-        "unremoved_images": list(sorted(unremoved_images)),
-        "schema_version": 2,
-    }
-    report_file = Path(
-        list(predictions.values())[0][KEY_MODEL].replace("/", "__")
-        + f".{run_id}"
-        + ".json"
-    )
-    with open(report_file, "w") as f:
-        print(json.dumps(report, indent=4), file=f)
-    print(f"Report written to {report_file}")
-    return report_file
-
-
 def get_gold_predictions(dataset_name: str, split: str):
     """
     Get gold predictions for the given dataset and split.
@@ -559,7 +441,7 @@ def main(
 
     # clean images + make final report
     clean_images(client, existing_images, cache_level, clean)
-    make_run_report(predictions, full_dataset, client, run_id)
+    make_run_report(predictions, full_dataset, run_id, client)
 
 if __name__ == "__main__":
     parser = ArgumentParser()
diff --git a/swebench/harness/run_evaluation_modal.py b/swebench/harness/run_evaluation_modal.py
index 787f3e91..5976f1ef 100644
--- a/swebench/harness/run_evaluation_modal.py
+++ b/swebench/harness/run_evaluation_modal.py
@@ -16,11 +16,10 @@
 
 from typing import cast
 
-from logging import Logger
-
 from swebench.harness.docker_build import setup_logger
-from swebench.harness.constants import KEY_INSTANCE_ID
 from swebench.harness.utils import EvaluationError
+from swebench.harness.reporting import make_run_report
+
 
 SANDBOX_ENTRYPOINT = "run_evaluation_modal_entrypoint"
 LOCAL_SANDBOX_ENTRYPOINT_PATH = (Path(__file__).parent / f"{SANDBOX_ENTRYPOINT}.py").resolve()
@@ -205,110 +204,13 @@ def get_instance_image(test_spec: TestSpec) -> modal.Image:
             )
             .workdir("/testbed/")
         )
-    
-def make_run_report(
-        predictions: dict,
-        full_dataset: list,
-        run_id: str
-    ) -> Path:
-    """
-    Make a final evaluation and run report of the instances that have been run.
 
-    Args:
-        predictions (dict): Predictions dict generated by the model
-        full_dataset (list): List of all instances
-        run_id (str): Run ID
-    
-    Returns:
-        Path to report file
-    """
-    # Sets to store IDs of different outcomes
-    completed_ids = set()
-    resolved_ids = set()
-    error_ids = set()
-    unresolved_ids = set()
-    incomplete_ids = set()
-    empty_patch_ids = set()
-
-    for instance in full_dataset:
-        instance_id = instance[KEY_INSTANCE_ID]
-
-        # Instances that were not submitted
-        if instance_id not in predictions:
-            incomplete_ids.add(instance_id)
-            continue
-        
-        # Instances with empty patches
-        prediction = predictions[instance_id]
-        if prediction.get("model_patch", None) in ["", None]:
-            empty_patch_ids.add(instance_id)
-            continue
-
-        # Instances that errored
-        log_dir = get_log_dir(predictions[instance_id], run_id, instance_id)
-        report_file = log_dir / "report.json"
-        if not report_file.exists():
-            error_ids.add(instance_id)
-            continue
-
-        # Instance completed successfully
-        completed_ids.add(instance_id)
-        try:
-            report = json.loads(report_file.read_text())
-            if report[instance_id]["resolved"]:
-                resolved_ids.add(instance_id)
-            else:
-                unresolved_ids.add(instance_id)
-        except Exception as e:
-            print(f"{instance_id}: error loading report.json: {e}")
-            error_ids.add(instance_id)
-
-    # Print final report
-    dataset_ids = {i[KEY_INSTANCE_ID] for i in full_dataset}
-    print(f"Total instances: {len(full_dataset)}")
-    print(f"Instances submitted: {len(set(predictions.keys()) & dataset_ids)}")
-    print(f"Instances completed: {len(completed_ids)}")
-    print(f"Instances incomplete: {len(incomplete_ids)}")
-    print(f"Instances resolved: {len(resolved_ids)}")
-    print(f"Instances unresolved: {len(unresolved_ids)}")
-    print(f"Instances with empty patches: {len(empty_patch_ids)}")
-    print(f"Instances with errors: {len(error_ids)}")
-
-    # Write report to file
-    report = {
-        "total_instances": len(full_dataset),
-        "submitted_instances": len(predictions),
-        "completed_instances": len(completed_ids),
-        "resolved_instances": len(resolved_ids),
-        "unresolved_instances": len(unresolved_ids),
-        "empty_patch_instances": len(empty_patch_ids),
-        "error_instances": len(error_ids),
-        "completed_ids": list(sorted(completed_ids)),
-        "incomplete_ids": list(sorted(incomplete_ids)),
-        "empty_patch_ids": list(sorted(empty_patch_ids)),
-        "submitted_ids": list(sorted(predictions.keys())),
-        "resolved_ids": list(sorted(resolved_ids)),
-        "unresolved_ids": list(sorted(unresolved_ids)),
-        "error_ids": list(sorted(error_ids)),
-        "schema_version": 2,
-    }
-
-    report_file = Path(
-        list(predictions.values())[0]["model_name_or_path"].replace("/", "__")
-        + f".{run_id}"
-        + ".json"
-    )
-
-    with open(report_file, "w") as f:
-        print(json.dumps(report, indent=4), file=f)
-
-    print(f"Report written to {report_file}")
-    return report_file
 
 def get_log_dir(pred: dict, run_id: str, instance_id: str) -> Path:
     model_name_or_path = cast(str, pred.get("model_name_or_path", "None").replace("/", "__"))
     return RUN_EVALUATION_LOG_DIR / run_id / model_name_or_path / instance_id
 
+
 @app.function(
     image=swebench_image,
     mounts=[
@@ -478,6 +380,7 @@ def run_instance_modal(
             errored=True,
         )
 
+
 def run_instances_modal(
         predictions: dict,
         instances: list,
diff --git a/swebench/harness/utils.py b/swebench/harness/utils.py
index 3188b686..6c47ccd2 100644
--- a/swebench/harness/utils.py
+++ b/swebench/harness/utils.py
@@ -36,6 +36,7 @@ def __str__(self):
             f"Check ({self.log_file}) for more information."
         )
 
+
 HEADERS = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
 
 
diff --git a/tests/test_evaluation.py b/tests/test_evaluation.py
index 7dff357e..b00cffa6 100644
--- a/tests/test_evaluation.py
+++ b/tests/test_evaluation.py
@@ -27,8 +27,8 @@ def test_make_run_report(tmpdir) -> None:
                 }
             },
             [TEST_INSTANCE],
+            "test",
             client,
-            "test"
         )
         assert output_path.is_file()
         report = json.loads(output_path.read_text())