Merge pull request #308 from swe-bench/fix/load-json-dict-preds

fix: handle dict-format JSON correctly when loading predictions
swe-bench · Feb 1, 2025 · a80694d · a80694d
2 parents a0536ee + 0368b60
commit a80694d
Showing 1 changed file with 15 additions and 2 deletions.
diff --git a/swebench/harness/utils.py b/swebench/harness/utils.py
@@ -50,12 +50,25 @@ def get_predictions_from_file(predictions_path: str, dataset_name: str, split: s
         ]
     if predictions_path.endswith(".json"):
         with open(predictions_path, "r") as f:
-            return json.load(f)
+            predictions = json.load(f)
+            if isinstance(predictions, dict):
+                predictions = list(predictions.values())  # compatible with SWE-agent predictions
+            if not isinstance(predictions, list):
+                raise ValueError("Predictions must be a list[prediction] or a dictionary[instance_id: prediction]")
     elif predictions_path.endswith(".jsonl"):
         with open(predictions_path, "r") as f:
-            return [json.loads(line) for line in f]
+            predictions = [json.loads(line) for line in f]
     else:
         raise ValueError("Predictions path must be .json or .jsonl")
+
+    # Validate that each prediction has an instance_id
+    for pred in predictions:
+        if not isinstance(pred, dict):
+            raise ValueError(f"Each prediction must be a dictionary, got {type(pred)}")
+        if KEY_INSTANCE_ID not in pred:
+            raise ValueError(f"Each prediction must contain '{KEY_INSTANCE_ID}'")
+
+    return predictions
 
 
 def run_threadpool(func, payloads, max_workers):