Skip to content

Commit

Permalink
Merge pull request #308 from swe-bench/fix/load-json-dict-preds
Browse files Browse the repository at this point in the history
fix: handle dict-format JSON correctly when loading predictions
  • Loading branch information
carlosejimenez authored Feb 1, 2025
2 parents a0536ee + 0368b60 commit a80694d
Showing 1 changed file with 15 additions and 2 deletions.
17 changes: 15 additions & 2 deletions swebench/harness/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,25 @@ def get_predictions_from_file(predictions_path: str, dataset_name: str, split: s
]
if predictions_path.endswith(".json"):
with open(predictions_path, "r") as f:
return json.load(f)
predictions = json.load(f)
if isinstance(predictions, dict):
predictions = list(predictions.values()) # compatible with SWE-agent predictions
if not isinstance(predictions, list):
raise ValueError("Predictions must be a list[prediction] or a dictionary[instance_id: prediction]")
elif predictions_path.endswith(".jsonl"):
with open(predictions_path, "r") as f:
return [json.loads(line) for line in f]
predictions = [json.loads(line) for line in f]
else:
raise ValueError("Predictions path must be .json or .jsonl")

# Validate that each prediction has an instance_id
for pred in predictions:
if not isinstance(pred, dict):
raise ValueError(f"Each prediction must be a dictionary, got {type(pred)}")
if KEY_INSTANCE_ID not in pred:
raise ValueError(f"Each prediction must contain '{KEY_INSTANCE_ID}'")

return predictions


def run_threadpool(func, payloads, max_workers):
Expand Down

0 comments on commit a80694d

Please sign in to comment.