unipept · bmesuere · Feb 2, 2026 · ninewise · Feb 2, 2026 · ninewise
diff --git a/.gitignore b/.gitignore
@@ -6,3 +6,5 @@ prodigal
 *.faa
 *.gff
 *.csv
+*.out
+/baseline
diff --git a/scripts/benchmark.sh b/scripts/benchmark.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+set -euo pipefail
+
+# Benchmark script for FragGeneScanRs using hyperfine
+# Runs benchmarks on all example files with at least 10 iterations
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
+BINARY="$PROJECT_ROOT/target/release/FragGeneScanRs"
+
+# Check for hyperfine
+if ! command -v hyperfine &> /dev/null; then
+    echo "Error: hyperfine is not installed."
+    echo "Install with: brew install hyperfine or similar"
+    exit 1
+fi
+
+# Build release binary
+echo "Building release binary..."
+cargo build --release --manifest-path "$PROJECT_ROOT/Cargo.toml"
+
+# Create temp directory for outputs
+TEMP_DIR=$(mktemp -d)
+trap 'rm -rf "$TEMP_DIR"' EXIT
+
+echo ""
+echo "Running benchmarks (minimum 10 runs, 3 warmup runs each)..."
+echo "============================================================"
+
+# Benchmark 1: Short reads (NC_000913-454.fna with 454_10 training)
+echo ""
+echo "Benchmark: Short reads (NC_000913-454.fna)"
+hyperfine \
+    --warmup 3 \
+    --min-runs 20 \
+    "$BINARY -s $PROJECT_ROOT/example/NC_000913-454.fna -t 454_10 -w 0 -o $TEMP_DIR/NC_000913-454"
+
+# Benchmark 2: Complete genome (NC_000913.fna with complete training)
+echo ""
+echo "Benchmark: Complete genome (NC_000913.fna)"
+hyperfine \
+    --warmup 3 \
+    --min-runs 20 \
+    "$BINARY -s $PROJECT_ROOT/example/NC_000913.fna -t complete -w 1 -o $TEMP_DIR/NC_000913"
+
+# Benchmark 3: Long reads (contigs.fna with complete training)
+echo ""
+echo "Benchmark: Long reads (contigs.fna)"
+hyperfine \
+    --warmup 3 \
+    --min-runs 10 \
+    "$BINARY -s $PROJECT_ROOT/example/contigs.fna -t complete -w 1 -o $TEMP_DIR/contigs"
+
+echo ""
+echo "Benchmarks complete!"
diff --git a/scripts/validate.sh b/scripts/validate.sh
@@ -0,0 +1,131 @@
+#!/bin/bash
+set -euo pipefail
+
+# Validation script for FragGeneScanRs
+# Usage:
+#   ./scripts/validate.sh --baseline   Generate baseline output files
+#   ./scripts/validate.sh --check      Compare current output against baseline (default)
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
+BASELINE_DIR="$PROJECT_ROOT/baseline"
+BINARY="$PROJECT_ROOT/target/release/FragGeneScanRs"
+
+# Example files and their configurations
+# Format: "input_file:training_file:whole_genome_flag:output_name"
+EXAMPLES=(
+    "example/NC_000913-454.fna:454_10:0:NC_000913-454"
+    "example/NC_000913.fna:complete:1:NC_000913"
+    "example/contigs.fna:complete:1:contigs"
+)
+
+usage() {
+    echo "Usage: $0 [--baseline|--check]"
+    echo "  --baseline  Generate baseline output files"
+    echo "  --check     Compare current output against baseline (default)"
+    exit 1
+}
+
+build_release() {
+    echo "Building release binary..."
+    cargo build --release --manifest-path "$PROJECT_ROOT/Cargo.toml"
+}
+
+run_example() {
+    local input="$1"
+    local train="$2"
+    local whole="$3"
+    local output_prefix="$4"
+
+    "$BINARY" \
+        -s "$PROJECT_ROOT/$input" \
+        -t "$train" \
+        -w "$whole" \
+        -o "$output_prefix"
+}
+
+generate_baseline() {
+    echo "Generating baseline outputs..."
+    mkdir -p "$BASELINE_DIR"
+
+    for example in "${EXAMPLES[@]}"; do
+        IFS=':' read -r input train whole name <<< "$example"
+        echo "  Processing $name..."
+        run_example "$input" "$train" "$whole" "$BASELINE_DIR/$name"
+    done
+
+    echo "Baseline generated in $BASELINE_DIR"
+}
+
+check_against_baseline() {
+    if [[ ! -d "$BASELINE_DIR" ]]; then
+        echo "Error: Baseline directory not found. Run with --baseline first."
+        exit 1
+    fi
+
+    local temp_dir
+    temp_dir=$(mktemp -d)
+    trap 'rm -rf "$temp_dir"' EXIT
+
+    echo "Running current version and comparing against baseline..."
+    local failed=0
+
+    for example in "${EXAMPLES[@]}"; do
+        IFS=':' read -r input train whole name <<< "$example"
+        echo "  Processing $name..."
+        run_example "$input" "$train" "$whole" "$temp_dir/$name"
+
+        for ext in out faa ffn; do
+            local baseline_file="$BASELINE_DIR/$name.$ext"
+            local current_file="$temp_dir/$name.$ext"
+
+            if [[ ! -f "$baseline_file" ]]; then
+                echo "    Warning: Baseline file $baseline_file not found"
+                continue
+            fi
+
+            if diff -q "$baseline_file" "$current_file" > /dev/null 2>&1; then
+                echo "    ✓ $name.$ext matches"
+            else
+                echo "    ✗ $name.$ext DIFFERS"
+                failed=1
+            fi
+        done
+    done
+
+    if [[ $failed -eq 0 ]]; then
+        echo "All outputs match baseline!"
+        exit 0
+    else
+        echo "Some outputs differ from baseline!"
+        exit 1
+    fi
+}
+
+# Parse arguments
+MODE="check"
+if [[ $# -gt 0 ]]; then
+    case "$1" in
+        --baseline)
+            MODE="baseline"
+            ;;
+        --check)
+            MODE="check"
+            ;;
+        *)
+            usage
+            ;;
+    esac
+fi
+
+# Main
+build_release
+
+case "$MODE" in
+    baseline)
+        generate_baseline
+        ;;
+    check)
+        check_against_baseline
+        ;;
+esac
-Original file line number
+Diff line change
@@ Expand Up / @@ -6,3 +6,5 @@ prodigal @@
     *.faa
     *.gff
     *.csv
+    *.out
+    /baseline