Skip to content

Commit 67ae292

Browse files
committed
add benchmark and validate script
1 parent cccb999 commit 67ae292

File tree

3 files changed

+188
-0
lines changed

3 files changed

+188
-0
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,5 @@ prodigal
66
*.faa
77
*.gff
88
*.csv
9+
*.out
10+
/baseline

scripts/benchmark.sh

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
#!/bin/bash
2+
set -euo pipefail
3+
4+
# Benchmark script for FragGeneScanRs using hyperfine
5+
# Runs benchmarks on all example files with at least 10 iterations
6+
7+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
8+
PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
9+
BINARY="$PROJECT_ROOT/target/release/FragGeneScanRs"
10+
11+
# Check for hyperfine
12+
if ! command -v hyperfine &> /dev/null; then
13+
echo "Error: hyperfine is not installed."
14+
echo "Install with: brew install hyperfine or similar"
15+
exit 1
16+
fi
17+
18+
# Build release binary
19+
echo "Building release binary..."
20+
cargo build --release --manifest-path "$PROJECT_ROOT/Cargo.toml"
21+
22+
# Create temp directory for outputs
23+
TEMP_DIR=$(mktemp -d)
24+
trap 'rm -rf "$TEMP_DIR"' EXIT
25+
26+
echo ""
27+
echo "Running benchmarks (minimum 10 runs, 3 warmup runs each)..."
28+
echo "============================================================"
29+
30+
# Benchmark 1: Short reads (NC_000913-454.fna with 454_10 training)
31+
echo ""
32+
echo "Benchmark: Short reads (NC_000913-454.fna)"
33+
hyperfine \
34+
--warmup 3 \
35+
--min-runs 20 \
36+
"$BINARY -s $PROJECT_ROOT/example/NC_000913-454.fna -t 454_10 -w 0 -o $TEMP_DIR/NC_000913-454"
37+
38+
# Benchmark 2: Complete genome (NC_000913.fna with complete training)
39+
echo ""
40+
echo "Benchmark: Complete genome (NC_000913.fna)"
41+
hyperfine \
42+
--warmup 3 \
43+
--min-runs 20 \
44+
"$BINARY -s $PROJECT_ROOT/example/NC_000913.fna -t complete -w 1 -o $TEMP_DIR/NC_000913"
45+
46+
# Benchmark 3: Long reads (contigs.fna with complete training)
47+
echo ""
48+
echo "Benchmark: Long reads (contigs.fna)"
49+
hyperfine \
50+
--warmup 3 \
51+
--min-runs 10 \
52+
"$BINARY -s $PROJECT_ROOT/example/contigs.fna -t complete -w 1 -o $TEMP_DIR/contigs"
53+
54+
echo ""
55+
echo "Benchmarks complete!"

scripts/validate.sh

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
#!/bin/bash
2+
set -euo pipefail
3+
4+
# Validation script for FragGeneScanRs
5+
# Usage:
6+
# ./scripts/validate.sh --baseline Generate baseline output files
7+
# ./scripts/validate.sh --check Compare current output against baseline (default)
8+
9+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
10+
PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
11+
BASELINE_DIR="$PROJECT_ROOT/baseline"
12+
BINARY="$PROJECT_ROOT/target/release/FragGeneScanRs"
13+
14+
# Example files and their configurations
15+
# Format: "input_file:training_file:whole_genome_flag:output_name"
16+
EXAMPLES=(
17+
"example/NC_000913-454.fna:454_10:0:NC_000913-454"
18+
"example/NC_000913.fna:complete:1:NC_000913"
19+
"example/contigs.fna:complete:1:contigs"
20+
)
21+
22+
usage() {
23+
echo "Usage: $0 [--baseline|--check]"
24+
echo " --baseline Generate baseline output files"
25+
echo " --check Compare current output against baseline (default)"
26+
exit 1
27+
}
28+
29+
build_release() {
30+
echo "Building release binary..."
31+
cargo build --release --manifest-path "$PROJECT_ROOT/Cargo.toml"
32+
}
33+
34+
run_example() {
35+
local input="$1"
36+
local train="$2"
37+
local whole="$3"
38+
local output_prefix="$4"
39+
40+
"$BINARY" \
41+
-s "$PROJECT_ROOT/$input" \
42+
-t "$train" \
43+
-w "$whole" \
44+
-o "$output_prefix"
45+
}
46+
47+
generate_baseline() {
48+
echo "Generating baseline outputs..."
49+
mkdir -p "$BASELINE_DIR"
50+
51+
for example in "${EXAMPLES[@]}"; do
52+
IFS=':' read -r input train whole name <<< "$example"
53+
echo " Processing $name..."
54+
run_example "$input" "$train" "$whole" "$BASELINE_DIR/$name"
55+
done
56+
57+
echo "Baseline generated in $BASELINE_DIR"
58+
}
59+
60+
check_against_baseline() {
61+
if [[ ! -d "$BASELINE_DIR" ]]; then
62+
echo "Error: Baseline directory not found. Run with --baseline first."
63+
exit 1
64+
fi
65+
66+
local temp_dir
67+
temp_dir=$(mktemp -d)
68+
trap 'rm -rf "$temp_dir"' EXIT
69+
70+
echo "Running current version and comparing against baseline..."
71+
local failed=0
72+
73+
for example in "${EXAMPLES[@]}"; do
74+
IFS=':' read -r input train whole name <<< "$example"
75+
echo " Processing $name..."
76+
run_example "$input" "$train" "$whole" "$temp_dir/$name"
77+
78+
for ext in out faa ffn; do
79+
local baseline_file="$BASELINE_DIR/$name.$ext"
80+
local current_file="$temp_dir/$name.$ext"
81+
82+
if [[ ! -f "$baseline_file" ]]; then
83+
echo " Warning: Baseline file $baseline_file not found"
84+
continue
85+
fi
86+
87+
if diff -q "$baseline_file" "$current_file" > /dev/null 2>&1; then
88+
echo "$name.$ext matches"
89+
else
90+
echo "$name.$ext DIFFERS"
91+
failed=1
92+
fi
93+
done
94+
done
95+
96+
if [[ $failed -eq 0 ]]; then
97+
echo "All outputs match baseline!"
98+
exit 0
99+
else
100+
echo "Some outputs differ from baseline!"
101+
exit 1
102+
fi
103+
}
104+
105+
# Parse arguments
106+
MODE="check"
107+
if [[ $# -gt 0 ]]; then
108+
case "$1" in
109+
--baseline)
110+
MODE="baseline"
111+
;;
112+
--check)
113+
MODE="check"
114+
;;
115+
*)
116+
usage
117+
;;
118+
esac
119+
fi
120+
121+
# Main
122+
build_release
123+
124+
case "$MODE" in
125+
baseline)
126+
generate_baseline
127+
;;
128+
check)
129+
check_against_baseline
130+
;;
131+
esac

0 commit comments

Comments
 (0)