-
Notifications
You must be signed in to change notification settings - Fork 1
Add benchmark and validate script #17
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,3 +6,5 @@ prodigal | |
| *.faa | ||
| *.gff | ||
| *.csv | ||
| *.out | ||
| /baseline | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,55 @@ | ||
| #!/bin/bash | ||
| set -euo pipefail | ||
|
|
||
| # Benchmark script for FragGeneScanRs using hyperfine | ||
| # Runs benchmarks on all example files with at least 10 iterations | ||
|
|
||
| SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" | ||
| PROJECT_ROOT="$(dirname "$SCRIPT_DIR")" | ||
| BINARY="$PROJECT_ROOT/target/release/FragGeneScanRs" | ||
|
|
||
| # Check for hyperfine | ||
| if ! command -v hyperfine &> /dev/null; then | ||
| echo "Error: hyperfine is not installed." | ||
| echo "Install with: brew install hyperfine or similar" | ||
| exit 1 | ||
| fi | ||
|
|
||
| # Build release binary | ||
| echo "Building release binary..." | ||
| cargo build --release --manifest-path "$PROJECT_ROOT/Cargo.toml" | ||
|
|
||
| # Create temp directory for outputs | ||
| TEMP_DIR=$(mktemp -d) | ||
| trap 'rm -rf "$TEMP_DIR"' EXIT | ||
|
|
||
| echo "" | ||
| echo "Running benchmarks (minimum 10 runs, 3 warmup runs each)..." | ||
| echo "============================================================" | ||
|
|
||
| # Benchmark 1: Short reads (NC_000913-454.fna with 454_10 training) | ||
| echo "" | ||
| echo "Benchmark: Short reads (NC_000913-454.fna)" | ||
| hyperfine \ | ||
| --warmup 3 \ | ||
| --min-runs 20 \ | ||
| "$BINARY -s $PROJECT_ROOT/example/NC_000913-454.fna -t 454_10 -w 0 -o $TEMP_DIR/NC_000913-454" | ||
|
|
||
| # Benchmark 2: Complete genome (NC_000913.fna with complete training) | ||
| echo "" | ||
| echo "Benchmark: Complete genome (NC_000913.fna)" | ||
| hyperfine \ | ||
| --warmup 3 \ | ||
| --min-runs 20 \ | ||
| "$BINARY -s $PROJECT_ROOT/example/NC_000913.fna -t complete -w 1 -o $TEMP_DIR/NC_000913" | ||
|
|
||
| # Benchmark 3: Long reads (contigs.fna with complete training) | ||
| echo "" | ||
| echo "Benchmark: Long reads (contigs.fna)" | ||
| hyperfine \ | ||
| --warmup 3 \ | ||
| --min-runs 10 \ | ||
| "$BINARY -s $PROJECT_ROOT/example/contigs.fna -t complete -w 1 -o $TEMP_DIR/contigs" | ||
|
|
||
| echo "" | ||
| echo "Benchmarks complete!" | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,131 @@ | ||
| #!/bin/bash | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use |
||
| set -euo pipefail | ||
|
|
||
| # Validation script for FragGeneScanRs | ||
| # Usage: | ||
| # ./scripts/validate.sh --baseline Generate baseline output files | ||
| # ./scripts/validate.sh --check Compare current output against baseline (default) | ||
|
|
||
| SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" | ||
| PROJECT_ROOT="$(dirname "$SCRIPT_DIR")" | ||
| BASELINE_DIR="$PROJECT_ROOT/baseline" | ||
| BINARY="$PROJECT_ROOT/target/release/FragGeneScanRs" | ||
|
|
||
| # Example files and their configurations | ||
| # Format: "input_file:training_file:whole_genome_flag:output_name" | ||
| EXAMPLES=( | ||
| "example/NC_000913-454.fna:454_10:0:NC_000913-454" | ||
| "example/NC_000913.fna:complete:1:NC_000913" | ||
| "example/contigs.fna:complete:1:contigs" | ||
| ) | ||
|
|
||
| usage() { | ||
| echo "Usage: $0 [--baseline|--check]" | ||
| echo " --baseline Generate baseline output files" | ||
| echo " --check Compare current output against baseline (default)" | ||
| exit 1 | ||
| } | ||
|
|
||
| build_release() { | ||
| echo "Building release binary..." | ||
| cargo build --release --manifest-path "$PROJECT_ROOT/Cargo.toml" | ||
| } | ||
|
|
||
| run_example() { | ||
| local input="$1" | ||
| local train="$2" | ||
| local whole="$3" | ||
| local output_prefix="$4" | ||
|
|
||
| "$BINARY" \ | ||
| -s "$PROJECT_ROOT/$input" \ | ||
| -t "$train" \ | ||
| -w "$whole" \ | ||
| -o "$output_prefix" | ||
| } | ||
|
|
||
| generate_baseline() { | ||
| echo "Generating baseline outputs..." | ||
| mkdir -p "$BASELINE_DIR" | ||
|
|
||
| for example in "${EXAMPLES[@]}"; do | ||
| IFS=':' read -r input train whole name <<< "$example" | ||
| echo " Processing $name..." | ||
| run_example "$input" "$train" "$whole" "$BASELINE_DIR/$name" | ||
|
Comment on lines
+52
to
+54
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Rather than putting the example in a string array, splitting and naming them, then naming them again in the run method, I'd rather write three methods And loop through the methods to call them directly. |
||
| done | ||
|
|
||
| echo "Baseline generated in $BASELINE_DIR" | ||
| } | ||
|
|
||
| check_against_baseline() { | ||
| if [[ ! -d "$BASELINE_DIR" ]]; then | ||
| echo "Error: Baseline directory not found. Run with --baseline first." | ||
| exit 1 | ||
| fi | ||
|
|
||
| local temp_dir | ||
| temp_dir=$(mktemp -d) | ||
| trap 'rm -rf "$temp_dir"' EXIT | ||
|
|
||
| echo "Running current version and comparing against baseline..." | ||
| local failed=0 | ||
|
|
||
| for example in "${EXAMPLES[@]}"; do | ||
| IFS=':' read -r input train whole name <<< "$example" | ||
| echo " Processing $name..." | ||
| run_example "$input" "$train" "$whole" "$temp_dir/$name" | ||
|
|
||
| for ext in out faa ffn; do | ||
| local baseline_file="$BASELINE_DIR/$name.$ext" | ||
| local current_file="$temp_dir/$name.$ext" | ||
|
|
||
| if [[ ! -f "$baseline_file" ]]; then | ||
| echo " Warning: Baseline file $baseline_file not found" | ||
| continue | ||
|
Comment on lines
+83
to
+84
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd rather be defensive and have this fail if there is no baseline to be found. |
||
| fi | ||
|
|
||
| if diff -q "$baseline_file" "$current_file" > /dev/null 2>&1; then | ||
| echo " ✓ $name.$ext matches" | ||
| else | ||
| echo " ✗ $name.$ext DIFFERS" | ||
| failed=1 | ||
| fi | ||
| done | ||
| done | ||
|
|
||
| if [[ $failed -eq 0 ]]; then | ||
| echo "All outputs match baseline!" | ||
| exit 0 | ||
| else | ||
| echo "Some outputs differ from baseline!" | ||
| exit 1 | ||
| fi | ||
| } | ||
|
|
||
| # Parse arguments | ||
| MODE="check" | ||
| if [[ $# -gt 0 ]]; then | ||
| case "$1" in | ||
| --baseline) | ||
| MODE="baseline" | ||
| ;; | ||
| --check) | ||
| MODE="check" | ||
| ;; | ||
| *) | ||
| usage | ||
| ;; | ||
| esac | ||
| fi | ||
|
Comment on lines
+105
to
+119
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Feels weird to combine the check and validate in here to deduplicate the 3 example calls, and not do the same for the benchmark. I'd merge all three. |
||
|
|
||
| # Main | ||
| build_release | ||
|
|
||
| case "$MODE" in | ||
| baseline) | ||
| generate_baseline | ||
| ;; | ||
| check) | ||
| check_against_baseline | ||
| ;; | ||
| esac | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just use
! hyperfine -V, no need forcommand.