Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
143 changes: 143 additions & 0 deletions benchmarks/bench_suite.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
import std/[times, strutils, tables, json, os]
import npeg

type
BenchmarkResult = object
name: string
iterations: int
totalTime: float
averageTime: float
minTime: float
maxTime: float

proc runBenchmark(name: string, iterations: int, body: proc()): BenchmarkResult =
result.name = name
result.iterations = iterations
result.minTime = float.high
result.maxTime = 0.0

for i in 0..<iterations:
let start = cpuTime()
body()
let elapsed = cpuTime() - start
result.totalTime += elapsed
result.minTime = min(result.minTime, elapsed)
result.maxTime = max(result.maxTime, elapsed)

result.averageTime = result.totalTime / float(iterations)

proc formatTime(t: float): string =
if t < 0.001:
formatFloat(t * 1_000_000, ffDecimal, 2) & " μs"
elif t < 1.0:
formatFloat(t * 1_000, ffDecimal, 2) & " ms"
else:
formatFloat(t, ffDecimal, 2) & " s"

proc printResults(results: seq[BenchmarkResult]) =
echo "\nBenchmark Results:"
echo "=================="
for r in results:
echo "\n", r.name
echo " Iterations: ", r.iterations
echo " Average: ", formatTime(r.averageTime)
echo " Min: ", formatTime(r.minTime)
echo " Max: ", formatTime(r.maxTime)
echo " Total: ", formatTime(r.totalTime)

# Benchmark 1: Simple string matching
proc benchSimpleMatch() =
let parser = patt("hello")
for i in 0..1000:
discard parser.match("hello world")

# Benchmark 2: Complex grammar
proc benchComplexGrammar() =
let parser = peg "doc":
doc <- expr * !1
expr <- term * *(('+' | '-') * term)
term <- factor * *(('*' | '/') * factor)
factor <- +Digit | ('(' * expr * ')')

for i in 0..100:
discard parser.match("1+2*3+(4/5)*6")

# Benchmark 3: JSON parsing
proc benchJsonParsing() =
let jsonParser = peg "json":
json <- value * !1
value <- object | array | string | number | boolean | null
object <- '{' * ?(pair * *(',' * pair)) * '}'
pair <- string * ':' * value
array <- '[' * ?(value * *(',' * value)) * ']'
string <- '"' * *(!'"' * 1) * '"'
number <- ?'-' * +Digit * ?('.' * +Digit)
boolean <- "true" | "false"
null <- "null"

let testJson = """{"name": "test", "value": 42, "items": [1, 2, 3]}"""
for i in 0..100:
discard jsonParser.match(testJson)

# Benchmark 4: Character set performance
proc benchCharSets() =
let parser = patt(+{'a'..'z', 'A'..'Z', '0'..'9', '_'})
let testString = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_" & " invalid"

for i in 0..1000:
discard parser.match(testString)

# Benchmark 5: Backreference performance
proc benchBackreferences() =
let parser = peg "tags":
tags <- tag * !1
tag <- '<' * R("name", ident) * '>' * content * "</" * R("name") * '>'
content <- *(1 - '<')
ident <- +Alpha

for i in 0..100:
discard parser.match("<div>Hello World</div>")

# Main benchmark runner
when isMainModule:
var results: seq[BenchmarkResult]

echo "Running NPeg Benchmark Suite..."

results.add runBenchmark("Simple Match", 10000) do:
benchSimpleMatch()

results.add runBenchmark("Complex Grammar", 1000) do:
benchComplexGrammar()

results.add runBenchmark("JSON Parsing", 1000) do:
benchJsonParsing()

results.add runBenchmark("Character Sets", 10000) do:
benchCharSets()

results.add runBenchmark("Backreferences", 1000) do:
benchBackreferences()

printResults(results)

# Save results to JSON
var jsonResults = newJObject()
jsonResults["timestamp"] = %($now())
jsonResults["hostname"] = %(getEnv("HOSTNAME", "unknown"))
var benchmarks = newJArray()

for r in results:
var bench = newJObject()
bench["name"] = %r.name
bench["iterations"] = %r.iterations
bench["average_ms"] = %(r.averageTime * 1000)
bench["min_ms"] = %(r.minTime * 1000)
bench["max_ms"] = %(r.maxTime * 1000)
benchmarks.add(bench)

jsonResults["benchmarks"] = benchmarks

let outputFile = "benchmark_results.json"
writeFile(outputFile, $jsonResults)
echo "\nResults saved to ", outputFile
185 changes: 185 additions & 0 deletions doc/optimization.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
# NPeg Optimization Guide

This guide explains the various optimization flags and techniques available in NPeg to improve parser performance.

## Compile-time Optimization Flags

NPeg provides several compile-time flags to control optimizations. These are controlled by the `-d:npegOptimize=N` flag, where N is a bitmask of optimization features.

### Available Optimizations

1. **Character Set Optimization** (`-d:npegOptimize=1`)
- Combines adjacent character sets for efficiency
- Optimizes character ranges
- Default: Enabled

2. **Head Fail Optimization** (`-d:npegOptimize=2`)
- Fails fast when patterns cannot possibly match
- Reduces unnecessary backtracking
- Default: Enabled

3. **Capture Shift Optimization** (`-d:npegOptimize=4`)
- Optimizes capture stack operations
- Reduces memory movement for captures
- Default: Enabled

4. **Choice Commit Optimization** (`-d:npegOptimize=8`)
- Optimizes ordered choice operations
- Commits to choices earlier when possible
- Default: Enabled

### Using Optimization Flags

To enable all optimizations (default):
```bash
nim c -d:npegOptimize=255 myparser.nim
```

To disable all optimizations (useful for debugging):
```bash
nim c -d:npegOptimize=0 myparser.nim
```

To enable only specific optimizations:
```bash
# Enable only character set and head fail optimizations
nim c -d:npegOptimize=3 myparser.nim
```

## Grammar Optimization Techniques

### 1. Rule Ordering

The order of rules affects inlining and performance:

```nim
# Good: Frequently used rules first, allows inlining
let parser = peg "doc":
space <- ' ' | '\t'
word <- +Alpha
doc <- word * *(space * word)

# Less optimal: Complex rules first may prevent inlining
let parser = peg "doc":
doc <- word * *(space * word)
word <- +Alpha
space <- ' ' | '\t'
```

### 2. Character Set Optimization

Combine character sets for better performance:

```nim
# Good: Single character set
identifier <- +{'a'..'z', 'A'..'Z', '0'..'9', '_'}

# Less optimal: Multiple checks
identifier <- +('a'..'z' | 'A'..'Z' | '0'..'9' | '_')
```

### 3. Avoiding Excessive Backtracking

Design grammars to fail fast:

```nim
# Good: Fails quickly on non-matches
number <- ?'-' * digit * *digit * ?('.' * +digit)
digit <- {'0'..'9'}

# Less optimal: More backtracking
number <- ?'-' * +{'0'..'9'} * ?('.' * +{'0'..'9'})
```

### 4. Using Lookahead Effectively

Use lookahead to avoid unnecessary parsing:

```nim
# Good: Check before parsing
statement <- &keyword * (ifStmt | whileStmt | assign)

# Less optimal: Parse then backtrack
statement <- ifStmt | whileStmt | assign
```

## Performance Tips

### 1. Inline Frequently Used Rules

Keep frequently used rules small for inlining:

```nim
# Will be inlined
ws <- *' '

# Too large for inlining
complexRule <- very * long * pattern * with * many * parts
```

### 2. Use Character Spans

Use span operator for repeated character matches:

```nim
# Good: Uses span optimization
identifier <- Alpha * *Alnum

# Less optimal: Individual character matching
identifier <- Alpha * *(Alpha | Digit)
```

### 3. Minimize Capture Overhead

Only capture what you need:

```nim
# Good: Capture only needed parts
keyValue <- >key * '=' * >value

# Less optimal: Capture everything
keyValue <- >(key * '=' * value)
```

## Debugging Performance

### Enable Tracing

Use `-d:npegTrace` to see parser execution:

```bash
nim c -d:npegTrace myparser.nim
```

### Generate Parser Graphs

Use `-d:npegDotDir=/tmp` to visualize grammar:

```bash
nim c -d:npegDotDir=/tmp myparser.nim
dot -Tpng /tmp/mygrammar.dot -o grammar.png
```

### Profile Your Parser

Use the benchmark suite to measure performance:

```nim
import times

let start = cpuTime()
for i in 0..1000:
discard parser.match(input)
echo "Time: ", cpuTime() - start
```

## Common Pitfalls

1. **Over-inlining**: Very large grammars may hit the `npegPattMaxLen` limit
2. **Deep recursion**: May hit stack limits with complex grammars
3. **Excessive captures**: Can slow down parsing significantly
4. **Poor rule ordering**: Can prevent optimization opportunities

## Conclusion

NPeg provides powerful optimization capabilities, but the best performance comes from well-designed grammars. Profile your specific use case and apply optimizations where they provide measurable benefits.
Loading
Loading