Skip to content

Commit 13fe4af

Browse files
authored
Merge pull request #1 from jasagiri/for_Gods_and_Golem_Inc
modenize for "Gods & Golem, Inc." s work
2 parents 409f679 + cb7ca91 commit 13fe4af

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

74 files changed

+17383
-1
lines changed

benchmarks/bench_suite.nim

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
import std/[times, strutils, tables, json, os]
2+
import npeg
3+
4+
type
5+
BenchmarkResult = object
6+
name: string
7+
iterations: int
8+
totalTime: float
9+
averageTime: float
10+
minTime: float
11+
maxTime: float
12+
13+
proc runBenchmark(name: string, iterations: int, body: proc()): BenchmarkResult =
14+
result.name = name
15+
result.iterations = iterations
16+
result.minTime = float.high
17+
result.maxTime = 0.0
18+
19+
for i in 0..<iterations:
20+
let start = cpuTime()
21+
body()
22+
let elapsed = cpuTime() - start
23+
result.totalTime += elapsed
24+
result.minTime = min(result.minTime, elapsed)
25+
result.maxTime = max(result.maxTime, elapsed)
26+
27+
result.averageTime = result.totalTime / float(iterations)
28+
29+
proc formatTime(t: float): string =
30+
if t < 0.001:
31+
formatFloat(t * 1_000_000, ffDecimal, 2) & " μs"
32+
elif t < 1.0:
33+
formatFloat(t * 1_000, ffDecimal, 2) & " ms"
34+
else:
35+
formatFloat(t, ffDecimal, 2) & " s"
36+
37+
proc printResults(results: seq[BenchmarkResult]) =
38+
echo "\nBenchmark Results:"
39+
echo "=================="
40+
for r in results:
41+
echo "\n", r.name
42+
echo " Iterations: ", r.iterations
43+
echo " Average: ", formatTime(r.averageTime)
44+
echo " Min: ", formatTime(r.minTime)
45+
echo " Max: ", formatTime(r.maxTime)
46+
echo " Total: ", formatTime(r.totalTime)
47+
48+
# Benchmark 1: Simple string matching
49+
proc benchSimpleMatch() =
50+
let parser = patt("hello")
51+
for i in 0..1000:
52+
discard parser.match("hello world")
53+
54+
# Benchmark 2: Complex grammar
55+
proc benchComplexGrammar() =
56+
let parser = peg "doc":
57+
doc <- expr * !1
58+
expr <- term * *(('+' | '-') * term)
59+
term <- factor * *(('*' | '/') * factor)
60+
factor <- +Digit | ('(' * expr * ')')
61+
62+
for i in 0..100:
63+
discard parser.match("1+2*3+(4/5)*6")
64+
65+
# Benchmark 3: JSON parsing
66+
proc benchJsonParsing() =
67+
let jsonParser = peg "json":
68+
json <- value * !1
69+
value <- object | array | string | number | boolean | null
70+
object <- '{' * ?(pair * *(',' * pair)) * '}'
71+
pair <- string * ':' * value
72+
array <- '[' * ?(value * *(',' * value)) * ']'
73+
string <- '"' * *(!'"' * 1) * '"'
74+
number <- ?'-' * +Digit * ?('.' * +Digit)
75+
boolean <- "true" | "false"
76+
null <- "null"
77+
78+
let testJson = """{"name": "test", "value": 42, "items": [1, 2, 3]}"""
79+
for i in 0..100:
80+
discard jsonParser.match(testJson)
81+
82+
# Benchmark 4: Character set performance
83+
proc benchCharSets() =
84+
let parser = patt(+{'a'..'z', 'A'..'Z', '0'..'9', '_'})
85+
let testString = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_" & " invalid"
86+
87+
for i in 0..1000:
88+
discard parser.match(testString)
89+
90+
# Benchmark 5: Backreference performance
91+
proc benchBackreferences() =
92+
let parser = peg "tags":
93+
tags <- tag * !1
94+
tag <- '<' * R("name", ident) * '>' * content * "</" * R("name") * '>'
95+
content <- *(1 - '<')
96+
ident <- +Alpha
97+
98+
for i in 0..100:
99+
discard parser.match("<div>Hello World</div>")
100+
101+
# Main benchmark runner
102+
when isMainModule:
103+
var results: seq[BenchmarkResult]
104+
105+
echo "Running NPeg Benchmark Suite..."
106+
107+
results.add runBenchmark("Simple Match", 10000) do:
108+
benchSimpleMatch()
109+
110+
results.add runBenchmark("Complex Grammar", 1000) do:
111+
benchComplexGrammar()
112+
113+
results.add runBenchmark("JSON Parsing", 1000) do:
114+
benchJsonParsing()
115+
116+
results.add runBenchmark("Character Sets", 10000) do:
117+
benchCharSets()
118+
119+
results.add runBenchmark("Backreferences", 1000) do:
120+
benchBackreferences()
121+
122+
printResults(results)
123+
124+
# Save results to JSON
125+
var jsonResults = newJObject()
126+
jsonResults["timestamp"] = %($now())
127+
jsonResults["hostname"] = %(getEnv("HOSTNAME", "unknown"))
128+
var benchmarks = newJArray()
129+
130+
for r in results:
131+
var bench = newJObject()
132+
bench["name"] = %r.name
133+
bench["iterations"] = %r.iterations
134+
bench["average_ms"] = %(r.averageTime * 1000)
135+
bench["min_ms"] = %(r.minTime * 1000)
136+
bench["max_ms"] = %(r.maxTime * 1000)
137+
benchmarks.add(bench)
138+
139+
jsonResults["benchmarks"] = benchmarks
140+
141+
let outputFile = "benchmark_results.json"
142+
writeFile(outputFile, $jsonResults)
143+
echo "\nResults saved to ", outputFile

doc/optimization.md

Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
# NPeg Optimization Guide
2+
3+
This guide explains the various optimization flags and techniques available in NPeg to improve parser performance.
4+
5+
## Compile-time Optimization Flags
6+
7+
NPeg provides several compile-time flags to control optimizations. These are controlled by the `-d:npegOptimize=N` flag, where N is a bitmask of optimization features.
8+
9+
### Available Optimizations
10+
11+
1. **Character Set Optimization** (`-d:npegOptimize=1`)
12+
- Combines adjacent character sets for efficiency
13+
- Optimizes character ranges
14+
- Default: Enabled
15+
16+
2. **Head Fail Optimization** (`-d:npegOptimize=2`)
17+
- Fails fast when patterns cannot possibly match
18+
- Reduces unnecessary backtracking
19+
- Default: Enabled
20+
21+
3. **Capture Shift Optimization** (`-d:npegOptimize=4`)
22+
- Optimizes capture stack operations
23+
- Reduces memory movement for captures
24+
- Default: Enabled
25+
26+
4. **Choice Commit Optimization** (`-d:npegOptimize=8`)
27+
- Optimizes ordered choice operations
28+
- Commits to choices earlier when possible
29+
- Default: Enabled
30+
31+
### Using Optimization Flags
32+
33+
To enable all optimizations (default):
34+
```bash
35+
nim c -d:npegOptimize=255 myparser.nim
36+
```
37+
38+
To disable all optimizations (useful for debugging):
39+
```bash
40+
nim c -d:npegOptimize=0 myparser.nim
41+
```
42+
43+
To enable only specific optimizations:
44+
```bash
45+
# Enable only character set and head fail optimizations
46+
nim c -d:npegOptimize=3 myparser.nim
47+
```
48+
49+
## Grammar Optimization Techniques
50+
51+
### 1. Rule Ordering
52+
53+
The order of rules affects inlining and performance:
54+
55+
```nim
56+
# Good: Frequently used rules first, allows inlining
57+
let parser = peg "doc":
58+
space <- ' ' | '\t'
59+
word <- +Alpha
60+
doc <- word * *(space * word)
61+
62+
# Less optimal: Complex rules first may prevent inlining
63+
let parser = peg "doc":
64+
doc <- word * *(space * word)
65+
word <- +Alpha
66+
space <- ' ' | '\t'
67+
```
68+
69+
### 2. Character Set Optimization
70+
71+
Combine character sets for better performance:
72+
73+
```nim
74+
# Good: Single character set
75+
identifier <- +{'a'..'z', 'A'..'Z', '0'..'9', '_'}
76+
77+
# Less optimal: Multiple checks
78+
identifier <- +('a'..'z' | 'A'..'Z' | '0'..'9' | '_')
79+
```
80+
81+
### 3. Avoiding Excessive Backtracking
82+
83+
Design grammars to fail fast:
84+
85+
```nim
86+
# Good: Fails quickly on non-matches
87+
number <- ?'-' * digit * *digit * ?('.' * +digit)
88+
digit <- {'0'..'9'}
89+
90+
# Less optimal: More backtracking
91+
number <- ?'-' * +{'0'..'9'} * ?('.' * +{'0'..'9'})
92+
```
93+
94+
### 4. Using Lookahead Effectively
95+
96+
Use lookahead to avoid unnecessary parsing:
97+
98+
```nim
99+
# Good: Check before parsing
100+
statement <- &keyword * (ifStmt | whileStmt | assign)
101+
102+
# Less optimal: Parse then backtrack
103+
statement <- ifStmt | whileStmt | assign
104+
```
105+
106+
## Performance Tips
107+
108+
### 1. Inline Frequently Used Rules
109+
110+
Keep frequently used rules small for inlining:
111+
112+
```nim
113+
# Will be inlined
114+
ws <- *' '
115+
116+
# Too large for inlining
117+
complexRule <- very * long * pattern * with * many * parts
118+
```
119+
120+
### 2. Use Character Spans
121+
122+
Use span operator for repeated character matches:
123+
124+
```nim
125+
# Good: Uses span optimization
126+
identifier <- Alpha * *Alnum
127+
128+
# Less optimal: Individual character matching
129+
identifier <- Alpha * *(Alpha | Digit)
130+
```
131+
132+
### 3. Minimize Capture Overhead
133+
134+
Only capture what you need:
135+
136+
```nim
137+
# Good: Capture only needed parts
138+
keyValue <- >key * '=' * >value
139+
140+
# Less optimal: Capture everything
141+
keyValue <- >(key * '=' * value)
142+
```
143+
144+
## Debugging Performance
145+
146+
### Enable Tracing
147+
148+
Use `-d:npegTrace` to see parser execution:
149+
150+
```bash
151+
nim c -d:npegTrace myparser.nim
152+
```
153+
154+
### Generate Parser Graphs
155+
156+
Use `-d:npegDotDir=/tmp` to visualize grammar:
157+
158+
```bash
159+
nim c -d:npegDotDir=/tmp myparser.nim
160+
dot -Tpng /tmp/mygrammar.dot -o grammar.png
161+
```
162+
163+
### Profile Your Parser
164+
165+
Use the benchmark suite to measure performance:
166+
167+
```nim
168+
import times
169+
170+
let start = cpuTime()
171+
for i in 0..1000:
172+
discard parser.match(input)
173+
echo "Time: ", cpuTime() - start
174+
```
175+
176+
## Common Pitfalls
177+
178+
1. **Over-inlining**: Very large grammars may hit the `npegPattMaxLen` limit
179+
2. **Deep recursion**: May hit stack limits with complex grammars
180+
3. **Excessive captures**: Can slow down parsing significantly
181+
4. **Poor rule ordering**: Can prevent optimization opportunities
182+
183+
## Conclusion
184+
185+
NPeg provides powerful optimization capabilities, but the best performance comes from well-designed grammars. Profile your specific use case and apply optimizations where they provide measurable benefits.

0 commit comments

Comments
 (0)