Skip to content

Commit 05fc887

Browse files
committed
test(wasm): Basic CST walking in Go
1 parent 5b6b281 commit 05fc887

File tree

4 files changed

+175
-10
lines changed

4 files changed

+175
-10
lines changed

packages/wasm/Makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,9 @@ print-runtime:
1515
go-test-es5: test/go/testmain
1616
cd test/go && ./testmain -wasm ../data/_es5.wasm -file ../data/_html5shiv-3.7.3.js
1717

18+
.PHONY: go-test-es5-lite
19+
go-test-es5-lite: test/go/testmain
20+
cd test/go && ./testmain -wasm ../data/_es5.wasm -input "var x = 3; function foo() {}"
21+
1822
test/go/testmain: test/go/testmain.go test/go/matcher.go
1923
cd test/go && go mod tidy && go build -o testmain

packages/wasm/test/go/cst.go

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
package main
2+
3+
import (
4+
"fmt"
5+
"unsafe"
6+
)
7+
8+
// Code for walking a CST by hand, by accessing the raw memory.
9+
// Ultimately, we will want a higher-level API for this, but for now,
10+
// this is useful for testing/debugging.
11+
12+
const (
13+
BYTES_PER_CST_REC = 12
14+
SIZEOF_UINT32 = 4
15+
16+
// Node type constants
17+
NODE_TYPE_NONTERMINAL = 0
18+
NODE_TYPE_TERMINAL = -1
19+
NODE_TYPE_ITER = -2
20+
)
21+
22+
type CstNode struct {
23+
Count uint32 // Number of child nodes
24+
MatchLen uint32 // Length of the matched text
25+
Type int32 // Nonterminal: 0, terminal: -1, iter: -2
26+
ChildRefs []uint32 // Addresses of child nodes
27+
}
28+
29+
type CstWalker struct {
30+
matcher *WasmMatcher
31+
}
32+
33+
func NewCstWalker(matcher *WasmMatcher) *CstWalker {
34+
return &CstWalker{
35+
matcher: matcher,
36+
}
37+
}
38+
39+
// Read a CST node from memory at the given address
40+
func (p *CstWalker) GetRawCstNode(addr uint32) (*CstNode, error) {
41+
memory := p.matcher.GetModule().Memory()
42+
if memory == nil {
43+
return nil, fmt.Errorf("WebAssembly module has no memory")
44+
}
45+
46+
// Read the node data
47+
data, ok := memory.Read(addr, BYTES_PER_CST_REC)
48+
if !ok {
49+
return nil, fmt.Errorf("failed to read CST node data at address %d", addr)
50+
}
51+
52+
node := &CstNode{
53+
Count: readUint32(data, 0),
54+
MatchLen: readUint32(data, 4),
55+
Type: readInt32(data, 8),
56+
}
57+
58+
// Read child references if any
59+
if node.Count > 0 {
60+
childData, ok := memory.Read(addr+12, SIZEOF_UINT32*node.Count)
61+
if !ok {
62+
return nil, fmt.Errorf("failed to read CST child references at address %d", addr+12)
63+
}
64+
65+
node.ChildRefs = make([]uint32, node.Count)
66+
for i := uint32(0); i < node.Count; i++ {
67+
node.ChildRefs[i] = readUint32(childData, i*SIZEOF_UINT32)
68+
}
69+
}
70+
71+
return node, nil
72+
}
73+
74+
func readUint32(data []byte, offset uint32) uint32 {
75+
return *(*uint32)(unsafe.Pointer(&data[offset]))
76+
}
77+
78+
func readInt32(data []byte, offset uint32) int32 {
79+
return *(*int32)(unsafe.Pointer(&data[offset]))
80+
}

packages/wasm/test/go/matcher.go

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,21 +19,26 @@ const (
1919

2020
// WasmMatcher is a Go implementation of the JavaScript WasmMatcher class for Ohm
2121
type WasmMatcher struct {
22-
runtime wazero.Runtime
23-
module api.Module
24-
input string
25-
pos int
26-
ctx context.Context
27-
ruleIds map[string]int
22+
runtime wazero.Runtime
23+
module api.Module
24+
input string
25+
pos int
26+
ctx context.Context
27+
ruleIds map[string]int
2828
defaultStartRule string
2929
}
3030

31+
// GetModule returns the WebAssembly module
32+
func (m *WasmMatcher) GetModule() api.Module {
33+
return m.module
34+
}
35+
3136
func NewWasmMatcher(ctx context.Context) *WasmMatcher {
3237
return &WasmMatcher{
33-
runtime: wazero.NewRuntime(ctx),
34-
ctx: ctx,
35-
ruleIds: make(map[string]int),
36-
pos: 0,
38+
runtime: wazero.NewRuntime(ctx),
39+
ctx: ctx,
40+
ruleIds: make(map[string]int),
41+
pos: 0,
3742
}
3843
}
3944

packages/wasm/test/go/testmain.go

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,17 @@ import (
66
"fmt"
77
"os"
88
"path/filepath"
9+
"strings"
910
)
1011

1112
func main() {
13+
fmt.Println("Ohm WebAssembly Matcher - Go Implementation")
1214
// Parse command line arguments
1315
wasmFile := flag.String("wasm", "test/data/_add.wasm", "Path to WebAssembly file")
1416
inputText := flag.String("input", "", "Input text to match against the grammar")
1517
inputFile := flag.String("file", "", "Path to file containing input text to match")
1618
startRule := flag.String("rule", "", "Start rule for the grammar (defaults to grammar's start rule)")
19+
verbose := flag.Bool("verbose", false, "Display verbose information about CST nodes")
1720
flag.Parse()
1821

1922
// Create a context
@@ -76,8 +79,81 @@ func main() {
7679
fmt.Printf("Error getting CST root: %v\n", err)
7780
} else {
7881
fmt.Printf("CST root node ID: %d\n", cstRoot)
82+
83+
// Create a CST walker with the matcher
84+
cstWalker := NewCstWalker(matcher)
85+
86+
// Read the CST node
87+
node, err := cstWalker.GetRawCstNode(cstRoot)
88+
if err != nil {
89+
fmt.Printf("Error reading CST node: %v\n", err)
90+
} else {
91+
fmt.Printf("CST Node - Count: %d, MatchLen: %d, Type: %d\n",
92+
node.Count, node.MatchLen, node.Type)
93+
94+
if len(node.ChildRefs) > 0 {
95+
fmt.Printf("Child references: %v\n", node.ChildRefs)
96+
}
97+
98+
// Unparse the CST to get the original text
99+
fmt.Println("\nUnparsing the CST to reconstruct the input:")
100+
unparsedText := unparse(cstWalker, cstRoot, matcher.GetInput())
101+
fmt.Printf("Unparsed text: %q\n", unparsedText)
102+
fmt.Printf("Original input: %q\n", matcher.GetInput())
103+
fmt.Printf("Match: %v\n", unparsedText == matcher.GetInput())
104+
}
105+
106+
// Display more verbose information if requested
107+
if *verbose {
108+
fmt.Println("\nCST node details have been displayed above.")
109+
fmt.Println("Node Types:")
110+
fmt.Printf(" - Terminal nodes (type %d): Leaf nodes that consume input\n", NODE_TYPE_TERMINAL)
111+
fmt.Printf(" - Iteration nodes (type %d): Used for repetition operations\n", NODE_TYPE_ITER)
112+
fmt.Printf(" - Non-terminal nodes (type %d): Internal nodes with children\n", NODE_TYPE_NONTERMINAL)
113+
fmt.Println("\nThe unparse function reconstructs the original input by collecting text from all terminal nodes in order.")
114+
}
79115
}
80116
} else {
81117
fmt.Println("Match failed")
82118
}
83119
}
120+
121+
// unparse walks the CST starting from the given node and reconstructs the original text
122+
// It returns the reconstructed text from the terminal nodes
123+
func unparse(walker *CstWalker, nodeAddr uint32, input string) string {
124+
var result strings.Builder
125+
pos := uint32(0)
126+
unparseNode(walker, nodeAddr, &pos, input, &result)
127+
return result.String()
128+
}
129+
130+
// unparseNode is a helper function that recursively processes nodes and builds the result
131+
func unparseNode(walker *CstWalker, nodeAddr uint32, pos *uint32, input string, result *strings.Builder) {
132+
// Read the current node
133+
node, err := walker.GetRawCstNode(nodeAddr)
134+
if err != nil {
135+
fmt.Printf("Error reading CST node: %v\n", err)
136+
return
137+
}
138+
139+
// Handle terminal nodes - append the consumed text to the result
140+
if node.Type == NODE_TYPE_TERMINAL {
141+
if *pos < uint32(len(input)) && node.MatchLen > 0 {
142+
end := *pos + node.MatchLen
143+
if end > uint32(len(input)) {
144+
end = uint32(len(input))
145+
}
146+
matchedText := input[*pos:end]
147+
result.WriteString(matchedText)
148+
149+
// Update position only after processing terminal nodes
150+
*pos += node.MatchLen
151+
}
152+
return
153+
}
154+
155+
// For all other node types (nonterminal, iteration, etc.), process children recursively
156+
for _, childAddr := range node.ChildRefs {
157+
unparseNode(walker, childAddr, pos, input, result)
158+
}
159+
}

0 commit comments

Comments
 (0)