Skip to content

Commit c107f7f

Browse files
committed
Allow using non-terminals as right-hand side of simple rules
1 parent 4b7110e commit c107f7f

File tree

9 files changed

+112
-112
lines changed

9 files changed

+112
-112
lines changed

src/algo_setting/preprocessor_setting.py

Lines changed: 2 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from src.algo_setting.algo_setting import AlgoSetting
66
from src.grammar.cnf_grammar_template import CnfGrammarTemplate, Symbol
77
from src.graph.label_decomposed_graph import LabelDecomposedGraph
8+
from src.problems.Base.template_cfg.utils import explode_indices
89

910

1011
class PreProcessorSetting(AlgoSetting, ABC):
@@ -67,60 +68,4 @@ def preprocess(
6768
if not self.is_enabled:
6869
return graph, grammar
6970

70-
block_matrix_space = graph.block_matrix_space
71-
block_count = block_matrix_space.block_count
72-
73-
matrices = dict()
74-
for symbol, matrix in graph.matrices.items():
75-
if block_matrix_space.is_single_cell(matrix.shape):
76-
matrices[symbol] = matrix
77-
else:
78-
for i, block in enumerate(block_matrix_space.get_hyper_vector_blocks(matrix)):
79-
matrices[_index_symbol(symbol, i)] = block
80-
81-
epsilon_rules = []
82-
for non_terminal in grammar.epsilon_rules:
83-
if non_terminal.is_indexed:
84-
for i in range(block_count):
85-
epsilon_rules.append(_index_symbol(non_terminal, i))
86-
else:
87-
epsilon_rules.append(non_terminal)
88-
89-
simple_rules = []
90-
for (non_terminal, terminal) in grammar.simple_rules:
91-
if non_terminal.is_indexed or terminal.is_indexed:
92-
for i in range(block_count):
93-
simple_rules.append((_index_symbol(non_terminal, i), _index_symbol(terminal, i)))
94-
else:
95-
simple_rules.append((non_terminal, terminal))
96-
97-
complex_rules = []
98-
for (non_terminal, symbol1, symbol2) in grammar.complex_rules:
99-
if non_terminal.is_indexed or symbol1.is_indexed or symbol2.is_indexed:
100-
for i in range(block_count):
101-
complex_rules.append((
102-
_index_symbol(non_terminal, i),
103-
_index_symbol(symbol1, i),
104-
_index_symbol(symbol2, i),
105-
))
106-
else:
107-
complex_rules.append((non_terminal, symbol1, symbol2))
108-
109-
return (
110-
LabelDecomposedGraph(
111-
vertex_count=graph.vertex_count,
112-
block_matrix_space=block_matrix_space,
113-
dtype=graph.dtype,
114-
matrices=matrices
115-
),
116-
CnfGrammarTemplate(
117-
start_nonterm=grammar.start_nonterm,
118-
epsilon_rules=epsilon_rules,
119-
simple_rules=simple_rules,
120-
complex_rules=complex_rules
121-
)
122-
)
123-
124-
125-
def _index_symbol(symbol: Symbol, index: int) -> Symbol:
126-
return Symbol(f"{symbol.label}_{index}") if symbol.is_indexed else symbol
71+
return explode_indices(graph, grammar)

src/grammar/cnf_grammar_template.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1+
import itertools
12
from pathlib import Path
2-
from typing import List, Tuple, Union
3+
from typing import List, Tuple, Union, Set
34

45

56
class Symbol:
@@ -11,7 +12,7 @@ def __repr__(self):
1112
return self.label
1213

1314
def __eq__(self, other):
14-
return self.label == other.label
15+
return isinstance(other, Symbol) and self.label == other.label
1516

1617
def __hash__(self) -> int:
1718
return self.label.__hash__()
@@ -30,11 +31,6 @@ def __init__(
3031
self.simple_rules = simple_rules
3132
self.complex_rules = complex_rules
3233

33-
for (non_terminal, terminal) in simple_rules:
34-
if terminal in self.non_terminals:
35-
raise ValueError(f"Invalid rule '{non_terminal} {terminal}'. "
36-
f"Right hand side of a simple rule should be a terminal symbol.")
37-
3834
@property
3935
def non_terminals(self):
4036
return set.union(
@@ -51,7 +47,7 @@ def read_from_pocr_cnf_file(path: Union[Path, str]) -> "CnfGrammarTemplate":
5147
The file format is expected to be as follows:
5248
- Each non-empty line represents a rule, except the last two lines.
5349
- Complex rules are in the format: `<NON_TERMINAL> <SYMBOL_1> <SYMBOL_2>`
54-
- Simple rules are in the format: `<NON_TERMINAL> <TERMINAL>`
50+
- Simple rules are in the format: `<NON_TERMINAL> <SYMBOL_1>`
5551
- Epsilon rules are in the format: `<NON_TERMINAL>`
5652
- Indexed symbols names must end with suffix `_i`.
5753
- Whitespace characters are used to separate values on one line
@@ -93,7 +89,7 @@ def read_from_pocr_cnf_file(path: Union[Path, str]) -> "CnfGrammarTemplate":
9389
raise ValueError(
9490
f"Invalid rule format: `{line}` in file `{path}`. "
9591
f"Expected formats are `<NON_TERMINAL> <SYMBOL_1> <SYMBOL_2>` for complex rules, "
96-
f"`<NON_TERMINAL> <TERMINAL>` for simple rules, and `<NON_TERMINAL>` for epsilon rules."
92+
f"`<NON_TERMINAL> <SYMBOL_1>` for simple rules, and `<NON_TERMINAL>` for epsilon rules."
9793
)
9894

9995
return CnfGrammarTemplate(start_nonterm, epsilon_rules, simple_rules, complex_rules)

src/matrix/optimized_matrix.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
from typing import Optional, Tuple
44

55
import graphblas
6-
from graphblas import Matrix
76
from graphblas.core.dtypes import DataType
7+
from graphblas.core.matrix import Matrix
88
from graphblas.core.operator import Semiring, Monoid
99

1010
from src.utils.subtractable_semiring import SubOp

src/matrix/utils.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
1+
from typing import Any
2+
13
import graphblas
4+
from graphblas.core.dtypes import DataType
25
from graphblas.core.matrix import Matrix
6+
from graphblas.core.vector import Vector
37

48

59
def complimentary_mask(matrix: Matrix, mask: Matrix) -> Matrix:
@@ -9,3 +13,11 @@ def complimentary_mask(matrix: Matrix, mask: Matrix) -> Matrix:
913
res.ss.config["format"] = matrix.ss.config["format"]
1014
res(~mask.S) << zero.ewise_add(matrix, op=graphblas.monoid.any)
1115
return res
16+
17+
18+
def identity_matrix(one: Any, dtype: DataType, size: int) -> Matrix:
19+
return Vector.from_scalar(
20+
value=one,
21+
size=size,
22+
dtype=dtype
23+
).diag()

src/problems/Base/template_cfg/matrix/abstract_all_pairs_cfl_reachability.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,13 @@
33

44
from graphblas.core.matrix import Matrix
55
from graphblas.core.operator import Semiring, Monoid
6-
from graphblas.core.vector import Vector
76
from graphblas.semiring import any_pair
87

98
from src.algo_setting.algo_setting import AlgoSetting
109
from src.grammar.cnf_grammar_template import CnfGrammarTemplate
1110
from src.graph.label_decomposed_graph import OptimizedLabelDecomposedGraph, LabelDecomposedGraph
1211
from src.matrix.matrix_optimizer_setting import get_matrix_optimizer_settings
13-
from src.matrix.utils import complimentary_mask
12+
from src.matrix.utils import complimentary_mask, identity_matrix
1413
from src.problems.Base.template_cfg.template_cfg_all_pairs_reachability import AllPairsCflReachabilityAlgoInstance
1514
from src.utils.subtractable_semiring import SubtractableSemiring
1615

@@ -43,7 +42,7 @@ def monoid(self) -> Monoid:
4342

4443
def solve(self) -> Matrix:
4544
self.add_epsilon_edges()
46-
self.add_edges_for_simple_rules()
45+
self.add_edges_for_simple_terminal_rules()
4746
self.compute_transitive_closure()
4847
return self.graph[self.grammar.start_nonterm]
4948

@@ -54,14 +53,14 @@ def compute_transitive_closure(self):
5453
def add_epsilon_edges(self):
5554
if len(self.grammar.epsilon_rules) == 0:
5655
return
57-
identity_matrix = Vector.from_scalar(
58-
self.algebraic_structure.one,
56+
id_matrix = identity_matrix(
57+
one=self.algebraic_structure.one,
5958
size=self.graph.vertex_count,
6059
dtype=self.graph.dtype
61-
).diag()
60+
)
6261
for non_terminal in self.grammar.epsilon_rules:
63-
self.graph.iadd_by_symbol(non_terminal, identity_matrix, op=self.monoid)
62+
self.graph.iadd_by_symbol(non_terminal, id_matrix, op=self.monoid)
6463

65-
def add_edges_for_simple_rules(self):
66-
for (non_terminal, terminal) in self.grammar.simple_rules:
67-
self.graph.iadd_by_symbol(non_terminal, self.graph[terminal], op=self.monoid)
64+
def add_edges_for_simple_terminal_rules(self):
65+
for (lhs, rhs) in self.grammar.simple_rules:
66+
self.graph.iadd_by_symbol(lhs, self.graph[rhs], op=self.monoid)

src/problems/Base/template_cfg/matrix/incremental_all_pairs_cfl_reachability_algo.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ def compute_transitive_closure(self):
2525
accum=new_front,
2626
op=self.semiring
2727
)
28+
for (lhs, rhs) in self.grammar.simple_rules:
29+
if rhs in self.grammar.non_terminals:
30+
new_front.iadd_by_symbol(lhs, front[rhs], op=self.monoid)
2831
front = new_front.to_unoptimized()
2932
front = self.graph.rsub(front, op=self.algebraic_structure.sub_op)
3033

src/problems/Base/template_cfg/matrix/non_incremental_all_pairs_cfl_reachability_algo.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@ def __init__(self, *args, **kwargs):
1616
def compute_transitive_closure(self) -> OptimizedLabelDecomposedGraph:
1717
old_nvals = self.graph.nvals
1818
while True:
19+
for (lhs, rhs) in self.grammar.simple_rules:
20+
if rhs in self.grammar.non_terminals:
21+
self.graph.iadd_by_symbol(lhs, self.graph[rhs], op=self.monoid)
1922
self.graph.mxm(
2023
self.graph.to_unoptimized(),
2124
self.grammar,
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
from src.grammar.cnf_grammar_template import CnfGrammarTemplate, Symbol
2+
from src.graph.label_decomposed_graph import LabelDecomposedGraph
3+
4+
5+
def explode_indices(
6+
graph: LabelDecomposedGraph,
7+
grammar: CnfGrammarTemplate
8+
) -> (LabelDecomposedGraph, CnfGrammarTemplate):
9+
block_matrix_space = graph.block_matrix_space
10+
block_count = block_matrix_space.block_count
11+
12+
matrices = dict()
13+
for symbol, matrix in graph.matrices.items():
14+
if block_matrix_space.is_single_cell(matrix.shape):
15+
matrices[symbol] = matrix
16+
else:
17+
for i, block in enumerate(block_matrix_space.get_hyper_vector_blocks(matrix)):
18+
matrices[_index_symbol(symbol, i)] = block
19+
20+
epsilon_rules = []
21+
for non_terminal in grammar.epsilon_rules:
22+
if non_terminal.is_indexed:
23+
for i in range(block_count):
24+
epsilon_rules.append(_index_symbol(non_terminal, i))
25+
else:
26+
epsilon_rules.append(non_terminal)
27+
28+
simple_rules = []
29+
for (non_terminal, terminal) in grammar.simple_rules:
30+
if non_terminal.is_indexed or terminal.is_indexed:
31+
for i in range(block_count):
32+
simple_rules.append((_index_symbol(non_terminal, i), _index_symbol(terminal, i)))
33+
else:
34+
simple_rules.append((non_terminal, terminal))
35+
36+
complex_rules = []
37+
for (non_terminal, symbol1, symbol2) in grammar.complex_rules:
38+
if non_terminal.is_indexed or symbol1.is_indexed or symbol2.is_indexed:
39+
for i in range(block_count):
40+
complex_rules.append((
41+
_index_symbol(non_terminal, i),
42+
_index_symbol(symbol1, i),
43+
_index_symbol(symbol2, i),
44+
))
45+
else:
46+
complex_rules.append((non_terminal, symbol1, symbol2))
47+
48+
return (
49+
LabelDecomposedGraph(
50+
vertex_count=graph.vertex_count,
51+
block_matrix_space=block_matrix_space,
52+
dtype=graph.dtype,
53+
matrices=matrices
54+
),
55+
CnfGrammarTemplate(
56+
start_nonterm=grammar.start_nonterm,
57+
epsilon_rules=epsilon_rules,
58+
simple_rules=simple_rules,
59+
complex_rules=complex_rules
60+
)
61+
)
62+
63+
64+
def _index_symbol(symbol: Symbol, index: int) -> Symbol:
65+
return Symbol(f"{symbol.label}_{index}") if symbol.is_indexed else symbol

test/pocr_data/c_alias/c_alias.cnf

Lines changed: 12 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,14 @@
1-
S H1 H0
2-
S H2 H0
3-
V H3 V3
4-
V V2 V3
5-
V V1 V3
6-
V V1 V2
7-
V H4 V3
8-
V H5 V3
9-
V H5 V2
10-
V a
11-
V H1 H0
12-
V H2 H0
13-
V H6 V1
14-
V V2 H7
15-
V H7 V1
16-
V a_r
17-
V1 H6 V1
18-
V1 V2 H7
19-
V1 H7 V1
20-
V1 a_r
21-
V2 H1 H0
22-
V2 H2 H0
23-
V3 H4 V3
24-
V3 H5 V3
25-
V3 H5 V2
26-
V3 a
27-
H0 d
28-
H1 H2 V
29-
H2 d_r
30-
H3 V1 V2
31-
H4 H5 V2
32-
H5 a
33-
H6 V2 H7
34-
H7 a_r
1+
S d_r V_d
2+
V1
3+
V3 a V2_V3
4+
V2_V3 V2 V3
5+
V2 d_r V_d
6+
a_r_V1 a_r V1
7+
V V1 V2_V3
8+
V3
9+
V2
10+
V_d V d
11+
V1 V2 a_r_V1
3512

3613
Count:
37-
S
14+
S

0 commit comments

Comments
 (0)