Skip to content

Commit 6133a67

Browse files
committed
Add evaluation script, supporting pocr, pearl, gigascale, graspan
1 parent 6e1b833 commit 6133a67

File tree

6 files changed

+411
-2
lines changed

6 files changed

+411
-2
lines changed
Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,195 @@
1+
import os
2+
import re
3+
from abc import ABC, abstractmethod
4+
from pathlib import Path
5+
from typing import Optional, Tuple
6+
7+
import psutil
8+
9+
from src.grammar.cnf_grammar_template import CnfGrammarTemplate
10+
from src.graph.label_decomposed_graph import LabelDecomposedGraph
11+
from src.problems.Base.template_cfg.utils import explode_indices
12+
13+
14+
def get_all_pairs_cflr_command_manager(
15+
algo_settings: str,
16+
graph_path: Path,
17+
grammar_path: Path
18+
) -> "AllPairsCflrCommandManager":
19+
return {
20+
"pocr": PocrAllPairsCflrCommandManager,
21+
"pearl": PearlAllPairsCflrCommandManager,
22+
"gigascale": GigascaleAllPairsCflrCommandManager,
23+
"graspan": GraspanAllPairsCflrCommandManager
24+
}.get(algo_settings, PyAlgoAllPairsCflrCommandManager)(
25+
algo_settings, graph_path, grammar_path
26+
)
27+
28+
29+
class AllPairsCflrCommandManager(ABC):
30+
def __init__(
31+
self,
32+
algo_settings: str,
33+
graph_path: Path,
34+
grammar_path: Path
35+
):
36+
self.algo_settings = algo_settings
37+
self.graph_path = graph_path
38+
self.grammar_path = grammar_path
39+
40+
@abstractmethod
41+
def create_command(self) -> str:
42+
pass
43+
44+
# noinspection PyMethodMayBeStatic
45+
def discard_stderr(self) -> bool:
46+
return False
47+
48+
@property
49+
def work_dir(self) -> Optional[Path]:
50+
return None
51+
52+
# noinspection PyMethodMayBeStatic
53+
def get_analysis_time(self, output: str) -> float:
54+
return float(re.search(r"AnalysisTime\s+([\d.]+|NaN)", output).group(1))
55+
56+
# noinspection PyMethodMayBeStatic
57+
def get_edge_count(self, output: str) -> int:
58+
return re.search(r"#(SEdges|CountEdges)\s+([\d.]+|NaN)", output).group(2)
59+
60+
61+
class PyAlgoAllPairsCflrCommandManager(AllPairsCflrCommandManager):
62+
def __init__(self, *args, **kwargs):
63+
super().__init__(*args, **kwargs)
64+
65+
def create_command(self) -> Optional[str]:
66+
return f"python3 -m cli.run_all_pairs_cflr {self.algo_settings} {self.graph_path} {self.grammar_path}"
67+
68+
69+
class PocrAllPairsCflrCommandManager(AllPairsCflrCommandManager):
70+
def __init__(self, *args, **kwargs):
71+
super().__init__(*args, **kwargs)
72+
73+
def create_command(self) -> Optional[str]:
74+
return (
75+
f'{self.grammar_path.stem} -pocr {self.graph_path}'
76+
if self.grammar_path.stem in {"aa", "vf"}
77+
else f'cfl -pocr {self.grammar_path} {self.graph_path}'
78+
)
79+
80+
81+
class PearlAllPairsCflrCommandManager(AllPairsCflrCommandManager):
82+
def __init__(self, *args, **kwargs):
83+
super().__init__(*args, **kwargs)
84+
85+
def create_command(self) -> Optional[str]:
86+
return (
87+
f'./{self.grammar_path.stem} {self.graph_path} -pearl -scc=false -gf=false'
88+
if self.grammar_path.stem in {"aa", "vf"}
89+
else None
90+
)
91+
92+
@property
93+
def work_dir(self) -> Optional[Path]:
94+
return Path(os.environ['PEARL_DIR'])
95+
96+
def get_edge_count(self, output: str) -> int:
97+
vedges_search = re.search(r"#VEdges\s+(\d+)", output)
98+
if vedges_search:
99+
return vedges_search.group(1)
100+
return re.search(r"#AEdges\s+(\d+)", output).group(1)
101+
102+
103+
class GigascaleAllPairsCflrCommandManager(AllPairsCflrCommandManager):
104+
def __init__(self, *args, **kwargs):
105+
super().__init__(*args, **kwargs)
106+
107+
def create_command(self) -> Optional[str]:
108+
return (
109+
f'./run.sh -wdlrb -i datasets/dacapo9/{self.graph_path.stem}'
110+
if self.grammar_path.stem in {"java_points_to"}
111+
else None
112+
)
113+
114+
@property
115+
def work_dir(self) -> Optional[Path]:
116+
return Path(os.environ['GIGASCALE_DIR'])
117+
118+
# Gigascale sends [INFO] logs to stderr
119+
def discard_stderr(self) -> bool:
120+
return True
121+
122+
def get_analysis_time(self, output: str) -> float:
123+
return self._get_analysis_time_and_edge_count(output)[0]
124+
125+
def get_edge_count(self, output: str) -> int:
126+
return self._get_analysis_time_and_edge_count(output)[1]
127+
128+
@staticmethod
129+
def _get_analysis_time_and_edge_count(output: str) -> Tuple[float, int]:
130+
pattern = (r"benchmark\s+TC-time\s+TC-mem\s+v\s+e\s+vpt\s+avg\s+max\s+load/f\s+store/f\s*\n"
131+
r"\w+\s+"
132+
r"(\d+\.\d+)\s+"
133+
r"\d+(?:\.\d+)?\s+"
134+
r"\d+\s+"
135+
r"\d+\s+"
136+
r"(\d+)\s+"
137+
r"\d+(?:\.\d+)?\s+"
138+
r"\d+\s+"
139+
r"\d+\s+"
140+
r"\d+")
141+
142+
match = re.search(pattern, output)
143+
144+
tc_time, vpt = match.groups()
145+
return float(tc_time), int(vpt)
146+
147+
148+
class GraspanAllPairsCflrCommandManager(AllPairsCflrCommandManager):
149+
def __init__(self, *args, **kwargs):
150+
super().__init__(*args, **kwargs)
151+
152+
def create_command(self) -> Optional[str]:
153+
grammar = CnfGrammarTemplate.read_from_pocr_cnf_file(self.grammar_path)
154+
graph = LabelDecomposedGraph.read_from_pocr_graph_file(self.graph_path)
155+
156+
# Graspan doesn't support indexed symbols, we need to concat labels and indices
157+
if graph.block_matrix_space.block_count > 1:
158+
graph, grammar = explode_indices(graph, grammar)
159+
graph_path = self.graph_path.parent / "graspan" / self.graph_path.name
160+
os.makedirs(graph_path.parent, exist_ok=True)
161+
graph.write_to_pocr_graph_file(graph_path)
162+
else:
163+
graph_path = self.graph_path
164+
165+
# Graspan doesn't support grammars with over 255 symbols, because
166+
# each symbol is encoded with one byte and one symbol is reserved for epsilon
167+
if len(grammar.symbols) > 255:
168+
return None
169+
170+
grammar_path = self.grammar_path.parent / "graspan" / self.grammar_path.name
171+
os.makedirs(grammar_path.parent, exist_ok=True)
172+
grammar.write_to_pocr_cnf_file(grammar_path, include_starting=False)
173+
174+
return (
175+
f'./run {graph_path} {grammar_path} 1 '
176+
f'{int(psutil.virtual_memory().total / 10**9 * 0.9)} '
177+
f'{os.cpu_count() * 2}'
178+
)
179+
180+
@property
181+
def work_dir(self) -> Optional[Path]:
182+
return Path(os.environ['GRASPAN_DIR']) / "src"
183+
184+
def get_analysis_time(self, output: str) -> float:
185+
return float(re.search(r"COMP TIME:\s*([\d.]+|NaN)", output).group(1))
186+
187+
def get_edge_count(self, output: str) -> int:
188+
final_file = re.search(r"finalFile:\s*(.*)", output).group(1)
189+
start_nonterm = CnfGrammarTemplate.read_from_pocr_cnf_file(self.grammar_path).start_nonterm
190+
with open(final_file, "r") as file:
191+
edges = set()
192+
for line in file:
193+
if line.split()[-1] == start_nonterm.label:
194+
edges.add((line.split()[0], line.split()[1]))
195+
return len(edges)

cli/eval_all_pairs_cflr.py

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
import argparse
2+
import csv
3+
import os
4+
import shlex
5+
import signal
6+
import subprocess
7+
import sys
8+
from pathlib import Path
9+
from typing import Optional, List
10+
11+
from cli.all_pairs_cflr_command_manager import get_all_pairs_cflr_command_manager
12+
13+
# see `man timeout`
14+
TIMEOUT_EXIT_CODE = 124
15+
16+
17+
def check_file_for_completion(result_file_path, rounds):
18+
try:
19+
with open(result_file_path, 'r') as file:
20+
reader = list(csv.reader(file))
21+
if len(reader) - 1 >= rounds or any("OOT" in row or "OOM" in row for row in reader):
22+
return True
23+
except FileNotFoundError:
24+
pass
25+
return False
26+
27+
28+
def run_experiment(
29+
algo_settings: str,
30+
algo_name: str,
31+
graph_path: Path,
32+
grammar_path: Path,
33+
rounds: int,
34+
timeout: Optional[int],
35+
result_file_path: Path
36+
):
37+
graph_base_name = graph_path.stem
38+
grammar_base_name = grammar_path.stem
39+
40+
if not os.path.exists(result_file_path):
41+
with open(result_file_path, 'w', newline='') as csvfile:
42+
writer = csv.writer(csvfile)
43+
writer.writerow(["algo", "graph", "grammar", "edge_count", "ram_kb", "time_sec"])
44+
45+
if "--rewrite-grammar" in algo_settings:
46+
algo_settings = algo_settings.replace("--rewrite-grammar", "")
47+
rewritten_grammar_path = grammar_path.with_stem(grammar_path.stem + "_rewritten")
48+
if os.path.exists(rewritten_grammar_path):
49+
grammar_path = rewritten_grammar_path
50+
51+
for _ in range(rounds):
52+
if check_file_for_completion(result_file_path, rounds):
53+
return
54+
55+
command_manager = get_all_pairs_cflr_command_manager(algo_settings, graph_path, grammar_path)
56+
57+
temp_ram_file = Path("temp_ram_usage.txt").absolute()
58+
59+
base_command = command_manager.create_command()
60+
61+
if base_command is None:
62+
edge_count, ram_kb, time_sec = "-", "-", "-"
63+
else:
64+
command = (f"/usr/bin/time -o {temp_ram_file} -f %M "
65+
+ ("" if timeout is None else f"timeout {timeout}s ")
66+
+ base_command)
67+
68+
process = subprocess.Popen(
69+
shlex.split(command),
70+
cwd=command_manager.work_dir,
71+
stdout=subprocess.PIPE,
72+
stderr=subprocess.DEVNULL if command_manager.discard_stderr() else None
73+
)
74+
try:
75+
output, _ = process.communicate()
76+
except KeyboardInterrupt:
77+
process.send_signal(signal.SIGINT)
78+
try:
79+
process.wait(timeout=5)
80+
except subprocess.TimeoutExpired:
81+
process.kill()
82+
raise
83+
if process.returncode == 0:
84+
output = output.decode()
85+
time_sec = command_manager.get_analysis_time(output)
86+
edge_count = command_manager.get_edge_count(output)
87+
with open(temp_ram_file, 'r') as f:
88+
ram_kb = f.read().strip()
89+
elif process.returncode == TIMEOUT_EXIT_CODE:
90+
print(" Runner process timed out")
91+
edge_count, ram_kb, time_sec = "OOT", "OOT", "OOT"
92+
else:
93+
print(
94+
f" Runner process terminated with return code {process.returncode}\n"
95+
f" (interpreting as out of memory error)"
96+
)
97+
edge_count, ram_kb, time_sec = "OOM", "OOM", "OOM"
98+
99+
with open(result_file_path, 'a', newline='') as csvfile:
100+
print(f" {edge_count} {ram_kb} {time_sec}")
101+
writer = csv.writer(csvfile)
102+
writer.writerow([
103+
{algo_name},
104+
os.path.basename(graph_base_name),
105+
os.path.basename(grammar_base_name),
106+
edge_count,
107+
ram_kb,
108+
time_sec
109+
])
110+
111+
112+
def eval_all_pairs_cflr(
113+
algo_config: Path,
114+
data_config: Path,
115+
result_path: Path,
116+
rounds: Optional[int],
117+
timeout: Optional[int],
118+
):
119+
with open(algo_config, mode='r') as algo_file:
120+
algo_reader = csv.DictReader(algo_file)
121+
for algo_row in algo_reader:
122+
algo_name = algo_row['algo_name']
123+
print(f"Running algorithm: {algo_name}")
124+
algo_settings = algo_row['algo_settings']
125+
algo_result_path = os.path.join(result_path, algo_name)
126+
if not os.path.exists(algo_result_path):
127+
os.makedirs(algo_result_path)
128+
129+
with open(data_config, mode='r') as data_file:
130+
data_reader = csv.DictReader(data_file)
131+
for data_row in data_reader:
132+
graph_path = Path(data_row['graph_path']).absolute()
133+
grammar_path = Path(data_row['grammar_path']).absolute()
134+
print(f" Processing data: {graph_path.stem}, {grammar_path.stem}")
135+
result_file_name = f"{graph_path.stem}_{grammar_path.stem}.csv"
136+
result_file_path = Path(os.path.join(algo_result_path, result_file_name))
137+
138+
run_experiment(
139+
algo_settings=algo_settings,
140+
algo_name=algo_name,
141+
graph_path=graph_path,
142+
grammar_path=grammar_path,
143+
rounds=rounds,
144+
timeout=timeout,
145+
result_file_path=result_file_path
146+
)
147+
148+
149+
def main(raw_args: List[str]):
150+
parser = argparse.ArgumentParser(
151+
description='Evaluates all vertex pairs Context-Free Language Reachability (CFL-R) algorithms.'
152+
)
153+
154+
parser.add_argument('algo_config', type=str,
155+
help='Path to the algo-config csv file.')
156+
parser.add_argument('data_config', type=str,
157+
help='Path to the data-config csv file.')
158+
parser.add_argument('result_path', type=str,
159+
help='Path to save the results.')
160+
parser.add_argument('--rounds', type=int, default=1,
161+
help='Number of rounds to run each configuration.')
162+
parser.add_argument('--timeout', type=int, default=None,
163+
help='Timeout for each run in seconds.')
164+
165+
args = parser.parse_args(raw_args)
166+
eval_all_pairs_cflr(
167+
algo_config=Path(args.algo_config),
168+
data_config=Path(args.data_config),
169+
result_path=Path(args.result_path),
170+
rounds=args.rounds,
171+
timeout=args.timeout
172+
)
173+
174+
175+
if __name__ == "__main__": # pragma: no cover
176+
main(raw_args=sys.argv[1:]) # pragma: no cover

cli/run_all_pairs_cflr.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,15 @@
44
from time import time
55
from typing import Optional, List
66

7-
from src.algo_setting.preprocessor_setting import preprocess_graph_and_grammar
8-
from src.utils.time_limit import time_limit, TimeoutException
97
from src.algo_setting.algo_setting import AlgoSetting
108
from src.algo_setting.algo_settings_manager import AlgoSettingsManager
9+
from src.algo_setting.preprocessor_setting import preprocess_graph_and_grammar
1110
from src.grammar.cnf_grammar_template import CnfGrammarTemplate
1211
from src.graph.label_decomposed_graph import LabelDecomposedGraph
1312
from src.problems.Base.template_cfg.template_cfg_all_pairs_reachability_impls import \
1413
ALL_PAIRS_CFL_REACHABILITY_ALGO_NAMES, \
1514
get_all_pairs_cfl_reachability_algo
15+
from src.utils.time_limit import time_limit, TimeoutException
1616

1717

1818
def run_all_pairs_cflr(

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,4 @@ suitesparse-graphblas==7.4.4.0
77
python-graphblas==2023.7.0
88
pandas==2.0.3
99
numpy==1.23.5
10+
psutil==5.9.8

0 commit comments

Comments
 (0)