Skip to content
This repository was archived by the owner on Sep 9, 2022. It is now read-only.

Commit 619866d

Browse files
Refine invoke summary output and add invoke-all command (#15)
* Print max running time in invoke subtask summary * Add invoke-all command * Update documentation * Align invoke-all summary Co-authored-by: prabowo02 <[email protected]>
1 parent fc32040 commit 619866d

9 files changed

+511
-95
lines changed

docs/README.md

+13-2
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ It contains all solutions that are prepared and used in development of the task,
190190

191191
## solutions.json
192192

193-
This file specifies the verdict of each solution. It is used by the web-interface to check if the behavior of each solution is expected on the test data. The verdicts can be `correct`, `time_limit`, `memory_limit`, `incorrect`, `runtime_error`, `failed`, `time_limit_and_runtime_error`, `partially_correct`.
193+
This file specifies the verdict of each solution. It is used by the web-interface and `invoke` to check if the behavior of each solution is expected on the test data. The verdicts can be `correct`, `time_limit`, `memory_limit`, `incorrect`, `runtime_error`, `failed`, `time_limit_and_runtime_error`, `partially_correct`.
194194
There is also a special verdict `model_solution` which should be used exactly once.
195195
The model solution is used to generate the correct outputs for test data.
196196
Below is an example:
@@ -558,7 +558,8 @@ Here are some notes/features on this command:
558558
## invoke
559559

560560
This command is used to compile a solution and the checker,
561-
run the solution over the test data (with the problem constraints, e.g. time limit) and check its output.
561+
run the solution over the test data (with the problem constraints, e.g. time limit) and check its output.
562+
If the filename exists in `solutions.json`, it will also compare the invocation verdict with the expected verdict.
562563
Here is the usage:
563564

564565
```
@@ -622,6 +623,16 @@ Here are some notes/features on this command:
622623
The score is usually zero or one, unless the verdict is `Partially Correct`.
623624

624625

626+
## invoke-all
627+
628+
This command runs `invoke` for all solutions specified in `solutions.json`.
629+
630+
All of the `invoke` command options are supported, except the following commands:
631+
* `-r, --show-reason`
632+
* `--no-check`
633+
* `--no-sol-compile`
634+
635+
625636
## make-public
626637

627638
This command updates the `public` directory and provides the package that is given to the contestants.

scripts/internal/invoke.py

+70-45
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from util import get_bool_environ, load_json, simple_usage_message, wait_process_success
66
from color_util import cprint, cprinterr, colors
7+
from invoke_util import get_short_verdict, is_verdict_expected
78
import tests_util as tu
89

910

@@ -13,27 +14,7 @@
1314
SOLUTIONS_JSON = os.environ.get('SOLUTIONS_JSON')
1415
SPECIFIC_TESTS = get_bool_environ('SPECIFIC_TESTS')
1516
SPECIFIED_TESTS_PATTERN = os.environ.get('SPECIFIED_TESTS_PATTERN')
16-
17-
18-
def is_verdict_expected(score, verdict, expected_verdict):
19-
if expected_verdict in ["correct", "model_solution"]:
20-
return verdict == "Correct" and score == 1
21-
elif expected_verdict == "time_limit":
22-
return verdict == "Time Limit Exceeded"
23-
elif expected_verdict == "memory_limit":
24-
return verdict == "Runtime Error"
25-
elif expected_verdict == "incorrect":
26-
return verdict == "Wrong Answer"
27-
elif expected_verdict == "runtime_error":
28-
return verdict == "Runtime Error"
29-
elif expected_verdict == "failed":
30-
return verdict != "Correct" or score == 0
31-
elif expected_verdict == "time_limit_and_runtime_error":
32-
return verdict in ["Time Limit Exceeded", "Runtime Error"]
33-
elif expected_verdict == "partially_correct":
34-
return 0 < score < 1
35-
else:
36-
raise ValueError("Invalid verdict")
17+
SKIP_CHECK = get_bool_environ('SKIP_CHECK')
3718

3819

3920
if __name__ == '__main__':
@@ -69,74 +50,118 @@ def is_verdict_expected(score, verdict, expected_verdict):
6950
]
7051
wait_process_success(subprocess.Popen(command))
7152

72-
print("\nSubtask summary")
53+
print()
54+
print("Subtask summary")
55+
56+
if solution_data is None:
57+
cprint(colors.WARN, "Solution does not exist in solutions.json. Skipped checking verdict")
7358

7459
subtasks_data = dict(load_json(SUBTASKS_JSON))['subtasks']
7560
total_points = total_full_points = 0
76-
for subtask, tests in tu.get_subtasks_tests_dict_from_tests_dir(tests_dir).items():
61+
unmatched_verdicts = []
62+
for subtask_index, (subtask, tests) in enumerate(tu.get_subtasks_tests_dict_from_tests_dir(tests_dir).items()):
7763
subtask_result = None
64+
max_execution_time = None
7865
testcases_run = 0
7966

8067
for test in tests:
81-
score = verdict = None
68+
score = verdict = execution_time = None
69+
if not SKIP_CHECK:
70+
try:
71+
with open(os.path.join(LOGS_DIR, "{}.score".format(test)), 'r') as sf:
72+
score = float(sf.readlines()[0].strip('\n'))
73+
with open(os.path.join(LOGS_DIR, "{}.verdict".format(test)), 'r') as vf:
74+
verdict = vf.readlines()[0].strip('\n')
75+
except FileNotFoundError:
76+
pass
77+
else:
78+
if subtask_result is None or score < subtask_result[0]:
79+
subtask_result = (score, verdict, test)
8280
try:
83-
with open(os.path.join(LOGS_DIR, "{}.score".format(test)), 'r') as sf:
84-
score = float(sf.readlines()[0].strip('\n'))
85-
with open(os.path.join(LOGS_DIR, "{}.verdict".format(test)), 'r') as vf:
86-
verdict = vf.readlines()[0].strip('\n')
81+
with open(os.path.join(LOGS_DIR, "{}.time".format(test)), 'r') as tf:
82+
execution_time = float(tf.readlines()[0].strip('\n'))
8783
except FileNotFoundError:
8884
pass
8985
else:
90-
if subtask_result is None or score < subtask_result[0]:
91-
subtask_result = (score, verdict, test)
86+
if max_execution_time is None or max_execution_time < execution_time:
87+
max_execution_time = execution_time
9288
testcases_run += 1
9389

94-
if subtask_result is None:
90+
if max_execution_time is None:
9591
command = [
9692
'bash',
9793
os.path.join(INTERNALS_DIR, 'subtask_summary.sh'),
94+
str(subtask_index),
9895
subtask,
9996
str(len(tests))
10097
]
10198
wait_process_success(subprocess.Popen(command))
99+
elif subtask_result is None:
100+
command = [
101+
'bash',
102+
os.path.join(INTERNALS_DIR, 'subtask_summary.sh'),
103+
str(subtask_index),
104+
subtask,
105+
str(len(tests)),
106+
str(testcases_run),
107+
str(max_execution_time)
108+
]
109+
wait_process_success(subprocess.Popen(command))
102110
else:
103111
subtask_score = subtask_result[0] * subtasks_data[subtask]['score']
104112

105-
expected_verdict = None
113+
short_verdict_color = "warn"
106114
if solution_data is not None:
107115
expected_verdict = solution_data.get("verdict", None)
108116
if "except" in solution_data:
109117
expected_verdict = solution_data["except"].get(subtask, expected_verdict)
110-
111-
expected_verdict_args = []
112-
if expected_verdict is not None:
113118
if is_verdict_expected(subtask_result[0], subtask_result[1], expected_verdict):
114-
expected_verdict_args = ["match with expected"]
119+
short_verdict_color = "ok"
115120
else:
116-
expected_verdict_args = ["expected: {}".format(expected_verdict)]
121+
short_verdict_color = "fail"
122+
unmatched_verdicts.append((subtask, subtask_result[1], expected_verdict))
123+
124+
subtask_score_color = "ok"
125+
if subtask_result[0] == 0:
126+
subtask_score_color = "fail"
127+
elif subtask_result[0] < 1:
128+
subtask_score_color = "warn"
117129

118130
command = [
119131
'bash',
120132
os.path.join(INTERNALS_DIR, 'subtask_summary.sh'),
133+
str(subtask_index),
121134
subtask,
122135
str(len(tests)),
123136
str(testcases_run),
137+
str(max_execution_time),
138+
get_short_verdict(subtask_result[1]),
139+
short_verdict_color,
124140
'{:g}'.format(round(subtask_score, 2)),
141+
subtask_score_color,
125142
str(subtasks_data[subtask]['score']),
126-
subtask_result[1],
127143
subtask_result[2]
128-
] + expected_verdict_args
144+
]
129145
wait_process_success(subprocess.Popen(command))
130146

131147
total_points += subtask_score
132148
total_full_points += subtasks_data[subtask]['score']
133149

134-
color = colors.OK
135-
if total_points == 0:
136-
color = colors.ERROR
137-
elif total_points < total_full_points:
138-
color = colors.WARN
139-
cprint(color, "{:g}/{} pts".format(round(total_points, 2), total_full_points))
150+
if not SKIP_CHECK:
151+
color = colors.OK
152+
if total_points == 0:
153+
color = colors.ERROR
154+
elif total_points < total_full_points:
155+
color = colors.WARN
156+
cprint(color, "{:g}/{} pts".format(round(total_points, 2), total_full_points))
157+
158+
if solution_data is not None:
159+
if len(unmatched_verdicts) == 0:
160+
cprint(colors.OK, "All verdict matches with solutions.json")
161+
else:
162+
cprint(colors.FAIL, "Found one or more subtasks mismatch with solutions.json")
163+
for subtask, verdict, expected_verdict in unmatched_verdicts:
164+
print("[{}] got '{}', expected '{}'".format(subtask, verdict, expected_verdict))
140165

141166
if missing_tests:
142167
cprinterr(colors.WARN, "Missing {} {}!".format(len(missing_tests), "tests" if len(missing_tests) != 1 else "test"))

scripts/internal/invoke_all.py

+144
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
import sys
2+
import os
3+
import subprocess
4+
5+
from util import get_bool_environ, load_json, simple_usage_message, wait_process_success
6+
from color_util import cprint, cprinterr, colors
7+
from invoke_util import get_short_verdict, is_verdict_expected
8+
import tests_util as tu
9+
10+
11+
INTERNALS_DIR = os.environ.get('INTERNALS')
12+
LOGS_DIR = os.environ.get('LOGS_DIR')
13+
SUBTASKS_JSON = os.environ.get('SUBTASKS_JSON')
14+
SOLUTIONS_JSON = os.environ.get('SOLUTIONS_JSON')
15+
SPECIFIC_TESTS = get_bool_environ('SPECIFIC_TESTS')
16+
SPECIFIED_TESTS_PATTERN = os.environ.get('SPECIFIED_TESTS_PATTERN')
17+
SOLUTION_DIR = os.environ.get('SOLUTION_DIR')
18+
SKIP_CHECK = False
19+
20+
21+
if __name__ == '__main__':
22+
if len(sys.argv) != 2:
23+
simple_usage_message("<tests-dir>")
24+
tests_dir = sys.argv[1]
25+
26+
try:
27+
test_name_list = tu.get_test_names_from_tests_dir(tests_dir)
28+
except tu.MalformedTestsException as e:
29+
cprinterr(colors.ERROR, "Error:")
30+
sys.stderr.write("{}\n".format(e))
31+
sys.exit(4)
32+
33+
if SPECIFIC_TESTS:
34+
tu.check_pattern_exists_in_test_names(SPECIFIED_TESTS_PATTERN, test_name_list)
35+
test_name_list = tu.filter_test_names_by_pattern(test_name_list, SPECIFIED_TESTS_PATTERN)
36+
37+
available_tests, missing_tests = tu.divide_tests_by_availability(test_name_list, tests_dir)
38+
if missing_tests:
39+
cprinterr(colors.WARN, "Missing tests: "+(", ".join(missing_tests)))
40+
41+
subtasks_tests_dict = tu.get_subtasks_tests_dict_from_tests_dir(tests_dir)
42+
43+
print("Subtask summary")
44+
header_line = "%-30s %-5s" % ("Filename", "Pts")
45+
for subtask_index, (subtask, tests) in enumerate(subtasks_tests_dict.items()):
46+
num_available_tests = len(set(tests).intersection(set(available_tests)))
47+
command = [
48+
'bash',
49+
os.path.join(INTERNALS_DIR, 'subtask_summary.sh'),
50+
str(subtask_index),
51+
subtask,
52+
str(len(tests)),
53+
str(num_available_tests)
54+
]
55+
wait_process_success(subprocess.Popen(command))
56+
57+
if num_available_tests > 0:
58+
header_line += " %-11s" % "[{}]".format(subtask_index)
59+
60+
print()
61+
print("Run result")
62+
print(header_line)
63+
64+
subtasks_data = dict(load_json(SUBTASKS_JSON))['subtasks']
65+
solutions_data = dict(load_json(SOLUTIONS_JSON))
66+
unmatched_verdicts = []
67+
for solution_filename, solution_data in solutions_data.items():
68+
command = [
69+
'bash',
70+
os.path.join(INTERNALS_DIR, 'compile_solution.sh'),
71+
os.path.join(SOLUTION_DIR, solution_filename)
72+
]
73+
ret = subprocess.Popen(command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL).wait()
74+
if ret != 0:
75+
cprint(colors.FAIL, "{} does not compile".format(solution_filename))
76+
77+
for test_name in available_tests:
78+
command = [
79+
'bash',
80+
os.path.join(INTERNALS_DIR, 'invoke_test.sh'),
81+
tests_dir,
82+
test_name,
83+
]
84+
wait_process_success(subprocess.Popen(command))
85+
86+
total_points = 0
87+
solution_summary_data = []
88+
for subtask_index, (subtask, tests) in enumerate(subtasks_tests_dict.items()):
89+
subtask_result = None
90+
max_execution_time = None
91+
92+
for test in tests:
93+
score = verdict = execution_time = None
94+
try:
95+
with open(os.path.join(LOGS_DIR, "{}.score".format(test)), 'r') as sf:
96+
score = float(sf.readlines()[0].strip('\n'))
97+
with open(os.path.join(LOGS_DIR, "{}.verdict".format(test)), 'r') as vf:
98+
verdict = vf.readlines()[0].strip('\n')
99+
with open(os.path.join(LOGS_DIR, "{}.time".format(test)), 'r') as tf:
100+
execution_time = float(tf.readlines()[0].strip('\n'))
101+
except FileNotFoundError:
102+
pass
103+
else:
104+
if subtask_result is None or score < subtask_result[0]:
105+
subtask_result = (score, verdict, test)
106+
if max_execution_time is None or max_execution_time < execution_time:
107+
max_execution_time = execution_time
108+
109+
if subtask_result is not None:
110+
subtask_score = subtask_result[0] * subtasks_data[subtask]['score']
111+
112+
short_verdict_color = "ok"
113+
expected_verdict = solution_data.get("verdict", None)
114+
if "except" in solution_data:
115+
expected_verdict = solution_data["except"].get(subtask, expected_verdict)
116+
if is_verdict_expected(subtask_result[0], subtask_result[1], expected_verdict):
117+
short_verdict_color = "ok"
118+
else:
119+
short_verdict_color = "fail"
120+
unmatched_verdicts.append((solution_filename, subtask, subtask_result[1], expected_verdict))
121+
122+
solution_summary_data.append(get_short_verdict(subtask_result[1]))
123+
solution_summary_data.append(short_verdict_color)
124+
solution_summary_data.append(str(max_execution_time))
125+
126+
total_points += subtask_score
127+
128+
command = [
129+
'bash',
130+
os.path.join(INTERNALS_DIR, 'solution_summary.sh'),
131+
solution_filename,
132+
'{:g}'.format(round(total_points, 2)),
133+
] + solution_summary_data
134+
wait_process_success(subprocess.Popen(command))
135+
136+
if len(unmatched_verdicts) == 0:
137+
cprint(colors.OK, "All verdict matches with solutions.json")
138+
else:
139+
cprint(colors.FAIL, "Found one or more subtasks mismatch with solutions.json")
140+
for solution_filename, subtask, verdict, expected_verdict in unmatched_verdicts:
141+
print("{:40}: got {:20}, expected '{}'".format("[{}] subtask '{}'".format(solution_filename, subtask), "'{}'".format(verdict), expected_verdict))
142+
143+
if missing_tests:
144+
cprinterr(colors.WARN, "Missing {} {}!".format(len(missing_tests), "tests" if len(missing_tests) != 1 else "test"))

0 commit comments

Comments
 (0)