|
4 | 4 |
|
5 | 5 | from util import get_bool_environ, load_json, simple_usage_message, wait_process_success
|
6 | 6 | from color_util import cprint, cprinterr, colors
|
| 7 | +from invoke_util import get_short_verdict, is_verdict_expected |
7 | 8 | import tests_util as tu
|
8 | 9 |
|
9 | 10 |
|
|
13 | 14 | SOLUTIONS_JSON = os.environ.get('SOLUTIONS_JSON')
|
14 | 15 | SPECIFIC_TESTS = get_bool_environ('SPECIFIC_TESTS')
|
15 | 16 | SPECIFIED_TESTS_PATTERN = os.environ.get('SPECIFIED_TESTS_PATTERN')
|
16 |
| - |
17 |
| - |
18 |
| -def is_verdict_expected(score, verdict, expected_verdict): |
19 |
| - if expected_verdict in ["correct", "model_solution"]: |
20 |
| - return verdict == "Correct" and score == 1 |
21 |
| - elif expected_verdict == "time_limit": |
22 |
| - return verdict == "Time Limit Exceeded" |
23 |
| - elif expected_verdict == "memory_limit": |
24 |
| - return verdict == "Runtime Error" |
25 |
| - elif expected_verdict == "incorrect": |
26 |
| - return verdict == "Wrong Answer" |
27 |
| - elif expected_verdict == "runtime_error": |
28 |
| - return verdict == "Runtime Error" |
29 |
| - elif expected_verdict == "failed": |
30 |
| - return verdict != "Correct" or score == 0 |
31 |
| - elif expected_verdict == "time_limit_and_runtime_error": |
32 |
| - return verdict in ["Time Limit Exceeded", "Runtime Error"] |
33 |
| - elif expected_verdict == "partially_correct": |
34 |
| - return 0 < score < 1 |
35 |
| - else: |
36 |
| - raise ValueError("Invalid verdict") |
| 17 | +SKIP_CHECK = get_bool_environ('SKIP_CHECK') |
37 | 18 |
|
38 | 19 |
|
39 | 20 | if __name__ == '__main__':
|
@@ -69,74 +50,118 @@ def is_verdict_expected(score, verdict, expected_verdict):
|
69 | 50 | ]
|
70 | 51 | wait_process_success(subprocess.Popen(command))
|
71 | 52 |
|
72 |
| - print("\nSubtask summary") |
| 53 | + print() |
| 54 | + print("Subtask summary") |
| 55 | + |
| 56 | + if solution_data is None: |
| 57 | + cprint(colors.WARN, "Solution does not exist in solutions.json. Skipped checking verdict") |
73 | 58 |
|
74 | 59 | subtasks_data = dict(load_json(SUBTASKS_JSON))['subtasks']
|
75 | 60 | total_points = total_full_points = 0
|
76 |
| - for subtask, tests in tu.get_subtasks_tests_dict_from_tests_dir(tests_dir).items(): |
| 61 | + unmatched_verdicts = [] |
| 62 | + for subtask_index, (subtask, tests) in enumerate(tu.get_subtasks_tests_dict_from_tests_dir(tests_dir).items()): |
77 | 63 | subtask_result = None
|
| 64 | + max_execution_time = None |
78 | 65 | testcases_run = 0
|
79 | 66 |
|
80 | 67 | for test in tests:
|
81 |
| - score = verdict = None |
| 68 | + score = verdict = execution_time = None |
| 69 | + if not SKIP_CHECK: |
| 70 | + try: |
| 71 | + with open(os.path.join(LOGS_DIR, "{}.score".format(test)), 'r') as sf: |
| 72 | + score = float(sf.readlines()[0].strip('\n')) |
| 73 | + with open(os.path.join(LOGS_DIR, "{}.verdict".format(test)), 'r') as vf: |
| 74 | + verdict = vf.readlines()[0].strip('\n') |
| 75 | + except FileNotFoundError: |
| 76 | + pass |
| 77 | + else: |
| 78 | + if subtask_result is None or score < subtask_result[0]: |
| 79 | + subtask_result = (score, verdict, test) |
82 | 80 | try:
|
83 |
| - with open(os.path.join(LOGS_DIR, "{}.score".format(test)), 'r') as sf: |
84 |
| - score = float(sf.readlines()[0].strip('\n')) |
85 |
| - with open(os.path.join(LOGS_DIR, "{}.verdict".format(test)), 'r') as vf: |
86 |
| - verdict = vf.readlines()[0].strip('\n') |
| 81 | + with open(os.path.join(LOGS_DIR, "{}.time".format(test)), 'r') as tf: |
| 82 | + execution_time = float(tf.readlines()[0].strip('\n')) |
87 | 83 | except FileNotFoundError:
|
88 | 84 | pass
|
89 | 85 | else:
|
90 |
| - if subtask_result is None or score < subtask_result[0]: |
91 |
| - subtask_result = (score, verdict, test) |
| 86 | + if max_execution_time is None or max_execution_time < execution_time: |
| 87 | + max_execution_time = execution_time |
92 | 88 | testcases_run += 1
|
93 | 89 |
|
94 |
| - if subtask_result is None: |
| 90 | + if max_execution_time is None: |
95 | 91 | command = [
|
96 | 92 | 'bash',
|
97 | 93 | os.path.join(INTERNALS_DIR, 'subtask_summary.sh'),
|
| 94 | + str(subtask_index), |
98 | 95 | subtask,
|
99 | 96 | str(len(tests))
|
100 | 97 | ]
|
101 | 98 | wait_process_success(subprocess.Popen(command))
|
| 99 | + elif subtask_result is None: |
| 100 | + command = [ |
| 101 | + 'bash', |
| 102 | + os.path.join(INTERNALS_DIR, 'subtask_summary.sh'), |
| 103 | + str(subtask_index), |
| 104 | + subtask, |
| 105 | + str(len(tests)), |
| 106 | + str(testcases_run), |
| 107 | + str(max_execution_time) |
| 108 | + ] |
| 109 | + wait_process_success(subprocess.Popen(command)) |
102 | 110 | else:
|
103 | 111 | subtask_score = subtask_result[0] * subtasks_data[subtask]['score']
|
104 | 112 |
|
105 |
| - expected_verdict = None |
| 113 | + short_verdict_color = "warn" |
106 | 114 | if solution_data is not None:
|
107 | 115 | expected_verdict = solution_data.get("verdict", None)
|
108 | 116 | if "except" in solution_data:
|
109 | 117 | expected_verdict = solution_data["except"].get(subtask, expected_verdict)
|
110 |
| - |
111 |
| - expected_verdict_args = [] |
112 |
| - if expected_verdict is not None: |
113 | 118 | if is_verdict_expected(subtask_result[0], subtask_result[1], expected_verdict):
|
114 |
| - expected_verdict_args = ["match with expected"] |
| 119 | + short_verdict_color = "ok" |
115 | 120 | else:
|
116 |
| - expected_verdict_args = ["expected: {}".format(expected_verdict)] |
| 121 | + short_verdict_color = "fail" |
| 122 | + unmatched_verdicts.append((subtask, subtask_result[1], expected_verdict)) |
| 123 | + |
| 124 | + subtask_score_color = "ok" |
| 125 | + if subtask_result[0] == 0: |
| 126 | + subtask_score_color = "fail" |
| 127 | + elif subtask_result[0] < 1: |
| 128 | + subtask_score_color = "warn" |
117 | 129 |
|
118 | 130 | command = [
|
119 | 131 | 'bash',
|
120 | 132 | os.path.join(INTERNALS_DIR, 'subtask_summary.sh'),
|
| 133 | + str(subtask_index), |
121 | 134 | subtask,
|
122 | 135 | str(len(tests)),
|
123 | 136 | str(testcases_run),
|
| 137 | + str(max_execution_time), |
| 138 | + get_short_verdict(subtask_result[1]), |
| 139 | + short_verdict_color, |
124 | 140 | '{:g}'.format(round(subtask_score, 2)),
|
| 141 | + subtask_score_color, |
125 | 142 | str(subtasks_data[subtask]['score']),
|
126 |
| - subtask_result[1], |
127 | 143 | subtask_result[2]
|
128 |
| - ] + expected_verdict_args |
| 144 | + ] |
129 | 145 | wait_process_success(subprocess.Popen(command))
|
130 | 146 |
|
131 | 147 | total_points += subtask_score
|
132 | 148 | total_full_points += subtasks_data[subtask]['score']
|
133 | 149 |
|
134 |
| - color = colors.OK |
135 |
| - if total_points == 0: |
136 |
| - color = colors.ERROR |
137 |
| - elif total_points < total_full_points: |
138 |
| - color = colors.WARN |
139 |
| - cprint(color, "{:g}/{} pts".format(round(total_points, 2), total_full_points)) |
| 150 | + if not SKIP_CHECK: |
| 151 | + color = colors.OK |
| 152 | + if total_points == 0: |
| 153 | + color = colors.ERROR |
| 154 | + elif total_points < total_full_points: |
| 155 | + color = colors.WARN |
| 156 | + cprint(color, "{:g}/{} pts".format(round(total_points, 2), total_full_points)) |
| 157 | + |
| 158 | + if solution_data is not None: |
| 159 | + if len(unmatched_verdicts) == 0: |
| 160 | + cprint(colors.OK, "All verdict matches with solutions.json") |
| 161 | + else: |
| 162 | + cprint(colors.FAIL, "Found one or more subtasks mismatch with solutions.json") |
| 163 | + for subtask, verdict, expected_verdict in unmatched_verdicts: |
| 164 | + print("[{}] got '{}', expected '{}'".format(subtask, verdict, expected_verdict)) |
140 | 165 |
|
141 | 166 | if missing_tests:
|
142 | 167 | cprinterr(colors.WARN, "Missing {} {}!".format(len(missing_tests), "tests" if len(missing_tests) != 1 else "test"))
|
0 commit comments