ioi-2022
diff --git a/‎docs/README.md
+13-2 b/‎docs/README.md
+13-2
diff --git a/‎scripts/internal/invoke.py
+70-45 b/‎scripts/internal/invoke.py
+70-45
diff --git a/‎scripts/internal/invoke_all.py
+144 b/‎scripts/internal/invoke_all.py
+144
@@ -190,7 +190,7 @@ It contains all solutions that are prepared and used in development of the task,
 
 ## solutions.json
 
-This file specifies the verdict of each solution. It is used by the web-interface to check if the behavior of each solution is expected on the test data. The verdicts can be `correct`, `time_limit`, `memory_limit`, `incorrect`, `runtime_error`, `failed`, `time_limit_and_runtime_error`, `partially_correct`.
+This file specifies the verdict of each solution. It is used by the web-interface and `invoke` to check if the behavior of each solution is expected on the test data. The verdicts can be `correct`, `time_limit`, `memory_limit`, `incorrect`, `runtime_error`, `failed`, `time_limit_and_runtime_error`, `partially_correct`.
 There is also a special verdict `model_solution` which should be used exactly once.
 The model solution is used to generate the correct outputs for test data.
 Below is an example:
@@ -558,7 +558,8 @@ Here are some notes/features on this command:
 ## invoke
 
 This command is used to compile a solution and the checker, 
-  run the solution over the test data (with the problem constraints, e.g. time limit) and check its output. 
+  run the solution over the test data (with the problem constraints, e.g. time limit) and check its output.
+If the filename exists in `solutions.json`, it will also compare the invocation verdict with the expected verdict.
 Here is the usage:
 
 ```
@@ -622,6 +623,16 @@ Here are some notes/features on this command:
   The score is usually zero or one, unless the verdict is `Partially Correct`.
 
 
+## invoke-all
+
+This command runs `invoke` for all solutions specified in `solutions.json`.
+
+All of the `invoke` command options are supported, except the following commands:
+* `-r, --show-reason`
+* `--no-check`
+* `--no-sol-compile`
+
+
 ## make-public
 
 This command updates the `public` directory and provides the package that is given to the contestants.
 
@@ -4,6 +4,7 @@
 
 from util import get_bool_environ, load_json, simple_usage_message, wait_process_success
 from color_util import cprint, cprinterr, colors
+from invoke_util import get_short_verdict, is_verdict_expected
 import tests_util as tu
 
 
@@ -13,27 +14,7 @@
 SOLUTIONS_JSON = os.environ.get('SOLUTIONS_JSON')
 SPECIFIC_TESTS = get_bool_environ('SPECIFIC_TESTS')
 SPECIFIED_TESTS_PATTERN = os.environ.get('SPECIFIED_TESTS_PATTERN')
-
-
-def is_verdict_expected(score, verdict, expected_verdict):
-    if expected_verdict in ["correct", "model_solution"]:
-        return verdict == "Correct" and score == 1
-    elif expected_verdict == "time_limit":
-        return verdict == "Time Limit Exceeded"
-    elif expected_verdict == "memory_limit":
-        return verdict == "Runtime Error"
-    elif expected_verdict == "incorrect":
-        return verdict == "Wrong Answer"
-    elif expected_verdict == "runtime_error":
-        return verdict == "Runtime Error"
-    elif expected_verdict == "failed":
-        return verdict != "Correct" or score == 0
-    elif expected_verdict == "time_limit_and_runtime_error":
-        return verdict in ["Time Limit Exceeded", "Runtime Error"]
-    elif expected_verdict == "partially_correct":
-        return 0 < score < 1
-    else:
-        raise ValueError("Invalid verdict")
+SKIP_CHECK = get_bool_environ('SKIP_CHECK')
 
 
 if __name__ == '__main__':
@@ -69,74 +50,118 @@ def is_verdict_expected(score, verdict, expected_verdict):
         ]
         wait_process_success(subprocess.Popen(command))
 
-    print("\nSubtask summary")
+    print()
+    print("Subtask summary")
+
+    if solution_data is None:
+        cprint(colors.WARN, "Solution does not exist in solutions.json. Skipped checking verdict")
 
     subtasks_data = dict(load_json(SUBTASKS_JSON))['subtasks']
     total_points = total_full_points = 0
-    for subtask, tests in tu.get_subtasks_tests_dict_from_tests_dir(tests_dir).items():
+    unmatched_verdicts = []
+    for subtask_index, (subtask, tests) in enumerate(tu.get_subtasks_tests_dict_from_tests_dir(tests_dir).items()):
         subtask_result = None
+        max_execution_time = None
         testcases_run = 0
 
         for test in tests:
-            score = verdict = None
+            score = verdict = execution_time = None
+            if not SKIP_CHECK:
+                try:
+                    with open(os.path.join(LOGS_DIR, "{}.score".format(test)), 'r') as sf:
+                        score = float(sf.readlines()[0].strip('\n'))
+                    with open(os.path.join(LOGS_DIR, "{}.verdict".format(test)), 'r') as vf:
+                        verdict = vf.readlines()[0].strip('\n')
+                except FileNotFoundError:
+                    pass
+                else:
+                    if subtask_result is None or score < subtask_result[0]:
+                        subtask_result = (score, verdict, test)
             try:
-                with open(os.path.join(LOGS_DIR, "{}.score".format(test)), 'r') as sf:
-                    score = float(sf.readlines()[0].strip('\n'))
-                with open(os.path.join(LOGS_DIR, "{}.verdict".format(test)), 'r') as vf:
-                    verdict = vf.readlines()[0].strip('\n')
+                with open(os.path.join(LOGS_DIR, "{}.time".format(test)), 'r') as tf:
+                    execution_time = float(tf.readlines()[0].strip('\n'))
             except FileNotFoundError:
                 pass
             else:
-                if subtask_result is None or score < subtask_result[0]:
-                    subtask_result = (score, verdict, test)
+                if max_execution_time is None or max_execution_time < execution_time:
+                    max_execution_time = execution_time
                 testcases_run += 1
 
-        if subtask_result is None:
+        if max_execution_time is None:
             command = [
                 'bash',
                 os.path.join(INTERNALS_DIR, 'subtask_summary.sh'),
+                str(subtask_index),
                 subtask,
                 str(len(tests))
             ]
             wait_process_success(subprocess.Popen(command))
+        elif subtask_result is None:
+            command = [
+                'bash',
+                os.path.join(INTERNALS_DIR, 'subtask_summary.sh'),
+                str(subtask_index),
+                subtask,
+                str(len(tests)),
+                str(testcases_run),
+                str(max_execution_time)
+            ]
+            wait_process_success(subprocess.Popen(command))
         else:
             subtask_score = subtask_result[0] * subtasks_data[subtask]['score']
 
-            expected_verdict = None
+            short_verdict_color = "warn"
             if solution_data is not None:
                 expected_verdict = solution_data.get("verdict", None)
                 if "except" in solution_data:
                     expected_verdict = solution_data["except"].get(subtask, expected_verdict)
-
-            expected_verdict_args = []
-            if expected_verdict is not None:
                 if is_verdict_expected(subtask_result[0], subtask_result[1], expected_verdict):
-                    expected_verdict_args = ["match with expected"]
+                    short_verdict_color = "ok"
                 else:
-                    expected_verdict_args = ["expected: {}".format(expected_verdict)]
+                    short_verdict_color = "fail"
+                    unmatched_verdicts.append((subtask, subtask_result[1], expected_verdict))
+
+            subtask_score_color = "ok"
+            if subtask_result[0] == 0:
+                subtask_score_color = "fail"
+            elif subtask_result[0] < 1:
+                subtask_score_color = "warn"
 
             command = [
                 'bash',
                 os.path.join(INTERNALS_DIR, 'subtask_summary.sh'),
+                str(subtask_index),
                 subtask,
                 str(len(tests)),
                 str(testcases_run),
+                str(max_execution_time),
+                get_short_verdict(subtask_result[1]),
+                short_verdict_color,
                 '{:g}'.format(round(subtask_score, 2)),
+                subtask_score_color,
                 str(subtasks_data[subtask]['score']),
-                subtask_result[1],
                 subtask_result[2]
-            ] + expected_verdict_args
+            ]
             wait_process_success(subprocess.Popen(command))
 
             total_points += subtask_score
             total_full_points += subtasks_data[subtask]['score']
 
-    color = colors.OK
-    if total_points == 0:
-        color = colors.ERROR
-    elif total_points < total_full_points:
-        color = colors.WARN
-    cprint(color, "{:g}/{} pts".format(round(total_points, 2), total_full_points))
+    if not SKIP_CHECK:
+        color = colors.OK
+        if total_points == 0:
+            color = colors.ERROR
+        elif total_points < total_full_points:
+            color = colors.WARN
+        cprint(color, "{:g}/{} pts".format(round(total_points, 2), total_full_points))
+
+        if solution_data is not None:
+            if len(unmatched_verdicts) == 0:
+                cprint(colors.OK, "All verdict matches with solutions.json")
+            else:
+                cprint(colors.FAIL, "Found one or more subtasks mismatch with solutions.json")
+                for subtask, verdict, expected_verdict in unmatched_verdicts:
+                    print("[{}] got '{}', expected '{}'".format(subtask, verdict, expected_verdict))
 
     if missing_tests:
         cprinterr(colors.WARN, "Missing {} {}!".format(len(missing_tests), "tests" if len(missing_tests) != 1 else "test"))
@@ -0,0 +1,144 @@
+import sys
+import os
+import subprocess
+
+from util import get_bool_environ, load_json, simple_usage_message, wait_process_success
+from color_util import cprint, cprinterr, colors
+from invoke_util import get_short_verdict, is_verdict_expected
+import tests_util as tu
+
+
+INTERNALS_DIR = os.environ.get('INTERNALS')
+LOGS_DIR = os.environ.get('LOGS_DIR')
+SUBTASKS_JSON = os.environ.get('SUBTASKS_JSON')
+SOLUTIONS_JSON = os.environ.get('SOLUTIONS_JSON')
+SPECIFIC_TESTS = get_bool_environ('SPECIFIC_TESTS')
+SPECIFIED_TESTS_PATTERN = os.environ.get('SPECIFIED_TESTS_PATTERN')
+SOLUTION_DIR = os.environ.get('SOLUTION_DIR')
+SKIP_CHECK = False
+
+
+if __name__ == '__main__':
+    if len(sys.argv) != 2:
+        simple_usage_message("<tests-dir>")
+    tests_dir = sys.argv[1]
+
+    try:
+        test_name_list = tu.get_test_names_from_tests_dir(tests_dir)
+    except tu.MalformedTestsException as e:
+        cprinterr(colors.ERROR, "Error:")
+        sys.stderr.write("{}\n".format(e))
+        sys.exit(4)
+
+    if SPECIFIC_TESTS:
+        tu.check_pattern_exists_in_test_names(SPECIFIED_TESTS_PATTERN, test_name_list)
+        test_name_list = tu.filter_test_names_by_pattern(test_name_list, SPECIFIED_TESTS_PATTERN)
+
+    available_tests, missing_tests = tu.divide_tests_by_availability(test_name_list, tests_dir)
+    if missing_tests:
+        cprinterr(colors.WARN, "Missing tests: "+(", ".join(missing_tests)))
+
+    subtasks_tests_dict = tu.get_subtasks_tests_dict_from_tests_dir(tests_dir)
+    
+    print("Subtask summary")
+    header_line = "%-30s %-5s" % ("Filename", "Pts")
+    for subtask_index, (subtask, tests) in enumerate(subtasks_tests_dict.items()):
+        num_available_tests = len(set(tests).intersection(set(available_tests)))
+        command = [
+            'bash',
+            os.path.join(INTERNALS_DIR, 'subtask_summary.sh'),
+            str(subtask_index),
+            subtask,
+            str(len(tests)),
+            str(num_available_tests)
+        ]
+        wait_process_success(subprocess.Popen(command))
+
+        if num_available_tests > 0:
+            header_line += " %-11s" % "[{}]".format(subtask_index)
+
+    print()
+    print("Run result")
+    print(header_line)
+
+    subtasks_data = dict(load_json(SUBTASKS_JSON))['subtasks']
+    solutions_data = dict(load_json(SOLUTIONS_JSON))
+    unmatched_verdicts = []
+    for solution_filename, solution_data in solutions_data.items():
+        command = [
+            'bash',
+            os.path.join(INTERNALS_DIR, 'compile_solution.sh'),
+            os.path.join(SOLUTION_DIR, solution_filename)
+        ]
+        ret = subprocess.Popen(command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL).wait()
+        if ret != 0:
+            cprint(colors.FAIL, "{} does not compile".format(solution_filename))
+
+        for test_name in available_tests:
+            command = [
+                'bash',
+                os.path.join(INTERNALS_DIR, 'invoke_test.sh'),
+                tests_dir,
+                test_name,
+            ]
+            wait_process_success(subprocess.Popen(command))
+
+        total_points = 0
+        solution_summary_data = []
+        for subtask_index, (subtask, tests) in enumerate(subtasks_tests_dict.items()):
+            subtask_result = None
+            max_execution_time = None
+
+            for test in tests:
+                score = verdict = execution_time = None
+                try:
+                    with open(os.path.join(LOGS_DIR, "{}.score".format(test)), 'r') as sf:
+                        score = float(sf.readlines()[0].strip('\n'))
+                    with open(os.path.join(LOGS_DIR, "{}.verdict".format(test)), 'r') as vf:
+                        verdict = vf.readlines()[0].strip('\n')
+                    with open(os.path.join(LOGS_DIR, "{}.time".format(test)), 'r') as tf:
+                        execution_time = float(tf.readlines()[0].strip('\n'))
+                except FileNotFoundError:
+                    pass
+                else:
+                    if subtask_result is None or score < subtask_result[0]:
+                        subtask_result = (score, verdict, test)
+                    if max_execution_time is None or max_execution_time < execution_time:
+                        max_execution_time = execution_time
+
+            if subtask_result is not None:
+                subtask_score = subtask_result[0] * subtasks_data[subtask]['score']
+                
+                short_verdict_color = "ok"
+                expected_verdict = solution_data.get("verdict", None)
+                if "except" in solution_data:
+                    expected_verdict = solution_data["except"].get(subtask, expected_verdict)
+                if is_verdict_expected(subtask_result[0], subtask_result[1], expected_verdict):
+                    short_verdict_color = "ok"
+                else:
+                    short_verdict_color = "fail"
+                    unmatched_verdicts.append((solution_filename, subtask, subtask_result[1], expected_verdict))
+
+                solution_summary_data.append(get_short_verdict(subtask_result[1]))
+                solution_summary_data.append(short_verdict_color)
+                solution_summary_data.append(str(max_execution_time))
+
+                total_points += subtask_score
+
+        command = [
+            'bash',
+            os.path.join(INTERNALS_DIR, 'solution_summary.sh'),
+            solution_filename,
+            '{:g}'.format(round(total_points, 2)),
+        ] + solution_summary_data
+        wait_process_success(subprocess.Popen(command))
+
+    if len(unmatched_verdicts) == 0:
+        cprint(colors.OK, "All verdict matches with solutions.json")
+    else:
+        cprint(colors.FAIL, "Found one or more subtasks mismatch with solutions.json")
+        for solution_filename, subtask, verdict, expected_verdict in unmatched_verdicts:
+            print("{:40}: got {:20}, expected '{}'".format("[{}] subtask '{}'".format(solution_filename, subtask), "'{}'".format(verdict), expected_verdict))
+
+    if missing_tests:
+        cprinterr(colors.WARN, "Missing {} {}!".format(len(missing_tests), "tests" if len(missing_tests) != 1 else "test"))