diff --git a/.github/workflows/format-check.yml b/.github/workflows/format-check.yml index 4c4be1d5..eeff996c 100644 --- a/.github/workflows/format-check.yml +++ b/.github/workflows/format-check.yml @@ -23,10 +23,12 @@ jobs: - name: Install dependencies needed for checking run: | - sudo apt update && sudo apt install flake8 + sudo apt update + sudo apt install flake8 + pip install black pip install isort - name: Check format, compilation, and imports working-directory: ${{github.workspace}}/qlever-control run: | - for PY in $(find src test -name "*.py"); do printf "$PY ... "; flake8 $PY && python3 -m py_compile $PY && isort -c $PY && echo "OK"; done + for PY in $(find src test -name "*.py"); do printf "$PY ... "; black --check $PY && flake8 $PY && python3 -m py_compile $PY && isort -c $PY && echo "OK"; done diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..3994605a --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,10 @@ +repos: + - repo: https://github.com/astral-sh/ruff-pre-commit + # Ruff version. + rev: v0.7.0 + hooks: + # Run the linter. + - id: ruff + args: [ --fix ] + # Run the formatter. + - id: ruff-format diff --git a/ruff.toml b/ruff.toml new file mode 100644 index 00000000..613bb750 --- /dev/null +++ b/ruff.toml @@ -0,0 +1,2 @@ +[lint] +select = ["E2", "E3", "E4", "E5", "E7", "E9", "F", "I"] diff --git a/src/qlever/__init__.py b/src/qlever/__init__.py index 17b17535..5c27be4b 100644 --- a/src/qlever/__init__.py +++ b/src/qlever/__init__.py @@ -13,8 +13,9 @@ def snake_to_camel(str): # Each module in `qlever/commands` corresponds to a command. The name # of the command is the base name of the module file. package_path = Path(__file__).parent -command_names = [Path(p).stem for p in package_path.glob("commands/*.py") - if p.name != "__init__.py"] +command_names = [ + Path(p).stem for p in package_path.glob("commands/*.py") if p.name != "__init__.py" +] # Dynamically load all the command classes and create an object for each. command_objects = {} @@ -24,8 +25,10 @@ def snake_to_camel(str): try: module = __import__(module_path, fromlist=[class_name]) except ImportError as e: - raise Exception(f"Could not import class {class_name} from module " - f"{module_path} for command {command_name}: {e}") + raise Exception( + f"Could not import class {class_name} from module " + f"{module_path} for command {command_name}: {e}" + ) # Create an object of the class and store it in the dictionary. For the # commands, take - instead of _. command_class = getattr(module, class_name) diff --git a/src/qlever/command.py b/src/qlever/command.py index 7ff73f78..580f4206 100644 --- a/src/qlever/command.py +++ b/src/qlever/command.py @@ -33,7 +33,6 @@ def description(self) -> str: @abstractmethod def should_have_qleverfile(self) -> bool: - """ Return `True` if the command should have a Qleverfile, `False` otherwise. If a command should have a Qleverfile, but none is @@ -43,7 +42,7 @@ def should_have_qleverfile(self) -> bool: pass @abstractmethod - def relevant_qleverfile_arguments(self) -> dict[str: list[str]]: + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: """ Retun the arguments relevant for this command. This must be a subset of the names of `all_arguments` defined in `QleverConfig`. Only these @@ -81,6 +80,8 @@ def show(command_description: str, only_show: bool = False): log.info(colored(command_description, "blue")) log.info("") if only_show: - log.info("You called \"qlever ... --show\", therefore the command " - "is only shown, but not executed (omit the \"--show\" to " - "execute it)") + log.info( + 'You called "qlever ... --show", therefore the command ' + 'is only shown, but not executed (omit the "--show" to ' + "execute it)" + ) diff --git a/src/qlever/commands/add_text_index.py b/src/qlever/commands/add_text_index.py index 15fa647b..30560995 100644 --- a/src/qlever/commands/add_text_index.py +++ b/src/qlever/commands/add_text_index.py @@ -17,22 +17,29 @@ def __init__(self): pass def description(self) -> str: - return ("Add text index to an index built with `qlever index`") + return "Add text index to an index built with `qlever index`" def should_have_qleverfile(self) -> bool: return True - def relevant_qleverfile_arguments(self) -> dict[str: list[str]]: - return {"data": ["name"], - "index": ["index_binary", "text_index", - "text_words_file", "text_docs_file"], - "runtime": ["system", "image", "index_container"]} + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: + return { + "data": ["name"], + "index": [ + "index_binary", + "text_index", + "text_words_file", + "text_docs_file", + ], + "runtime": ["system", "image", "index_container"], + } def additional_arguments(self, subparser) -> None: subparser.add_argument( - "--overwrite-existing", - action="store_true", - help="Overwrite existing text index files") + "--overwrite-existing", + action="store_true", + help="Overwrite existing text index files", + ) def execute(self, args) -> bool: # Check that there is actually something to add. @@ -42,24 +49,25 @@ def execute(self, args) -> bool: # Construct the command line. add_text_index_cmd = f"{args.index_binary} -A -i {args.name}" - if args.text_index in \ - ["from_text_records", "from_text_records_and_literals"]: - add_text_index_cmd += (f" -w {args.text_words_file}" - f" -d {args.text_docs_file}") - if args.text_index in \ - ["from_literals", "from_text_records_and_literals"]: + if args.text_index in ["from_text_records", "from_text_records_and_literals"]: + add_text_index_cmd += ( + f" -w {args.text_words_file}" f" -d {args.text_docs_file}" + ) + if args.text_index in ["from_literals", "from_text_records_and_literals"]: add_text_index_cmd += " --text-words-from-literals" add_text_index_cmd += f" | tee {args.name}.text-index-log.txt" # Run the command in a container (if so desired). if args.system in Containerize.supported_systems(): add_text_index_cmd = Containerize().containerize_command( - add_text_index_cmd, - args.system, "run --rm", - args.image, - args.index_container, - volumes=[("$(pwd)", "/index")], - working_directory="/index") + add_text_index_cmd, + args.system, + "run --rm", + args.image, + args.index_container, + volumes=[("$(pwd)", "/index")], + working_directory="/index", + ) # Show the command line. self.show(add_text_index_cmd, only_show=args.show) @@ -71,16 +79,19 @@ def execute(self, args) -> bool: try: run_command(f"{args.index_binary} --help") except Exception as e: - log.error(f"Running \"{args.index_binary}\" failed ({e}), " - f"set `--index-binary` to a different binary or " - f"use `--container_system`") + log.error( + f'Running "{args.index_binary}" failed ({e}), ' + f"set `--index-binary` to a different binary or " + f"use `--container_system`" + ) # Check if text index files already exist. - existing_text_index_files = get_existing_index_files( - f"{args.name}.text.*") + existing_text_index_files = get_existing_index_files(f"{args.name}.text.*") if len(existing_text_index_files) > 0 and not args.overwrite_existing: - log.error("Text index files found, if you want to overwrite them, " - "use --overwrite-existing") + log.error( + "Text index files found, if you want to overwrite them, " + "use --overwrite-existing" + ) log.info("") log.info(f"Index files found: {existing_text_index_files}") return False @@ -89,7 +100,7 @@ def execute(self, args) -> bool: try: subprocess.run(add_text_index_cmd, shell=True, check=True) except Exception as e: - log.error(f"Running \"{add_text_index_cmd}\" failed ({e})") + log.error(f'Running "{add_text_index_cmd}" failed ({e})') return False return True diff --git a/src/qlever/commands/cache_stats.py b/src/qlever/commands/cache_stats.py index 28eb6e58..be26055a 100644 --- a/src/qlever/commands/cache_stats.py +++ b/src/qlever/commands/cache_stats.py @@ -17,43 +17,43 @@ def __init__(self): pass def description(self) -> str: - return ("Show how much of the cache is currently being used") + return "Show how much of the cache is currently being used" def should_have_qleverfile(self) -> bool: return False - def relevant_qleverfile_arguments(self) -> dict[str: list[str]]: + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: return {"server": ["host_name", "port"]} def additional_arguments(self, subparser) -> None: - subparser.add_argument("--server-url", - help="URL of the QLever server, default is " - "localhost:{port}") - subparser.add_argument("--detailed", - action="store_true", - default=False, - help="Show detailed statistics and settings") + subparser.add_argument( + "--server-url", + help="URL of the QLever server, default is " "localhost:{port}", + ) + subparser.add_argument( + "--detailed", + action="store_true", + default=False, + help="Show detailed statistics and settings", + ) def execute(self, args) -> bool: # Construct the two curl commands. - server_url = (args.server_url if args.server_url - else f"localhost:{args.port}") - cache_stats_cmd = (f"curl -s {server_url} " - f"--data-urlencode \"cmd=cache-stats\"") - cache_settings_cmd = (f"curl -s {server_url} " - f"--data-urlencode \"cmd=get-settings\"") + server_url = args.server_url if args.server_url else f"localhost:{args.port}" + cache_stats_cmd = f"curl -s {server_url} " f'--data-urlencode "cmd=cache-stats"' + cache_settings_cmd = ( + f"curl -s {server_url} " f'--data-urlencode "cmd=get-settings"' + ) # Show them. - self.show("\n".join([cache_stats_cmd, cache_settings_cmd]), - only_show=args.show) + self.show("\n".join([cache_stats_cmd, cache_settings_cmd]), only_show=args.show) if args.show: return False # Execute them. try: cache_stats = subprocess.check_output(cache_stats_cmd, shell=True) - cache_settings = subprocess.check_output(cache_settings_cmd, - shell=True) + cache_settings = subprocess.check_output(cache_settings_cmd, shell=True) cache_stats_dict = json.loads(cache_stats) cache_settings_dict = json.loads(cache_settings) except Exception as e: @@ -64,8 +64,10 @@ def execute(self, args) -> bool: if not args.detailed: cache_size = cache_settings_dict["cache-max-size"] if not cache_size.endswith(" GB"): - log.error(f"Cache size {cache_size} is not in GB, " - f"QLever should return bytes instead") + log.error( + f"Cache size {cache_size} is not in GB, " + f"QLever should return bytes instead" + ) return False else: cache_size = float(cache_size[:-3]) @@ -76,15 +78,21 @@ def execute(self, args) -> bool: if cached_size == 0: log.info(f"Cache is empty, all {cache_size:.1f} GB available") else: - log.info(f"Pinned queries : " - f"{pinned_size:5.1f} GB of {cache_size:5.1f} GB" - f" [{pinned_size / cache_size:5.1%}]") - log.info(f"Non-pinned queries : " - f"{non_pinned_size:5.1f} GB of {cache_size:5.1f} GB" - f" [{non_pinned_size / cache_size:5.1%}]") - log.info(f"FREE : " - f"{free_size:5.1f} GB of {cache_size:5.1f} GB" - f" [{1 - cached_size / cache_size:5.1%}]") + log.info( + f"Pinned queries : " + f"{pinned_size:5.1f} GB of {cache_size:5.1f} GB" + f" [{pinned_size / cache_size:5.1%}]" + ) + log.info( + f"Non-pinned queries : " + f"{non_pinned_size:5.1f} GB of {cache_size:5.1f} GB" + f" [{non_pinned_size / cache_size:5.1%}]" + ) + log.info( + f"FREE : " + f"{free_size:5.1f} GB of {cache_size:5.1f} GB" + f" [{1 - cached_size / cache_size:5.1%}]" + ) return True # Complete version. @@ -96,6 +104,7 @@ def show_dict_as_table(key_value_pairs): if re.match(r"^\d+\.\d+$", value): value = "{:.2f}".format(float(value)) log.info(f"{key.ljust(max_key_len)} : {value}") + show_dict_as_table(cache_stats_dict.items()) log.info("") show_dict_as_table(cache_settings_dict.items()) diff --git a/src/qlever/commands/clear_cache.py b/src/qlever/commands/clear_cache.py index 448aeb39..a6110614 100644 --- a/src/qlever/commands/clear_cache.py +++ b/src/qlever/commands/clear_cache.py @@ -17,22 +17,25 @@ def __init__(self): pass def description(self) -> str: - return ("Clear the query processing cache") + return "Clear the query processing cache" def should_have_qleverfile(self) -> bool: return True - def relevant_qleverfile_arguments(self) -> dict[str: list[str]]: + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: return {"server": ["port", "access_token"]} def additional_arguments(self, subparser) -> None: - subparser.add_argument("--server-url", - help="URL of the QLever server, default is " - "localhost:{port}") - subparser.add_argument("--complete", action="store_true", - default=False, - help="Clear the cache completely, including " - "the pinned queries") + subparser.add_argument( + "--server-url", + help="URL of the QLever server, default is " "localhost:{port}", + ) + subparser.add_argument( + "--complete", + action="store_true", + default=False, + help="Clear the cache completely, including " "the pinned queries", + ) def execute(self, args) -> bool: # Construct command line and show it. @@ -42,20 +45,21 @@ def execute(self, args) -> bool: else: clear_cache_cmd += f" localhost:{args.port}" cmd_val = "clear-cache-complete" if args.complete else "clear-cache" - clear_cache_cmd += f" --data-urlencode \"cmd={cmd_val}\"" + clear_cache_cmd += f' --data-urlencode "cmd={cmd_val}"' if args.complete: - clear_cache_cmd += (f" --data-urlencode access-token=" - f"\"{args.access_token}\"") + clear_cache_cmd += ( + f" --data-urlencode access-token=" f'"{args.access_token}"' + ) self.show(clear_cache_cmd, only_show=args.show) if args.show: return False # Execute the command. try: - clear_cache_cmd += " -w \" %{http_code}\"" - result = subprocess.run(clear_cache_cmd, shell=True, - capture_output=True, text=True, - check=True).stdout + clear_cache_cmd += ' -w " %{http_code}"' + result = subprocess.run( + clear_cache_cmd, shell=True, capture_output=True, text=True, check=True + ).stdout match = re.match(r"^(.*) (\d+)$", result, re.DOTALL) if not match: raise Exception(f"Unexpected output:\n{result}") diff --git a/src/qlever/commands/get_data.py b/src/qlever/commands/get_data.py index 4ae2bb7d..5131d857 100644 --- a/src/qlever/commands/get_data.py +++ b/src/qlever/commands/get_data.py @@ -21,7 +21,7 @@ def description(self) -> str: def should_have_qleverfile(self) -> bool: return True - def relevant_qleverfile_arguments(self) -> dict[str: list[str]]: + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: return {"data": ["name", "get_data_cmd"], "index": ["input_files"]} def additional_arguments(self, subparser) -> None: @@ -37,12 +37,11 @@ def execute(self, args) -> bool: try: run_command(args.get_data_cmd, show_output=True) except Exception as e: - log.error(f"Problem executing \"{args.get_data_cmd}\": {e}") + log.error(f'Problem executing "{args.get_data_cmd}": {e}') return False # Show the total file size in GB and return. patterns = shlex.split(args.input_files) total_file_size = get_total_file_size(patterns) - print(f"Download successful, total file size: " - f"{total_file_size:,} bytes") + print(f"Download successful, total file size: " f"{total_file_size:,} bytes") return True diff --git a/src/qlever/commands/index.py b/src/qlever/commands/index.py index 33a76f34..2f7d44b2 100644 --- a/src/qlever/commands/index.py +++ b/src/qlever/commands/index.py @@ -6,8 +6,7 @@ from qlever.command import QleverCommand from qlever.containerize import Containerize from qlever.log import log -from qlever.util import (get_existing_index_files, get_total_file_size, - run_command) +from qlever.util import get_existing_index_files, get_total_file_size, run_command class IndexCommand(QleverCommand): @@ -19,41 +18,52 @@ def __init__(self): pass def description(self) -> str: - return ("Build the index for a given RDF dataset") + return "Build the index for a given RDF dataset" def should_have_qleverfile(self) -> bool: return True - def relevant_qleverfile_arguments(self) -> dict[str: list[str]]: - return {"data": ["name", "format"], - "index": ["input_files", "cat_input_files", "settings_json", - "index_binary", - "only_pso_and_pos_permutations", "use_patterns", - "text_index", "stxxl_memory"], - "runtime": ["system", "image", "index_container"]} + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: + return { + "data": ["name", "format"], + "index": [ + "input_files", + "cat_input_files", + "settings_json", + "index_binary", + "only_pso_and_pos_permutations", + "use_patterns", + "text_index", + "stxxl_memory", + ], + "runtime": ["system", "image", "index_container"], + } def additional_arguments(self, subparser) -> None: subparser.add_argument( - "--overwrite-existing", action="store_true", - default=False, - help="Overwrite an existing index, think twice before using.") + "--overwrite-existing", + action="store_true", + default=False, + help="Overwrite an existing index, think twice before using.", + ) def execute(self, args) -> bool: # Construct the command line. - index_cmd = (f"{args.cat_input_files} | {args.index_binary}" - f" -F {args.format} -f -" - f" -i {args.name}" - f" -s {args.name}.settings.json") + index_cmd = ( + f"{args.cat_input_files} | {args.index_binary}" + f" -F {args.format} -f -" + f" -i {args.name}" + f" -s {args.name}.settings.json" + ) if args.only_pso_and_pos_permutations: index_cmd += " --only-pso-and-pos-permutations --no-patterns" if not args.use_patterns: index_cmd += " --no-patterns" - if args.text_index in \ - ["from_text_records", "from_text_records_and_literals"]: - index_cmd += (f" -w {args.name}.wordsfile.tsv" - f" -d {args.name}.docsfile.tsv") - if args.text_index in \ - ["from_literals", "from_text_records_and_literals"]: + if args.text_index in ["from_text_records", "from_text_records_and_literals"]: + index_cmd += ( + f" -w {args.name}.wordsfile.tsv" f" -d {args.name}.docsfile.tsv" + ) + if args.text_index in ["from_literals", "from_text_records_and_literals"]: index_cmd += " --text-words-from-literals" if args.stxxl_memory: index_cmd += f" --stxxl-memory {args.stxxl_memory}" @@ -61,24 +71,26 @@ def execute(self, args) -> bool: # If the total file size is larger than 10 GB, set ulimit (such that a # large number of open files is allowed). - total_file_size = get_total_file_size( - shlex.split(args.input_files)) + total_file_size = get_total_file_size(shlex.split(args.input_files)) if total_file_size > 1e10: index_cmd = f"ulimit -Sn 1048576; {index_cmd}" # Run the command in a container (if so desired). if args.system in Containerize.supported_systems(): index_cmd = Containerize().containerize_command( - index_cmd, - args.system, "run --rm", - args.image, - args.index_container, - volumes=[("$(pwd)", "/index")], - working_directory="/index") + index_cmd, + args.system, + "run --rm", + args.image, + args.index_container, + volumes=[("$(pwd)", "/index")], + working_directory="/index", + ) # Command for writing the settings JSON to a file. - settings_json_cmd = (f"echo {shlex.quote(args.settings_json)} " - f"> {args.name}.settings.json") + settings_json_cmd = ( + f"echo {shlex.quote(args.settings_json)} " f"> {args.name}.settings.json" + ) # Show the command line. self.show(f"{settings_json_cmd}\n{index_cmd}", only_show=args.show) @@ -90,9 +102,11 @@ def execute(self, args) -> bool: try: run_command(f"{args.index_binary} --help") except Exception as e: - log.error(f"Running \"{args.index_binary}\" failed, " - f"set `--index-binary` to a different binary or " - f"set `--system to a container system`") + log.error( + f'Running "{args.index_binary}" failed, ' + f"set `--index-binary` to a different binary or " + f"set `--system to a container system`" + ) log.info("") log.info(f"The error message was: {e}") return False @@ -100,25 +114,27 @@ def execute(self, args) -> bool: # Check if all of the input files exist. for pattern in shlex.split(args.input_files): if len(glob.glob(pattern)) == 0: - log.error(f"No file matching \"{pattern}\" found") + log.error(f'No file matching "{pattern}" found') log.info("") - log.info("Did you call `qlever get-data`? If you did, check " - "GET_DATA_CMD and INPUT_FILES in the QLeverfile") + log.info( + "Did you call `qlever get-data`? If you did, check " + "GET_DATA_CMD and INPUT_FILES in the QLeverfile" + ) return False # Check if index files (name.index.*) already exist. existing_index_files = get_existing_index_files(args.name) if len(existing_index_files) > 0 and not args.overwrite_existing: log.error( - f"Index files for basename \"{args.name}\" found, if you " - f"want to overwrite them, use --overwrite-existing") + f'Index files for basename "{args.name}" found, if you ' + f"want to overwrite them, use --overwrite-existing" + ) log.info("") log.info(f"Index files found: {existing_index_files}") return False # Remove already existing container. - if args.system in Containerize.supported_systems() \ - and args.overwrite_existing: + if args.system in Containerize.supported_systems() and args.overwrite_existing: if Containerize.is_running(args.system, args.index_container): log.info("Another index process is running, trying to stop it ...") log.info("") diff --git a/src/qlever/commands/index_stats.py b/src/qlever/commands/index_stats.py index 975576ac..2b795cf0 100644 --- a/src/qlever/commands/index_stats.py +++ b/src/qlever/commands/index_stats.py @@ -18,32 +18,45 @@ def __init__(self): pass def description(self) -> str: - return ("Breakdown of the time and space used for the index build") + return "Breakdown of the time and space used for the index build" def should_have_qleverfile(self) -> bool: return False - def relevant_qleverfile_arguments(self) -> dict[str: list[str]]: + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: return {"data": ["name"]} def additional_arguments(self, subparser) -> None: - subparser.add_argument("--only-time", action="store_true", - default=False, - help="Show only the time used") - subparser.add_argument("--only-space", action="store_true", - default=False, - help="Show only the space used") - subparser.add_argument("--ignore-text-index", action="store_true", - default=False, - help="Ignore the text index") - subparser.add_argument("--time-unit", - choices=["s", "min", "h", "auto"], - default="auto", - help="The time unit") - subparser.add_argument("--size-unit", - choices=["B", "MB", "GB", "TB", "auto"], - default="auto", - help="The size unit") + subparser.add_argument( + "--only-time", + action="store_true", + default=False, + help="Show only the time used", + ) + subparser.add_argument( + "--only-space", + action="store_true", + default=False, + help="Show only the space used", + ) + subparser.add_argument( + "--ignore-text-index", + action="store_true", + default=False, + help="Ignore the text index", + ) + subparser.add_argument( + "--time-unit", + choices=["s", "min", "h", "auto"], + default="auto", + help="The time unit", + ) + subparser.add_argument( + "--size-unit", + choices=["B", "MB", "GB", "TB", "auto"], + default="auto", + help="The size unit", + ) def execute_time(self, args, log_file_name) -> bool: """ @@ -65,8 +78,9 @@ def execute_time(self, args, log_file_name) -> bool: with open(text_log_file_name, "r") as text_log_file: lines.extend(text_log_file.readlines()) except Exception as e: - log.error(f"Problem reading text index log file " - f"{text_log_file_name}: {e}") + log.error( + f"Problem reading text index log file " f"{text_log_file_name}: {e}" + ) return False # Helper function that finds the next line matching the given `regex`, @@ -94,13 +108,19 @@ def find_next_line(regex, update_current_line=True): regex_match = re.search(regex, line) if regex_match: try: - return datetime.strptime( + return ( + datetime.strptime( re.match(timestamp_regex, line).group(), - timestamp_format), regex_match + timestamp_format, + ), + regex_match, + ) except Exception as e: - log.error(f"Could not parse timestamp of form " - f"\"{timestamp_regex}\" from line " - f" \"{line.rstrip()}\" ({e})") + log.error( + f"Could not parse timestamp of form " + f'"{timestamp_regex}" from line ' + f' "{line.rstrip()}" ({e})' + ) # If we get here, we did not find a matching line. if not update_current_line: current_line = current_line_backup @@ -119,26 +139,32 @@ def find_next_line(regex, update_current_line=True): # file (old format: "Creating a pair" + names of permutations in # line "Writing meta data for ..."; new format: name of # permutations already in line "Creating permutations ..."). - perm_begin, _ = find_next_line(r"INFO:\s*Creating a pair", - update_current_line=False) + perm_begin, _ = find_next_line( + r"INFO:\s*Creating a pair", update_current_line=False + ) if perm_begin is None: perm_begin, perm_info = find_next_line( r"INFO:\s*Creating permutations ([A-Z]+ and [A-Z]+)", - update_current_line=False) + update_current_line=False, + ) else: _, perm_info = find_next_line( r"INFO:\s*Writing meta data for ([A-Z]+ and [A-Z]+)", - update_current_line=False) + update_current_line=False, + ) if perm_info is None: break perm_begin_and_info.append((perm_begin, perm_info)) - convert_end = (perm_begin_and_info[0][0] if - len(perm_begin_and_info) > 0 else None) + convert_end = ( + perm_begin_and_info[0][0] if len(perm_begin_and_info) > 0 else None + ) normal_end, _ = find_next_line(r"INFO:\s*Index build completed") - text_begin, _ = find_next_line(r"INFO:\s*Adding text index", - update_current_line=False) - text_end, _ = find_next_line(r"INFO:\s*Text index build comp", - update_current_line=False) + text_begin, _ = find_next_line( + r"INFO:\s*Adding text index", update_current_line=False + ) + text_end, _ = find_next_line( + r"INFO:\s*Text index build comp", update_current_line=False + ) if args.ignore_text_index: text_begin = text_end = None @@ -147,9 +173,11 @@ def find_next_line(regex, update_current_line=True): log.error("Missing line that index build has started") return False if overall_begin and not merge_begin: - log.error("According to the log file, the index build " - "has started, but is still in its first " - "phase (parsing the input)") + log.error( + "According to the log file, the index build " + "has started, but is still in its first " + "phase (parsing the input)" + ) return False # Helper function that shows the duration for a phase (if the start and @@ -187,22 +215,24 @@ def show_duration(heading, start_end_pairs): show_duration("Convert to global IDs", [(convert_begin, convert_end)]) for i in range(len(perm_begin_and_info)): perm_begin, perm_info = perm_begin_and_info[i] - perm_end = perm_begin_and_info[i + 1][0] if i + 1 < len( - perm_begin_and_info) else normal_end - perm_info_text = (perm_info.group(1).replace(" and ", " & ") - if perm_info else f"#{i + 1}") - show_duration(f"Permutation {perm_info_text}", - [(perm_begin, perm_end)]) + perm_end = ( + perm_begin_and_info[i + 1][0] + if i + 1 < len(perm_begin_and_info) + else normal_end + ) + perm_info_text = ( + perm_info.group(1).replace(" and ", " & ") if perm_info else f"#{i + 1}" + ) + show_duration(f"Permutation {perm_info_text}", [(perm_begin, perm_end)]) show_duration("Text index", [(text_begin, text_end)]) if text_begin and text_end: log.info("") - show_duration("TOTAL time", - [(overall_begin, normal_end), - (text_begin, text_end)]) + show_duration( + "TOTAL time", [(overall_begin, normal_end), (text_begin, text_end)] + ) elif normal_end: log.info("") - show_duration("TOTAL time", - [(overall_begin, normal_end)]) + show_duration("TOTAL time", [(overall_begin, normal_end)]) return True def execute_space(self, args) -> bool: @@ -257,9 +287,12 @@ def execute(self, args) -> bool: # The "time" part of the command. if not args.only_space: log_file_name = f"{args.name}.index-log.txt" - self.show(f"Breakdown of the time used for " - f"building the index, based on the timestamps for key " - f"lines in \"{log_file_name}\"", only_show=args.show) + self.show( + f"Breakdown of the time used for " + f"building the index, based on the timestamps for key " + f'lines in "{log_file_name}"', + only_show=args.show, + ) if not args.show: ret_value &= self.execute_time(args, log_file_name) if not args.only_time: @@ -267,8 +300,10 @@ def execute(self, args) -> bool: # The "space" part of the command. if not args.only_time: - self.show("Breakdown of the space used for building the index", - only_show=args.show) + self.show( + "Breakdown of the space used for building the index", + only_show=args.show, + ) if not args.show: ret_value &= self.execute_space(args) diff --git a/src/qlever/commands/log.py b/src/qlever/commands/log.py index 3b2599b4..f484f96c 100644 --- a/src/qlever/commands/log.py +++ b/src/qlever/commands/log.py @@ -15,24 +15,33 @@ def __init__(self): pass def description(self) -> str: - return ("Show the last lines of the server log file and follow it") + return "Show the last lines of the server log file and follow it" def should_have_qleverfile(self) -> bool: return False - def relevant_qleverfile_arguments(self) -> dict[str: list[str]]: + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: return {"data": ["name"]} def additional_arguments(self, subparser) -> None: - subparser.add_argument("--tail-num-lines", type=int, default=20, - help="Show this many of the last lines of the " - "log file") - subparser.add_argument("--from-beginning", action="store_true", - default=False, - help="Show all lines of the log file") - subparser.add_argument("--no-follow", action="store_true", - default=False, - help="Don't follow the log file") + subparser.add_argument( + "--tail-num-lines", + type=int, + default=20, + help="Show this many of the last lines of the " "log file", + ) + subparser.add_argument( + "--from-beginning", + action="store_true", + default=False, + help="Show all lines of the log file", + ) + subparser.add_argument( + "--no-follow", + action="store_true", + default=False, + help="Don't follow the log file", + ) def execute(self, args) -> bool: # Construct the command and show it. @@ -50,7 +59,9 @@ def execute(self, args) -> bool: return False # Execute the command. - log.info(f"Follow log file {log_file}, press Ctrl-C to stop" - f" following (will not stop the server)") + log.info( + f"Follow log file {log_file}, press Ctrl-C to stop" + f" following (will not stop the server)" + ) log.info("") subprocess.run(log_cmd, shell=True) diff --git a/src/qlever/commands/query.py b/src/qlever/commands/query.py index 163c0c8a..65fd79da 100644 --- a/src/qlever/commands/query.py +++ b/src/qlever/commands/query.py @@ -18,39 +18,54 @@ def __init__(self): pass def description(self) -> str: - return ("Send a query to a SPARQL endpoint") + return "Send a query to a SPARQL endpoint" def should_have_qleverfile(self) -> bool: return False - def relevant_qleverfile_arguments(self) -> dict[str: list[str]]: + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: return {"server": ["port"]} def additional_arguments(self, subparser) -> None: - subparser.add_argument("--query", type=str, - default="SELECT * WHERE { ?s ?p ?o } LIMIT 10", - help="SPARQL query to send") - subparser.add_argument("--sparql-endpoint", type=str, - help="URL of the SPARQL endpoint") - subparser.add_argument("--accept", type=str, - choices=["text/tab-separated-values", - "text/csv", - "application/sparql-results+json", - "application/sparql-results+xml", - "application/qlever-results+json"], - default="text/tab-separated-values", - help="Accept header for the SPARQL query") - subparser.add_argument("--no-time", action="store_true", - default=False, - help="Do not print the (end-to-end) time taken") + subparser.add_argument( + "--query", + type=str, + default="SELECT * WHERE { ?s ?p ?o } LIMIT 10", + help="SPARQL query to send", + ) + subparser.add_argument( + "--sparql-endpoint", type=str, help="URL of the SPARQL endpoint" + ) + subparser.add_argument( + "--accept", + type=str, + choices=[ + "text/tab-separated-values", + "text/csv", + "application/sparql-results+json", + "application/sparql-results+xml", + "application/qlever-results+json", + ], + default="text/tab-separated-values", + help="Accept header for the SPARQL query", + ) + subparser.add_argument( + "--no-time", + action="store_true", + default=False, + help="Do not print the (end-to-end) time taken", + ) def execute(self, args) -> bool: # Show what the command will do. - sparql_endpoint = (args.sparql_endpoint if args.sparql_endpoint - else f"localhost:{args.port}") - curl_cmd = (f"curl -s {sparql_endpoint}" - f" -H \"Accept: {args.accept}\"" - f" --data-urlencode query={shlex.quote(args.query)}") + sparql_endpoint = ( + args.sparql_endpoint if args.sparql_endpoint else f"localhost:{args.port}" + ) + curl_cmd = ( + f"curl -s {sparql_endpoint}" + f' -H "Accept: {args.accept}"' + f" --data-urlencode query={shlex.quote(args.query)}" + ) self.show(curl_cmd, only_show=args.show) if args.show: return False @@ -62,8 +77,7 @@ def execute(self, args) -> bool: time_msecs = round(1000 * (time.time() - start_time)) if not args.no_time and args.log_level != "NO_LOG": log.info("") - log.info(f"Query processing time (end-to-end):" - f" {time_msecs:,d} ms") + log.info(f"Query processing time (end-to-end):" f" {time_msecs:,d} ms") except Exception as e: if args.log_level == "DEBUG": traceback.print_exc() diff --git a/src/qlever/commands/setup_config.py b/src/qlever/commands/setup_config.py index c1140c1b..dabd6546 100644 --- a/src/qlever/commands/setup_config.py +++ b/src/qlever/commands/setup_config.py @@ -15,9 +15,9 @@ class SetupConfigCommand(QleverCommand): def __init__(self): self.qleverfiles_path = Path(__file__).parent.parent / "Qleverfiles" - self.qleverfile_names = \ - [p.name.split(".")[1] - for p in self.qleverfiles_path.glob("Qleverfile.*")] + self.qleverfile_names = [ + p.name.split(".")[1] for p in self.qleverfiles_path.glob("Qleverfile.*") + ] def description(self) -> str: return "Get a pre-configured Qleverfile" @@ -25,23 +25,25 @@ def description(self) -> str: def should_have_qleverfile(self) -> bool: return False - def relevant_qleverfile_arguments(self) -> dict[str: list[str]]: + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: return {} def additional_arguments(self, subparser) -> None: subparser.add_argument( - "config_name", type=str, - choices=self.qleverfile_names, - help="The name of the pre-configured Qleverfile to create") + "config_name", + type=str, + choices=self.qleverfile_names, + help="The name of the pre-configured Qleverfile to create", + ) def execute(self, args) -> bool: # Construct the command line and show it. - qleverfile_path = (self.qleverfiles_path - / f"Qleverfile.{args.config_name}") + qleverfile_path = self.qleverfiles_path / f"Qleverfile.{args.config_name}" setup_config_cmd = ( - f"cat {qleverfile_path}" - f" | sed -E 's/(^ACCESS_TOKEN.*)/\\1_{get_random_string(12)}/'" - f"> Qleverfile") + f"cat {qleverfile_path}" + f" | sed -E 's/(^ACCESS_TOKEN.*)/\\1_{get_random_string(12)}/'" + f"> Qleverfile" + ) self.show(setup_config_cmd, only_show=args.show) if args.show: return False @@ -51,23 +53,33 @@ def execute(self, args) -> bool: if qleverfile_path.exists(): log.error("`Qleverfile` already exists in current directory") log.info("") - log.info("If you want to create a new Qleverfile using " - "`qlever setup-config`, delete the existing Qleverfile " - "first") + log.info( + "If you want to create a new Qleverfile using " + "`qlever setup-config`, delete the existing Qleverfile " + "first" + ) return False # Copy the Qleverfile to the current directory. try: - subprocess.run(setup_config_cmd, shell=True, check=True, - stdin=subprocess.DEVNULL, stdout=subprocess.DEVNULL) + subprocess.run( + setup_config_cmd, + shell=True, + check=True, + stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + ) except Exception as e: - log.error(f"Could not copy \"{qleverfile_path}\"" - f" to current directory: {e}") + log.error( + f'Could not copy "{qleverfile_path}"' f" to current directory: {e}" + ) return False # If we get here, everything went well. - log.info(f"Created Qleverfile for config \"{args.config_name}\"" - f" in current directory") + log.info( + f'Created Qleverfile for config "{args.config_name}"' + f" in current directory" + ) return True # if config_name == "default": diff --git a/src/qlever/commands/start.py b/src/qlever/commands/start.py index e51717b5..7f7cf11e 100644 --- a/src/qlever/commands/start.py +++ b/src/qlever/commands/start.py @@ -22,22 +22,35 @@ def __init__(self): pass def description(self) -> str: - return ("Start the QLever server (requires that you have built " - "an index with `qlever index` before)") + return ( + "Start the QLever server (requires that you have built " + "an index with `qlever index` before)" + ) def should_have_qleverfile(self) -> bool: return True - def relevant_qleverfile_arguments(self) -> dict[str: list[str]]: - return {"data": ["name", "description", "text_description"], - "server": ["server_binary", "host_name", "port", - "access_token", "memory_for_queries", - "cache_max_size", "cache_max_size_single_entry", - "cache_max_num_entries", "num_threads", - "timeout", "only_pso_and_pos_permutations", - "use_patterns", "use_text_index", - "warmup_cmd"], - "runtime": ["system", "image", "server_container"]} + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: + return { + "data": ["name", "description", "text_description"], + "server": [ + "server_binary", + "host_name", + "port", + "access_token", + "memory_for_queries", + "cache_max_size", + "cache_max_size_single_entry", + "cache_max_num_entries", + "num_threads", + "timeout", + "only_pso_and_pos_permutations", + "use_patterns", + "use_text_index", + "warmup_cmd", + ], + "runtime": ["system", "image", "server_container"], + } def additional_arguments(self, subparser) -> None: # subparser.add_argument("--kill-existing-with-same-name", @@ -46,16 +59,20 @@ def additional_arguments(self, subparser) -> None: # help="If a QLever server is already running " # "with the same name, kill it before " # "starting a new server") - subparser.add_argument("--kill-existing-with-same-port", - action="store_true", - default=False, - help="If a QLever server is already running " - "on the same port, kill it before " - "starting a new server") - subparser.add_argument("--no-warmup", - action="store_true", - default=False, - help="Do not execute the warmup command") + subparser.add_argument( + "--kill-existing-with-same-port", + action="store_true", + default=False, + help="If a QLever server is already running " + "on the same port, kill it before " + "starting a new server", + ) + subparser.add_argument( + "--no-warmup", + action="store_true", + default=False, + help="Do not execute the warmup command", + ) def execute(self, args) -> bool: # Kill existing server with the same name if so desired. @@ -76,14 +93,16 @@ def execute(self, args) -> bool: log.info("") # Construct the command line based on the config file. - start_cmd = (f"{args.server_binary}" - f" -i {args.name}" - f" -j {args.num_threads}" - f" -p {args.port}" - f" -m {args.memory_for_queries}" - f" -c {args.cache_max_size}" - f" -e {args.cache_max_size_single_entry}" - f" -k {args.cache_max_num_entries}") + start_cmd = ( + f"{args.server_binary}" + f" -i {args.name}" + f" -j {args.num_threads}" + f" -p {args.port}" + f" -m {args.memory_for_queries}" + f" -c {args.cache_max_size}" + f" -e {args.cache_max_size_single_entry}" + f" -k {args.cache_max_num_entries}" + ) if args.timeout: start_cmd += f" -s {args.timeout}" if args.access_token: @@ -102,13 +121,15 @@ def execute(self, args) -> bool: if not args.server_container: args.server_container = f"qlever.server.{args.name}" start_cmd = Containerize().containerize_command( - start_cmd, - args.system, "run -d --restart=unless-stopped", - args.image, - args.server_container, - volumes=[("$(pwd)", "/index")], - ports=[(args.port, args.port)], - working_directory="/index") + start_cmd, + args.system, + "run -d --restart=unless-stopped", + args.image, + args.server_container, + volumes=[("$(pwd)", "/index")], + ports=[(args.port, args.port)], + working_directory="/index", + ) else: start_cmd = f"nohup {start_cmd} &" @@ -122,9 +143,11 @@ def execute(self, args) -> bool: try: run_command(f"{args.server_binary} --help") except Exception as e: - log.error(f"Running \"{args.server_binary}\" failed, " - f"set `--server-binary` to a different binary or " - f"set `--system to a container system`") + log.error( + f'Running "{args.server_binary}" failed, ' + f"set `--server-binary` to a different binary or " + f"set `--system to a container system`" + ) log.info("") log.info(f"The error message was: {e}") return False @@ -134,9 +157,11 @@ def execute(self, args) -> bool: if is_qlever_server_alive(port): log.error(f"QLever server already running on port {port}") log.info("") - log.info("To kill the existing server, use `qlever stop` " - "or `qlever start` with option " - "--kill-existing-with-same-port`") + log.info( + "To kill the existing server, use `qlever stop` " + "or `qlever start` with option " + "--kill-existing-with-same-port`" + ) # Show output of status command. args.cmdline_regex = f"^ServerMain.* -p *{port}" @@ -146,8 +171,10 @@ def execute(self, args) -> bool: return False # Remove already existing container. - if args.system in Containerize.supported_systems() \ - and args.kill_existing_with_same_port: + if ( + args.system in Containerize.supported_systems() + and args.kill_existing_with_same_port + ): try: run_command(f"{args.system} rm -f {args.server_container}") except Exception as e: @@ -172,8 +199,10 @@ def execute(self, args) -> bool: # Tail the server log until the server is ready (note that the `exec` # is important to make sure that the tail process is killed and not # just the bash process). - log.info(f"Follow {args.name}.server-log.txt until the server is ready" - f" (Ctrl-C stops following the log, but not the server)") + log.info( + f"Follow {args.name}.server-log.txt until the server is ready" + f" (Ctrl-C stops following the log, but not the server)" + ) log.info("") tail_cmd = f"exec tail -f {args.name}.server-log.txt" tail_proc = subprocess.Popen(tail_cmd, shell=True) @@ -181,12 +210,14 @@ def execute(self, args) -> bool: time.sleep(1) # Set the access token if specified. - access_arg = f"--data-urlencode \"access-token={args.access_token}\"" + access_arg = f'--data-urlencode "access-token={args.access_token}"' if args.description: desc = args.description - curl_cmd = (f"curl -Gs http://localhost:{port}/api" - f" --data-urlencode \"index-description={desc}\"" - f" {access_arg} > /dev/null") + curl_cmd = ( + f"curl -Gs http://localhost:{port}/api" + f' --data-urlencode "index-description={desc}"' + f" {access_arg} > /dev/null" + ) log.debug(curl_cmd) try: run_command(curl_cmd) @@ -194,9 +225,11 @@ def execute(self, args) -> bool: log.error(f"Setting the index description failed ({e})") if args.text_description: text_desc = args.text_description - curl_cmd = (f"curl -Gs http://localhost:{port}/api" - f" --data-urlencode \"text-description={text_desc}\"" - f" {access_arg} > /dev/null") + curl_cmd = ( + f"curl -Gs http://localhost:{port}/api" + f' --data-urlencode "text-description={text_desc}"' + f" {access_arg} > /dev/null" + ) log.debug(curl_cmd) try: run_command(curl_cmd) diff --git a/src/qlever/commands/status.py b/src/qlever/commands/status.py index 5d066660..f329610a 100644 --- a/src/qlever/commands/status.py +++ b/src/qlever/commands/status.py @@ -15,25 +15,29 @@ def __init__(self): pass def description(self) -> str: - return ("Show QLever processes running on this machine") + return "Show QLever processes running on this machine" def should_have_qleverfile(self) -> bool: return False - def relevant_qleverfile_arguments(self) -> dict[str: list[str]]: + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: return {} def additional_arguments(self, subparser) -> None: - subparser.add_argument("--cmdline-regex", - default="^(ServerMain|IndexBuilderMain)", - help="Show only processes where the command " - "line matches this regex") + subparser.add_argument( + "--cmdline-regex", + default="^(ServerMain|IndexBuilderMain)", + help="Show only processes where the command " "line matches this regex", + ) def execute(self, args) -> bool: # Show action description. - self.show(f"Show all processes on this machine where " - f"the command line matches {args.cmdline_regex}" - f" using Python's psutil library", only_show=args.show) + self.show( + f"Show all processes on this machine where " + f"the command line matches {args.cmdline_regex}" + f" using Python's psutil library", + only_show=args.show, + ) if args.show: return False @@ -41,8 +45,9 @@ def execute(self, args) -> bool: num_processes_found = 0 for proc in psutil.process_iter(): show_heading = num_processes_found == 0 - process_shown = show_process_info(proc, args.cmdline_regex, - show_heading=show_heading) + process_shown = show_process_info( + proc, args.cmdline_regex, show_heading=show_heading + ) if process_shown: num_processes_found += 1 if num_processes_found == 0: diff --git a/src/qlever/commands/stop.py b/src/qlever/commands/stop.py index f2f8e80a..6f032c29 100644 --- a/src/qlever/commands/stop.py +++ b/src/qlever/commands/stop.py @@ -20,33 +20,35 @@ def __init__(self): pass def description(self) -> str: - return ("Stop QLever server for a given datasedataset or port") + return "Stop QLever server for a given datasedataset or port" def should_have_qleverfile(self) -> bool: return True - def relevant_qleverfile_arguments(self) -> dict[str: list[str]]: - return {"data": ["name"], - "server": ["port"], - "runtime": ["server_container"]} + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: + return {"data": ["name"], "server": ["port"], "runtime": ["server_container"]} def additional_arguments(self, subparser) -> None: - subparser.add_argument("--cmdline-regex", - default="ServerMain.* -i [^ ]*%%NAME%%", - help="Show only processes where the command " - "line matches this regex") - subparser.add_argument("--no-containers", action="store_true", - default=False, - help="Do not look for containers, only for " - "native processes") + subparser.add_argument( + "--cmdline-regex", + default="ServerMain.* -i [^ ]*%%NAME%%", + help="Show only processes where the command " "line matches this regex", + ) + subparser.add_argument( + "--no-containers", + action="store_true", + default=False, + help="Do not look for containers, only for " "native processes", + ) def execute(self, args) -> bool: # Show action description. cmdline_regex = args.cmdline_regex.replace("%%NAME%%", args.name) - description = f"Checking for processes matching \"{cmdline_regex}\"" + description = f'Checking for processes matching "{cmdline_regex}"' if not args.no_containers: - description += (f" and for Docker container with name " - f"\"{args.server_container}\"") + description += ( + f" and for Docker container with name " f'"{args.server_container}"' + ) self.show(description, only_show=args.show) if args.show: return False @@ -56,10 +58,13 @@ def execute(self, args) -> bool: if not args.no_containers: for container_system in Containerize.supported_systems(): if Containerize.stop_and_remove_container( - container_system, args.server_container): - log.info(f"{container_system.capitalize()} container with " - f"name \"{args.server_container}\" stopped " - f" and removed") + container_system, args.server_container + ): + log.info( + f"{container_system.capitalize()} container with " + f'name "{args.server_container}" stopped ' + f" and removed" + ) return True # Check if there is a process running on the server port using psutil. @@ -69,30 +74,37 @@ def execute(self, args) -> bool: for proc in psutil.process_iter(): try: pinfo = proc.as_dict( - attrs=['pid', 'username', 'create_time', - 'memory_info', 'cmdline']) - cmdline = " ".join(pinfo['cmdline']) + attrs=["pid", "username", "create_time", "memory_info", "cmdline"] + ) + cmdline = " ".join(pinfo["cmdline"]) except Exception as e: log.debug(f"Error getting process info: {e}") if re.search(cmdline_regex, cmdline): - log.info(f"Found process {pinfo['pid']} from user " - f"{pinfo['username']} with command line: {cmdline}") + log.info( + f"Found process {pinfo['pid']} from user " + f"{pinfo['username']} with command line: {cmdline}" + ) log.info("") try: proc.kill() log.info(f"Killed process {pinfo['pid']}") except Exception as e: - log.error(f"Could not kill process with PID " - f"{pinfo['pid']} ({e}) ... try to kill it " - f"manually") + log.error( + f"Could not kill process with PID " + f"{pinfo['pid']} ({e}) ... try to kill it " + f"manually" + ) log.info("") show_process_info(proc, "", show_heading=True) return False return True # No matching process found. - message = "No matching process found" if args.no_containers else \ - "No matching process or container found" + message = ( + "No matching process found" + if args.no_containers + else "No matching process or container found" + ) log.error(message) # Show output of status command. diff --git a/src/qlever/commands/ui.py b/src/qlever/commands/ui.py index 3de7c177..02e61be7 100644 --- a/src/qlever/commands/ui.py +++ b/src/qlever/commands/ui.py @@ -17,16 +17,17 @@ def __init__(self): pass def description(self) -> str: - return ("Launch the QLever UI web application") + return "Launch the QLever UI web application" def should_have_qleverfile(self) -> bool: return True - def relevant_qleverfile_arguments(self) -> dict[str: list[str]]: - return {"data": ["name"], - "server": ["host_name", "port"], - "ui": ["ui_port", "ui_config", - "ui_system", "ui_image", "ui_container"]} + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: + return { + "data": ["name"], + "server": ["host_name", "port"], + "ui": ["ui_port", "ui_config", "ui_system", "ui_image", "ui_container"], + } def additional_arguments(self, subparser) -> None: pass @@ -36,27 +37,34 @@ def execute(self, args) -> bool: server_url = f"http://{args.host_name}:{args.port}" ui_url = f"http://{args.host_name}:{args.ui_port}" pull_cmd = f"{args.ui_system} pull -q {args.ui_image}" - run_cmd = f"{args.ui_system} run -d " \ - f"--publish {args.ui_port}:7000 " \ - f"--name {args.ui_container} " \ - f"{args.ui_image}" - exec_cmd = f"{args.ui_system} exec -it " \ - f"{args.ui_container} " \ - f"bash -c \"python manage.py configure " \ - f"{args.ui_config} {server_url}\"" - self.show("\n".join(["Stop running containers", - pull_cmd, run_cmd, exec_cmd]), only_show=args.show) + run_cmd = ( + f"{args.ui_system} run -d " + f"--publish {args.ui_port}:7000 " + f"--name {args.ui_container} " + f"{args.ui_image}" + ) + exec_cmd = ( + f"{args.ui_system} exec -it " + f"{args.ui_container} " + f'bash -c "python manage.py configure ' + f'{args.ui_config} {server_url}"' + ) + self.show( + "\n".join(["Stop running containers", pull_cmd, run_cmd, exec_cmd]), + only_show=args.show, + ) if args.show: return False # Stop running containers. for container_system in Containerize.supported_systems(): - Containerize.stop_and_remove_container( - container_system, args.ui_container) + Containerize.stop_and_remove_container(container_system, args.ui_container) # Check if the UI port is already being used. if is_port_used(args.ui_port): - log.warning(f"It looks like the specified port for the UI ({args.ui_port}) is already in use. You can set another port in the Qleverfile in the [ui] section with the UI_PORT variable.") + log.warning( + f"It looks like the specified port for the UI ({args.ui_port}) is already in use. You can set another port in the Qleverfile in the [ui] section with the UI_PORT variable." + ) # Try to start the QLever UI. try: @@ -68,7 +76,9 @@ def execute(self, args) -> bool: return False # Success. - log.info(f"The QLever UI should now be up at {ui_url} ..." - f"You can log in as QLever UI admin with username and " - f"password \"demo\"") + log.info( + f"The QLever UI should now be up at {ui_url} ..." + f"You can log in as QLever UI admin with username and " + f'password "demo"' + ) return True diff --git a/src/qlever/commands/warmup.py b/src/qlever/commands/warmup.py index 7a7041fb..1ec71302 100644 --- a/src/qlever/commands/warmup.py +++ b/src/qlever/commands/warmup.py @@ -15,12 +15,12 @@ def __init__(self): pass def description(self) -> str: - return ("Execute WARMUP_CMD") + return "Execute WARMUP_CMD" def should_have_qleverfile(self) -> bool: return True - def relevant_qleverfile_arguments(self) -> dict[str: list[str]]: + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: return {"server": ["port", "warmup_cmd"]} def additional_arguments(self, subparser) -> None: diff --git a/src/qlever/config.py b/src/qlever/config.py index 35e0bb94..2266f2aa 100644 --- a/src/qlever/config.py +++ b/src/qlever/config.py @@ -38,9 +38,14 @@ class QleverConfig: autocompletion. """ - def add_subparser_for_command(self, subparsers, command_name, - command_object, all_qleverfile_args, - qleverfile_config=None): + def add_subparser_for_command( + self, + subparsers, + command_name, + command_object, + all_qleverfile_args, + qleverfile_config=None, + ): """ Add subparser for the given command. Take the arguments from `command_object.relevant_qleverfile_arguments()` and report an error if @@ -55,11 +60,15 @@ def add_subparser_for_command(self, subparsers, command_name, # `all_qleverfile_args`. def argument_error(prefix): log.info("") - log.error(f"{prefix} in `Qleverfile.all_arguments()` for command " - f"`{command_name}`") + log.error( + f"{prefix} in `Qleverfile.all_arguments()` for command " + f"`{command_name}`" + ) log.info("") - log.info(f"Value of `relevant_qleverfile_arguments` for " - f"command `{command_name}`:") + log.info( + f"Value of `relevant_qleverfile_arguments` for " + f"command `{command_name}`:" + ) log.info("") log.info(f"{arg_names}") log.info("") @@ -67,9 +76,9 @@ def argument_error(prefix): # Add the subparser. description = command_object.description() - subparser = subparsers.add_parser(command_name, - description=description, - help=description) + subparser = subparsers.add_parser( + command_name, description=description, help=description + ) # Add the arguments relevant for the command. for section in arg_names: @@ -77,8 +86,9 @@ def argument_error(prefix): argument_error(f"Section `{section}` not found") for arg_name in arg_names[section]: if arg_name not in all_qleverfile_args[section]: - argument_error(f"Argument `{arg_name}` of section " - f"`{section}` not found") + argument_error( + f"Argument `{arg_name}` of section " f"`{section}` not found" + ) args, kwargs = all_qleverfile_args[section][arg_name] kwargs_copy = kwargs.copy() # If `qleverfile_config` is given, add info about default @@ -86,26 +96,32 @@ def argument_error(prefix): if qleverfile_config is not None: default_value = kwargs.get("default", None) qleverfile_value = qleverfile_config.get( - section, arg_name, fallback=None) + section, arg_name, fallback=None + ) if qleverfile_value is not None: kwargs_copy["default"] = qleverfile_value kwargs_copy["required"] = False - kwargs_copy["help"] += (f" [default, from Qleverfile:" - f" {qleverfile_value}]") + kwargs_copy["help"] += ( + f" [default, from Qleverfile:" f" {qleverfile_value}]" + ) else: kwargs_copy["help"] += f" [default: {default_value}]" subparser.add_argument(*args, **kwargs_copy) # Additional arguments that are shared by all commands. command_object.additional_arguments(subparser) - subparser.add_argument("--show", action="store_true", - default=False, - help="Only show what would be executed" - ", but don't execute it") - subparser.add_argument("--log-level", - choices=log_levels.keys(), - default="INFO", - help="Set the log level") + subparser.add_argument( + "--show", + action="store_true", + default=False, + help="Only show what would be executed" ", but don't execute it", + ) + subparser.add_argument( + "--log-level", + choices=log_levels.keys(), + default="INFO", + help="Set the log level", + ) def parse_args(self): # Determine whether we are in autocomplete mode or not. @@ -116,11 +132,13 @@ def parse_args(self): argcomplete_enabled = os.environ.get("QLEVER_ARGCOMPLETE_ENABLED") if not argcomplete_enabled and not argcomplete_check_off: log.info("") - log.warn(f"To enable autocompletion, run the following command, " - f"and consider adding it to your `.bashrc` or `.zshrc`:" - f"\n\n" - f"eval \"$(register-python-argcomplete {script_name})\"" - f" && export QLEVER_ARGCOMPLETE_ENABLED=1") + log.warn( + f"To enable autocompletion, run the following command, " + f"and consider adding it to your `.bashrc` or `.zshrc`:" + f"\n\n" + f'eval "$(register-python-argcomplete {script_name})"' + f" && export QLEVER_ARGCOMPLETE_ENABLED=1" + ) log.info("") # Create a temporary parser only to parse the `--qleverfile` option, in @@ -129,8 +147,8 @@ def parse_args(self): # want the values from the Qleverfile to be shown in the help strings, # but only if this is actually necessary. def add_qleverfile_option(parser): - parser.add_argument("--qleverfile", "-q", type=str, - default="Qleverfile") + parser.add_argument("--qleverfile", "-q", type=str, default="Qleverfile") + qleverfile_parser = argparse.ArgumentParser(add_help=False) add_qleverfile_option(qleverfile_parser) qleverfile_parser.add_argument("command", type=str, nargs="?") @@ -144,14 +162,17 @@ def add_qleverfile_option(parser): # We need this again further down in the code, so remember it. qleverfile_path = Path(qleverfile_path_name) qleverfile_exists = qleverfile_path.is_file() - qleverfile_is_default = qleverfile_path_name \ - == qleverfile_parser.get_default("qleverfile") + qleverfile_is_default = qleverfile_path_name == qleverfile_parser.get_default( + "qleverfile" + ) # If a Qleverfile with a non-default name was specified, but it does # not exist, that's an error. if not qleverfile_exists and not qleverfile_is_default: - raise ConfigException(f"Qleverfile with non-default name " - f"`{qleverfile_path_name}` specified, " - f"but it does not exist") + raise ConfigException( + f"Qleverfile with non-default name " + f"`{qleverfile_path_name}` specified, " + f"but it does not exist" + ) # If it exists and we are not in the autocompletion mode, parse it. # # IMPORTANT: No need to parse the Qleverfile in autocompletion mode and @@ -164,8 +185,7 @@ def add_qleverfile_option(parser): qleverfile_config = Qleverfile.read(qleverfile_path) except Exception as e: log.info("") - log.error(f"Error parsing Qleverfile `{qleverfile_path}`" - f": {e}") + log.error(f"Error parsing Qleverfile `{qleverfile_path}`" f": {e}") log.info("") exit(1) else: @@ -176,19 +196,23 @@ def add_qleverfile_option(parser): # are defined in the modules in `qlever/commands`. In `__init__.py` # an object of each class is created and stored in `command_objects`. parser = argparse.ArgumentParser( - description=colored("This is the qlever command line tool, " - "it's all you need to work with QLever", - attrs=["bold"])) - parser.add_argument("--version", action="version", - version=f"%(prog)s {version('qlever')}") + description=colored( + "This is the qlever command line tool, " + "it's all you need to work with QLever", + attrs=["bold"], + ) + ) + parser.add_argument( + "--version", action="version", version=f"%(prog)s {version('qlever')}" + ) add_qleverfile_option(parser) - subparsers = parser.add_subparsers(dest='command') + subparsers = parser.add_subparsers(dest="command") subparsers.required = True all_args = Qleverfile.all_arguments() for command_name, command_object in command_objects.items(): self.add_subparser_for_command( - subparsers, command_name, command_object, - all_args, qleverfile_config) + subparsers, command_name, command_object, all_args, qleverfile_config + ) # Enable autocompletion for the commands and their options. # @@ -208,9 +232,11 @@ def add_qleverfile_option(parser): # issue a warning. if command_objects[args.command].should_have_qleverfile(): if not qleverfile_exists: - log.warning(f"Invoking command `{args.command}` without a " - "Qleverfile. You have to specify all required " - "arguments on the command line. This is possible, " - "but not recommended.") + log.warning( + f"Invoking command `{args.command}` without a " + "Qleverfile. You have to specify all required " + "arguments on the command line. This is possible, " + "but not recommended." + ) return args diff --git a/src/qlever/containerize.py b/src/qlever/containerize.py index 9441ee99..2cf18ade 100644 --- a/src/qlever/containerize.py +++ b/src/qlever/containerize.py @@ -31,12 +31,16 @@ def supported_systems() -> list[str]: return ["docker", "podman"] @staticmethod - def containerize_command(cmd: str, container_system: str, - run_subcommand: str, - image_name: str, container_name: str, - volumes: list[tuple[str, str]] = [], - ports: list[tuple[int, int]] = [], - working_directory: Optional[str] = None) -> str: + def containerize_command( + cmd: str, + container_system: str, + run_subcommand: str, + image_name: str, + container_name: str, + volumes: list[tuple[str, str]] = [], + ports: list[tuple[int, int]] = [], + working_directory: Optional[str] = None, + ) -> str: """ Get the command to run `cmd` with the given `container_system` and the given options. @@ -45,8 +49,9 @@ def containerize_command(cmd: str, container_system: str, # Check that `container_system` is supported. if container_system not in Containerize.supported_systems(): return ContainerizeException( - f"Invalid container system \"{container_system}\"" - f" (must be one of {Containerize.supported_systems()})") + f'Invalid container system "{container_system}"' + f" (must be one of {Containerize.supported_systems()})" + ) # Set user and group ids. This is important so that the files created # by the containerized command are owned by the user running the @@ -62,37 +67,40 @@ def containerize_command(cmd: str, container_system: str, # dir. volume_options = "".join([f" -v {v1}:{v2}" for v1, v2 in volumes]) port_options = "".join([f" -p {p1}:{p2}" for p1, p2 in ports]) - working_directory_option = (f" -w {working_directory}" - if working_directory is not None else "") + working_directory_option = ( + f" -w {working_directory}" if working_directory is not None else "" + ) # Construct the command that runs `cmd` with the given container # system. - containerized_cmd = (f"{container_system} {run_subcommand}" - f"{user_option}" - f" -v /etc/localtime:/etc/localtime:ro" - f"{volume_options}" - f"{port_options}" - f"{working_directory_option}" - f" --init" - f" --entrypoint bash" - f" --name {container_name} {image_name}" - f" -c {shlex.quote(cmd)}") + containerized_cmd = ( + f"{container_system} {run_subcommand}" + f"{user_option}" + f" -v /etc/localtime:/etc/localtime:ro" + f"{volume_options}" + f"{port_options}" + f"{working_directory_option}" + f" --init" + f" --entrypoint bash" + f" --name {container_name} {image_name}" + f" -c {shlex.quote(cmd)}" + ) return containerized_cmd @staticmethod def is_running(container_system: str, container_name: str) -> bool: # Note: the `{{{{` and `}}}}` result in `{{` and `}}`, respectively. containers = ( - run_command(f"{container_system} ps --format=\"{{{{.Names}}}}\"", - return_output=True) + run_command( + f'{container_system} ps --format="{{{{.Names}}}}"', return_output=True + ) .strip() .splitlines() ) return container_name in containers @staticmethod - def stop_and_remove_container(container_system: str, - container_name: str) -> bool: + def stop_and_remove_container(container_system: str, container_name: str) -> bool: """ Stop the container with the given name using the given system. Return `True` if a container with that name was found and stopped, `False` @@ -102,19 +110,26 @@ def stop_and_remove_container(container_system: str, # Check that `container_system` is supported. if container_system not in Containerize.supported_systems(): return ContainerizeException( - f"Invalid container system \"{container_system}\"" - f" (must be one of {Containerize.supported_systems()})") + f'Invalid container system "{container_system}"' + f" (must be one of {Containerize.supported_systems()})" + ) # Construct the command that stops the container. - stop_cmd = f"{container_system} stop {container_name} && " \ - f"{container_system} rm {container_name}" + stop_cmd = ( + f"{container_system} stop {container_name} && " + f"{container_system} rm {container_name}" + ) # Run the command. try: - subprocess.run(stop_cmd, shell=True, check=True, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL) + subprocess.run( + stop_cmd, + shell=True, + check=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) return True except Exception as e: - log.debug(f"Error running \"{stop_cmd}\": {e}") + log.debug(f'Error running "{stop_cmd}": {e}') return False diff --git a/src/qlever/log.py b/src/qlever/log.py index c0e2b58f..a92f9272 100644 --- a/src/qlever/log.py +++ b/src/qlever/log.py @@ -10,6 +10,7 @@ class QleverLogFormatter(logging.Formatter): """ Custom formatter for logging. """ + def format(self, record): message = record.getMessage() if record.levelno == logging.DEBUG: @@ -34,7 +35,7 @@ def format(self, record): "WARNING": logging.WARNING, "ERROR": logging.ERROR, "CRITICAL": logging.CRITICAL, - "NO_LOG": logging.CRITICAL + 1 + "NO_LOG": logging.CRITICAL + 1, } diff --git a/src/qlever/qlever_main.py b/src/qlever/qlever_main.py index a936abd5..959f7a8c 100644 --- a/src/qlever/qlever_main.py +++ b/src/qlever/qlever_main.py @@ -46,20 +46,25 @@ def main(): # Check if it's a certain kind of `AttributeError` and give a hint in # that case. match_error = re.search(r"object has no attribute '(.+)'", str(e)) - match_trace = re.search(r"(qlever/commands/.+\.py)\", line (\d+)", - traceback.format_exc()) + match_trace = re.search( + r"(qlever/commands/.+\.py)\", line (\d+)", traceback.format_exc() + ) if isinstance(e, AttributeError) and match_error and match_trace: attribute = match_error.group(1) trace_command = match_trace.group(1) trace_line = match_trace.group(2) log.error(f"{e} in `{trace_command}` at line {trace_line}") log.info("") - log.info(f"Likely cause: you used `args.{attribute}`, but it was " - f"neither defined in `relevant_qleverfile_arguments` " - f"nor in `additional_arguments`") + log.info( + f"Likely cause: you used `args.{attribute}`, but it was " + f"neither defined in `relevant_qleverfile_arguments` " + f"nor in `additional_arguments`" + ) log.info("") - log.info(f"If you did not implement `{trace_command}` yourself, " - f"please report this issue") + log.info( + f"If you did not implement `{trace_command}` yourself, " + f"please report this issue" + ) log.info("") else: log.error(f"An unexpected error occurred: {e}") diff --git a/src/qlever/qlever_old.py b/src/qlever/qlever_old.py index 7618928f..1a84b9c9 100755 --- a/src/qlever/qlever_old.py +++ b/src/qlever/qlever_old.py @@ -58,6 +58,8 @@ def track_action_rank(method): method.rank = track_action_rank.counter track_action_rank.counter += 1 return method + + track_action_rank.counter = 0 # noqa: E305 @@ -74,18 +76,22 @@ def show_available_config_names(): script_dir = os.path.dirname(__file__) try: qleverfiles_dir = os.path.join(script_dir, "Qleverfiles") - config_names = [qleverfile_name.split(".")[1] for - qleverfile_name in os.listdir(qleverfiles_dir)] + config_names = [ + qleverfile_name.split(".")[1] + for qleverfile_name in os.listdir(qleverfiles_dir) + ] if not config_names: - raise Exception(f"Directory \"{qleverfiles_dir}\" exists, but " - f"contains no Qleverfiles") + raise Exception( + f'Directory "{qleverfiles_dir}" exists, but ' f"contains no Qleverfiles" + ) except Exception as e: - log.error(f"Could not find any Qleverfiles in \"{qleverfiles_dir}\" " - f"({e})") + log.error(f'Could not find any Qleverfiles in "{qleverfiles_dir}" ' f"({e})") log.info("") - log.info("Check that you have fully downloaded or cloned " - "https://github.com/ad-freiburg/qlever-control, and " - "not just the script itself") + log.info( + "Check that you have fully downloaded or cloned " + "https://github.com/ad-freiburg/qlever-control, and " + "not just the script itself" + ) abort_script() # Show available config names. log.info(f"Available config names are: {', '.join(sorted(config_names))}") @@ -99,10 +105,12 @@ def show_available_action_names(): log.info("") log.info(f"Available action names are: {', '.join(action_names)}") log.info("") - log.info("To get autocompletion for these, run the following or " - "add it to your `.bashrc`:") + log.info( + "To get autocompletion for these, run the following or " + "add it to your `.bashrc`:" + ) log.info("") - log.info(f"{BLUE}eval \"$(qlever-old setup-autocompletion)\"{NORMAL}") + log.info(f'{BLUE}eval "$(qlever-old setup-autocompletion)"{NORMAL}') # We want to distinguish between exception that we throw intentionally and all @@ -120,37 +128,44 @@ def __init__(self): if not os.path.isfile("Qleverfile"): log.setLevel(logging.INFO) log.info("") - log.error("The qlever script needs a \"Qleverfile\" " - "in the current directory, but I could not find it") + log.error( + 'The qlever script needs a "Qleverfile" ' + "in the current directory, but I could not find it" + ) log.info("") - log.info("Run `qlever-old setup-config ` to create a " - "pre-filled Qleverfile") + log.info( + "Run `qlever-old setup-config ` to create a " + "pre-filled Qleverfile" + ) log.info("") show_available_config_names() abort_script() files_read = self.config.read("Qleverfile") if not files_read: - log.error("ConfigParser could not read \"Qleverfile\"") + log.error('ConfigParser could not read "Qleverfile"') abort_script() - self.name = self.config['data']['name'] + self.name = self.config["data"]["name"] self.yes_values = ["1", "true", "yes"] # Defaults for [server] that carry over from [index]. - for option in ["with_text_index", "only_pso_and_pos_permutations", - "use_patterns"]: - if option in self.config['index'] and \ - option not in self.config['server']: - self.config['server'][option] = \ - self.config['index'][option] + for option in [ + "with_text_index", + "only_pso_and_pos_permutations", + "use_patterns", + ]: + if option in self.config["index"] and option not in self.config["server"]: + self.config["server"][option] = self.config["index"][option] # Default values for options that are not mandatory in the Qleverfile. defaults = { "general": { "log_level": "info", "pid": "0", - "example_queries_url": (f"https://qlever.cs.uni-freiburg.de/" - f"api/examples/" - f"{self.config['ui']['config']}"), + "example_queries_url": ( + f"https://qlever.cs.uni-freiburg.de/" + f"api/examples/" + f"{self.config['ui']['config']}" + ), "example_queries_limit": "10", "example_queries_send": "0", }, @@ -183,8 +198,7 @@ def __init__(self): "image": "adfreiburg/qlever-ui", "container": "qlever-ui", "url": "https://qlever.cs.uni-freiburg.de/api", - - } + }, } for section in defaults: # If the section does not exist, create it. @@ -198,16 +212,17 @@ def __init__(self): # If the log level was not explicitly set by the first command-line # argument (see below), set it according to the Qleverfile. if log.level == logging.NOTSET: - log_level = self.config['general']['log_level'].upper() + log_level = self.config["general"]["log_level"].upper() try: log.setLevel(getattr(logging, log_level)) except AttributeError: - log.error(f"Invalid log level: \"{log_level}\"") + log.error(f'Invalid log level: "{log_level}"') abort_script() # Show some information (for testing purposes only). - log.debug(f"Parsed Qleverfile, sections are: " - f"{', '.join(self.config.sections())}") + log.debug( + f"Parsed Qleverfile, sections are: " f"{', '.join(self.config.sections())}" + ) # Check specifics of the installation. self.check_installation() @@ -225,23 +240,30 @@ def check_installation(self): self.net_connections_enabled = True except Exception as e: self.net_connections_enabled = False - log.debug(f"Note: psutil.net_connections() failed ({e})," - f" will not scan network connections for action" - f" \"start\"") + log.debug( + f"Note: psutil.net_connections() failed ({e})," + f" will not scan network connections for action" + f' "start"' + ) # Check whether docker is installed and works (on MacOS 12, docker # hangs when installed without GUI, hence the timeout). try: completed_process = subprocess.run( - ["docker", "info"], timeout=0.5, - stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + ["docker", "info"], + timeout=0.5, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) if completed_process.returncode != 0: raise Exception("docker info failed") self.docker_enabled = True except Exception: self.docker_enabled = False - print("Note: `docker info` failed, therefore" - " docker.USE_DOCKER=true not supported") + print( + "Note: `docker info` failed, therefore" + " docker.USE_DOCKER=true not supported" + ) def set_config(self, section, option, value): """ @@ -253,8 +275,10 @@ def set_config(self, section, option, value): log.error(f"Section [{section}] does not exist in Qleverfile") abort_script() if not self.config.has_option(section, option): - log.error(f"Option {option.upper()} does not exist in section " - f"[{section}] in Qleverfile") + log.error( + f"Option {option.upper()} does not exist in section " + f"[{section}] in Qleverfile" + ) abort_script() self.config[section][option] = value @@ -278,13 +302,12 @@ def alive_check(self, port): message = "from the qlever script".replace(" ", "%20") curl_cmd = f"curl -s http://localhost:{port}/ping?msg={message}" - exit_code = subprocess.call(curl_cmd, shell=True, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL) + exit_code = subprocess.call( + curl_cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL + ) return exit_code == 0 - def show_process_info(self, psutil_process, - cmdline_regex, show_heading=True): + def show_process_info(self, psutil_process, cmdline_regex, show_heading=True): """ Helper function that shows information about a process if information about the process can be retrieved and the command line matches the @@ -294,16 +317,17 @@ def show_process_info(self, psutil_process, def show_table_line(pid, user, start_time, rss, cmdline): log.info(f"{pid:<8} {user:<8} {start_time:>5} {rss:>5} {cmdline}") + try: pinfo = psutil_process.as_dict( - attrs=['pid', 'username', 'create_time', - 'memory_info', 'cmdline']) - cmdline = " ".join(pinfo['cmdline']) + attrs=["pid", "username", "create_time", "memory_info", "cmdline"] + ) + cmdline = " ".join(pinfo["cmdline"]) if not re.search(cmdline_regex, cmdline): return False - pid = pinfo['pid'] - user = pinfo['username'] if pinfo['username'] else "" - start_time = datetime.fromtimestamp(pinfo['create_time']) + pid = pinfo["pid"] + user = pinfo["username"] if pinfo["username"] else "" + start_time = datetime.fromtimestamp(pinfo["create_time"]) if start_time.date() == date.today(): start_time = start_time.strftime("%H:%M") else: @@ -326,9 +350,11 @@ def show(self, action_description, only_show): log.info(f"{BLUE}{action_description}{NORMAL}") log.info("") if only_show: - log.info("You called \"qlever-old ... show\", therefore the " - "action is only shown, but not executed (omit the " - "\"show\" to execute it)") + log.info( + 'You called "qlever-old ... show", therefore the ' + "action is only shown, but not executed (omit the " + '"show" to execute it)' + ) @staticmethod @track_action_rank @@ -344,35 +370,44 @@ def action_setup_config(config_name="default"): if os.path.isfile("Qleverfile"): log.error("Qleverfile already exists in current directory") log.info("") - log.info("If you want to create a new Qleverfile using " - "`qlever-old setup-config`, delete the existing " - "Qleverfile first") + log.info( + "If you want to create a new Qleverfile using " + "`qlever-old setup-config`, delete the existing " + "Qleverfile first" + ) abort_script() # Get the directory of this script and copy the Qleverfile for `config` # to the current directory. script_dir = os.path.dirname(os.path.realpath(__file__)) - qleverfile_path = os.path.join(script_dir, - f"Qleverfiles/Qleverfile.{config_name}") + qleverfile_path = os.path.join( + script_dir, f"Qleverfiles/Qleverfile.{config_name}" + ) if not os.path.isfile(qleverfile_path): - log.error(f"File \"{qleverfile_path}\" does not exist") + log.error(f'File "{qleverfile_path}" does not exist') log.info("") abort_script() try: shutil.copy(qleverfile_path, "Qleverfile") except Exception as e: - log.error(f"Could not copy \"{qleverfile_path}\"" - f" to current directory: {e}") + log.error( + f'Could not copy "{qleverfile_path}"' f" to current directory: {e}" + ) abort_script() - log.info(f"Created Qleverfile for config \"{config_name}\"" - f" in current directory") + log.info( + f'Created Qleverfile for config "{config_name}"' f" in current directory" + ) log.info("") if config_name == "default": - log.info("Since this is the default Qleverfile, you need to " - "edit it before you can continue") + log.info( + "Since this is the default Qleverfile, you need to " + "edit it before you can continue" + ) log.info("") - log.info("Afterwards, run `qlever` without arguments to see " - "which actions are available") + log.info( + "Afterwards, run `qlever` without arguments to see " + "which actions are available" + ) else: show_available_action_names() log.info("") @@ -384,17 +419,20 @@ def action_show_config(self, only_show=False): values for options that are not set explicitly in the Qleverfile. """ - print(f"{BLUE}Showing the current configuration, including default" - f" values for options that are not set explicitly in the" - f" Qleverfile{NORMAL}") + print( + f"{BLUE}Showing the current configuration, including default" + f" values for options that are not set explicitly in the" + f" Qleverfile{NORMAL}" + ) for section in self.config.sections(): print() print(f"[{section}]") - max_option_length = max([len(option) for option in - self.config[section]]) + max_option_length = max([len(option) for option in self.config[section]]) for option in self.config[section]: - print(f"{option.upper().ljust(max_option_length)} = " - f"{self.config[section][option]}") + print( + f"{option.upper().ljust(max_option_length)} = " + f"{self.config[section][option]}" + ) print() @@ -405,10 +443,10 @@ def action_get_data(self, only_show=False): """ # Construct the command line. - if not self.config['data']['get_data_cmd']: + if not self.config["data"]["get_data_cmd"]: log.error(f"{RED}No GET_DATA_CMD specified in Qleverfile") return - get_data_cmd = self.config['data']['get_data_cmd'] + get_data_cmd = self.config["data"]["get_data_cmd"] # Show it. self.show(get_data_cmd, only_show) @@ -418,7 +456,8 @@ def action_get_data(self, only_show=False): # Execute the command line. subprocess.run(get_data_cmd, shell=True) total_file_size = self.get_total_file_size( - self.config['index']['file_names'].split()) + self.config["index"]["file_names"].split() + ) print(f"Total file size: {total_file_size:.1f} GB") @track_action_rank @@ -429,75 +468,93 @@ def action_index(self, only_show=False): """ # Construct the command line based on the config file. - index_config = self.config['index'] - cmdline = (f"{index_config['cat_files']} | {index_config['binary']}" - f" -F ttl -f -" - f" -i {self.name}" - f" -s {self.name}.settings.json") - if index_config['only_pso_and_pos_permutations'] in self.yes_values: + index_config = self.config["index"] + cmdline = ( + f"{index_config['cat_files']} | {index_config['binary']}" + f" -F ttl -f -" + f" -i {self.name}" + f" -s {self.name}.settings.json" + ) + if index_config["only_pso_and_pos_permutations"] in self.yes_values: cmdline += " --only-pso-and-pos-permutations --no-patterns" - if not index_config['use_patterns'] in self.yes_values: + if not index_config["use_patterns"] in self.yes_values: cmdline += " --no-patterns" - if index_config['with_text_index'] in \ - ["from_text_records", "from_text_records_and_literals"]: - cmdline += (f" -w {self.name}.wordsfile.tsv" - f" -d {self.name}.docsfile.tsv") - if index_config['with_text_index'] in \ - ["from_literals", "from_text_records_and_literals"]: + if index_config["with_text_index"] in [ + "from_text_records", + "from_text_records_and_literals", + ]: + cmdline += f" -w {self.name}.wordsfile.tsv" f" -d {self.name}.docsfile.tsv" + if index_config["with_text_index"] in [ + "from_literals", + "from_text_records_and_literals", + ]: cmdline += " --text-words-from-literals" - if 'stxxl_memory' in index_config: + if "stxxl_memory" in index_config: cmdline += f" --stxxl-memory {index_config['stxxl_memory']}" cmdline += f" | tee {self.name}.index-log.txt" # If the total file size is larger than 10 GB, set ulimit (such that a # large number of open files is allowed). total_file_size = self.get_total_file_size( - self.config['index']['file_names'].split()) + self.config["index"]["file_names"].split() + ) if total_file_size > 10: cmdline = f"ulimit -Sn 1048576; {cmdline}" # If we are using Docker, run the command in a Docker container. # Here is how the shell script does it: - if self.config['docker']['use_docker'] in self.yes_values: - docker_config = self.config['docker'] - cmdline = (f"docker run -it --rm -u $(id -u):$(id -g)" - f" -v /etc/localtime:/etc/localtime:ro" - f" -v $(pwd):/index -w /index" - f" --entrypoint bash" - f" --name {docker_config['container_indexer']}" - f" {docker_config['image']}" - f" -c {shlex.quote(cmdline)}") + if self.config["docker"]["use_docker"] in self.yes_values: + docker_config = self.config["docker"] + cmdline = ( + f"docker run -it --rm -u $(id -u):$(id -g)" + f" -v /etc/localtime:/etc/localtime:ro" + f" -v $(pwd):/index -w /index" + f" --entrypoint bash" + f" --name {docker_config['container_indexer']}" + f" {docker_config['image']}" + f" -c {shlex.quote(cmdline)}" + ) # Show the command line. - self.show(f"Write value of config variable index.SETTINGS_JSON to " - f"file {self.name}.settings.json\n" - f"{cmdline}", only_show) + self.show( + f"Write value of config variable index.SETTINGS_JSON to " + f"file {self.name}.settings.json\n" + f"{cmdline}", + only_show, + ) if only_show: return # When docker.USE_DOCKER=false, check if the binary for building the # index exists and works. - if self.config['docker']['use_docker'] not in self.yes_values: + if self.config["docker"]["use_docker"] not in self.yes_values: try: check_binary_cmd = f"{self.config['index']['binary']} --help" - subprocess.run(check_binary_cmd, shell=True, check=True, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL) + subprocess.run( + check_binary_cmd, + shell=True, + check=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) except subprocess.CalledProcessError as e: - log.error(f"Running \"{check_binary_cmd}\" failed ({e}), " - f"set index.BINARY to a different binary or " - f"set docker.USE_DOCKER=true") + log.error( + f'Running "{check_binary_cmd}" failed ({e}), ' + f"set index.BINARY to a different binary or " + f"set docker.USE_DOCKER=true" + ) abort_script() # Check if index files (name.index.*) already exist. if glob.glob(f"{self.name}.index.*"): raise ActionException( - f"Index files \"{self.name}.index.*\" already exist, " - f"please delete them if you want to rebuild the index") + f'Index files "{self.name}.index.*" already exist, ' + f"please delete them if you want to rebuild the index" + ) # Write settings.json file and run the command. with open(f"{self.name}.settings.json", "w") as f: - f.write(self.config['index']['settings_json']) + f.write(self.config["index"]["settings_json"]) subprocess.run(cmdline, shell=True) @track_action_rank @@ -507,15 +564,16 @@ def action_remove_index(self, only_show=False): """ # List of all the index files (not all of them need to be there). - index_fileglobs = (f"{self.name}.index.*", - f"{self.name}.patterns.*", - f"{self.name}.prefixes", - f"{self.name}.meta-data.json", - f"{self.name}.vocabulary.*") + index_fileglobs = ( + f"{self.name}.index.*", + f"{self.name}.patterns.*", + f"{self.name}.prefixes", + f"{self.name}.meta-data.json", + f"{self.name}.vocabulary.*", + ) # Show the command line. - self.show(f"Remove index files {', '.join(index_fileglobs)}", - only_show) + self.show(f"Remove index files {', '.join(index_fileglobs)}", only_show) if only_show: return @@ -529,8 +587,10 @@ def action_remove_index(self, only_show=False): os.remove(filename) files_removed.append(filename) if files_removed: - log.info(f"Removed the following index files of total size " - f"{total_file_size / 1e9:.1f} GB:") + log.info( + f"Removed the following index files of total size " + f"{total_file_size / 1e9:.1f} GB:" + ) log.info("") log.info(", ".join(files_removed)) else: @@ -545,44 +605,49 @@ def action_start(self, only_show=False): """ # Construct the command line based on the config file. - server_config = self.config['server'] - cmdline = (f"{self.config['server']['binary']}" - f" -i {self.name}" - f" -j {server_config['num_threads']}" - f" -p {server_config['port']}" - f" -m {server_config['memory_for_queries']}" - f" -c {server_config['cache_max_size']}" - f" -e {server_config['cache_max_size_single_entry']}" - f" -k {server_config['cache_max_num_entries']}") - if server_config['timeout']: + server_config = self.config["server"] + cmdline = ( + f"{self.config['server']['binary']}" + f" -i {self.name}" + f" -j {server_config['num_threads']}" + f" -p {server_config['port']}" + f" -m {server_config['memory_for_queries']}" + f" -c {server_config['cache_max_size']}" + f" -e {server_config['cache_max_size_single_entry']}" + f" -k {server_config['cache_max_num_entries']}" + ) + if server_config["timeout"]: cmdline += f" -s {server_config['timeout']}" - if server_config['access_token']: + if server_config["access_token"]: cmdline += f" -a {server_config['access_token']}" - if server_config['only_pso_and_pos_permutations'] in self.yes_values: + if server_config["only_pso_and_pos_permutations"] in self.yes_values: cmdline += " --only-pso-and-pos-permutations" - if not server_config['use_patterns'] in self.yes_values: + if not server_config["use_patterns"] in self.yes_values: cmdline += " --no-patterns" - if server_config['with_text_index'] in \ - ["from_text_records", - "from_literals", - "from_text_records_and_literals"]: + if server_config["with_text_index"] in [ + "from_text_records", + "from_literals", + "from_text_records_and_literals", + ]: cmdline += " -t" cmdline += f" > {self.name}.server-log.txt 2>&1" # If we are using Docker, run the command in a docker container. - if self.config['docker']['use_docker'] in self.yes_values: - docker_config = self.config['docker'] - cmdline = (f"docker run -d --restart=unless-stopped" - f" -u $(id -u):$(id -g)" - f" -it -v /etc/localtime:/etc/localtime:ro" - f" -v $(pwd):/index" - f" -p {server_config['port']}:{server_config['port']}" - f" -w /index" - f" --entrypoint bash" - f" --name {docker_config['container_server']}" - f" --init" - f" {docker_config['image']}" - f" -c {shlex.quote(cmdline)}") + if self.config["docker"]["use_docker"] in self.yes_values: + docker_config = self.config["docker"] + cmdline = ( + f"docker run -d --restart=unless-stopped" + f" -u $(id -u):$(id -g)" + f" -it -v /etc/localtime:/etc/localtime:ro" + f" -v $(pwd):/index" + f" -p {server_config['port']}:{server_config['port']}" + f" -w /index" + f" --entrypoint bash" + f" --name {docker_config['container_server']}" + f" --init" + f" {docker_config['image']}" + f" -c {shlex.quote(cmdline)}" + ) else: cmdline = f"nohup {cmdline} &" @@ -593,41 +658,48 @@ def action_start(self, only_show=False): # When docker.USE_DOCKER=false, check if the binary for starting the # server exists and works. - if self.config['docker']['use_docker'] not in self.yes_values: + if self.config["docker"]["use_docker"] not in self.yes_values: try: check_binary_cmd = f"{self.config['server']['binary']} --help" - subprocess.run(check_binary_cmd, shell=True, check=True, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL) + subprocess.run( + check_binary_cmd, + shell=True, + check=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) except subprocess.CalledProcessError as e: - log.error(f"Running \"{check_binary_cmd}\" failed ({e}), " - f"set server.BINARY to a different binary or " - f"set docker.USE_DOCKER=true") + log.error( + f'Running "{check_binary_cmd}" failed ({e}), ' + f"set server.BINARY to a different binary or " + f"set docker.USE_DOCKER=true" + ) abort_script() # Check if a QLever server is already running on this port. - port = server_config['port'] + port = server_config["port"] if self.alive_check(port): - raise ActionException( - f"QLever server already running on port {port}") + raise ActionException(f"QLever server already running on port {port}") # Check if another process is already listening. if self.net_connections_enabled: - if port in [conn.laddr.port for conn - in psutil.net_connections()]: + if port in [conn.laddr.port for conn in psutil.net_connections()]: raise ActionException( - f"Port {port} is already in use by another process") + f"Port {port} is already in use by another process" + ) # Execute the command line. - subprocess.run(cmdline, shell=True, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL) + subprocess.run( + cmdline, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL + ) # Tail the server log until the server is ready (note that the `exec` # is important to make sure that the tail process is killed and not # just the bash process). - log.info(f"Follow {self.name}.server-log.txt until the server is ready" - f" (Ctrl-C stops following the log, but not the server)") + log.info( + f"Follow {self.name}.server-log.txt until the server is ready" + f" (Ctrl-C stops following the log, but not the server)" + ) log.info("") tail_cmd = f"exec tail -f {self.name}.server-log.txt" tail_proc = subprocess.Popen(tail_cmd, shell=True) @@ -635,20 +707,24 @@ def action_start(self, only_show=False): time.sleep(1) # Set the access token if specified. - access_token = server_config['access_token'] - access_arg = f"--data-urlencode \"access-token={access_token}\"" - if "index_description" in self.config['data']: - desc = self.config['data']['index_description'] - curl_cmd = (f"curl -Gs http://localhost:{port}/api" - f" --data-urlencode \"index-description={desc}\"" - f" {access_arg} > /dev/null") + access_token = server_config["access_token"] + access_arg = f'--data-urlencode "access-token={access_token}"' + if "index_description" in self.config["data"]: + desc = self.config["data"]["index_description"] + curl_cmd = ( + f"curl -Gs http://localhost:{port}/api" + f' --data-urlencode "index-description={desc}"' + f" {access_arg} > /dev/null" + ) log.debug(curl_cmd) subprocess.run(curl_cmd, shell=True) - if "text_description" in self.config['data']: - desc = self.config['data']['text_description'] - curl_cmd = (f"curl -Gs http://localhost:{port}/api" - f" --data-urlencode \"text-description={desc}\"" - f" {access_arg} > /dev/null") + if "text_description" in self.config["data"]: + desc = self.config["data"]["text_description"] + curl_cmd = ( + f"curl -Gs http://localhost:{port}/api" + f' --data-urlencode "text-description={desc}"' + f" {access_arg} > /dev/null" + ) log.debug(curl_cmd) subprocess.run(curl_cmd, shell=True) @@ -664,28 +740,39 @@ def action_stop(self, only_show=False, fail_if_not_running=True): """ # Show action description. - docker_container_name = self.config['docker']['container_server'] - cmdline_regex = (f"ServerMain.* -i [^ ]*{self.name}") - self.show(f"Checking for process matching \"{cmdline_regex}\" " - f"and for Docker container with name " - f"\"{docker_container_name}\"", only_show) + docker_container_name = self.config["docker"]["container_server"] + cmdline_regex = f"ServerMain.* -i [^ ]*{self.name}" + self.show( + f'Checking for process matching "{cmdline_regex}" ' + f"and for Docker container with name " + f'"{docker_container_name}"', + only_show, + ) if only_show: return # First check if there is docker container running. if self.docker_enabled: - docker_cmd = (f"docker stop {docker_container_name} && " - f"docker rm {docker_container_name}") + docker_cmd = ( + f"docker stop {docker_container_name} && " + f"docker rm {docker_container_name}" + ) try: - subprocess.run(docker_cmd, shell=True, check=True, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL) - log.info(f"Docker container with name " - f"\"{docker_container_name}\" " - f"stopped and removed") + subprocess.run( + docker_cmd, + shell=True, + check=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + log.info( + f"Docker container with name " + f'"{docker_container_name}" ' + f"stopped and removed" + ) return except Exception as e: - log.debug(f"Error running \"{docker_cmd}\": {e}") + log.debug(f'Error running "{docker_cmd}": {e}') # Check if there is a process running on the server port using psutil. # @@ -694,22 +781,24 @@ def action_stop(self, only_show=False, fail_if_not_running=True): for proc in psutil.process_iter(): try: pinfo = proc.as_dict( - attrs=['pid', 'username', 'create_time', - 'memory_info', 'cmdline']) - cmdline = " ".join(pinfo['cmdline']) + attrs=["pid", "username", "create_time", "memory_info", "cmdline"] + ) + cmdline = " ".join(pinfo["cmdline"]) except Exception as err: log.debug(f"Error getting process info: {err}") if re.match(cmdline_regex, cmdline): - log.info(f"Found process {pinfo['pid']} from user " - f"{pinfo['username']} with command line: {cmdline}") + log.info( + f"Found process {pinfo['pid']} from user " + f"{pinfo['username']} with command line: {cmdline}" + ) print() try: proc.kill() log.info(f"Killed process {pinfo['pid']}") except Exception as e: raise ActionException( - f"Could not kill process with PID " - f"{pinfo['pid']}: {e}") + f"Could not kill process with PID " f"{pinfo['pid']}: {e}" + ) return # No matching process found. @@ -726,8 +815,7 @@ def action_restart(self, only_show=False): """ # Show action description. - self.show("Stop running server if found, then start new server", - only_show) + self.show("Stop running server if found, then start new server", only_show) if only_show: return @@ -749,8 +837,10 @@ def action_log(self, only_show=False): return # Do it. - log.info(f"Follow {self.name}.server-log.txt (Ctrl-C stops" - f" following the log, but not the server)") + log.info( + f"Follow {self.name}.server-log.txt (Ctrl-C stops" + f" following the log, but not the server)" + ) log.info("") subprocess.run(log_cmd, shell=True) @@ -765,9 +855,12 @@ def action_status(self, only_show=False): # Show action description. cmdline_regex = "(ServerMain|IndexBuilderMain)" # cmdline_regex = f"(ServerMain|IndexBuilderMain).*{self.name}" - self.show(f"{BLUE}Show all processes on this machine where " - f"the command line matches {cmdline_regex}" - f" using Python's psutil library", only_show) + self.show( + f"{BLUE}Show all processes on this machine where " + f"the command line matches {cmdline_regex}" + f" using Python's psutil library", + only_show, + ) if only_show: return @@ -775,8 +868,9 @@ def action_status(self, only_show=False): num_processes_found = 0 for proc in psutil.process_iter(): show_heading = num_processes_found == 0 - process_shown = self.show_process_info(proc, cmdline_regex, - show_heading=show_heading) + process_shown = self.show_process_info( + proc, cmdline_regex, show_heading=show_heading + ) if process_shown: num_processes_found += 1 if num_processes_found == 0: @@ -789,10 +883,12 @@ def action_index_stats(self, only_show=False): index, based on the log file of th index build. """ - log_file_name = self.config['data']['name'] + ".index-log.txt" - log.info(f"{BLUE}Breakdown of the time for building the index, " - f"based on the timestamps for key lines in " - f"\"{log_file_name}{NORMAL}\"") + log_file_name = self.config["data"]["name"] + ".index-log.txt" + log.info( + f"{BLUE}Breakdown of the time for building the index, " + f"based on the timestamps for key lines in " + f'"{log_file_name}{NORMAL}"' + ) log.info("") if only_show: return @@ -802,8 +898,7 @@ def action_index_stats(self, only_show=False): with open(log_file_name, "r") as f: lines = f.readlines() except Exception as e: - raise ActionException(f"Could not read log file {log_file_name}: " - f"{e}") + raise ActionException(f"Could not read log file {log_file_name}: " f"{e}") current_line = 0 # Helper lambda that finds the next line matching the given `regex`, @@ -826,14 +921,19 @@ def find_next_line(regex, line_is_optional=False): regex_match = re.search(regex, line) if regex_match: try: - return datetime.strptime( + return ( + datetime.strptime( re.match(timestamp_regex, line).group(), - timestamp_format), regex_match + timestamp_format, + ), + regex_match, + ) except Exception as e: raise ActionException( - f"Could not parse timestamp of form " - f"\"{timestamp_regex}\" from line " - f" \"{line.rstrip()}\" ({e})") + f"Could not parse timestamp of form " + f'"{timestamp_regex}" from line ' + f' "{line.rstrip()}" ({e})' + ) # If we get here, we did not find a matching line. if line_is_optional: current_line = current_line_backup @@ -849,13 +949,15 @@ def find_next_line(regex, line_is_optional=False): perm_begin, _ = find_next_line(r"INFO:\s*Creating a pair", True) if perm_begin is None: break - _, perm_info = find_next_line(r"INFO:\s*Writing meta data for" - r" ([A-Z]+ and [A-Z]+)", True) + _, perm_info = find_next_line( + r"INFO:\s*Writing meta data for" r" ([A-Z]+ and [A-Z]+)", True + ) # if perm_info is None: # break perm_begin_and_info.append((perm_begin, perm_info)) - convert_end = (perm_begin_and_info[0][0] if - len(perm_begin_and_info) > 0 else None) + convert_end = ( + perm_begin_and_info[0][0] if len(perm_begin_and_info) > 0 else None + ) normal_end, _ = find_next_line(r"INFO:\s*Index build completed") text_begin, _ = find_next_line(r"INFO:\s*Adding text index", True) text_end, _ = find_next_line(r"INFO:\s*DocsDB done", True) @@ -867,9 +969,11 @@ def find_next_line(regex, line_is_optional=False): if overall_begin is None: raise ActionException("Missing line that index build has started") if overall_begin and not merge_begin: - raise ActionException("According to the log file, the index build " - "has started, but is still in its first " - "phase (parsing the input)") + raise ActionException( + "According to the log file, the index build " + "has started, but is still in its first " + "phase (parsing the input)" + ) # Helper lambda that shows the duration for a phase (if the start and # end timestamps are available). @@ -904,22 +1008,25 @@ def show_duration(heading, start_end_pairs): show_duration("Convert to global IDs", [(convert_begin, convert_end)]) for i in range(len(perm_begin_and_info)): perm_begin, perm_info = perm_begin_and_info[i] - perm_end = perm_begin_and_info[i + 1][0] if i + 1 < len( - perm_begin_and_info) else normal_end - perm_info_text = (perm_info.group(1).replace(" and ", " & ") - if perm_info else f"#{i + 1}") - show_duration(f"Permutation {perm_info_text}", - [(perm_begin, perm_end)]) + perm_end = ( + perm_begin_and_info[i + 1][0] + if i + 1 < len(perm_begin_and_info) + else normal_end + ) + perm_info_text = ( + perm_info.group(1).replace(" and ", " & ") if perm_info else f"#{i + 1}" + ) + show_duration(f"Permutation {perm_info_text}", [(perm_begin, perm_end)]) show_duration("Text index", [(text_begin, text_end)]) if text_begin and text_end: log.info("") - show_duration("TOTAL index build time", - [(overall_begin, normal_end), - (text_begin, text_end)]) + show_duration( + "TOTAL index build time", + [(overall_begin, normal_end), (text_begin, text_end)], + ) elif normal_end: log.info("") - show_duration("TOTAL index build time", - [(overall_begin, normal_end)]) + show_duration("TOTAL index build time", [(overall_begin, normal_end)]) @track_action_rank def action_test_query(self, only_show=False): @@ -929,11 +1036,15 @@ def action_test_query(self, only_show=False): # Construct the curl command. query = "SELECT * WHERE { ?s ?p ?o } LIMIT 10" - headers = ["Accept: text/tab-separated-values", - "Content-Type: application/sparql-query"] - curl_cmd = (f"curl -s {self.config['server']['url']} " - f"-H \"{headers[0]}\" -H \"{headers[1]}\" " - f"--data \"{query}\"") + headers = [ + "Accept: text/tab-separated-values", + "Content-Type: application/sparql-query", + ] + curl_cmd = ( + f"curl -s {self.config['server']['url']} " + f'-H "{headers[0]}" -H "{headers[1]}" ' + f'--data "{query}"' + ) # Show it. self.show(curl_cmd, only_show) @@ -955,37 +1066,44 @@ def action_ui(self, only_show=False): server_url = f"http://{host_name}:{self.config['server']['port']}" docker_rm_cmd = f"docker rm -f {self.config['ui']['container']}" docker_pull_cmd = f"docker pull {self.config['ui']['image']}" - docker_run_cmd = (f"docker run -d -p {self.config['ui']['port']}:7000 " - f"--name {self.config['ui']['container']} " - f"{self.config['ui']['image']} ") - docker_exec_cmd = (f"docker exec -it " - f"{self.config['ui']['container']} " - f"bash -c \"python manage.py configure " - f"{self.config['ui']['config']} " - f"{server_url}\"") + docker_run_cmd = ( + f"docker run -d -p {self.config['ui']['port']}:7000 " + f"--name {self.config['ui']['container']} " + f"{self.config['ui']['image']} " + ) + docker_exec_cmd = ( + f"docker exec -it " + f"{self.config['ui']['container']} " + f'bash -c "python manage.py configure ' + f"{self.config['ui']['config']} " + f'{server_url}"' + ) # Show them. - self.show("\n".join([docker_rm_cmd, docker_pull_cmd, docker_run_cmd, - docker_exec_cmd]), only_show) + self.show( + "\n".join( + [docker_rm_cmd, docker_pull_cmd, docker_run_cmd, docker_exec_cmd] + ), + only_show, + ) if only_show: return # Execute them. try: - subprocess.run(docker_rm_cmd, shell=True, - stdout=subprocess.DEVNULL) - subprocess.run(docker_pull_cmd, shell=True, - stdout=subprocess.DEVNULL) - subprocess.run(docker_run_cmd, shell=True, - stdout=subprocess.DEVNULL) - subprocess.run(docker_exec_cmd, shell=True, - stdout=subprocess.DEVNULL) + subprocess.run(docker_rm_cmd, shell=True, stdout=subprocess.DEVNULL) + subprocess.run(docker_pull_cmd, shell=True, stdout=subprocess.DEVNULL) + subprocess.run(docker_run_cmd, shell=True, stdout=subprocess.DEVNULL) + subprocess.run(docker_exec_cmd, shell=True, stdout=subprocess.DEVNULL) except subprocess.CalledProcessError as e: raise ActionException(f"Failed to start the QLever UI {e}") - log.info(f"The QLever UI should now be up at " - f"http://{host_name}:{self.config['ui']['port']}") - log.info("You can log in as QLever UI admin with username and " - "password \"demo\"") + log.info( + f"The QLever UI should now be up at " + f"http://{host_name}:{self.config['ui']['port']}" + ) + log.info( + "You can log in as QLever UI admin with username and " 'password "demo"' + ) @track_action_rank def action_cache_stats_and_settings(self, only_show=False): @@ -994,10 +1112,14 @@ def action_cache_stats_and_settings(self, only_show=False): """ # Construct the two curl commands. - cache_stats_cmd = (f"curl -s {self.config['server']['url']} " - f"--data-urlencode \"cmd=cache-stats\"") - cache_settings_cmd = (f"curl -s {self.config['server']['url']} " - f"--data-urlencode \"cmd=get-settings\"") + cache_stats_cmd = ( + f"curl -s {self.config['server']['url']} " + f'--data-urlencode "cmd=cache-stats"' + ) + cache_settings_cmd = ( + f"curl -s {self.config['server']['url']} " + f'--data-urlencode "cmd=get-settings"' + ) # Show them. self.show("\n".join([cache_stats_cmd, cache_settings_cmd]), only_show) @@ -1007,8 +1129,7 @@ def action_cache_stats_and_settings(self, only_show=False): # Execute them. try: cache_stats = subprocess.check_output(cache_stats_cmd, shell=True) - cache_settings = subprocess.check_output(cache_settings_cmd, - shell=True) + cache_settings = subprocess.check_output(cache_settings_cmd, shell=True) # Print the key-value pairs of the stats JSON in tabular form. def print_json_as_tabular(raw_json): @@ -1020,12 +1141,12 @@ def print_json_as_tabular(raw_json): if re.match(r"^\d+\.\d+$", value): value = "{:.2f}".format(float(value)) log.info(f"{key.ljust(max_key_len)} : {value}") + print_json_as_tabular(cache_stats) log.info("") print_json_as_tabular(cache_settings) except Exception as e: - raise ActionException(f"Failed to get cache stats and settings: " - f"{e}") + raise ActionException(f"Failed to get cache stats and settings: " f"{e}") @track_action_rank def action_clear_cache(self, only_show=False): @@ -1034,8 +1155,10 @@ def action_clear_cache(self, only_show=False): """ # Construct the curl command. - clear_cache_cmd = (f"curl -s {self.config['server']['url']} " - f"--data-urlencode \"cmd=clear-cache\"") + clear_cache_cmd = ( + f"curl -s {self.config['server']['url']} " + f'--data-urlencode "cmd=clear-cache"' + ) # Show it. self.show(clear_cache_cmd, only_show) @@ -1044,8 +1167,7 @@ def action_clear_cache(self, only_show=False): # Execute it. try: - subprocess.run(clear_cache_cmd, shell=True, - stdout=subprocess.DEVNULL) + subprocess.run(clear_cache_cmd, shell=True, stdout=subprocess.DEVNULL) print("Cache cleared (only unpinned entries)") print() self.action_cache_stats_and_settings(only_show) @@ -1060,10 +1182,12 @@ def action_clear_cache_complete(self, only_show=False): """ # Construct the curl command. - access_token = self.config['server']['access_token'] - clear_cache_cmd = (f"curl -s {self.config['server']['url']} " - f"--data-urlencode \"cmd=clear-cache-complete\" " - f"--data-urlencode \"access-token={access_token}\"") + access_token = self.config["server"]["access_token"] + clear_cache_cmd = ( + f"curl -s {self.config['server']['url']} " + f'--data-urlencode "cmd=clear-cache-complete" ' + f'--data-urlencode "access-token={access_token}"' + ) # Show it. self.show(clear_cache_cmd, only_show) @@ -1072,8 +1196,7 @@ def action_clear_cache_complete(self, only_show=False): # Execute it. try: - subprocess.run(clear_cache_cmd, shell=True, - stdout=subprocess.DEVNULL) + subprocess.run(clear_cache_cmd, shell=True, stdout=subprocess.DEVNULL) print("Cache cleared (both pinned and unpinned entries)") print() self.action_cache_stats_and_settings(only_show) @@ -1095,7 +1218,7 @@ def action_autocompletion_warmup(self, only_show=False): access_token_ui = "top-secret" config_name = self.config["ui"]["config"] warmup_url = f"{self.config['ui']['url']}/warmup/{config_name}" - curl_cmd = (f"curl -s {warmup_url}/queries?token={access_token_ui}") + curl_cmd = f"curl -s {warmup_url}/queries?token={access_token_ui}" # Show it. self.show(f"Pin warmup queries obtained via: {curl_cmd}", only_show) @@ -1114,22 +1237,27 @@ def action_autocompletion_warmup(self, only_show=False): first = True timeout = "300s" access_token = self.config["server"]["access_token"] - for description, query in [line.split("\t") for line in - queries.decode("utf-8").split("\n")]: + for description, query in [ + line.split("\t") for line in queries.decode("utf-8").split("\n") + ]: if first: first = False else: log.info("") log.info(f"{BOLD}Pin query: {description}{NORMAL}") - pin_cmd = (f"curl -s {self.config['server']['url']}/api " - f"-H \"{header}\" " - f"--data-urlencode query={shlex.quote(query)} " - f"--data-urlencode timeout={timeout} " - f"--data-urlencode access-token={access_token} " - f"--data-urlencode pinresult=true " - f"--data-urlencode send=0") - clear_cache_cmd = (f"curl -s {self.config['server']['url']} " - f"--data-urlencode \"cmd=clear-cache\"") + pin_cmd = ( + f"curl -s {self.config['server']['url']}/api " + f'-H "{header}" ' + f"--data-urlencode query={shlex.quote(query)} " + f"--data-urlencode timeout={timeout} " + f"--data-urlencode access-token={access_token} " + f"--data-urlencode pinresult=true " + f"--data-urlencode send=0" + ) + clear_cache_cmd = ( + f"curl -s {self.config['server']['url']} " + f'--data-urlencode "cmd=clear-cache"' + ) log.info(pin_cmd) # Launch query and show the `resultsize` of the JSON response. try: @@ -1140,8 +1268,9 @@ def action_autocompletion_warmup(self, only_show=False): raise Exception(json_result["exception"]) log.info(f"Result size: {json_result['resultsize']:,}") log.info(clear_cache_cmd) - subprocess.check_output(clear_cache_cmd, shell=True, - stderr=subprocess.DEVNULL) + subprocess.check_output( + clear_cache_cmd, shell=True, stderr=subprocess.DEVNULL + ) except Exception as e: log.error(f"Query failed: {e}") @@ -1159,12 +1288,14 @@ def action_example_queries(self, only_show=False): curl_cmd = f"curl -s {example_queries_url}" # Show what the action does. - self.show(f"Launch example queries obtained via: {curl_cmd}\n" - f"SPARQL endpoint: {self.config['server']['url']}\n" - f"Clearing the cache before each query\n" - f"Using send={example_queries_send} and limit=" - f"{example_queries_limit}", - only_show) + self.show( + f"Launch example queries obtained via: {curl_cmd}\n" + f"SPARQL endpoint: {self.config['server']['url']}\n" + f"Clearing the cache before each query\n" + f"Using send={example_queries_send} and limit=" + f"{example_queries_limit}", + only_show, + ) if only_show: return @@ -1179,19 +1310,27 @@ def action_example_queries(self, only_show=False): count = 0 total_time_seconds = 0.0 total_result_size = 0 - for description, query in [line.split("\t") for line in - queries.decode("utf-8").splitlines()]: + for description, query in [ + line.split("\t") for line in queries.decode("utf-8").splitlines() + ]: # Launch query and show the `resultsize` of the JSON response. - clear_cache_cmd = (f"curl -s {self.config['server']['url']} " - f"--data-urlencode cmd=clear-cache") - query_cmd = (f"curl -s {self.config['server']['url']} " - f"-H \"Accept: application/qlever-results+json\" " - f"--data-urlencode query={shlex.quote(query)} " - f"--data-urlencode send={example_queries_send}") + clear_cache_cmd = ( + f"curl -s {self.config['server']['url']} " + f"--data-urlencode cmd=clear-cache" + ) + query_cmd = ( + f"curl -s {self.config['server']['url']} " + f'-H "Accept: application/qlever-results+json" ' + f"--data-urlencode query={shlex.quote(query)} " + f"--data-urlencode send={example_queries_send}" + ) try: - subprocess.run(clear_cache_cmd, shell=True, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL) + subprocess.run( + clear_cache_cmd, + shell=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) start_time = time.time() result = subprocess.check_output(query_cmd, shell=True) time_seconds = time.time() - start_time @@ -1203,15 +1342,13 @@ def action_example_queries(self, only_show=False): except Exception as e: time_seconds = 0.0 result_size = 0 - result_string = (f"{RED} FAILED{NORMAL}" - f" {RED}({e}){NORMAL}") + result_string = f"{RED} FAILED{NORMAL}" f" {RED}({e}){NORMAL}" # Print description, time, result in tabular form. log.debug(query) - if (len(description) > 60): + if len(description) > 60: description = description[:57] + "..." - log.info(f"{description:<60} {time_seconds:6.2f} s " - f"{result_string}") + log.info(f"{description:<60} {time_seconds:6.2f} s " f"{result_string}") count += 1 total_time_seconds += time_seconds total_result_size += result_size @@ -1220,14 +1357,16 @@ def action_example_queries(self, only_show=False): # Print total time. log.info("") - description = (f"TOTAL for {count} " - f"{'query' if count == 1 else 'queries'}") - log.info(f"{description:<60} {total_time_seconds:6.2f} s " - f"{total_result_size:>14,}") - description = (f"AVERAGE for {count} " - f"{'query' if count == 1 else 'queries'}") - log.info(f"{description:<60} {total_time_seconds / count:6.2f} s " - f"{round(total_result_size / count):>14,}") + description = f"TOTAL for {count} " f"{'query' if count == 1 else 'queries'}" + log.info( + f"{description:<60} {total_time_seconds:6.2f} s " + f"{total_result_size:>14,}" + ) + description = f"AVERAGE for {count} " f"{'query' if count == 1 else 'queries'}" + log.info( + f"{description:<60} {total_time_seconds / count:6.2f} s " + f"{round(total_result_size / count):>14,}" + ) @track_action_rank def action_memory_profile(self, only_show=False): @@ -1237,8 +1376,11 @@ def action_memory_profile(self, only_show=False): """ # Show what the action does. - self.show("Poll memory usage of the given process every second " - "and print it to a file", only_show) + self.show( + "Poll memory usage of the given process every second " + "and print it to a file", + only_show, + ) if only_show: return @@ -1249,8 +1391,9 @@ def action_memory_profile(self, only_show=False): pid = int(self.config["general"]["pid"]) proc = psutil.Process(pid) except Exception as e: - raise ActionException(f"Could not obtain information for process " - f"with PID {pid} ({e})") + raise ActionException( + f"Could not obtain information for process " f"with PID {pid} ({e})" + ) self.show_process_info(proc, "", show_heading=True) log.info("") @@ -1282,15 +1425,17 @@ def action_memory_profile_show(self, only_show=False): if "pid" not in self.config["general"]: raise ActionException("PID must be specified via general.PID") pid = int(self.config["general"]["pid"]) - gnuplot_script = (f"set datafile separator \"\t\"; " - f"set xdata time; " - f"set timefmt \"%Y-%m-%d %H:%M:%S\"; " - f"set xlabel \"Time\"; " - f"set ylabel \"Memory Usage\"; " - f"set grid; " - f"plot \"{pid}.memory-usage.tsv\" " - f"using 1:2 with lines; " - f"pause -1") + gnuplot_script = ( + f'set datafile separator "\t"; ' + f"set xdata time; " + f'set timefmt "%Y-%m-%d %H:%M:%S"; ' + f'set xlabel "Time"; ' + f'set ylabel "Memory Usage"; ' + f"set grid; " + f'plot "{pid}.memory-usage.tsv" ' + f"using 1:2 with lines; " + f"pause -1" + ) gnuplot_cmd = f"gnuplot -e {shlex.quote(gnuplot_script)}" # Show it. @@ -1316,8 +1461,9 @@ def setup_autocompletion_cmd(): # their appearance in the class (see the `@track_action_rank` decorator). methods = inspect.getmembers(Actions, predicate=inspect.isfunction) methods = [m for m in methods if m[0].startswith("action_")] - action_names = sorted([m[0] for m in methods], - key=lambda m: getattr(Actions, m).rank) + action_names = sorted( + [m[0] for m in methods], key=lambda m: getattr(Actions, m).rank + ) action_names = [_.replace("action_", "") for _ in action_names] action_names = [_.replace("_", "-") for _ in action_names] action_names = " ".join(action_names) @@ -1352,22 +1498,29 @@ def main(): version = "unknown" # If the script is called without argument, say hello and provide some # help to get started. - if len(sys.argv) == 1 or \ - (len(sys.argv) == 2 and sys.argv[1] == "help") or \ - (len(sys.argv) == 2 and sys.argv[1] == "--help") or \ - (len(sys.argv) == 2 and sys.argv[1] == "-h"): + if ( + len(sys.argv) == 1 + or (len(sys.argv) == 2 and sys.argv[1] == "help") + or (len(sys.argv) == 2 and sys.argv[1] == "--help") + or (len(sys.argv) == 2 and sys.argv[1] == "-h") + ): log.info("") - log.info(f"{BOLD}Hello, I am the OLD qlever script" - f" (version {version}){NORMAL}") + log.info( + f"{BOLD}Hello, I am the OLD qlever script" f" (version {version}){NORMAL}" + ) log.info("") if os.path.exists("Qleverfile"): - log.info("I see that you already have a \"Qleverfile\" in the " - "current directory, so you are ready to start") + log.info( + 'I see that you already have a "Qleverfile" in the ' + "current directory, so you are ready to start" + ) log.info("") show_available_action_names() else: - log.info("You need a Qleverfile in the current directory, which " - "you can create as follows:") + log.info( + "You need a Qleverfile in the current directory, which " + "you can create as follows:" + ) log.info("") log.info(f"{BLUE}qlever-old setup-config {NORMAL}") log.info("") @@ -1390,8 +1543,9 @@ def main(): # take the log level from the config file). log.setLevel(logging.NOTSET) if len(sys.argv) > 1: - set_log_level_match = re.match(r"general.log_level=(\w+)", - sys.argv[1], re.IGNORECASE) + set_log_level_match = re.match( + r"general.log_level=(\w+)", sys.argv[1], re.IGNORECASE + ) if set_log_level_match: log_level = set_log_level_match.group(1).upper() sys.argv = sys.argv[1:] @@ -1401,13 +1555,13 @@ def main(): log.debug(f"Log level set to {log_level}") log.debug("") except AttributeError: - log.error(f"Invalid log level: \"{log_level}\"") + log.error(f'Invalid log level: "{log_level}"') abort_script() # Helper function that executes an action. def execute_action(actions, action_name, **kwargs): log.info("") - log.info(f"{BOLD}Action \"{action_name}\"{NORMAL}") + log.info(f'{BOLD}Action "{action_name}"{NORMAL}') log.info("") action = f"action_{action_name.replace('-', '_')}" try: @@ -1417,8 +1571,10 @@ def execute_action(actions, action_name, **kwargs): abort_script() except Exception as err: line = traceback.extract_tb(err.__traceback__)[-1].lineno - print(f"{RED}Error in Python script (line {line}: {err})" - f", stack trace follows:{NORMAL}") + print( + f"{RED}Error in Python script (line {line}: {err})" + f", stack trace follows:{NORMAL}" + ) print() raise err @@ -1431,8 +1587,10 @@ def execute_action(actions, action_name, **kwargs): abort_script() if len(sys.argv) > 3: log.setLevel(logging.ERROR) - log.error("Action `setup-config` must be followed by at most one " - "argument (the name of the desied configuration)") + log.error( + "Action `setup-config` must be followed by at most one " + "argument (the name of the desied configuration)" + ) abort_script() log.setLevel(logging.INFO) config_name = sys.argv[2] if len(sys.argv) == 3 else "default" @@ -1465,8 +1623,10 @@ def execute_action(actions, action_name, **kwargs): continue # If the action name does not exist, exit. if action_name not in action_names: - log.error(f"Action \"{action_name}\" does not exist, available " - f"actions are: {', '.join(action_names)}") + log.error( + f'Action "{action_name}" does not exist, available ' + f"actions are: {', '.join(action_names)}" + ) abort_script() # Execute the action (or only show what would be executed). execute_action(actions, action_name, only_show=only_show) diff --git a/src/qlever/qleverfile.py b/src/qlever/qleverfile.py index dda55e23..f6fa0bda 100644 --- a/src/qlever/qleverfile.py +++ b/src/qlever/qleverfile.py @@ -41,162 +41,261 @@ def arg(*args, **kwargs): ui_args = all_args["ui"] = {} data_args["name"] = arg( - "--name", type=str, required=True, - help="The name of the dataset") + "--name", type=str, required=True, help="The name of the dataset" + ) data_args["get_data_cmd"] = arg( - "--get-data-cmd", type=str, required=True, - help="The command to get the data") + "--get-data-cmd", + type=str, + required=True, + help="The command to get the data", + ) data_args["description"] = arg( - "--description", type=str, required=True, - help="A concise description of the dataset") + "--description", + type=str, + required=True, + help="A concise description of the dataset", + ) data_args["text_description"] = arg( - "--text-description", type=str, default=None, - help="A concise description of the additional text data" - " if any") + "--text-description", + type=str, + default=None, + help="A concise description of the additional text data" " if any", + ) data_args["format"] = arg( - "--format", type=str, default="ttl", - choices=["ttl", "nt", "nq"], - help="The format of the data") + "--format", + type=str, + default="ttl", + choices=["ttl", "nt", "nq"], + help="The format of the data", + ) index_args["input_files"] = arg( - "--input-files", type=str, required=True, - help="A space-separated list of patterns that match " - "all the files of the dataset") + "--input-files", + type=str, + required=True, + help="A space-separated list of patterns that match " + "all the files of the dataset", + ) index_args["cat_input_files"] = arg( - "--cat-input-files", type=str, required=True, - help="The command that produces the input") + "--cat-input-files", + type=str, + required=True, + help="The command that produces the input", + ) index_args["settings_json"] = arg( - "--settings-json", type=str, default="{}", - help="The `.settings.json` file for the index") + "--settings-json", + type=str, + default="{}", + help="The `.settings.json` file for the index", + ) index_args["index_binary"] = arg( - "--index-binary", type=str, default="IndexBuilderMain", - help="The binary for building the index (this requires " - "that you have compiled QLever on your machine)") + "--index-binary", + type=str, + default="IndexBuilderMain", + help="The binary for building the index (this requires " + "that you have compiled QLever on your machine)", + ) index_args["stxxl_memory"] = arg( - "--stxxl-memory", type=str, default="5G", - help="The amount of memory to use for the index build " - "(the name of the option has historical reasons)") + "--stxxl-memory", + type=str, + default="5G", + help="The amount of memory to use for the index build " + "(the name of the option has historical reasons)", + ) index_args["only_pso_and_pos_permutations"] = arg( - "--only-pso-and-pos-permutations", action="store_true", - default=False, - help="Only create the PSO and POS permutations") + "--only-pso-and-pos-permutations", + action="store_true", + default=False, + help="Only create the PSO and POS permutations", + ) index_args["use_patterns"] = arg( - "--use-patterns", action="store_true", default=True, - help="Precompute so-called patterns needed for fast processing" - " of queries like SELECT ?p (COUNT(DISTINCT ?s) AS ?c) " - "WHERE { ?s ?p [] ... } GROUP BY ?p") + "--use-patterns", + action="store_true", + default=True, + help="Precompute so-called patterns needed for fast processing" + " of queries like SELECT ?p (COUNT(DISTINCT ?s) AS ?c) " + "WHERE { ?s ?p [] ... } GROUP BY ?p", + ) index_args["text_index"] = arg( - "--text-index", - choices=["none", "from_text_records", "from_literals", - "from_text_records_and_literals"], - default="none", - help="Whether to also build an index for text search" - "and for which texts") + "--text-index", + choices=[ + "none", + "from_text_records", + "from_literals", + "from_text_records_and_literals", + ], + default="none", + help="Whether to also build an index for text search" "and for which texts", + ) index_args["text_words_file"] = arg( - "--text-words-file", type=str, default=None, - help="File with the words for the text index (one line " - "per word, format: `word or IRI\t0 or 1\tdoc id\t1`)") + "--text-words-file", + type=str, + default=None, + help="File with the words for the text index (one line " + "per word, format: `word or IRI\t0 or 1\tdoc id\t1`)", + ) index_args["text_docs_file"] = arg( - "--text-docs-file", type=str, default=None, - help="File with the documents for the text index (one line " - "per document, format: `id\tdocument text`)") + "--text-docs-file", + type=str, + default=None, + help="File with the documents for the text index (one line " + "per document, format: `id\tdocument text`)", + ) server_args["server_binary"] = arg( - "--server-binary", type=str, default="ServerMain", - help="The binary for starting the server (this requires " - "that you have compiled QLever on your machine)") + "--server-binary", + type=str, + default="ServerMain", + help="The binary for starting the server (this requires " + "that you have compiled QLever on your machine)", + ) server_args["host_name"] = arg( - "--host-name", type=str, default=f"localhost", - help="The name of the host on which the server listens for " - "requests") + "--host-name", + type=str, + default=f"localhost", + help="The name of the host on which the server listens for " "requests", + ) server_args["port"] = arg( - "--port", type=int, - help="The port on which the server listens for requests") + "--port", type=int, help="The port on which the server listens for requests" + ) server_args["access_token"] = arg( - "--access-token", type=str, default=None, - help="The access token for privileged operations") + "--access-token", + type=str, + default=None, + help="The access token for privileged operations", + ) server_args["memory_for_queries"] = arg( - "--memory-for-queries", type=str, default="5G", - help="The maximal amount of memory used for query processing" - " (if a query needs more than what is available, the " - "query will not be processed)") + "--memory-for-queries", + type=str, + default="5G", + help="The maximal amount of memory used for query processing" + " (if a query needs more than what is available, the " + "query will not be processed)", + ) server_args["cache_max_size"] = arg( - "--cache-max-size", type=str, default="2G", - help="The maximal amount of memory used for caching") + "--cache-max-size", + type=str, + default="2G", + help="The maximal amount of memory used for caching", + ) server_args["cache_max_size_single_entry"] = arg( - "--cache-max-size-single-entry", type=str, default="1G", - help="The maximal amount of memory used for caching a single " - "query result") + "--cache-max-size-single-entry", + type=str, + default="1G", + help="The maximal amount of memory used for caching a single " + "query result", + ) server_args["cache_max_num_entries"] = arg( - "--cache-max-num-entries", type=int, default=200, - help="The maximal number of entries in the cache" - " (the eviction policy when the cache is full is LRU)") + "--cache-max-num-entries", + type=int, + default=200, + help="The maximal number of entries in the cache" + " (the eviction policy when the cache is full is LRU)", + ) server_args["timeout"] = arg( - "--timeout", type=str, default="30s", - help="The maximal time in seconds a query is allowed to run" - " (can be increased per query with the URL parameters " - "`timeout` and `access_token`)") + "--timeout", + type=str, + default="30s", + help="The maximal time in seconds a query is allowed to run" + " (can be increased per query with the URL parameters " + "`timeout` and `access_token`)", + ) server_args["num_threads"] = arg( - "--num-threads", type=int, default=8, - help="The number of threads used for query processing") + "--num-threads", + type=int, + default=8, + help="The number of threads used for query processing", + ) server_args["only_pso_and_pos_permutations"] = arg( - "--only-pso-and-pos-permutations", action="store_true", - default=False, - help="Only use the PSO and POS permutations (then each " - "triple pattern must have a fixed predicate)") + "--only-pso-and-pos-permutations", + action="store_true", + default=False, + help="Only use the PSO and POS permutations (then each " + "triple pattern must have a fixed predicate)", + ) server_args["use_patterns"] = arg( - "--use-patterns", action="store_true", default=True, - help="Use the patterns precomputed during the index build" - " (see `qlever index --help` for their utility)") + "--use-patterns", + action="store_true", + default=True, + help="Use the patterns precomputed during the index build" + " (see `qlever index --help` for their utility)", + ) server_args["use_text_index"] = arg( - "--use-text-index", choices=["yes", "no"], default="no", - help="Whether to use the text index (requires that one was " - "built, see `qlever index`)") + "--use-text-index", + choices=["yes", "no"], + default="no", + help="Whether to use the text index (requires that one was " + "built, see `qlever index`)", + ) server_args["warmup_cmd"] = arg( - "--warmup-cmd", type=str, - help="Command executed after the server has started " - " (executed as part of `qlever start` unless " - " `--no-warmup` is specified, or with `qlever warmup`)") + "--warmup-cmd", + type=str, + help="Command executed after the server has started " + " (executed as part of `qlever start` unless " + " `--no-warmup` is specified, or with `qlever warmup`)", + ) runtime_args["system"] = arg( - "--system", type=str, - choices=Containerize.supported_systems() + ["native"], - default="docker", - help=("Whether to run commands like `index` or `start` " - "natively or in a container, and if in a container, " - "which system to use")) + "--system", + type=str, + choices=Containerize.supported_systems() + ["native"], + default="docker", + help=( + "Whether to run commands like `index` or `start` " + "natively or in a container, and if in a container, " + "which system to use" + ), + ) runtime_args["image"] = arg( - "--image", type=str, - default="docker.io/adfreiburg/qlever", - help="The name of the image when running in a container") + "--image", + type=str, + default="docker.io/adfreiburg/qlever", + help="The name of the image when running in a container", + ) runtime_args["index_container"] = arg( - "--index-container", type=str, - help="The name of the container used by `qlever index`") + "--index-container", + type=str, + help="The name of the container used by `qlever index`", + ) runtime_args["server_container"] = arg( - "--server-container", type=str, - help="The name of the container used by `qlever start`") + "--server-container", + type=str, + help="The name of the container used by `qlever start`", + ) ui_args["ui_port"] = arg( - "--ui-port", type=int, default=8176, - help="The port of the Qlever UI when running `qlever ui`") + "--ui-port", + type=int, + default=8176, + help="The port of the Qlever UI when running `qlever ui`", + ) ui_args["ui_config"] = arg( - "--ui-config", type=str, default="default", - help="The name of the backend configuration for the QLever UI" - " (this determines AC queries and example queries)") + "--ui-config", + type=str, + default="default", + help="The name of the backend configuration for the QLever UI" + " (this determines AC queries and example queries)", + ) ui_args["ui_system"] = arg( - "--ui-system", type=str, - choices=Containerize.supported_systems(), - default="docker", - help="Which container system to use for `qlever ui`" - " (unlike for `qlever index` and `qlever start`, " - " \"native\" is not yet supported here)") + "--ui-system", + type=str, + choices=Containerize.supported_systems(), + default="docker", + help="Which container system to use for `qlever ui`" + " (unlike for `qlever index` and `qlever start`, " + ' "native" is not yet supported here)', + ) ui_args["ui_image"] = arg( - "--ui-image", type=str, - default="docker.io/adfreiburg/qlever-ui", - help="The name of the image used for `qlever ui`") + "--ui-image", + type=str, + default="docker.io/adfreiburg/qlever-ui", + help="The name of the image used for `qlever ui`", + ) ui_args["ui_container"] = arg( - "--ui-container", type=str, - help="The name of the container used for `qlever ui`") + "--ui-container", + type=str, + help="The name of the container used for `qlever ui`", + ) return all_args @@ -214,8 +313,7 @@ def read(qleverfile_path): # Read the Qleverfile. defaults = {"random": "83724324hztz", "version": "01.01.01"} - config = ConfigParser(interpolation=ExtendedInterpolation(), - defaults=defaults) + config = ConfigParser(interpolation=ExtendedInterpolation(), defaults=defaults) try: config.read(qleverfile_path) except Exception as e: @@ -230,13 +328,18 @@ def read(qleverfile_path): if match: try: value = subprocess.check_output( - match.group(1), shell=True, text=True, - stderr=subprocess.STDOUT).strip() + match.group(1), + shell=True, + text=True, + stderr=subprocess.STDOUT, + ).strip() except Exception as e: log.info("") - log.error(f"Error evaluating {value} for option " - f"{section}.{option.upper()} in " - f"{qleverfile_path}:") + log.error( + f"Error evaluating {value} for option " + f"{section}.{option.upper()} in " + f"{qleverfile_path}:" + ) log.info("") log.info(e.output if hasattr(e, "output") else e) exit(1) diff --git a/src/qlever/util.py b/src/qlever/util.py index e595fe08..41243fc7 100644 --- a/src/qlever/util.py +++ b/src/qlever/util.py @@ -29,8 +29,9 @@ def get_total_file_size(patterns: list[str]) -> int: return total_size -def run_command(cmd: str, return_output: bool = False, - show_output: bool = False) -> Optional[str]: +def run_command( + cmd: str, return_output: bool = False, show_output: bool = False +) -> Optional[str]: """ Run the given command and throw an exception if the exit code is non-zero. If `return_output` is `True`, return what the command wrote to `stdout`. @@ -45,7 +46,7 @@ def run_command(cmd: str, return_output: bool = False, "shell": True, "text": True, "stdout": None if show_output else subprocess.PIPE, - "stderr": subprocess.PIPE + "stderr": subprocess.PIPE, } result = subprocess.run(f"set -o pipefail; {cmd}", **subprocess_args) # If the exit code is non-zero, throw an exception. If something was @@ -63,17 +64,20 @@ def run_command(cmd: str, return_output: bool = False, raise Exception(result.stderr.replace("\n", " ").strip()) else: raise Exception( - f"Command failed with exit code {result.returncode}" - f" but nothing written to stderr") + f"Command failed with exit code {result.returncode}" + f" but nothing written to stderr" + ) # Optionally, return what was written to `stdout`. if return_output: return result.stdout -def run_curl_command(url: str, - headers: dict[str, str] = {}, - params: dict[str, str] = {}, - result_file: Optional[str] = None) -> str: +def run_curl_command( + url: str, + headers: dict[str, str] = {}, + params: dict[str, str] = {}, + result_file: Optional[str] = None, +) -> str: """ Run `curl` with the given `url`, `headers`, and `params`. If `result_file` is `None`, return the output, otherwise, write the output to the given file @@ -83,22 +87,29 @@ def run_curl_command(url: str, # Construct and run the `curl` command. default_result_file = "/tmp/qlever.curl.result" actual_result_file = result_file if result_file else default_result_file - curl_cmd = (f"curl -s -o \"{actual_result_file}\"" - f" -w \"%{{http_code}}\n\" {url}" - + "".join([f" -H \"{key}: {value}\"" - for key, value in headers.items()]) - + "".join([f" --data-urlencode {key}={shlex.quote(value)}" - for key, value in params.items()])) - result = subprocess.run(curl_cmd, shell=True, text=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + curl_cmd = ( + f'curl -s -o "{actual_result_file}"' + f' -w "%{{http_code}}\n" {url}' + + "".join([f' -H "{key}: {value}"' for key, value in headers.items()]) + + "".join( + [ + f" --data-urlencode {key}={shlex.quote(value)}" + for key, value in params.items() + ] + ) + ) + result = subprocess.run( + curl_cmd, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) # Case 1: An error occurred, raise an exception. if result.returncode != 0: if len(result.stderr) > 0: raise Exception(result.stderr) else: - raise Exception(f"curl command failed with exit code " - f"{result.returncode}, stderr is empty") + raise Exception( + f"curl command failed with exit code " + f"{result.returncode}, stderr is empty" + ) # Case 2: Return output (read from `default_result_file`). if result_file is None: result_file_path = Path(default_result_file) @@ -117,9 +128,9 @@ def is_qlever_server_alive(port: str) -> bool: message = "from the qlever script".replace(" ", "%20") curl_cmd = f"curl -s http://localhost:{port}/ping?msg={message}" - exit_code = subprocess.call(curl_cmd, shell=True, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL) + exit_code = subprocess.call( + curl_cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL + ) return exit_code == 0 @@ -152,15 +163,15 @@ def show_table_line(pid, user, start_time, rss, cmdline): try: pinfo = psutil_process.as_dict( - attrs=['pid', 'username', 'create_time', - 'memory_info', 'cmdline']) + attrs=["pid", "username", "create_time", "memory_info", "cmdline"] + ) # Note: pinfo[`cmdline`] is `None` if the process is a zombie. - cmdline = " ".join(pinfo['cmdline'] or []) + cmdline = " ".join(pinfo["cmdline"] or []) if len(cmdline) == 0 or not re.search(cmdline_regex, cmdline): return False - pid = pinfo['pid'] - user = pinfo['username'] if pinfo['username'] else "" - start_time = datetime.fromtimestamp(pinfo['create_time']) + pid = pinfo["pid"] + user = pinfo["username"] if pinfo["username"] else "" + start_time = datetime.fromtimestamp(pinfo["create_time"]) if start_time.date() == date.today(): start_time = start_time.strftime("%H:%M") else: @@ -193,7 +204,7 @@ def is_port_used(port: int) -> bool: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # Ensure that the port is not blocked after the check. sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - sock.bind(('', port)) + sock.bind(("", port)) sock.close() return False except OSError as err: