-
Notifications
You must be signed in to change notification settings - Fork 440
Introduce cache database and reindex command #4650
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,99 @@ | ||
| # ------------------------------------------------------------------------- | ||
| # | ||
| # Part of the CodeChecker project, under the Apache License v2.0 with | ||
| # LLVM Exceptions. See LICENSE for license information. | ||
| # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| # | ||
| # ------------------------------------------------------------------------- | ||
|
|
||
| import sqlite3 | ||
| import itertools | ||
| import os | ||
| from typing import List | ||
|
|
||
|
|
||
| class CacheDB: | ||
| """ | ||
| SQLite database located in the report directory, | ||
| designed to speed up the parsing process. | ||
| """ | ||
|
|
||
| __sqlitedb_path: str | ||
| __con: sqlite3.Connection | ||
| __cur: sqlite3.Cursor | ||
|
|
||
| def __init__(self, report_dir: str, clean: bool = False): | ||
| """ | ||
| Initiates the cache database and creates the necessary tables. | ||
|
|
||
| Args: | ||
| report_dir (str): path to the report directory | ||
| clean (bool): If set to True, the previous database | ||
| will be dropped and a new one is created. | ||
| """ | ||
| self.__sqlitedb_path = os.path.join(report_dir, "cache.sqlite") | ||
|
|
||
| if clean and os.path.exists(self.__sqlitedb_path): | ||
| os.remove(self.__sqlitedb_path) | ||
|
|
||
| self.__create_connection() | ||
|
|
||
| def __create_connection(self): | ||
| self.__con = sqlite3.connect(self.__sqlitedb_path) | ||
| self.__cur = self.__con.cursor() | ||
| self.__create_tables() | ||
|
|
||
| def close_connection(self): | ||
| """ | ||
| Closes the connection to the cache database and writes | ||
| changes to the disk. | ||
| """ | ||
| self.__con.close() | ||
|
|
||
| def __table_exists(self, name: str) -> bool: | ||
| res = self.__cur.execute("SELECT name FROM sqlite_master WHERE name=?", | ||
| [name]) | ||
| return res.fetchone() is not None | ||
|
|
||
| def __create_tables(self): | ||
| if not self.__table_exists("plist_lookup"): | ||
| self.__cur.execute("CREATE TABLE plist_lookup" | ||
| "(plist TEXT, source TEXT)") | ||
|
|
||
| def insert_plist_sources(self, plist_file: str, source_files: List[str]): | ||
| """ | ||
| Inserts the plist file and its associated source files into the | ||
| cache database. These source files are located in the 'files' section | ||
| of an individual plist file. | ||
|
|
||
| Args: | ||
| plist_file (str): path to the plist file | ||
| source_files (List[str]): list of source files mapped to | ||
| the plist file | ||
| """ | ||
|
|
||
| data = list(zip(itertools.repeat(plist_file), source_files)) | ||
| self.__cur.executemany("INSERT INTO plist_lookup VALUES(?, ?)", data) | ||
| self.__con.commit() | ||
|
|
||
| def plist_query(self, source_files: List[str]) -> List[str]: | ||
| """ | ||
| Returns all plist files associated with any of the given source files | ||
| by querying the cache database. | ||
|
|
||
| Args: | ||
| source_files (List[str]): list of source files to be looked up | ||
| from the cache database. | ||
| """ | ||
|
|
||
| placeholders = ','.join('?' for _ in source_files) | ||
| res = self.__cur.execute("SELECT plist FROM plist_lookup WHERE source" | ||
| f" IN ({placeholders})", source_files) | ||
| return list(set(map(lambda e: e[0], res))) | ||
|
|
||
| def get_indexed_plist_files(self) -> List[str]: | ||
| """ | ||
| Returns already indexed plist files from the cache database. | ||
| """ | ||
| res = self.__cur.execute("SELECT DISTINCT plist FROM plist_lookup") | ||
| return list(map(lambda e: e[0], res)) |
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,108 @@ | ||||||
| # ------------------------------------------------------------------------- | ||||||
| # | ||||||
| # Part of the CodeChecker project, under the Apache License v2.0 with | ||||||
| # LLVM Exceptions. See LICENSE for license information. | ||||||
| # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||||
| # | ||||||
| # ------------------------------------------------------------------------- | ||||||
|
|
||||||
| import os | ||||||
| import sys | ||||||
| from codechecker_analyzer.cachedb import CacheDB | ||||||
| from codechecker_common import arg, logger | ||||||
| from codechecker_common.compatibility.multiprocessing import Pool, cpu_count | ||||||
| from codechecker_report_converter.report.parser import plist as plistparser | ||||||
| from typing import List, Tuple | ||||||
|
|
||||||
| LOG = logger.get_logger('system') | ||||||
|
|
||||||
|
|
||||||
| def get_argparser_ctor_args(): | ||||||
| """ | ||||||
| This method returns a dict containing the kwargs for constructing an | ||||||
| argparse.ArgumentParser (either directly or as a subparser). | ||||||
| """ | ||||||
|
|
||||||
| return { | ||||||
| 'prog': 'CodeChecker reindex', | ||||||
| 'formatter_class': arg.RawDescriptionDefaultHelpFormatter, | ||||||
| 'description': """ | ||||||
| The analysis cache database is a SQLite database located in the | ||||||
| report directory, designed to speed up the parsing process. | ||||||
| In case it is missing or outdated, one can use the 'reindex' command to | ||||||
| recreate/update this database.""", | ||||||
| 'help': "Recreate/update the cache database given a report directory." | ||||||
| } | ||||||
|
|
||||||
|
|
||||||
| def add_arguments_to_parser(parser): | ||||||
| """ | ||||||
| Add the subcommand's arguments to the given argparse.ArgumentParser. | ||||||
| """ | ||||||
|
|
||||||
| parser.add_argument('input', | ||||||
| type=str, | ||||||
| nargs='+', | ||||||
| metavar='folder', | ||||||
| help="The analysis result folder(s) containing " | ||||||
| "analysis results which should be " | ||||||
| "reindexed.") | ||||||
|
|
||||||
| parser.add_argument('-j', '--jobs', | ||||||
| type=int, | ||||||
| dest="jobs", | ||||||
| required=False, | ||||||
| default=cpu_count(), | ||||||
| help="Number of threads to use for reindex. More " | ||||||
| "threads mean faster reindex at the cost of " | ||||||
| "using more memory.") | ||||||
|
|
||||||
| parser.add_argument('-f', '--force', | ||||||
| action="store_true", | ||||||
| dest="force", | ||||||
| required=False, | ||||||
| default=False, | ||||||
| help="Drop the previous cache database and do a " | ||||||
| "clean reindex.") | ||||||
|
|
||||||
| logger.add_verbose_arguments(parser) | ||||||
| parser.set_defaults(func=main) | ||||||
|
|
||||||
|
|
||||||
| def main(args): | ||||||
| logger.setup_logger(args.verbose if 'verbose' in args else None) | ||||||
| for i in args.input: | ||||||
| update_cache_db(i, args.force, args.jobs) | ||||||
|
|
||||||
|
|
||||||
| def __process_file(file_path: str) -> Tuple[str, List[str]]: | ||||||
| with open(file_path, 'rb') as fp: | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Conventionally we're using utf-8 encoding for every
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We are opening the file in binary mode, where For XML parsing, the plistparser uses lxml, see ( |
||||||
| plist = plistparser.parse(fp) | ||||||
|
|
||||||
| file_list = [] if plist is None else \ | ||||||
| plistparser.get_file_list(plist, os.path.dirname(file_path)) | ||||||
| return (file_path, file_list) | ||||||
|
|
||||||
|
|
||||||
| def update_cache_db(report_dir: str, force: bool, jobs: int): | ||||||
| if not os.path.isdir(report_dir): | ||||||
| LOG.error("Directory %s does not exist!", report_dir) | ||||||
| sys.exit(1) | ||||||
|
|
||||||
| report_dir = os.path.abspath(report_dir) | ||||||
| cachedb = CacheDB(report_dir, force) | ||||||
| indexed_files = cachedb.get_indexed_plist_files() | ||||||
|
|
||||||
| plist_files = filter(lambda f: f.endswith( | ||||||
| plistparser.EXTENSION), os.listdir(report_dir)) | ||||||
| plist_files = map(lambda f: os.path.abspath( | ||||||
| os.path.join(report_dir, f)), plist_files) | ||||||
| plist_files = list(filter(lambda f: f not in indexed_files, plist_files)) | ||||||
|
|
||||||
| with Pool(jobs) as p: | ||||||
| res = p.map(__process_file, plist_files) | ||||||
| for (plist_file, sources) in res: | ||||||
| if sources != []: | ||||||
| cachedb.insert_plist_sources(plist_file, sources) | ||||||
|
|
||||||
| cachedb.close_connection() | ||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,11 @@ | ||
| # coding=utf-8 | ||
| # ------------------------------------------------------------------------- | ||
| # | ||
| # Part of the CodeChecker project, under the Apache License v2.0 with | ||
| # LLVM Exceptions. See LICENSE for license information. | ||
| # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| # | ||
| # ------------------------------------------------------------------------- | ||
|
|
||
| # This file is empty, and is only present so that this directory will form a | ||
| # package. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,8 @@ | ||
| #include <stdio.h> | ||
| #include "a.h" | ||
|
|
||
| int main() | ||
| { | ||
| int a = foo(); | ||
| return 1 / 0; | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,4 @@ | ||
| int foo() | ||
| { | ||
| return 1 / 0; | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,127 @@ | ||
| # | ||
| # ------------------------------------------------------------------------- | ||
| # | ||
| # Part of the CodeChecker project, under the Apache License v2.0 with | ||
| # LLVM Exceptions. See LICENSE for license information. | ||
| # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| # | ||
| # ------------------------------------------------------------------------- | ||
|
|
||
| """ | ||
| Test reindex functionality. | ||
| """ | ||
|
|
||
| import os | ||
| import shutil | ||
| import subprocess | ||
| import unittest | ||
|
|
||
| from libtest import env | ||
| from codechecker_analyzer.cachedb import CacheDB | ||
|
|
||
|
|
||
| class TestReindex(unittest.TestCase): | ||
| _ccClient = None | ||
|
|
||
| def setup_class(self): | ||
| """Setup the environment for the tests.""" | ||
|
|
||
| global TEST_WORKSPACE | ||
| TEST_WORKSPACE = env.get_workspace('reindex') | ||
|
|
||
| report_dir = os.path.join(TEST_WORKSPACE, 'reports') | ||
| os.makedirs(report_dir) | ||
|
|
||
| os.environ['TEST_WORKSPACE'] = TEST_WORKSPACE | ||
|
|
||
| def teardown_class(self): | ||
| """Delete the workspace associated with this test""" | ||
|
|
||
| # TODO: If environment variable is set keep the workspace | ||
| # and print out the path. | ||
| global TEST_WORKSPACE | ||
|
|
||
| print("Removing: " + TEST_WORKSPACE) | ||
| shutil.rmtree(TEST_WORKSPACE) | ||
|
|
||
| def setup_method(self, _): | ||
|
|
||
| # TEST_WORKSPACE is automatically set by test package __init__.py . | ||
| self.test_workspace = os.environ['TEST_WORKSPACE'] | ||
|
|
||
| test_class = self.__class__.__name__ | ||
| print('Running ' + test_class + ' tests in ' + self.test_workspace) | ||
|
|
||
| # Get the CodeChecker cmd if needed for the tests. | ||
| self._codechecker_cmd = env.codechecker_cmd() | ||
| self._tu_collector_cmd = env.tu_collector_cmd() | ||
| self.report_dir = os.path.join(self.test_workspace, "reports") | ||
| self.test_dir = os.path.join(os.path.dirname(__file__), 'test_files') | ||
|
|
||
| def __run_cmd(self, cmd, cwd): | ||
| process = subprocess.Popen( | ||
| cmd, | ||
| stdout=subprocess.PIPE, | ||
| stderr=subprocess.PIPE, | ||
| cwd=cwd, | ||
| encoding="utf-8", | ||
| errors="ignore") | ||
| out, err = process.communicate() | ||
| print(out) | ||
| print(err) | ||
| self.assertEqual(process.returncode, 0) | ||
|
|
||
| def test_reindex(self): | ||
| build_json = os.path.join(self.test_workspace, "build.json") | ||
|
|
||
| # Create and run log command | ||
| log_cmd = [self._codechecker_cmd, "log", "-b", "gcc a.c", | ||
| "-o", build_json] | ||
| self.__run_cmd(log_cmd, self.test_dir) | ||
|
|
||
| # Create and run analyze command | ||
| analyze_cmd = [ | ||
| self._codechecker_cmd, "analyze", "-c", build_json, | ||
| "--analyzers", "clangsa", "-o", self.report_dir] | ||
| self.__run_cmd(analyze_cmd, self.test_dir) | ||
|
|
||
| plist_files_in_report_dir = [ | ||
| os.path.join(self.report_dir, f) | ||
| for f in os.listdir(self.report_dir) | ||
| if os.path.splitext(f)[1] == ".plist"] | ||
|
|
||
| # Check if there are plist files in report_dir | ||
| self.assertGreaterEqual(len(plist_files_in_report_dir), 1) | ||
|
|
||
| a_c_clangsa_plist = None | ||
| for f in plist_files_in_report_dir: | ||
| if "a.c_clangsa" in f.split("/")[-1]: | ||
| a_c_clangsa_plist = f | ||
| break | ||
|
|
||
| # Check if a.c_clangsa plist was found | ||
| self.assertIsNotNone(a_c_clangsa_plist) | ||
|
|
||
| # Create and run reindex command | ||
| reindex_cmd = [ | ||
| self._codechecker_cmd, "reindex", "-f", self.report_dir] | ||
| self.__run_cmd(reindex_cmd, self.test_dir) | ||
|
|
||
| # Check if CacheDB was created | ||
| self.assertTrue(os.path.isfile( | ||
| os.path.join(self.report_dir, "cache.sqlite"))) | ||
|
|
||
| # Load CacheDB | ||
| cachedb = CacheDB(self.report_dir) | ||
|
|
||
| # Check if a.c_clangsa plist was indexed by the reindex command | ||
| self.assertIn(a_c_clangsa_plist, cachedb.get_indexed_plist_files()) | ||
|
|
||
| source_files_in_test_dir = [ | ||
| os.path.join(self.test_dir, f) | ||
| for f in os.listdir(self.test_dir) | ||
| if os.path.splitext(f)[1] in [".c", ".h"]] | ||
|
|
||
| # Check if source files were mapped to a.c_clangsa plist | ||
| for f in source_files_in_test_dir: | ||
| self.assertIn(a_c_clangsa_plist, cachedb.plist_query([f])) |
Uh oh!
There was an error while loading. Please reload this page.