Skip to content

Commit 110a261

Browse files
Introduce cache database and reindex command
1 parent 49a939a commit 110a261

File tree

3 files changed

+219
-3
lines changed

3 files changed

+219
-3
lines changed
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
# -------------------------------------------------------------------------
2+
#
3+
# Part of the CodeChecker project, under the Apache License v2.0 with
4+
# LLVM Exceptions. See LICENSE for license information.
5+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
#
7+
# -------------------------------------------------------------------------
8+
9+
import sqlite3
10+
import itertools
11+
import os
12+
from typing import List
13+
14+
15+
class CacheDB:
16+
"""
17+
SQLite database located in the report directory,
18+
designed to speed up the parsing process.
19+
"""
20+
21+
__sqlitedb_path: str
22+
__con: sqlite3.Connection
23+
__cur: sqlite3.Cursor
24+
25+
def __init__(self, report_dir: str, clean: bool = False):
26+
"""
27+
Initiates the cache database and creates the necessary tables.
28+
29+
Args:
30+
report_dir (str): path to the report directory
31+
clean (bool): If set to True, the previous database
32+
will be dropped and a new one is created.
33+
"""
34+
self.__sqlitedb_path = os.path.join(report_dir, "cache.db")
35+
36+
if clean and os.path.exists(self.__sqlitedb_path):
37+
os.remove(self.__sqlitedb_path)
38+
39+
self.__create_connection()
40+
41+
def __create_connection(self):
42+
self.__con = sqlite3.connect(self.__sqlitedb_path)
43+
self.__cur = self.__con.cursor()
44+
self.__create_tables()
45+
46+
def close_connection(self):
47+
"""
48+
Closes the connection to the cache database and writes
49+
changes to the disk.
50+
"""
51+
self.__con.close()
52+
53+
def __table_exists(self, name: str) -> bool:
54+
res = self.__cur.execute("SELECT name FROM sqlite_master WHERE name=?",
55+
[name])
56+
return res.fetchone() is not None
57+
58+
def __create_tables(self):
59+
if not self.__table_exists("plist_lookup"):
60+
self.__cur.execute("CREATE TABLE plist_lookup(plist, source)")
61+
62+
def insert_plist_sources(self, plist_file: str, source_files: List[str]):
63+
"""
64+
Inserts the plist file and its associated source files into the
65+
cache database. These source files are located in the 'files' section
66+
of an individual plist file.
67+
68+
Args:
69+
plist_file (str): path to the plist file
70+
source_files (List[str]): list of source files mapped to
71+
the plist file
72+
"""
73+
74+
data = list(zip(itertools.repeat(plist_file), source_files))
75+
self.__cur.executemany("INSERT INTO plist_lookup VALUES(?, ?)", data)
76+
self.__con.commit()
77+
78+
def plist_query(self, source_files: List[str]) -> List[str]:
79+
"""
80+
Returns all plist files associated with any of the given source files
81+
by querying the cache database.
82+
83+
Args:
84+
source_files (List[str]): list of source files to be looked up
85+
from the cache database.
86+
"""
87+
88+
placeholders = ','.join('?' for _ in source_files)
89+
res = self.__cur.execute("SELECT plist FROM plist_lookup WHERE source"
90+
f" IN ({placeholders})", source_files)
91+
return list(map(lambda e: e[0], res))
92+
93+
def get_indexed_plist_files(self) -> List[str]:
94+
"""
95+
Returns already indexed plist files from the cache database.
96+
"""
97+
res = self.__cur.execute("SELECT DISTINCT plist FROM plist_lookup")
98+
return list(map(lambda e: e[0], res))
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
# -------------------------------------------------------------------------
2+
#
3+
# Part of the CodeChecker project, under the Apache License v2.0 with
4+
# LLVM Exceptions. See LICENSE for license information.
5+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
#
7+
# -------------------------------------------------------------------------
8+
9+
import os
10+
import sys
11+
from codechecker_analyzer.cachedb import CacheDB
12+
from codechecker_common import arg, logger
13+
from codechecker_report_converter.report.parser import plist as plistparser
14+
from multiprocessing import Pool, cpu_count
15+
from typing import List, Tuple
16+
17+
LOG = logger.get_logger('system')
18+
19+
20+
def get_argparser_ctor_args():
21+
"""
22+
This method returns a dict containing the kwargs for constructing an
23+
argparse.ArgumentParser (either directly or as a subparser).
24+
"""
25+
26+
return {
27+
'prog': 'CodeChecker reindex',
28+
'formatter_class': arg.RawDescriptionDefaultHelpFormatter,
29+
'description': """
30+
The analysis cache database is a SQLite database located in the
31+
report directory, designed to speed up the parsing process.
32+
In case it is missing or outdated, one can use the 'reindex' command to
33+
recreate/update this database.""",
34+
'help': "Recreate/update the cache database given a report directory."
35+
}
36+
37+
38+
def add_arguments_to_parser(parser):
39+
"""
40+
Add the subcommand's arguments to the given argparse.ArgumentParser.
41+
"""
42+
43+
parser.add_argument('input',
44+
type=str,
45+
nargs='+',
46+
metavar='folder',
47+
help="The analysis result folder(s) containing "
48+
"analysis results which should be "
49+
"reindexed.")
50+
51+
parser.add_argument('-j', '--jobs',
52+
type=int,
53+
dest="jobs",
54+
required=False,
55+
default=cpu_count(),
56+
help="Number of threads to use for reindex. More "
57+
"threads mean faster reindex at the cost of "
58+
"using more memory.")
59+
60+
parser.add_argument('-f', '--force',
61+
action="store_true",
62+
dest="force",
63+
required=False,
64+
default=False,
65+
help="Drop the previous cache database and do a "
66+
"clean reindex.")
67+
68+
logger.add_verbose_arguments(parser)
69+
parser.set_defaults(func=main)
70+
71+
72+
def main(args):
73+
logger.setup_logger(args.verbose if 'verbose' in args else None)
74+
for i in args.input:
75+
update_cache_db(i, args.force, args.jobs)
76+
77+
78+
def __process_file(file_path: str) -> Tuple[str, List[str]]:
79+
with open(file_path, 'rb') as fp:
80+
plist = plistparser.parse(fp)
81+
82+
file_list = [] if plist is None else \
83+
plistparser.get_file_list(plist, os.path.dirname(file_path))
84+
return (file_path, file_list)
85+
86+
87+
def update_cache_db(report_dir: str, force: bool, jobs: int):
88+
if not os.path.isdir(report_dir):
89+
LOG.error(f"Directory {report_dir} does not exist!")
90+
sys.exit(1)
91+
92+
report_dir = os.path.abspath(report_dir)
93+
cachedb = CacheDB(report_dir, force)
94+
indexed_files = cachedb.get_indexed_plist_files()
95+
96+
plist_files = list(filter(lambda f: f.endswith(
97+
plistparser.EXTENSION), os.listdir(report_dir)))
98+
plist_files = list(map(lambda f: os.path.abspath(
99+
os.path.join(report_dir, f)), plist_files))
100+
plist_files = list(filter(lambda f: f not in indexed_files, plist_files))
101+
102+
with Pool(jobs) as p:
103+
res = p.map(__process_file, plist_files)
104+
for (plist_file, sources) in res:
105+
if sources != []:
106+
cachedb.insert_plist_sources(plist_file, sources)
107+
108+
cachedb.close_connection()

tools/report-converter/codechecker_report_converter/report/parser/plist.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -178,15 +178,25 @@ def get_file_index_map(
178178
) -> Dict[int, File]:
179179
""" Get file index map from the given plist object. """
180180
file_index_map: Dict[int, File] = {}
181+
file_list = get_file_list(plist, source_dir_path)
181182

182-
for i, orig_file_path in enumerate(plist.get('files', [])):
183-
file_path = os.path.normpath(os.path.join(
184-
source_dir_path, orig_file_path))
183+
for i, file_path in enumerate(file_list):
185184
file_index_map[i] = get_or_create_file(file_path, file_cache)
186185

187186
return file_index_map
188187

189188

189+
def get_file_list(
190+
plist: Any,
191+
source_dir_path: str
192+
) -> List[str]:
193+
""" Get file list section from the given plist object. """
194+
return list(map(
195+
lambda f: os.path.normpath(os.path.join(
196+
source_dir_path, f)),
197+
plist.get('files', [])))
198+
199+
190200
class Parser(BaseParser):
191201
def get_reports(
192202
self,

0 commit comments

Comments
 (0)