Skip to content

Commit 3ed0892

Browse files
flozendertsantalis
authored andcommitted
add git log -L baseline
1 parent 395dfe5 commit 3ed0892

File tree

9 files changed

+75433
-0
lines changed

9 files changed

+75433
-0
lines changed

experiments/gitLog-baseline/gitLog.py

Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
import git
2+
import csv
3+
import json
4+
5+
from collections import defaultdict
6+
import json
7+
import os
8+
from collections import defaultdict
9+
import subprocess
10+
11+
oracle_commits = defaultdict(list)
12+
oracle_stats = defaultdict(list)
13+
empty_commits = defaultdict(list)
14+
15+
def is_git_diff_empty(commit, parentCommit, filepath, repo_path, fileName):
16+
if not parentCommit:
17+
return False
18+
try:
19+
# Run the git diff command with --ignore-cr-at-eol flag
20+
result_ig_whitespace = subprocess.run(
21+
['git', '-C', repo_path, 'diff', '--stat', '--ignore-cr-at-eol', parentCommit, commit, '--', filepath],
22+
stdout=subprocess.PIPE,
23+
stderr=subprocess.PIPE,
24+
text=True,
25+
check=True
26+
)
27+
28+
isEmpty = result_ig_whitespace.stdout.strip() == ''
29+
30+
if not isEmpty:
31+
result = subprocess.run(
32+
['git', '-C', repo_path, 'diff', '--stat', parentCommit, commit, '--', filepath],
33+
stdout=subprocess.PIPE,
34+
stderr=subprocess.PIPE,
35+
text=True,
36+
check=True
37+
)
38+
if (result.stdout.strip() == result_ig_whitespace.stdout.strip()):
39+
return False
40+
41+
info = result.stdout.strip().split("\n")[1].split(", ")
42+
diff_lines = 0
43+
if (len(info) < 3):
44+
diff_lines = int(info[1].split(" ")[0])
45+
else:
46+
diff_lines = max(int(info[1].split(" ")[0]), int(info[2].split(" ")[0]))
47+
48+
ig_whitespace = int(result_ig_whitespace.stdout.strip().split("\n")[0].split(" | ")[1].split(" ")[0])
49+
whitespace_percentage = (diff_lines - ig_whitespace)/diff_lines
50+
if (whitespace_percentage > 0.95):
51+
isEmpty = True
52+
53+
54+
if (isEmpty):
55+
empty_commits[fileName].append(commit)
56+
return isEmpty
57+
except subprocess.CalledProcessError as e:
58+
print(f"An error occurred while running git diff: {e.stderr}")
59+
return False
60+
61+
def write_to_csv(data, filename):
62+
with open(filename, mode='w', newline='') as file:
63+
writer = csv.writer(file)
64+
writer.writerow(["Name", "TP", "FP", "FN", "Empty commits count"])
65+
for fileName, stats in data.items():
66+
writer.writerow([fileName, stats[0], stats[1], stats[2], stats[3]])
67+
68+
def getLogCommits(fileName, repo_path, start_commit, file_path, start_line, end_line, introduction_commit, mappings_file, manual_empty_file):
69+
repo = git.Repo(repo_path)
70+
log = repo.git.log(start_commit, '-L', f'{start_line},{end_line}:{file_path}')
71+
72+
commitIds = []
73+
process = True
74+
empty_commits_count = 0
75+
while process:
76+
process = False
77+
for line in log.split('\n'):
78+
if ('commit ' in line and len(line) == 47):
79+
_, commitId = line.split(' ')
80+
if (len(commitId) == 40):
81+
# if a file is known to have an "empty" commit in its first set of commits
82+
# returned by gitLog
83+
if (fileName in mappings_file and commitId in mappings_file[fileName]):
84+
info = mappings_file[fileName][commitId]
85+
# check if commit is known to be "empty" or if its
86+
# a new commit (succeeding the original empty commit) that could be "empty" by running git diff
87+
if ((fileName in manual_empty_file and commitId in manual_empty_file[fileName]) or
88+
(is_git_diff_empty(commitId, info['parent_commit_id'], info['element_file_before'], repo_path, fileName))):
89+
empty_commits_count += 1
90+
sl, el = info['element_name_after'].split('$')[1].split('(')[1].split(')')[0].split('-')
91+
log = repo.git.log(info['parent_commit_id'], '-L', f'{sl},{el}:{info["element_file_after"]}')
92+
process = True
93+
break
94+
commitIds.append(commitId)
95+
if (commitId == introduction_commit):
96+
return commitIds, empty_commits_count
97+
98+
return commitIds, empty_commits_count
99+
100+
def get_file_names_in_directory(directory_path):
101+
file_names = []
102+
for file in os.listdir(directory_path):
103+
if os.path.isfile(os.path.join(directory_path, file)):
104+
file_names.append(os.path.basename(file))
105+
return file_names
106+
107+
def load_json_file(file_path):
108+
with open(file_path, 'r') as file:
109+
return json.load(file)
110+
111+
def getMethodName(signature):
112+
return signature.split("#")[1].split("(")[0]
113+
114+
def getFileName(signature):
115+
splitArray = signature.split("/")
116+
return splitArray[len(splitArray) -1]
117+
118+
def scanJSON(fileName, data, mappings_file, manual_empty_file):
119+
print(f"Processing {fileName}")
120+
start_commit = data.get('startCommitId')
121+
start_line = data.get('blockStartLine')
122+
end_line = data.get('blockEndLine')
123+
file_path = data.get('filePath')
124+
125+
repo_name = data.get('repositoryWebURL')
126+
repo_name = repo_name.replace('https://github.com/', '')
127+
repo_name = repo_name.replace('.git', '')
128+
129+
introduction_commit = data.get('expectedChanges')[-1].get("commitId")
130+
131+
repo_path = '../code-tracker/tmp/' + repo_name
132+
133+
tp = 0
134+
fp = 0
135+
fn = 0
136+
oracleChanges = data.get('expectedChanges', [])
137+
oracleCommitIds = set()
138+
for change in oracleChanges:
139+
oracleCommitIds.add(change.get('commitId'))
140+
141+
commitIds, empty_commits_count = getLogCommits(fileName, repo_path, start_commit, file_path, start_line, end_line, introduction_commit, mappings_file, manual_empty_file)
142+
143+
for commitId in commitIds:
144+
if (commitId in oracleCommitIds):
145+
tp += 1
146+
if (not (commitId in oracleCommitIds)):
147+
fp += 1
148+
149+
commitIdsSet = set(commitIds)
150+
151+
for oracleCommitId in oracleCommitIds:
152+
if (not (oracleCommitId in commitIdsSet)):
153+
fn += 1
154+
155+
stats = [tp, fp, fn, empty_commits_count]
156+
157+
oracle_commits[fileName] = commitIds
158+
oracle_stats[fileName] = stats
159+
return True
160+
161+
162+
def processFile(oracle_file, mappings_file, manual_empty_file):
163+
oracle_file1 = "./oracle/block/training/" + oracle_file
164+
165+
oracle1 = load_json_file(oracle_file1)
166+
return scanJSON(oracle_file, oracle1, mappings_file, manual_empty_file)
167+
168+
169+
if __name__ == "__main__":
170+
commonChanges = defaultdict(int)
171+
addedChanges = defaultdict(int)
172+
deletedChanges = defaultdict(int)
173+
174+
directory_path = "../../src/main/resources/oracle/block/training/"
175+
file_names = get_file_names_in_directory(directory_path)
176+
mappings_file = load_json_file("./mappings/training-mappings.json")
177+
manual_empty_file = load_json_file("./mappings/empty-commits-training.json")
178+
179+
for file_name in file_names:
180+
processFile(file_name, mappings_file, manual_empty_file)
181+
182+
write_to_csv(oracle_stats, './stats/gitLog-training-breakdown.csv')
183+
with open('./stats/gitLog-training.json', 'w') as json_file:
184+
json.dump(oracle_commits, json_file)
185+
json_file.write('\n')
186+
with open('./stats/empty-commits-training.json', 'w') as json_file:
187+
json.dump(empty_commits, json_file)
188+
json_file.write('\n')
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
{
2+
"lucene-solr-QueryParserBase-addClause-IF_STATEMENT-1.json": [
3+
"9af1a725691cbfe4593922e23af570f3fb18a46d"
4+
],
5+
"lucene-solr-QueryParserBase-addClause-IF_STATEMENT-2.json": [
6+
"9af1a725691cbfe4593922e23af570f3fb18a46d"
7+
],
8+
"lucene-solr-QueryParserBase-addClause-IF_STATEMENT-3.json": [
9+
"9af1a725691cbfe4593922e23af570f3fb18a46d"
10+
],
11+
"lucene-solr-QueryParserBase-addClause-IF_STATEMENT-4.json": [
12+
"9af1a725691cbfe4593922e23af570f3fb18a46d"
13+
],
14+
"lucene-solr-QueryParserBase-addClause-IF_STATEMENT-5.json": [
15+
"9af1a725691cbfe4593922e23af570f3fb18a46d"
16+
],
17+
"lucene-solr-QueryParserBase-addClause-IF_STATEMENT-6.json": [
18+
"9af1a725691cbfe4593922e23af570f3fb18a46d"
19+
],
20+
"lucene-solr-QueryParserBase-addClause-IF_STATEMENT-7.json": [
21+
"9af1a725691cbfe4593922e23af570f3fb18a46d"
22+
],
23+
"lucene-solr-QueryParserBase-addClause-IF_STATEMENT-8.json": [
24+
"9af1a725691cbfe4593922e23af570f3fb18a46d"
25+
],
26+
"lucene-solr-QueryParserBase-addClause-IF_STATEMENT-9.json": [
27+
"9af1a725691cbfe4593922e23af570f3fb18a46d"
28+
],
29+
"lucene-solr-QueryParserBase-addClause-IF_STATEMENT.json": [
30+
"9af1a725691cbfe4593922e23af570f3fb18a46d"
31+
],
32+
"pmd-ClassTypeResolver-visit-CATCH_CLAUSE-1.json": [
33+
"0ebcac0705f8afa2c62c805e35cd30ead84fc1b6"
34+
],
35+
"pmd-ClassTypeResolver-visit-CATCH_CLAUSE.json": [
36+
"0ebcac0705f8afa2c62c805e35cd30ead84fc1b6"
37+
],
38+
"pmd-ClassTypeResolver-visit-FINALLY_BLOCK.json": [
39+
"0ebcac0705f8afa2c62c805e35cd30ead84fc1b6"
40+
],
41+
"pmd-ClassTypeResolver-visit-IF_STATEMENT-1.json": [
42+
"0ebcac0705f8afa2c62c805e35cd30ead84fc1b6"
43+
],
44+
"pmd-ClassTypeResolver-visit-IF_STATEMENT-2.json": [
45+
"0ebcac0705f8afa2c62c805e35cd30ead84fc1b6"
46+
],
47+
"pmd-ClassTypeResolver-visit-IF_STATEMENT.json": [
48+
"0ebcac0705f8afa2c62c805e35cd30ead84fc1b6"
49+
],
50+
"pmd-ClassTypeResolver-visit-TRY_STATEMENT.json": [
51+
"0ebcac0705f8afa2c62c805e35cd30ead84fc1b6"
52+
],
53+
"pmd-JUnitTestsShouldIncludeAssertRule-visit-IF_STATEMENT.json": [
54+
"d92688a07dc5e6edc3bfc81cced6f25c951a19f4"
55+
]
56+
}

0 commit comments

Comments
 (0)