Skip to content

Enhance Plagiarism Checker with Heatmap Visualization #22

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 33 additions & 8 deletions app.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,25 @@
import os
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load the student files
student_files = [doc for doc in os.listdir() if doc.endswith('.txt')]
student_notes = [open(_file, encoding='utf-8').read()
for _file in student_files]
student_notes = [open(_file, encoding='utf-8').read() for _file in student_files]

# Vectorize the text
def vectorize(Text):
return TfidfVectorizer().fit_transform(Text).toarray()

def vectorize(Text): return TfidfVectorizer().fit_transform(Text).toarray()
def similarity(doc1, doc2): return cosine_similarity([doc1, doc2])

def similarity(doc1, doc2):
return cosine_similarity([doc1, doc2])

vectors = vectorize(student_notes)
s_vectors = list(zip(student_files, vectors))
plagiarism_results = set()


# Check plagiarism
def check_plagiarism():
global s_vectors
for student_a, text_vector_a in s_vectors:
Expand All @@ -29,6 +33,27 @@ def check_plagiarism():
plagiarism_results.add(score)
return plagiarism_results

# Visualize plagiarism results
def visualize_results(plagiarism_results):
# Create a matrix for the heatmap
files = sorted(list(set([pair[0] for pair in plagiarism_results] + [pair[1] for pair in plagiarism_results])))
matrix = [[0 for _ in files] for _ in files]

file_index = {file: idx for idx, file in enumerate(files)}

for file_a, file_b, score in plagiarism_results:
i, j = file_index[file_a], file_index[file_b]
matrix[i][j] = score
matrix[j][i] = score

# Plot the heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(matrix, xticklabels=files, yticklabels=files, cmap='coolwarm', annot=True)
plt.title('Plagiarism Heatmap')
plt.show()

# Check for plagiarism and visualize the results
plagiarism_results = check_plagiarism()
visualize_results(plagiarism_results)


for data in check_plagiarism():
print(data)
5 changes: 4 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
scikit_learn==0.24.2
scikit-learn==0.24.2
matplotlib==3.4.2
seaborn==0.11.1