-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathOCR.py
83 lines (65 loc) · 2.7 KB
/
OCR.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import cv2
import pytesseract
from tkinter import *
import tkinter
from tkinter import filedialog
from PIL import ImageTk, Image
import os
from datetime import datetime
from textblob import TextBlob
# Enter the directory where tessaract is installed in your system
pytesseract.pytesseract.tesseract_cmd = 'C://Program Files (x86)//Tesseract-OCR//tesseract.exe'
curr_datetime = datetime.now().strftime('_%Y-%m-%d-%H-%M-%S')
temp_directory = "temp/"
file_name = "OCR'ed_"
def corrected_text(file):
one = TextBlob(file)
return str(one.correct())
# Function to convert the original image into a grayscaled image
def get_grayscale(image):
return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
def thresholding(image):
return cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
# Function to save the image/text at a defined directory with current data and time
def fn(saved_at):
file_name = os.path.basename(image_file)
splitted_path = os.path.splitext(file_name)
modified_picture_path = saved_at + splitted_path[0] + curr_datetime + splitted_path[1]
final_temp_path = temp_directory + modified_picture_path
return final_temp_path
ui = tkinter.Tk()
ui.title('TEXT RECOGNITION')
ui.geometry('720x640+0+0')
ui.configure(bg='#8D3DAF')
image_file = filedialog.askopenfilename(initialdir="/Images",
title="select a file",
filetypes=(("png files", "*.jpg"), ("all file", "*.*")))
og_image_label = Label(ui, text="Original Image", bg='black', fg='white',
font=("Courier New", 14)).place(x=50, y=125)
text_gen_label = Label(ui, text="Text Generated", bg='black', fg='white',
font=("Terminal", 16)).place(x=250, y=330)
img = cv2.imread(image_file)
gray = get_grayscale(img)
thresh = thresholding(gray)
ima = Image.open(image_file)
resized = ima.thumbnail((480, 360))
resized = ima.save(fn(file_name))
image = Image.open(fn(file_name))
photo = ImageTk.PhotoImage(image)
og_image = Label(ui, image=photo)
og_image.place(x=240, y=10)
file_rn = fn(file_name)
txt = pytesseract.image_to_string(file_rn)
text_rn = corrected_text(txt)
res = " ".join(text_rn.split())
text_gen_ = Label(ui, text=res, bg='#f1c40f', fg='#0D0D0D',
font=('MS Serif', 14), width=60, height=8).place(x=20, y=365)
txt_filename = fn(file_name)+'.txt'
fs = f"Text Generated File Saved at and as:\n {txt_filename}"
file_saved_label = Label(ui, text=fs, bg='#3498db', fg='white',
font=("Arial", 12)).place(x=125, y=580)
f = open(txt_filename, 'w+')
f.write(res)
f.write('\n')
f.close()
ui.mainloop()