-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtri_matrix.py
53 lines (45 loc) · 2.04 KB
/
tri_matrix.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
from dataclasses import dataclass
@dataclass
class TriBool:
"""Holds two boolean values.
Attributes:
tr_in_ocr: if condition is true for transcription word in ocr word
ocr_in_tr: if condition is true for ocr word in transcription word
"""
tr_in_ocr : bool = False
ocr_in_tr : bool = False
def __str__(self):
return f"{self.tr_in_ocr=}, {self.ocr_in_tr=}"
class IsHeadBodyTail:
"""Holds substring checking values for two words.
Attributes:
is_head (TriBool): if either word is the head of the other
(one word is the start of the other word, but
is not completely the same as the other word)
is_body (TriBool): if either word is the body of the other
(one word is a substring of the other word, but
is not completely the same as the other word)
is_tail (TriBool): if either word is the tail of the other
(one word is the end of the other word, but
is not completely the same as the other word)
"""
def __init__(self, is_head: TriBool = None, is_body: TriBool = None, is_tail: TriBool = None) -> None:
if is_head is None:
is_head = TriBool()
if is_body is None:
is_body = TriBool()
if is_tail is None:
is_tail = TriBool()
self.is_head = is_head
self.is_body = is_body
self.is_tail = is_tail
def __str__(self):
return f"{self.is_head=}, {self.is_body=}, {self.is_tail=}"
def set_headbodytail(self, head: bool, tail: bool, tr_len: int, ocr_len: int, body: bool = None) -> None:
"""Sets the head body and tail values, dependent on the length of the words."""
self.is_head.tr_in_ocr = head and tr_len < ocr_len
self.is_head.ocr_in_tr = head and ocr_len < tr_len
self.is_tail.tr_in_ocr = tail and tr_len < ocr_len
self.is_tail.ocr_in_tr = tail and ocr_len < tr_len
self.is_body.tr_in_ocr = body and tr_len < ocr_len
self.is_body.ocr_in_tr = body and ocr_len < tr_len