-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconvert_icdar_to_mmocr.py
72 lines (62 loc) · 2.16 KB
/
convert_icdar_to_mmocr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import json
import os
from PIL import Image
def polygon_to_bbox(polygon):
min_x = min(polygon[::2])
min_y = min(polygon[1::2])
max_x = max(polygon[::2])
max_y = max(polygon[1::2])
bbox = [min_x, min_y, max_x, max_y]
return bbox
# 构建数据结构
data = {
"metainfo": {
"dataset_type": "TextDetDataset",
"task_name": "textdet",
"category": [{"id": 0, "name": "text"}]
},
"data_list": []
}
gt_dir = "scene_text_detection/annotations"
img_dir = "scene_text_detection/imgs"
json_path = "scene_text_detection/output.json"
items = os.listdir(img_dir)
for item in items:
new_data = {}
new_data["img_path"] = "imgs/"+item
img_path = os.path.join(img_dir, item)
image = Image.open(img_path)
# 获取图像的宽度和高度
new_data["height"] = image.height
new_data["width"] = image.width
#获取instances
new_data["instances"] = []
gt_path = os.path.join(gt_dir,"gt_"+os.path.splitext(item)[0]+".txt")
if(os.path.isfile(gt_path)):
with open(gt_path,"r", encoding='utf-8') as file:
lines = file.readlines()
for line in lines:
line = line.strip() # 去除行尾的换行符
# 创建一个instance
new_instance = {}
#获取polygon
numbers_str = line.split(',')[0:-1]
text_str = line.split(',')[-1]
numbers = list(map(int, numbers_str))
new_instance["polygon"] = numbers
new_instance["bbox"] = polygon_to_bbox(numbers)
new_instance["bbox_label"] = 0
new_instance["text"] = text_str
if(text_str=="###"):
new_instance["ignore"] = True
else:
new_instance["ignore"] = False
new_data["instances"].append(new_instance)
data["data_list"].append(new_data)
else:
print(gt_path + " is not exist!!!")
# 将数据结构转换为JSON字符串
json_data = json.dumps(data, ensure_ascii=False,indent=4)
# 保存JSON字符串到文件
with open(json_path, "w", encoding='utf-8') as f:
f.write(json_data)