-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathconvert.py
112 lines (96 loc) · 3.81 KB
/
convert.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#!/usr/bin/env python
# -*- coding:utf-8 -*-
###################################################
# Filename: convert.py
# Author: [email protected]
# Created: 2017-11-16 10:34:55
# Last Modified: 2017-11-23 18:41:54
###################################################
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from six.moves import xrange
from struct import pack, unpack
from collections import defaultdict
from PIL import Image, ImageDraw
import os
import numpy as np
import sample_data
trn_gnt_dir = "/home/aib/datasets/HWDB1.1trn_gnt/"
tst_gnt_dir = "/home/aib/datasets/HWDB1.1tst_gnt/"
trn_pot_dir = "/home/aib/datasets/OLHWDB1.1trn_pot/"
tst_pot_dir = "/home/aib/datasets/OLHWDB1.1tst_pot/"
def convert_gnt(gnt_dir, fn_dst):
with open(fn_dst, 'wb') as f:
for tagcode, img in sample_data.read_from_gnt_dir(gnt_dir):
tagcode.tofile(f)
norm_img = sample_data.resize_image(img)
norm_img.tofile(f)
def convert_pot(pot_dir, fn_dst):
all_tagcode = defaultdict(int)
with open(fn_dst, 'wb') as f:
for tagcode, strokes in sample_data.read_from_pot_dir(pot_dir):
tagcode = np.uint16(tagcode)
tagcode.tofile(f)
all_tagcode[tagcode] += 1
im = Image.new("L", (10240, 10240), 255)
draw = ImageDraw.Draw(im)
mins = []
maxs = []
for stroke in strokes:
mins.append(np.min(stroke, 0))
maxs.append(np.max(stroke, 0))
draw.line(stroke, fill=0, width=4)
del draw
_min = np.min(mins, 0) - 2
_max = np.max(maxs, 0) + 2
box = (_min[0], _min[1], _max[0], _max[1])
im = im.crop(box)
shape = (im.size[1], im.size[0])
img = np.reshape(bytearray(im.tobytes()), shape)
norm_img = sample_data.resize_image(img)
norm_img.tofile(f)
# Image.fromarray(np.reshape(norm_img, (64, 64))).save('/home/aib/hehe.png')
# break
chars = [pack('>H', v).decode('gb2312') for v in all_tagcode.keys()]
with open(fn_dst + ".charset", "w") as f:
for char in chars:
f.write(char.encode('utf8'))
f.write("\n")
def extract_pot(pot_dir, png_dir):
files = []
all_tagcode = defaultdict(int)
for tagcode, strokes in sample_data.read_from_pot_dir(pot_dir):
all_tagcode[tagcode] += 1
fn = "%05d_%s.png" % (tagcode, all_tagcode[tagcode])
files.append(fn)
pngf = os.path.join(png_dir, fn)
im = Image.new("L", (10240, 10240), 255)
draw = ImageDraw.Draw(im)
mins = []
maxs = []
for stroke in strokes:
mins.append(np.min(stroke, 0))
maxs.append(np.max(stroke, 0))
draw.line(stroke, fill=0, width=4)
del draw
_min = np.min(mins, 0) - 2
_max = np.max(maxs, 0) + 2
box = (_min[0], _min[1], _max[0], _max[1])
im = im.crop(box)
im.save(pngf)
chars = [pack('>H', v).decode('gb2312') for v in all_tagcode.keys()]
with open(os.path.join(png_dir, "charset"), "w") as f:
for char in chars:
f.write(char.encode('utf8'))
f.write("\n")
with open(os.path.join(png_dir, "files"), "w") as f:
for fn in files:
f.write(fn + "\n")
# convert_gnt(trn_gnt_dir, "/home/aib/datasets/HWDB1.1trn_gnt.bin")
# convert_gnt(tst_gnt_dir, "/home/aib/datasets/HWDB1.1tst_gnt.bin")
# extract_pot(trn_pot_dir, "/home/aib/datasets/OLHWDB1.1trn_png/")
# extract_pot(tst_pot_dir, "/home/aib/datasets/OLHWDB1.1tst_png/")
convert_pot(trn_pot_dir, "/home/aib/datasets/OLHWDB1.1trn_pot.bin")
convert_pot(tst_pot_dir, "/home/aib/datasets/OLHWDB1.1tst_pot.bin")