1
1
from __future__ import absolute_import
2
+ import os
2
3
from math import log , ceil
3
4
4
- from ocrd import Processor , MIMETYPE_PAGE
5
- from ocrd .validator .page_validator import PageValidator , ConsistencyError
6
- from ocrd .utils import getLogger , concat_padded , xywh_from_points , points_from_xywh
7
- from ocrd .model .ocrd_page import from_file , to_xml , GlyphType , CoordsType , TextEquivType
8
- from ocrd .model .ocrd_page_generateds import MetadataItemType , LabelsType , LabelType
5
+ from ocrd import Processor
6
+ from ocrd_validators .page_validator import PageValidator , ConsistencyError
7
+ from ocrd_utils import (
8
+ getLogger , concat_padded ,
9
+ xywh_from_points , points_from_xywh ,
10
+ MIMETYPE_PAGE
11
+ )
12
+ from ocrd_modelfactory import page_from_file
13
+ from ocrd_models .ocrd_page import (
14
+ to_xml , GlyphType ,
15
+ MetadataItemType , LabelsType , LabelType ,
16
+ CoordsType , TextEquivType
17
+ )
9
18
10
19
import networkx as nx
11
20
12
- from ocrd_keraslm . wrapper .config import OCRD_TOOL
13
- from ocrd_keraslm import lib
21
+ from .config import OCRD_TOOL
22
+ from .. import lib
14
23
15
24
LOG = getLogger ('processor.KerasRate' )
16
25
@@ -54,13 +63,15 @@ def process(self):
54
63
55
64
prev_traceback = None
56
65
prev_pcgts = None
66
+ prev_file_id = None
57
67
for (n , input_file ) in enumerate (self .input_files ):
58
- LOG .info ("INPUT FILE %i / %s" , n , input_file )
59
- pcgts = from_file (self .workspace .download_file (input_file ))
68
+ page_id = input_file .pageId or input_file .ID
69
+ LOG .info ("INPUT FILE %i / %s" , n , page_id )
70
+ pcgts = page_from_file (self .workspace .download_file (input_file ))
60
71
LOG .info ("Scoring text in page '%s' at the %s level" , pcgts .get_pcGtsId (), level )
61
72
62
73
# annotate processing metadata:
63
- metadata = pcgts .get_Metadata () # ensured by from_file ()
74
+ metadata = pcgts .get_Metadata () # ensured by page_from_file ()
64
75
metadata .add_MetadataItem (
65
76
MetadataItemType (type_ = "processingStep" ,
66
77
name = OCRD_TOOL ['tools' ]['ocrd-keraslm-rate' ]['steps' ][0 ],
@@ -115,11 +126,13 @@ def process(self):
115
126
page_update_higher_textequiv_levels (level , pcgts )
116
127
117
128
# write back result
118
- file_id = concat_padded (self .output_file_grp , n )
129
+ file_id = input_file .ID .replace (self .input_file_grp , self .output_file_grp )
130
+ if file_id == input_file .ID :
131
+ file_id = concat_padded (self .output_file_grp , n )
119
132
self .workspace .add_file (
120
133
ID = file_id ,
121
134
file_grp = self .output_file_grp ,
122
- basename = file_id + '.xml' , # with suffix or bare?
135
+ local_filename = os . path . join ( self . output_file_grp , file_id + '.xml' ),
123
136
mimetype = MIMETYPE_PAGE ,
124
137
content = to_xml (pcgts ),
125
138
)
@@ -140,15 +153,18 @@ def process(self):
140
153
page_update_higher_textequiv_levels (level , prev_pcgts )
141
154
142
155
# write back result
143
- file_id = concat_padded (self .output_file_grp , n - 1 )
156
+ file_id = prev_file_id .replace (self .input_file_grp , self .output_file_grp )
157
+ if file_id == prev_file_id :
158
+ file_id = concat_padded (self .output_file_grp , n - 1 )
144
159
self .workspace .add_file (
145
160
ID = file_id ,
146
161
file_grp = self .output_file_grp ,
147
- basename = file_id + '.xml' , # with suffix or bare?
162
+ local_filename = os . path . join ( self . output_file_grp , file_id + '.xml' ),
148
163
mimetype = MIMETYPE_PAGE ,
149
164
content = to_xml (prev_pcgts ),
150
165
)
151
-
166
+
167
+ prev_file_id = input_file .ID
152
168
prev_pcgts = pcgts
153
169
prev_traceback = traceback
154
170
@@ -160,11 +176,13 @@ def process(self):
160
176
page_update_higher_textequiv_levels (level , prev_pcgts )
161
177
162
178
# write back result
163
- file_id = concat_padded (self .output_file_grp , n )
179
+ file_id = input_file .ID .replace (self .input_file_grp , self .output_file_grp )
180
+ if file_id == input_file .ID :
181
+ file_id = concat_padded (self .output_file_grp , n )
164
182
self .workspace .add_file (
165
183
ID = file_id ,
166
184
file_grp = self .output_file_grp ,
167
- basename = file_id + '.xml' , # with suffix or bare?
185
+ local_filename = os . path . join ( self . output_file_grp , file_id + '.xml' ),
168
186
mimetype = MIMETYPE_PAGE ,
169
187
content = to_xml (prev_pcgts ),
170
188
)
0 commit comments