Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: OCR-D/ocrd_keraslm
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: v0.4.0
Choose a base ref
...
head repository: OCR-D/ocrd_keraslm
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: v0.4.1
Choose a head ref
  • 3 commits
  • 3 files changed
  • 2 contributors

Commits on Sep 24, 2020

  1. getLogger per method

    kba authored and bertsky committed Sep 24, 2020
    Copy the full SHA
    d73c0e2 View commit details
  2. Update CHANGELOG.md

    bertsky committed Sep 24, 2020
    Copy the full SHA
    ac0f008 View commit details
  3. 📦 v0.4.1

    bertsky committed Sep 24, 2020
    Copy the full SHA
    b11b51d View commit details
Showing with 37 additions and 5 deletions.
  1. +29 −2 CHANGELOG.md
  2. +1 −1 ocrd_keraslm/wrapper/ocrd-tool.json
  3. +7 −2 ocrd_keraslm/wrapper/rate.py
31 changes: 29 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -4,13 +4,40 @@ Versioned according to [Semantic Versioning](http://semver.org/).

## Unreleased

## [0.4.1] - 2020-09-24

Changed:

* logging according to OCR-D/core#599

## [0.4.0] - 2020-08-21

Fixed:

* deps: relax tensorflow, use -gpu variant
* deps: restrict keras<2.4

Changed:

* adapt tests to core#397
* update tests from obsolete bags to assets repo
* create CircleCI config
* adapt to 1-output-file-group convention, use `make_file_id` and `assert_file_grp_cardinality`, #17
* set pcGtsId to file ID, #17

## [0.3.2] - 2019-11-18

Fixed:

* deps: restrict tensorflow<2
* deps: require ocrd>=2

## [0.3.1] - 2019-10-26



<!-- link-labels -->
[0.4.0]: ../../compare/v0.4.0...v0.3.1
[0.3.1]: ../../compare/HEAD...v0.3.1
[0.4.1]: ../../compare/v0.4.0...v0.4.1
[0.4.0]: ../../compare/0.3.2...v0.4.0
[0.3.2]: ../../compare/0.3.1...0.3.2
[0.3.1]: ../../compare/HEAD...0.3.1
2 changes: 1 addition & 1 deletion ocrd_keraslm/wrapper/ocrd-tool.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"git_url": "https://github.com/OCR-D/ocrd_keraslm",
"version": "0.4.0",
"version": "0.4.1",
"tools": {
"ocrd-keraslm-rate": {
"executable": "ocrd-keraslm-rate",
9 changes: 7 additions & 2 deletions ocrd_keraslm/wrapper/rate.py
Original file line number Diff line number Diff line change
@@ -22,8 +22,6 @@
from .config import OCRD_TOOL
from .. import lib

LOG = getLogger('processor.KerasRate')

CHOICE_THRESHOLD_NUM = 4 # maximum number of choices to try per element
CHOICE_THRESHOLD_CONF = 0.1 # maximum score drop from best choice to try per element
#beam_width = 100 # maximum number of best partial paths to consider during search with alternative_decoding
@@ -42,6 +40,7 @@ def __init__(self, *args, **kwargs):
if not hasattr(self, 'workspace') or not self.workspace: # no parameter/workspace for --dump-json or --version (no processing)
return

LOG = getLogger('processor.KerasRate')
self.rater = lib.Rater(logger=LOG)
self.rater.load_config(self.parameter['model_file'])
# overrides necessary before compilation:
@@ -58,6 +57,7 @@ def process(self):
... explain incremental page-wise processing here ...
"""
LOG = getLogger('processor.KerasRate')
assert_file_grp_cardinality(self.input_file_grp, 1)
assert_file_grp_cardinality(self.output_file_grp, 1)

@@ -192,6 +192,7 @@ def process(self):
)

def page_get_linear_graph_at(level, pcgts):
LOG = getLogger('processor.KerasRate')
problems = _page_get_tokenisation_problems(level, pcgts)

graph = nx.DiGraph(level=level) # initialise directed unigraph
@@ -270,6 +271,7 @@ def page_get_linear_graph_at(level, pcgts):
return graph, page_start_node, start_node

def _page_update_from_path(level, path, entropy):
LOG = getLogger('processor.KerasRate')
strlen = 0
for element, textequiv, score in path:
if element: # not just space
@@ -291,6 +293,7 @@ def page_update_higher_textequiv_levels(level, pcgts):
join all first TextEquiv (by the rules governing the respective level)
into TextEquiv of the next higher level, replacing them.
'''
LOG = getLogger('processor.KerasRate')
regions = pcgts.get_Page().get_TextRegion()
if level != 'region':
for region in regions:
@@ -309,6 +312,7 @@ def page_update_higher_textequiv_levels(level, pcgts):
region.set_TextEquiv([TextEquivType(Unicode=region_unicode)]) # remove old

def _page_get_tokenisation_problems(level, pcgts):
LOG = getLogger('processor.KerasRate')
# white space IFF between words, newline IFF between lines/regions: required for LM input
# as a minor mitigation, try to guess consistency a text annotation on multiple levels
# (i.e. infer wrong tokenisation when mother node has TextEquiv deviating from
@@ -353,6 +357,7 @@ def _add_space(graph, start_node, space, last_start_node, problem, textequivs):
return start_node

def _repair_tokenisation(tokenisation, concatenation, next_token):
LOG = getLogger('processor.KerasRate')
# invariant: text should contain a representation that concatenates into actual tokenisation
# ideally, both overlap (concatenation~tokenisation)
i = 0