Open
Description
It seems that hfst-ospell does a better job considering all possible
tokenisations of a word; divvunspell fails to offer some suggestions when they
are multiple tokenisations (due to multichar symbols).
After a lot of work, I derived this explanation by finding a minimal failing
example; I hope this effort helps with fixing the bug!
Script to reproduce (run in an empty directory):
echo -e "cat\nca\ncb\ncbt" > multichar
echo -e "cbt:cat\ncb:ca" > orth
echo '?*' | hfst-regexp2fst -o anystar.hfst
hfst-strings2fst -j -m multichar < orth \
| hfst-concatenate anystar.hfst - \
| hfst-concatenate - anystar.hfst \
| hfst-repeat -f 1 -t 3 \
| hfst-disjunct - anystar.hfst \
| hfst-fst2fst -w > errmodel.default.hfst
echo "cat" | hfst-strings2fst -j -m multichar | hfst-fst2fst -w > acceptor.default.hfst
cat > index.xml <<-EOF
<?xml version="1.0" encoding="utf-8"?>
<hfstspeller dtdversion="1.0" hfstversion="3">
<info>
<title>cat</title>
<locale>xxx</locale>
<producer>xxx</producer>
<description>cat</description>
</info>
<acceptor type="general" id="acceptor.default.hfst">
<title>cat</title>
<description>cat</description>
</acceptor>
<errmodel id="errmodel.default.hfst">
<title>error</title>
<description>cat</description>
<type type="default"/>
<model>errmodel.default.hfst</model>
</errmodel>
</hfstspeller>
EOF
zip test.zhfst index.xml errmodel.default.hfst acceptor.default.hfst
echo "cbt" | hfst-ospell -S test.zhfst
echo "cbt" | divvunspell -S -z test.zhfst