Open
Description
I've modified class phpMorphy_Morphier_MorphierAbstract this way:
function getParadigmCollection($word) {
if(false === ($annots = $this->finder->findWord($word))) {
return false;
}
// debug
print ' +' . PHP_EOL;
$a = array_map(function ($annot) { return preg_replace('/\s*[\r\n]+\s*/', ' ', print_r($annot, true)); }, $annots);
sort($a);
array_map(function ($annot) { print ' ' . $annot . PHP_EOL; }, $a);
print ' -' . PHP_EOL;
return $this->helper->getParadigmCollection($word, $annots);
}
Then I use standard aot dictionary from sourceforge and the following morphier:
$morphy = new phpMorphy('dicts/ru_ru_aot_withjo', 'ru_RU');
$morphy->getPredictByDatabaseMorphier()->getParadigmCollection('АБУШКАН');
And this shows me the following debug info:
+
Array ( [count] => 1 [offset] => 169664 [cplen] => 0 [plen] => 0 [flen] => 2 [common_ancode] => 667 [forms_count] => 75 [packed_forms_count] => 46 [affixes_size] => 486 [form_no] => 34 [pos_id] => 21 [freq] => 11 [base_prefix] => [base_suffix] => ТЬ )
Array ( [count] => 1 [offset] => 169664 [cplen] => 0 [plen] => 0 [flen] => 2 [common_ancode] => 667 [forms_count] => 75 [packed_forms_count] => 46 [affixes_size] => 486 [form_no] => 34 [pos_id] => 21 [freq] => 12 [base_prefix] => [base_suffix] => ТЬ )
Array ( [count] => 1 [offset] => 169664 [cplen] => 0 [plen] => 0 [flen] => 2 [common_ancode] => 667 [forms_count] => 75 [packed_forms_count] => 46 [affixes_size] => 486 [form_no] => 34 [pos_id] => 21 [freq] => 19 [base_prefix] => [base_suffix] => ТЬ )
Array ( [count] => 1 [offset] => 169664 [cplen] => 0 [plen] => 0 [flen] => 2 [common_ancode] => 667 [forms_count] => 75 [packed_forms_count] => 46 [affixes_size] => 486 [form_no] => 34 [pos_id] => 21 [freq] => 23 [base_prefix] => [base_suffix] => ТЬ )
Array ( [count] => 1 [offset] => 169664 [cplen] => 0 [plen] => 0 [flen] => 2 [common_ancode] => 667 [forms_count] => 75 [packed_forms_count] => 46 [affixes_size] => 486 [form_no] => 34 [pos_id] => 21 [freq] => 3 [base_prefix] => [base_suffix] => ТЬ )
Array ( [count] => 1 [offset] => 169664 [cplen] => 0 [plen] => 0 [flen] => 2 [common_ancode] => 667 [forms_count] => 75 [packed_forms_count] => 46 [affixes_size] => 486 [form_no] => 34 [pos_id] => 21 [freq] => 4 [base_prefix] => [base_suffix] => ТЬ )
Array ( [count] => 1 [offset] => 169664 [cplen] => 0 [plen] => 0 [flen] => 2 [common_ancode] => 667 [forms_count] => 75 [packed_forms_count] => 46 [affixes_size] => 486 [form_no] => 34 [pos_id] => 21 [freq] => 6 [base_prefix] => [base_suffix] => ТЬ )
Array ( [count] => 1 [offset] => 169664 [cplen] => 0 [plen] => 0 [flen] => 2 [common_ancode] => 667 [forms_count] => 75 [packed_forms_count] => 46 [affixes_size] => 486 [form_no] => 34 [pos_id] => 21 [freq] => 6 [base_prefix] => [base_suffix] => ТЬ )
Array ( [count] => 1 [offset] => 169664 [cplen] => 0 [plen] => 0 [flen] => 2 [common_ancode] => 667 [forms_count] => 75 [packed_forms_count] => 46 [affixes_size] => 486 [form_no] => 34 [pos_id] => 21 [freq] => 6 [base_prefix] => [base_suffix] => ТЬ )
Array ( [count] => 1 [offset] => 169664 [cplen] => 0 [plen] => 0 [flen] => 2 [common_ancode] => 667 [forms_count] => 75 [packed_forms_count] => 46 [affixes_size] => 486 [form_no] => 34 [pos_id] => 21 [freq] => 6 [base_prefix] => [base_suffix] => ТЬ )
Array ( [count] => 1 [offset] => 169664 [cplen] => 0 [plen] => 0 [flen] => 2 [common_ancode] => 667 [forms_count] => 75 [packed_forms_count] => 46 [affixes_size] => 486 [form_no] => 34 [pos_id] => 21 [freq] => 7 [base_prefix] => [base_suffix] => ТЬ )
Array ( [count] => 1 [offset] => 169664 [cplen] => 0 [plen] => 0 [flen] => 2 [common_ancode] => 667 [forms_count] => 75 [packed_forms_count] => 46 [affixes_size] => 486 [form_no] => 34 [pos_id] => 21 [freq] => 9 [base_prefix] => [base_suffix] => ТЬ )
Array ( [count] => 1 [offset] => 49600 [cplen] => 0 [plen] => 0 [flen] => 2 [common_ancode] => 669 [forms_count] => 132 [packed_forms_count] => 76 [affixes_size] => 842 [form_no] => 64 [pos_id] => 21 [freq] => 3 [base_prefix] => [base_suffix] => ТЬ )
Array ( [count] => 1 [offset] => 49600 [cplen] => 0 [plen] => 0 [flen] => 2 [common_ancode] => 669 [forms_count] => 132 [packed_forms_count] => 76 [affixes_size] => 842 [form_no] => 64 [pos_id] => 21 [freq] => 3 [base_prefix] => [base_suffix] => ТЬ )
Array ( [count] => 1 [offset] => 49600 [cplen] => 0 [plen] => 0 [flen] => 2 [common_ancode] => 669 [forms_count] => 132 [packed_forms_count] => 76 [affixes_size] => 842 [form_no] => 64 [pos_id] => 21 [freq] => 4 [base_prefix] => [base_suffix] => ТЬ )
Array ( [count] => 1 [offset] => 680064 [cplen] => 0 [plen] => 0 [flen] => 6 [common_ancode] => 667 [forms_count] => 83 [packed_forms_count] => 54 [affixes_size] => 752 [form_no] => 42 [pos_id] => 21 [freq] => 11 [base_prefix] => [base_suffix] => КАТЬ )
Array ( [count] => 1 [offset] => 6976 [cplen] => 0 [plen] => 0 [flen] => 0 [common_ancode] => 683 [forms_count] => 12 [packed_forms_count] => 10 [affixes_size] => 70 [form_no] => 0 [pos_id] => 0 [freq] => 3 [base_prefix] => [base_suffix] => )
Array ( [count] => 1 [offset] => 6976 [cplen] => 0 [plen] => 0 [flen] => 0 [common_ancode] => 683 [forms_count] => 12 [packed_forms_count] => 10 [affixes_size] => 70 [form_no] => 0 [pos_id] => 0 [freq] => 3 [base_prefix] => [base_suffix] => )
Array ( [count] => 1 [offset] => 6976 [cplen] => 0 [plen] => 0 [flen] => 0 [common_ancode] => 683 [forms_count] => 12 [packed_forms_count] => 10 [affixes_size] => 70 [form_no] => 0 [pos_id] => 0 [freq] => 5 [base_prefix] => [base_suffix] => )
Array ( [count] => 1 [offset] => 6976 [cplen] => 0 [plen] => 0 [flen] => 0 [common_ancode] => 687 [forms_count] => 12 [packed_forms_count] => 10 [affixes_size] => 70 [form_no] => 0 [pos_id] => 0 [freq] => 3 [base_prefix] => [base_suffix] => )
Array ( [count] => 1 [offset] => 7296 [cplen] => 0 [plen] => 0 [flen] => 0 [common_ancode] => 688 [forms_count] => 12 [packed_forms_count] => 9 [affixes_size] => 62 [form_no] => 0 [pos_id] => 0 [freq] => 5 [base_prefix] => [base_suffix] => )
Array ( [count] => 1 [offset] => 8832 [cplen] => 0 [plen] => 0 [flen] => 0 [common_ancode] => 688 [forms_count] => 36 [packed_forms_count] => 29 [affixes_size] => 338 [form_no] => 0 [pos_id] => 0 [freq] => 3 [base_prefix] => [base_suffix] => )
Array ( [count] => 1 [offset] => 8832 [cplen] => 0 [plen] => 0 [flen] => 0 [common_ancode] => 691 [forms_count] => 36 [packed_forms_count] => 29 [affixes_size] => 338 [form_no] => 0 [pos_id] => 0 [freq] => 3 [base_prefix] => [base_suffix] => )
Array ( [count] => 1 [offset] => 8832 [cplen] => 0 [plen] => 0 [flen] => 0 [common_ancode] => 691 [forms_count] => 36 [packed_forms_count] => 29 [affixes_size] => 338 [form_no] => 0 [pos_id] => 0 [freq] => 3 [base_prefix] => [base_suffix] => )
Array ( [count] => 1 [offset] => 8832 [cplen] => 0 [plen] => 0 [flen] => 0 [common_ancode] => 691 [forms_count] => 36 [packed_forms_count] => 29 [affixes_size] => 338 [form_no] => 0 [pos_id] => 0 [freq] => 3 [base_prefix] => [base_suffix] => )
Array ( [count] => 1 [offset] => 8832 [cplen] => 0 [plen] => 0 [flen] => 0 [common_ancode] => 691 [forms_count] => 36 [packed_forms_count] => 29 [affixes_size] => 338 [form_no] => 0 [pos_id] => 0 [freq] => 3 [base_prefix] => [base_suffix] => )
Array ( [count] => 2 [offset] => 1383424 [cplen] => 0 [plen] => 0 [flen] => 0 [common_ancode] => 663 [forms_count] => 35 [packed_forms_count] => 21 [affixes_size] => 172 [form_no] => 13 [pos_id] => 1 [freq] => 6 [base_prefix] => [base_suffix] => ЫЙ )
Array ( [count] => 2 [offset] => 29376 [cplen] => 0 [plen] => 0 [flen] => 0 [common_ancode] => 680 [forms_count] => 6 [packed_forms_count] => 5 [affixes_size] => 36 [form_no] => 1 [pos_id] => 0 [freq] => 5 [base_prefix] => [base_suffix] => Ы )
Array ( [count] => 2 [offset] => 8832 [cplen] => 0 [plen] => 0 [flen] => 0 [common_ancode] => 691 [forms_count] => 36 [packed_forms_count] => 29 [affixes_size] => 338 [form_no] => 0 [pos_id] => 0 [freq] => 6 [base_prefix] => [base_suffix] => )
Array ( [offset] => 169664 [cplen] => 0 [plen] => 0 [flen] => 2 [common_ancode] => 667 [forms_count] => 75 [packed_forms_count] => 46 [affixes_size] => 486 [form_no] => 34 [pos_id] => 21 [freq] => 7 [base_prefix] => [base_suffix] => ТЬ )
Array ( [offset] => 335264 [cplen] => 0 [plen] => 0 [flen] => 4 [common_ancode] => 667 [forms_count] => 76 [packed_forms_count] => 46 [affixes_size] => 560 [form_no] => 34 [pos_id] => 21 [freq] => 8 [base_prefix] => [base_suffix] => АТЬ )
Array ( [offset] => 410208 [cplen] => 0 [plen] => 0 [flen] => 6 [common_ancode] => 667 [forms_count] => 75 [packed_forms_count] => 46 [affixes_size] => 652 [form_no] => 34 [pos_id] => 21 [freq] => 9 [base_prefix] => [base_suffix] => КАТЬ )
-
I think there are too many duplicates. Is this behavior wrong?
Metadata
Metadata
Assignees
Labels
No labels