Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: dictionary as language module #1185

Draft
wants to merge 7 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions language-modules/ar/build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
plugins {
id 'java-library'
}

dependencies {
compileOnly(project.rootProject)
if (providedCoreLibsDir.directory) {
compileOnly fileTree(dir: providedCoreLibsDir,
includes: ['**/languagetool-core-*.jar', '**/commons-io-*.jar'])
implementation fileTree(dir: providedCoreLibsDir,
includes: ['**/language-ar-*.jar'])
} else {
compileOnly(libs.commons.io)
compileOnly(libs.languagetool.core) {
exclude module: 'guava'
exclude module: 'language-detector'
exclude group: 'com.google.android'
exclude module: 'jackson-databind'
exclude group: 'org.jetbrains'
}
compileOnly(libs.lucene.analyzers.common)
implementation(libs.languagetool.ar) {
exclude module: 'languagetool-core'
}
}
testImplementation(libs.junit4)
testImplementation(libs.assertj)
testImplementation(testFixtures(project.rootProject))
testImplementation(libs.commons.io)
}

jar {
archiveFileName.set("omegat-language-ar.${archiveExtension.get()}")
destinationDirectory.set(rootProject.layout.buildDirectory.dir("modules").get())
duplicatesStrategy = DuplicatesStrategy.EXCLUDE
from configurations.runtimeClasspath.collect { it.isDirectory() ? it : zipTree(it) }
def pluginProp = loadProperties(file('plugin.properties'))
manifest {
attributes('License': 'GNU Public License version 3 or later',
'Implementation-Version': pluginProp.Version,
'OmegaT-Plugins': pluginProp.Class,
'Plugin-Author': pluginProp.Author,
'Plugin-Version': pluginProp.Version,
'Plugin-Name': pluginProp.Name,
'Plugin-Category': pluginProp.Category,
'Plugin-Description': pluginProp.Description
)
}
duplicatesStrategy = DuplicatesStrategy.INCLUDE
}
31 changes: 31 additions & 0 deletions language-modules/ar/plugin.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#
# OmegaT - Computer Assisted Translation (CAT) tool
# with fuzzy matching, translation memory, keyword search,
# glossaries, and translation leveraging into updated projects.
#
# Copyright (C) 2024 Hiroshi Miura
# Home page: https://www.omegat.org/
# Support center: https://omegat.org/support
#
# This file is part of OmegaT.
#
# OmegaT is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# OmegaT is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
License=GNU Public License version 3 or later
Author=OmegaT team
Version=6.1.0
Category=language
Name=Arabic plugin
Class=org.omegat.languages.ar.ArabicPlugin
Description=Provide languagetool rules, and spell check dictionary for language-ar
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,21 @@

package org.omegat.languages.ar;

import org.omegat.core.spellchecker.SpellCheckDictionaryType;
import org.omegat.core.spellchecker.SpellCheckerManager;
import org.omegat.languagetools.LanguageManager;

public final class ArabicPlugin {

private static final String ARABIC = "org.languagetool.language.Arabic";
private static final String ARABIC_LT = "org.languagetool.language.Arabic";
private static final String ARABIC_SCD = "org.omegat.languages.ar.ArabicSpellCheckerDictionary";

private ArabicPlugin() {
}

public static void loadPlugins() {
LanguageManager.registerLTLanguage("ar-AR", ARABIC);
LanguageManager.registerLTLanguage("ar-AR", ARABIC_LT);
SpellCheckerManager.registerSpellCheckerDictionaryProvider("ar", SpellCheckDictionaryType.HUNSPELL, ARABIC_SCD);
}

public static void unloadPlugins() {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
package org.omegat.languages.ar;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Path;
import java.text.ParseException;
import java.util.Collections;

import org.apache.commons.io.IOUtils;
import org.apache.lucene.analysis.hunspell.Dictionary;
import org.languagetool.JLanguageTool;

import org.omegat.core.spellchecker.ISpellCheckerDictionary;
import org.omegat.core.spellchecker.SpellCheckDictionaryType;

public class ArabicSpellCheckerDictionary implements ISpellCheckerDictionary, AutoCloseable {

private static final String DICTIONARY_BASE = "/org/languagetool/resource/ar/hunspell/";
private static final String DICTIONARY_PATH = DICTIONARY_BASE + "ar.dic";
private static final String AFFIX_PATH = DICTIONARY_BASE + "ar.aff";

private InputStream affixInputStream;
private InputStream dictInputStream;

@Override
public Dictionary getHunspellDictionary() {
affixInputStream = JLanguageTool.getDataBroker().getAsStream(AFFIX_PATH);
dictInputStream = JLanguageTool.getDataBroker().getAsStream(DICTIONARY_PATH);
try {
return new Dictionary(affixInputStream, Collections.singletonList(dictInputStream), true);
} catch (IOException | ParseException ignored) {
}
return null;
}

@Override
public Path installHunspellDictionary(Path dictionaryDir) {
try {
Path dictionaryPath = dictionaryDir.resolve("ar.dic");
try (InputStream dicStream = JLanguageTool.getDataBroker().getFromResourceDirAsStream(DICTIONARY_PATH);
FileOutputStream fos = new FileOutputStream(dictionaryPath.toFile())) {
IOUtils.copy(dicStream, fos);
}
File affixFile = dictionaryDir.resolve("ar.aff").toFile();
try (InputStream affStream = JLanguageTool.getDataBroker().getFromResourceDirAsStream(AFFIX_PATH);
FileOutputStream fos = new FileOutputStream(affixFile)) {
IOUtils.copy(affStream, fos);
}
return dictionaryPath;
} catch (Exception ignored) {
}
return null;
}

@Override
public SpellCheckDictionaryType getDictionaryType() {
return SpellCheckDictionaryType.HUNSPELL;
}

@Override
public void close() {
try {
affixInputStream.close();
dictInputStream.close();
} catch (IOException ignored) {}
}
}
50 changes: 50 additions & 0 deletions language-modules/ast/build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
plugins {
id 'java-library'
}


dependencies {
compileOnly(project.rootProject)
if (providedCoreLibsDir.directory) {
compileOnly fileTree(dir: providedCoreLibsDir,
includes: ['**/languagetool-core-*.jar', '**/commons-io-*.jar'])
implementation fileTree(dir: providedCoreLibsDir, includes: ['**/language-ast-*.jar'])
} else {
compileOnly(libs.commons.io)
compileOnly(libs.languagetool.core) {
exclude module: 'guava'
exclude module: 'language-detector'
exclude group: 'com.google.android'
exclude module: 'jackson-databind'
exclude group: 'org.jetbrains'
}
compileOnly(libs.morfologik.stemming)
implementation(libs.languagetool.ast) {
exclude module: 'languagetool-core'
}
}
testImplementation(libs.junit4)
testImplementation(libs.assertj)
testImplementation(testFixtures(project.rootProject))
testImplementation(libs.commons.io)
}

jar {
archiveFileName.set("omegat-language-ast.${archiveExtension.get()}")
destinationDirectory.set(rootProject.layout.buildDirectory.dir("modules").get())
duplicatesStrategy = DuplicatesStrategy.EXCLUDE
from configurations.runtimeClasspath.collect { it.isDirectory() ? it : zipTree(it) }
def pluginProp = loadProperties(file('plugin.properties'))
manifest {
attributes('License': 'GNU Public License version 3 or later',
'Implementation-Version': pluginProp.Version,
'OmegaT-Plugins': pluginProp.Class,
'Plugin-Author': pluginProp.Author,
'Plugin-Version': pluginProp.Version,
'Plugin-Name': pluginProp.Name,
'Plugin-Category': pluginProp.Category,
'Plugin-Description': pluginProp.Description
)
}
duplicatesStrategy = DuplicatesStrategy.INCLUDE
}
31 changes: 31 additions & 0 deletions language-modules/ast/plugin.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#
# OmegaT - Computer Assisted Translation (CAT) tool
# with fuzzy matching, translation memory, keyword search,
# glossaries, and translation leveraging into updated projects.
#
# Copyright (C) 2024 Hiroshi Miura
# Home page: https://www.omegat.org/
# Support center: https://omegat.org/support
#
# This file is part of OmegaT.
#
# OmegaT is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# OmegaT is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
License=GNU Public License version 3 or later
Author=OmegaT team
Version=6.1.0
Category=language
Name=Asturian plugin
Class=org.omegat.languages.ast.AsturianPlugin
Description=Provide languagetool rules, and spell check dictionary for language-ast
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,23 @@

package org.omegat.languages.ast;

import org.omegat.core.spellchecker.SpellCheckDictionaryType;
import org.omegat.core.spellchecker.SpellCheckerManager;
import org.omegat.languagetools.LanguageManager;

public final class AsturianPlugin {

private static final String ASTURIAN = "org.languagetool.language.Asturian";
private static final String SPELLCHECK_DICITONARY = "org.omegat.languages.ast"
+ ".AstruianSpellCheckerDictionary";

private AsturianPlugin() {
}

public static void loadPlugins() {
LanguageManager.registerLTLanguage("ast-ES", ASTURIAN);
SpellCheckerManager.registerSpellCheckerDictionaryProvider("ast", SpellCheckDictionaryType.MORFOLOGIK,
SPELLCHECK_DICITONARY);
}

public static void unloadPlugins() {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package org.omegat.languages.ast;

import java.io.IOException;
import java.io.InputStream;

import morfologik.stemming.Dictionary;
import org.languagetool.JLanguageTool;

import org.omegat.core.spellchecker.ISpellCheckerDictionary;
import org.omegat.core.spellchecker.SpellCheckDictionaryType;

public class AsturianSpellCheckerDictionary implements ISpellCheckerDictionary, AutoCloseable {

private static final String DICTIONARY_PATH = "/org/languagetool/resource/ast/hunspell/";

private InputStream infoInputStream;
private InputStream dictInputStream;

@Override
public Dictionary getMofologikDictionary() {
infoInputStream = JLanguageTool.getDataBroker().getAsStream(DICTIONARY_PATH + "ast_ES.info");
dictInputStream = JLanguageTool.getDataBroker().getAsStream(DICTIONARY_PATH + "ast_ES.dict");
try {
return Dictionary.read(dictInputStream, infoInputStream);
} catch (IOException ignored) {
}
return null;
}

@Override
public SpellCheckDictionaryType getDictionaryType() {
return SpellCheckDictionaryType.MORFOLOGIK;
}

@Override
public void close() {
try {
infoInputStream.close();
dictInputStream.close();
} catch (IOException ignored) {
}
}
}
50 changes: 50 additions & 0 deletions language-modules/be/build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
plugins {
id 'java-library'
}

dependencies {
compileOnly(project.rootProject)
if (providedCoreLibsDir.directory) {
compileOnly fileTree(dir: providedCoreLibsDir,
includes: ['**/languagetool-core-*.jar', '**/commons-io-*.jar'])
implementation fileTree(dir: providedCoreLibsDir,
includes: ['**/language-be-*.jar'])
} else {
compileOnly(libs.commons.io)
compileOnly(libs.languagetool.core) {
exclude module: 'guava'
exclude module: 'language-detector'
exclude group: 'com.google.android'
exclude module: 'jackson-databind'
exclude group: 'org.jetbrains'
}
compileOnly(libs.morfologik.stemming)
implementation(libs.languagetool.be) {
exclude module: 'languagetool-core'
}
}
testImplementation(libs.junit4)
testImplementation(libs.assertj)
testImplementation(testFixtures(project.rootProject))
testImplementation(libs.commons.io)
}

jar {
archiveFileName.set("omegat-language-be.${archiveExtension.get()}")
destinationDirectory.set(rootProject.layout.buildDirectory.dir("modules").get())
duplicatesStrategy = DuplicatesStrategy.EXCLUDE
from configurations.runtimeClasspath.collect { it.isDirectory() ? it : zipTree(it) }
def pluginProp = loadProperties(file('plugin.properties'))
manifest {
attributes('License': 'GNU Public License version 3 or later',
'Implementation-Version': pluginProp.Version,
'OmegaT-Plugins': pluginProp.Class,
'Plugin-Author': pluginProp.Author,
'Plugin-Version': pluginProp.Version,
'Plugin-Name': pluginProp.Name,
'Plugin-Category': pluginProp.Category,
'Plugin-Description': pluginProp.Description
)
}
duplicatesStrategy = DuplicatesStrategy.INCLUDE
}
Loading
Loading