Skip to content

Commit

Permalink
Merge pull request #26 from masaruh/analyzer
Browse files Browse the repository at this point in the history
Add analyzer
  • Loading branch information
masaruh committed Jan 7, 2016
2 parents d720d7c + cca0fbf commit 21f80bb
Show file tree
Hide file tree
Showing 14 changed files with 281 additions and 154 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package org.elasticsearch.analysis;
package org.elasticsearch.index.analysis;


import com.fasterxml.jackson.core.type.TypeReference;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package org.elasticsearch.index.analysis;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.LowerCaseFilter;

import java.io.Reader;
import java.text.Normalizer;

public abstract class KuromojiSuggestAnalyzer extends Analyzer {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = createTokenizer();
TokenStream tokenStream = new LowerCaseFilter(tokenizer);
return new TokenStreamComponents(tokenizer, tokenStream);
}

@Override
protected Reader initReader(String fieldName, Reader reader) {
return new UnicodeNormalizationCharFilter(reader, Normalizer.Form.NFKC, false);
}

protected abstract Tokenizer createTokenizer();

public static class IndexKuromojiSuggestAnalyzer extends KuromojiSuggestAnalyzer {
@Override
protected Tokenizer createTokenizer() {
return new KuromojiSuggestTokenizer(true, 512, false);
}
}

public static class SearchKuromojiSuggestAnalyzer extends KuromojiSuggestAnalyzer {
@Override
protected Tokenizer createTokenizer() {
return new KuromojiSuggestTokenizer(false, 512, false);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package org.elasticsearch.index.analysis;

import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettingsService;

public class KuromojiSuggestAnalyzerProvider extends AbstractIndexAnalyzerProvider<KuromojiSuggestAnalyzer> {
public static final String INDEX_ANALYZER = "kuromoji_suggest_index";
public static final String SEARCH_ANALYZER = "kuromoji_suggest_search";

private final KuromojiSuggestAnalyzer analyzer;
@Inject
public KuromojiSuggestAnalyzerProvider(Index index, IndexSettingsService indexSettingsService,
@Assisted String name, @Assisted Settings settings) {
super(index, indexSettingsService.indexSettings(), name, settings);

switch (name) {
case INDEX_ANALYZER:
this.analyzer = new KuromojiSuggestAnalyzer.IndexKuromojiSuggestAnalyzer();
break;
case SEARCH_ANALYZER:
this.analyzer = new KuromojiSuggestAnalyzer.SearchKuromojiSuggestAnalyzer();
break;
default:
throw new IllegalArgumentException("Invalid name [" + name + "]");
}
}

@Override
public KuromojiSuggestAnalyzer get() {
return this.analyzer;
}
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package org.elasticsearch.analysis;
package org.elasticsearch.index.analysis;

import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
package org.elasticsearch.analysis;
package org.elasticsearch.index.analysis;

import org.apache.lucene.analysis.Tokenizer;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.analysis.AbstractTokenizerFactory;
import org.elasticsearch.index.settings.IndexSettingsService;

public class KuromojiSuggestTokenizerFactory extends AbstractTokenizerFactory {
Expand All @@ -15,7 +13,7 @@ public class KuromojiSuggestTokenizerFactory extends AbstractTokenizerFactory {
private final boolean edgeNGram;

@Inject
public KuromojiSuggestTokenizerFactory(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
public KuromojiSuggestTokenizerFactory(Index index, IndexSettingsService indexSettingsService, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettingsService.getSettings(), name, settings);

this.expand = settings.getAsBoolean("expand", false);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package org.elasticsearch.analysis;
package org.elasticsearch.index.analysis;

import org.apache.lucene.analysis.CharFilter;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
package org.elasticsearch.analysis;
package org.elasticsearch.index.analysis;

import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.analysis.AbstractCharFilterFactory;
import org.elasticsearch.index.settings.IndexSettingsService;

import java.io.Reader;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
package org.elasticsearch.indices.analysis;

import org.apache.lucene.analysis.Tokenizer;
import org.elasticsearch.common.component.AbstractComponent;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.analysis.AnalyzerScope;
import org.elasticsearch.index.analysis.CharFilterFactory;
import org.elasticsearch.index.analysis.KuromojiSuggestAnalyzer;
import org.elasticsearch.index.analysis.KuromojiSuggestAnalyzerProvider;
import org.elasticsearch.index.analysis.KuromojiSuggestTokenizer;
import org.elasticsearch.index.analysis.PreBuiltAnalyzerProviderFactory;
import org.elasticsearch.index.analysis.PreBuiltCharFilterFactoryFactory;
import org.elasticsearch.index.analysis.PreBuiltTokenizerFactoryFactory;
import org.elasticsearch.index.analysis.TokenizerFactory;
import org.elasticsearch.index.analysis.UnicodeNormalizationCharFilter;

import java.io.Reader;
import java.text.Normalizer;

/**
* Registers indices level analysis components so, if not explicitly configured,
* will be shared among all indices.
*/
public class KuromojiSuggestIndicesAnalysis extends AbstractComponent {
@Inject
public KuromojiSuggestIndicesAnalysis(Settings settings, IndicesAnalysisService indicesAnalysisService) {
super(settings);
indicesAnalysisService.analyzerProviderFactories().put(KuromojiSuggestAnalyzerProvider.INDEX_ANALYZER,
new PreBuiltAnalyzerProviderFactory(KuromojiSuggestAnalyzerProvider.INDEX_ANALYZER, AnalyzerScope.INDICES,
new KuromojiSuggestAnalyzer.IndexKuromojiSuggestAnalyzer()));

indicesAnalysisService.analyzerProviderFactories().put(KuromojiSuggestAnalyzerProvider.SEARCH_ANALYZER,
new PreBuiltAnalyzerProviderFactory(KuromojiSuggestAnalyzerProvider.SEARCH_ANALYZER, AnalyzerScope.INDICES,
new KuromojiSuggestAnalyzer.SearchKuromojiSuggestAnalyzer()));

indicesAnalysisService.charFilterFactories().put("nfkc",
new PreBuiltCharFilterFactoryFactory(new CharFilterFactory() {
@Override
public String name() {
return "nfkc";
}

@Override
public Reader create(Reader reader) {
return new UnicodeNormalizationCharFilter(reader, Normalizer.Form.NFKC, false);
}
}));

indicesAnalysisService.tokenizerFactories().put("kuromoji_suggest_index",
new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() {
@Override
public String name() {
return "kuromoji_suggest_index";
}

@Override
public Tokenizer create() {
return new KuromojiSuggestTokenizer(true, 512, false);
}
}));

indicesAnalysisService.tokenizerFactories().put("kuromoji_suggest_search",
new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() {
@Override
public String name() {
return "kuromoji_suggest_search";
}

@Override
public Tokenizer create() {
return new KuromojiSuggestTokenizer(false, 512, false);
}
}));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package org.elasticsearch.indices.analysis;

import org.elasticsearch.common.inject.AbstractModule;

public class KuromojiSuggestIndicesAnalysisModule extends AbstractModule {
@Override
protected void configure() {
bind(KuromojiSuggestIndicesAnalysis.class).asEagerSingleton();
}
}
Original file line number Diff line number Diff line change
@@ -1,14 +1,20 @@
package org.elasticsearch.plugin;

import org.elasticsearch.analysis.KuromojiSuggestTokenizerFactory;
import org.elasticsearch.analysis.UnicodeNormalizationCharFilterFactory;
import org.elasticsearch.common.inject.Module;
import org.elasticsearch.index.analysis.AnalysisModule;
import org.elasticsearch.index.analysis.KuromojiSuggestAnalyzerProvider;
import org.elasticsearch.index.analysis.KuromojiSuggestTokenizerFactory;
import org.elasticsearch.index.analysis.UnicodeNormalizationCharFilterFactory;
import org.elasticsearch.index.mapper.core.CompletionFieldMapper;
import org.elasticsearch.indices.IndicesModule;
import org.elasticsearch.indices.analysis.KuromojiSuggestIndicesAnalysisModule;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.search.SearchModule;
import org.elasticsearch.search.suggest.completion.JapaneseCompletionSuggester;

import java.util.Collection;
import java.util.Collections;

public class JapaneseSuggesterPlugin extends Plugin {
@Override
public String name() {
Expand All @@ -24,7 +30,14 @@ public void onModule(IndicesModule indicesModule) {
indicesModule.registerMapper("japanese_completion", new CompletionFieldMapper.TypeParser());
}

@Override
public Collection<Module> nodeModules() {
return Collections.<Module>singletonList(new KuromojiSuggestIndicesAnalysisModule());
}

public void onModule(AnalysisModule module) {
module.addAnalyzer(KuromojiSuggestAnalyzerProvider.INDEX_ANALYZER, KuromojiSuggestAnalyzerProvider.class);
module.addAnalyzer(KuromojiSuggestAnalyzerProvider.SEARCH_ANALYZER, KuromojiSuggestAnalyzerProvider.class);
module.addTokenizer("kuromoji_suggest", KuromojiSuggestTokenizerFactory.class);
module.addCharFilter("unicode_normalize", UnicodeNormalizationCharFilterFactory.class);
}
Expand Down
Loading

0 comments on commit 21f80bb

Please sign in to comment.