From 32929a6276f8a92d5a6c75416d95fb433b6ff8e6 Mon Sep 17 00:00:00 2001 From: iilyak Date: Mon, 4 Dec 2023 06:23:45 -0800 Subject: [PATCH] Introduce types to supported analyzers take2 (#84) * Refactor createAnalyzer to remove polymorphic arguments The polymorphism makes it harder to introduce types. Because it requires union types. Which are not natively supported by the version of Scala we use. Elimination of the polymorphism would allow us to introduce types. The introduction of types is the vehicle to solve type erasure problem we would have to deal with when we upgrade Scala to the next version. The refactoring is done using following steps 1. Introduce `AnalyzerOptions` class with type specific constructors - `def fromMap(map: Map[_, _])` - `def fromAnalyzerName(name: String)` - `def fromKVsList(options: List[_])` 2. Make sure we correctly go from `Any` to the concrete type. This PR uses the `.collect` combinator instead of relying on `ClassCastException`. Assumptions 1. The keys of `options` passed to `OpenIndexMsg` are strings. 2. It is ok to just ignore all non-string keys in `options`. 3. The analyzer name is either a `String` or a single `String` element wrapped in the `List`. 4. The keys of options passed to OpenIndexMsg are strings. 5. It is ok to just ignore all non-string keys in options. 6. The fields value is a list. 7. The elements of a fields list are tuples. `(String, String)` or (String, [String]). 8. The config is a String in ('analyze, config, text) message in `AnalyzerService.handleCall` and it should really be named `('analyze, analyzerName, text)`. * Avoid matching on `Some(stopwords: List[String])` The matching on inner types of the container will not work on next version of Scala. Newer Scala causes type erasure when matching on inner type of the container (such as List in this case). Assumptions 1. The `stopwords` is a list 2. The elements of a `stopwords` are strings. 3. It is ok to skip non-strings elements of `stopwords`. --- .../cloudant/clouseau/AnalyzerOptions.scala | 42 +++ .../cloudant/clouseau/AnalyzerService.scala | 2 +- .../clouseau/ClouseauTypeFactory.scala | 10 +- .../clouseau/IndexManagerService.scala | 2 +- .../com/cloudant/clouseau/IndexService.scala | 2 +- .../clouseau/SupportedAnalyzers.scala | 294 +++++++++--------- .../clouseau/IndexManagerServiceSpec.scala | 6 +- .../cloudant/clouseau/IndexServiceSpec.scala | 8 +- .../clouseau/SupportedAnalyzersSpec.scala | 189 ++++++++--- 9 files changed, 352 insertions(+), 203 deletions(-) create mode 100644 src/main/scala/com/cloudant/clouseau/AnalyzerOptions.scala diff --git a/src/main/scala/com/cloudant/clouseau/AnalyzerOptions.scala b/src/main/scala/com/cloudant/clouseau/AnalyzerOptions.scala new file mode 100644 index 00000000..ad9d82ad --- /dev/null +++ b/src/main/scala/com/cloudant/clouseau/AnalyzerOptions.scala @@ -0,0 +1,42 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy of +// the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. + +package com.cloudant.clouseau + +class AnalyzerOptions private (inner: Map[String, Any]) { + def toMap = inner +} + +object AnalyzerOptions { + def fromMap(map: Map[_, _]) = + new AnalyzerOptions(map.asInstanceOf[Map[String, Any]]) + def fromAnalyzerName(name: String) = + new AnalyzerOptions(Map("name" -> name).asInstanceOf[Map[String, Any]]) + def fromKVsList(options: List[_]) = { + // options can be a List of key-value pairs or a single String element wrapped in a List + // the latter is a corner case which we should deprecate + options match { + case List(name: String) => new AnalyzerOptions(Map("name" -> name).asInstanceOf[Map[String, Any]]) + case list: List[_] => new AnalyzerOptions(collectKVs(list).toMap[String, Any].asInstanceOf[Map[String, Any]]) + } + } + def collectKVs(list: List[_]): List[(String, Any)] = + list.collect { case t @ (_: String, _: Any) => t }.asInstanceOf[List[(String, Any)]] + + def from(options: Any): Option[AnalyzerOptions] = + options match { + case map: Map[_, _] => Some(fromMap(map)) + case list: List[_] => Some(fromKVsList(list)) + case string: String => Some(fromAnalyzerName(string)) + case _ => None + } +} diff --git a/src/main/scala/com/cloudant/clouseau/AnalyzerService.scala b/src/main/scala/com/cloudant/clouseau/AnalyzerService.scala index 2918c137..3a03cb7f 100644 --- a/src/main/scala/com/cloudant/clouseau/AnalyzerService.scala +++ b/src/main/scala/com/cloudant/clouseau/AnalyzerService.scala @@ -26,7 +26,7 @@ class AnalyzerService(ctx: ServiceContext[ConfigurationArgs]) extends Service(ct override def handleCall(tag: (Pid, Any), msg: Any): Any = msg match { case ('analyze, analyzerConfig: Any, text: String) => - SupportedAnalyzers.createAnalyzer(analyzerConfig) match { + AnalyzerOptions.from(analyzerConfig).flatMap(SupportedAnalyzers.createAnalyzer) match { case Some(analyzer) => ('ok, tokenize(text, analyzer)) case None => diff --git a/src/main/scala/com/cloudant/clouseau/ClouseauTypeFactory.scala b/src/main/scala/com/cloudant/clouseau/ClouseauTypeFactory.scala index 94cbbc09..66ac80b4 100644 --- a/src/main/scala/com/cloudant/clouseau/ClouseauTypeFactory.scala +++ b/src/main/scala/com/cloudant/clouseau/ClouseauTypeFactory.scala @@ -30,7 +30,7 @@ import scala.collection.mutable.ArrayBuffer case class SearchRequest(options: Map[Symbol, Any]) -case class OpenIndexMsg(peer: Pid, path: String, options: Any) +case class OpenIndexMsg(peer: Pid, path: String, options: AnalyzerOptions) case class CleanupPathMsg(path: String) case class RenamePathMsg(dbName: String) case class CleanupDbMsg(dbName: String, activeSigs: List[String]) @@ -53,8 +53,12 @@ object ClouseauTypeFactory extends TypeFactory { val logger = LoggerFactory.getLogger("clouseau.tf") def createType(name: Symbol, arity: Int, reader: TermReader): Option[Any] = (name, arity) match { - case ('open, 4) => - Some(OpenIndexMsg(reader.readAs[Pid], reader.readAs[String], reader.readTerm)) + case ('open, 4) => { + val peer = reader.readAs[Pid] + val path = reader.readAs[String] + val options = reader.readTerm + AnalyzerOptions.from(options).map(OpenIndexMsg(peer, path, _)) + } case ('cleanup, 2) => Some(CleanupPathMsg(reader.readAs[String])) case ('rename, 2) => diff --git a/src/main/scala/com/cloudant/clouseau/IndexManagerService.scala b/src/main/scala/com/cloudant/clouseau/IndexManagerService.scala index 73429897..1ded6be2 100644 --- a/src/main/scala/com/cloudant/clouseau/IndexManagerService.scala +++ b/src/main/scala/com/cloudant/clouseau/IndexManagerService.scala @@ -109,7 +109,7 @@ class IndexManagerService(ctx: ServiceContext[ConfigurationArgs]) extends Servic } override def handleCall(tag: (Pid, Any), msg: Any): Any = msg match { - case OpenIndexMsg(peer: Pid, path: String, options: Any) => + case OpenIndexMsg(peer: Pid, path: String, options: AnalyzerOptions) => lru.get(path) match { case null => waiters.get(path) match { diff --git a/src/main/scala/com/cloudant/clouseau/IndexService.scala b/src/main/scala/com/cloudant/clouseau/IndexService.scala index 8bb8e182..f9d45c01 100644 --- a/src/main/scala/com/cloudant/clouseau/IndexService.scala +++ b/src/main/scala/com/cloudant/clouseau/IndexService.scala @@ -899,7 +899,7 @@ object IndexService { val FP = """([-+]?[0-9]+(?:\.[0-9]+)?)""" val DISTANCE_RE = "^([-+])?$".format(FP, FP).r - def start(node: Node, config: Configuration, path: String, options: Any): Any = { + def start(node: Node, config: Configuration, path: String, options: AnalyzerOptions): Any = { val rootDir = new File(config.getString("clouseau.dir", "target/indexes")) val dir = newDirectory(config, new File(rootDir, path)) try { diff --git a/src/main/scala/com/cloudant/clouseau/SupportedAnalyzers.scala b/src/main/scala/com/cloudant/clouseau/SupportedAnalyzers.scala index d4514898..bad0fb35 100644 --- a/src/main/scala/com/cloudant/clouseau/SupportedAnalyzers.scala +++ b/src/main/scala/com/cloudant/clouseau/SupportedAnalyzers.scala @@ -74,8 +74,8 @@ object SupportedAnalyzers { val logger = LoggerFactory.getLogger("clouseau.analyzers") - def createAnalyzer(options: Any): Option[Analyzer] = { - createAnalyzerInt(options) match { + def createAnalyzer(options: AnalyzerOptions): Option[Analyzer] = { + createAnalyzerInt(options.toMap) match { case Some(perfield: PerFieldAnalyzer) => Some(perfield) case Some(analyzer: Analyzer) => @@ -87,25 +87,13 @@ object SupportedAnalyzers { } } - def createAnalyzerInt(options: Any): Option[Analyzer] = options match { - case name: String => - createAnalyzerInt(Map("name" -> name)) - case list: List[(String, Any)] => - try { - createAnalyzerInt(list.toMap) - } catch { - case e: ClassCastException => None - } - case map: Map[String, Any] => - map.get("name") match { - case Some(name: String) => - createAnalyzerInt(name, map) - case None => - None - } - case _ => - None - } + def createAnalyzerInt(options: Map[String, Any]): Option[Analyzer] = + options.get("name").map(_.asInstanceOf[String]) match { + case Some(name: String) => + createAnalyzerInt(name, options) + case None => + None + } def createAnalyzerInt(name: String, options: Map[String, Any]): Option[Analyzer] = name match { case "keyword" => @@ -122,246 +110,246 @@ object SupportedAnalyzers { } }) case "arabic" => - options.get("stopwords") match { - case Some(stopwords: List[String]) => + getStopWords(options) match { + case Some(stopwords) => Some(new ArabicAnalyzer(IndexService.version, stopwords)) - case _ => + case None => Some(new ArabicAnalyzer(IndexService.version)) } case "bulgarian" => - options.get("stopwords") match { - case Some(stopwords: List[String]) => + getStopWords(options) match { + case Some(stopwords) => Some(new BulgarianAnalyzer(IndexService.version, stopwords)) - case _ => + case None => Some(new BulgarianAnalyzer(IndexService.version)) } case "brazilian" => - options.get("stopwords") match { - case Some(stopwords: List[String]) => + getStopWords(options) match { + case Some(stopwords) => Some(new BrazilianAnalyzer(IndexService.version, stopwords)) - case _ => + case None => Some(new BrazilianAnalyzer(IndexService.version)) } case "catalan" => - options.get("stopwords") match { - case Some(stopwords: List[String]) => + getStopWords(options) match { + case Some(stopwords) => Some(new CatalanAnalyzer(IndexService.version, stopwords)) - case _ => + case None => Some(new CatalanAnalyzer(IndexService.version)) } case "cjk" => - options.get("stopwords") match { - case Some(stopwords: List[String]) => + getStopWords(options) match { + case Some(stopwords) => Some(new CJKAnalyzer(IndexService.version, stopwords)) - case _ => + case None => Some(new CJKAnalyzer(IndexService.version)) } case "chinese" => - options.get("stopwords") match { - case Some(stopwords: List[String]) => + getStopWords(options) match { + case Some(stopwords) => Some(new SmartChineseAnalyzer(IndexService.version, stopwords)) - case _ => + case None => Some(new SmartChineseAnalyzer(IndexService.version)) } case "czech" => - options.get("stopwords") match { - case Some(stopwords: List[String]) => + getStopWords(options) match { + case Some(stopwords) => Some(new CzechAnalyzer(IndexService.version, stopwords)) - case _ => + case None => Some(new CzechAnalyzer(IndexService.version)) } case "danish" => - options.get("stopwords") match { - case Some(stopwords: List[String]) => + getStopWords(options) match { + case Some(stopwords) => Some(new DanishAnalyzer(IndexService.version, stopwords)) - case _ => + case None => Some(new DanishAnalyzer(IndexService.version)) } case "german" => - options.get("stopwords") match { - case Some(stopwords: List[String]) => + getStopWords(options) match { + case Some(stopwords) => Some(new GermanAnalyzer(IndexService.version, stopwords)) - case _ => + case None => Some(new GermanAnalyzer(IndexService.version)) } case "greek" => - options.get("stopwords") match { - case Some(stopwords: List[String]) => + getStopWords(options) match { + case Some(stopwords) => Some(new GreekAnalyzer(IndexService.version, stopwords)) - case _ => + case None => Some(new GreekAnalyzer(IndexService.version)) } case "english" => - options.get("stopwords") match { - case Some(stopwords: List[String]) => + getStopWords(options) match { + case Some(stopwords) => Some(new EnglishAnalyzer(IndexService.version, stopwords)) - case _ => + case None => Some(new EnglishAnalyzer(IndexService.version)) } case "spanish" => - options.get("stopwords") match { - case Some(stopwords: List[String]) => + getStopWords(options) match { + case Some(stopwords) => Some(new SpanishAnalyzer(IndexService.version, stopwords)) - case _ => + case None => Some(new SpanishAnalyzer(IndexService.version)) } case "basque" => - options.get("stopwords") match { - case Some(stopwords: List[String]) => + getStopWords(options) match { + case Some(stopwords) => Some(new BasqueAnalyzer(IndexService.version, stopwords)) - case _ => + case None => Some(new BasqueAnalyzer(IndexService.version)) } case "persian" => - options.get("stopwords") match { - case Some(stopwords: List[String]) => + getStopWords(options) match { + case Some(stopwords) => Some(new PersianAnalyzer(IndexService.version, stopwords)) - case _ => + case None => Some(new PersianAnalyzer(IndexService.version)) } case "finnish" => - options.get("stopwords") match { - case Some(stopwords: List[String]) => + getStopWords(options) match { + case Some(stopwords) => Some(new FinnishAnalyzer(IndexService.version, stopwords)) - case _ => + case None => Some(new FinnishAnalyzer(IndexService.version)) } case "french" => - options.get("stopwords") match { - case Some(stopwords: List[String]) => + getStopWords(options) match { + case Some(stopwords) => Some(new FrenchAnalyzer(IndexService.version, stopwords)) - case _ => + case None => Some(new FrenchAnalyzer(IndexService.version)) } case "irish" => - options.get("stopwords") match { - case Some(stopwords: List[String]) => + getStopWords(options) match { + case Some(stopwords) => Some(new IrishAnalyzer(IndexService.version, stopwords)) - case _ => + case None => Some(new IrishAnalyzer(IndexService.version)) } case "galician" => - options.get("stopwords") match { - case Some(stopwords: List[String]) => + getStopWords(options) match { + case Some(stopwords) => Some(new GalicianAnalyzer(IndexService.version, stopwords)) - case _ => + case None => Some(new GalicianAnalyzer(IndexService.version)) } case "hindi" => - options.get("stopwords") match { - case Some(stopwords: List[String]) => + getStopWords(options) match { + case Some(stopwords) => Some(new HindiAnalyzer(IndexService.version, stopwords)) - case _ => + case None => Some(new HindiAnalyzer(IndexService.version)) } case "hungarian" => - options.get("stopwords") match { - case Some(stopwords: List[String]) => + getStopWords(options) match { + case Some(stopwords) => Some(new HungarianAnalyzer(IndexService.version, stopwords)) - case _ => + case None => Some(new HungarianAnalyzer(IndexService.version)) } case "armenian" => - options.get("stopwords") match { - case Some(stopwords: List[String]) => + getStopWords(options) match { + case Some(stopwords) => Some(new ArmenianAnalyzer(IndexService.version, stopwords)) - case _ => + case None => Some(new ArmenianAnalyzer(IndexService.version)) } case "indonesian" => - options.get("stopwords") match { - case Some(stopwords: List[String]) => + getStopWords(options) match { + case Some(stopwords) => Some(new IndonesianAnalyzer(IndexService.version, stopwords)) - case _ => + case None => Some(new IndonesianAnalyzer(IndexService.version)) } case "italian" => - options.get("stopwords") match { - case Some(stopwords: List[String]) => + getStopWords(options) match { + case Some(stopwords) => Some(new ItalianAnalyzer(IndexService.version, stopwords)) - case _ => + case None => Some(new ItalianAnalyzer(IndexService.version)) } case "japanese" => - options.get("stopwords") match { - case Some(stopwords: List[String]) => + getStopWords(options) match { + case Some(stopwords) => Some(new JapaneseAnalyzer(IndexService.version, null, JapaneseTokenizer.DEFAULT_MODE, stopwords, JapaneseAnalyzer.getDefaultStopTags)) - case _ => + case None => Some(new JapaneseAnalyzer(IndexService.version)) } case "latvian" => - options.get("stopwords") match { - case Some(stopwords: List[String]) => + getStopWords(options) match { + case Some(stopwords) => Some(new LatvianAnalyzer(IndexService.version, stopwords)) - case _ => + case None => Some(new LatvianAnalyzer(IndexService.version)) } case "dutch" => - options.get("stopwords") match { - case Some(stopwords: List[String]) => + getStopWords(options) match { + case Some(stopwords) => Some(new DutchAnalyzer(IndexService.version, stopwords)) - case _ => + case None => Some(new DutchAnalyzer(IndexService.version)) } case "norwegian" => - options.get("stopwords") match { - case Some(stopwords: List[String]) => + getStopWords(options) match { + case Some(stopwords) => Some(new NorwegianAnalyzer(IndexService.version, stopwords)) - case _ => + case None => Some(new NorwegianAnalyzer(IndexService.version)) } case "polish" => - options.get("stopwords") match { - case Some(stopwords: List[String]) => + getStopWords(options) match { + case Some(stopwords) => Some(new PolishAnalyzer(IndexService.version, stopwords)) - case _ => + case None => Some(new PolishAnalyzer(IndexService.version)) } case "portuguese" => - options.get("stopwords") match { - case Some(stopwords: List[String]) => + getStopWords(options) match { + case Some(stopwords) => Some(new PortugueseAnalyzer(IndexService.version, stopwords)) - case _ => + case None => Some(new PortugueseAnalyzer(IndexService.version)) } case "romanian" => - options.get("stopwords") match { - case Some(stopwords: List[String]) => + getStopWords(options) match { + case Some(stopwords) => Some(new RomanianAnalyzer(IndexService.version, stopwords)) - case _ => + case None => Some(new RomanianAnalyzer(IndexService.version)) } case "russian" => - options.get("stopwords") match { - case Some(stopwords: List[String]) => + getStopWords(options) match { + case Some(stopwords) => Some(new RussianAnalyzer(IndexService.version, stopwords)) - case _ => + case None => Some(new RussianAnalyzer(IndexService.version)) } case "classic" => - options.get("stopwords") match { - case Some(stopwords: List[String]) => + getStopWords(options) match { + case Some(stopwords) => Some(new ClassicAnalyzer(IndexService.version, stopwords)) - case _ => + case None => Some(new ClassicAnalyzer(IndexService.version)) } case "standard" => - options.get("stopwords") match { - case Some(stopwords: List[String]) => + getStopWords(options) match { + case Some(stopwords) => Some(new StandardAnalyzer(IndexService.version, stopwords)) - case _ => + case None => Some(new StandardAnalyzer(IndexService.version)) } case "email" => - options.get("stopwords") match { - case Some(stopwords: List[String]) => + getStopWords(options) match { + case Some(stopwords) => Some(new UAX29URLEmailAnalyzer(IndexService.version, stopwords)) - case _ => + case None => Some(new UAX29URLEmailAnalyzer(IndexService.version)) } case "perfield" => val fallbackAnalyzer = new StandardAnalyzer(IndexService.version) - val defaultAnalyzer: Analyzer = options.get("default") match { + val defaultAnalyzer: Analyzer = options.get("default").flatMap(parseDefault) match { case Some(defaultOptions) => createAnalyzerInt(defaultOptions) match { case Some(defaultAnalyzer1) => @@ -372,16 +360,29 @@ object SupportedAnalyzers { case None => fallbackAnalyzer } + + def parseFields(fields: List[_]): List[(String, Option[Map[String, Any]])] = + // anaylyzerName can be a String or a single String element wrapped in a List + // the latter is a corner case which we should deprecate + fields.collect { + case (field: String, analyzerName: String) => (field, Some(Map("name" -> analyzerName))) + case (field: String, List(analyzerName: String)) => (field, Some(Map("name" -> analyzerName))) + case (field: String, _) => (field, None) + } + var fieldMap: Map[String, Analyzer] = options.get("fields") match { - case Some(fields: List[(String, Any)]) => - fields map { kv => - createAnalyzerInt(kv._2) match { - case Some(fieldAnalyzer) => - (kv._1, fieldAnalyzer) - case None => - (kv._1, defaultAnalyzer) - } - } toMap + case Some(fields: List[_]) => + parseFields(fields).map { + case (field, Some(options)) => + createAnalyzerInt(options) match { + case Some(fieldAnalyzer) => + (field, fieldAnalyzer) + case None => + (field, defaultAnalyzer) + } + case (field, None) => + (field, defaultAnalyzer) + }.toMap case _ => Map.empty } @@ -389,30 +390,39 @@ object SupportedAnalyzers { fieldMap += ("_partition" -> new KeywordAnalyzer()) Some(new PerFieldAnalyzer(defaultAnalyzer, fieldMap)) case "swedish" => - options.get("stopwords") match { - case Some(stopwords: List[String]) => + getStopWords(options) match { + case Some(stopwords) => Some(new SwedishAnalyzer(IndexService.version, stopwords)) - case _ => + case None => Some(new SwedishAnalyzer(IndexService.version)) } case "thai" => - options.get("stopwords") match { - case Some(stopwords: List[String]) => + getStopWords(options) match { + case Some(stopwords) => Some(new ThaiAnalyzer(IndexService.version, stopwords)) - case _ => + case None => Some(new ThaiAnalyzer(IndexService.version)) } case "turkish" => - options.get("stopwords") match { - case Some(stopwords: List[String]) => + getStopWords(options) match { + case Some(stopwords) => Some(new TurkishAnalyzer(IndexService.version, stopwords)) - case _ => + case None => Some(new TurkishAnalyzer(IndexService.version)) } case _ => None } + def parseDefault(default: Any): Option[Map[String, Any]] = default match { + case list: List[_] => Some(AnalyzerOptions.fromKVsList(list).toMap) + case string: String => Some(AnalyzerOptions.fromAnalyzerName(string).toMap) + case _ => None + } + + def getStopWords(options: Map[String, Any]): Option[CharArraySet] = + options.get("stopwords").collect { case list: List[_] => list.collect { case word: String => word } } + implicit def listToJavaSet(list: List[String]): JSet[String] = { Set() ++ list } diff --git a/src/test/scala/com/cloudant/clouseau/IndexManagerServiceSpec.scala b/src/test/scala/com/cloudant/clouseau/IndexManagerServiceSpec.scala index 0b91485a..564b712f 100644 --- a/src/test/scala/com/cloudant/clouseau/IndexManagerServiceSpec.scala +++ b/src/test/scala/com/cloudant/clouseau/IndexManagerServiceSpec.scala @@ -23,13 +23,13 @@ class IndexManagerServiceSpec extends SpecificationWithJUnit { "the index manager" should { "open an index when asked" in new manager_service { - node.call(service, OpenIndexMsg(mbox.self, "foo", "standard")) must beLike { case ('ok, pid: Pid) => ok } + node.call(service, OpenIndexMsg(mbox.self, "foo", AnalyzerOptions.fromAnalyzerName("standard"))) must beLike { case ('ok, pid: Pid) => ok } } "return the same index if it's already open" in new manager_service { - node.call(service, OpenIndexMsg(mbox.self, "foo", "standard")) match { + node.call(service, OpenIndexMsg(mbox.self, "foo", AnalyzerOptions.fromAnalyzerName("standard"))) match { case ('ok, pid) => - node.call(service, OpenIndexMsg(mbox.self, "foo", "standard")) must be equalTo ('ok, pid) + node.call(service, OpenIndexMsg(mbox.self, "foo", AnalyzerOptions.fromAnalyzerName("standard"))) must be equalTo ('ok, pid) } } diff --git a/src/test/scala/com/cloudant/clouseau/IndexServiceSpec.scala b/src/test/scala/com/cloudant/clouseau/IndexServiceSpec.scala index 4f3baf8e..dd597824 100644 --- a/src/test/scala/com/cloudant/clouseau/IndexServiceSpec.scala +++ b/src/test/scala/com/cloudant/clouseau/IndexServiceSpec.scala @@ -596,8 +596,8 @@ trait index_service extends RunningNode { service = pid } - def options(): Any = { - "standard" + def options(): AnalyzerOptions = { + AnalyzerOptions.fromAnalyzerName("standard") } override def after { @@ -611,8 +611,8 @@ trait index_service extends RunningNode { trait index_service_perfield extends index_service { - override def options(): Any = { - Map("name" -> "perfield", "default" -> "english") + override def options(): AnalyzerOptions = { + AnalyzerOptions.fromMap(Map("name" -> "perfield", "default" -> "english")) } } diff --git a/src/test/scala/com/cloudant/clouseau/SupportedAnalyzersSpec.scala b/src/test/scala/com/cloudant/clouseau/SupportedAnalyzersSpec.scala index 87c6d099..debccdfa 100644 --- a/src/test/scala/com/cloudant/clouseau/SupportedAnalyzersSpec.scala +++ b/src/test/scala/com/cloudant/clouseau/SupportedAnalyzersSpec.scala @@ -61,150 +61,243 @@ class SupportedAnalyzersSpec extends SpecificationWithJUnit { "SupportedAnalyzers" should { "ignore unsupported analyzers" in { - createAnalyzer("foo") must beNone + val options = AnalyzerOptions.from("foo") + options must beSome + createAnalyzer(options.get) must beNone } "List of non-tuples yields no analyzer" in { - createAnalyzer(List("foo")) must beNone + val options = AnalyzerOptions.from(List("foo")) + options must beSome + createAnalyzer(options.get) must beNone } "keyword" in { - createAnalyzer("keyword") must haveClass[Some[KeywordAnalyzer]] + val options = AnalyzerOptions.from("keyword") + options must beSome + createAnalyzer(options.get) must haveClass[Some[KeywordAnalyzer]] } "simple" in { - createAnalyzer("simple") must haveClass[Some[SimpleAnalyzer]] + val options = AnalyzerOptions.from("simple") + options must beSome + createAnalyzer(options.get) must haveClass[Some[SimpleAnalyzer]] } "whitespace" in { - createAnalyzer("whitespace") must haveClass[Some[WhitespaceAnalyzer]] + val options = AnalyzerOptions.from("whitespace") + options must beSome + createAnalyzer(options.get) must haveClass[Some[WhitespaceAnalyzer]] } "simple_asciifolding" in { - createAnalyzer("simple_asciifolding") must haveClass[Some[Analyzer]] + val options = AnalyzerOptions.from("simple_asciifolding") + options must beSome + createAnalyzer(options.get) must haveClass[Some[Analyzer]] } "email" in { - createAnalyzer("email") must haveClass[Some[UAX29URLEmailAnalyzer]] + val options = AnalyzerOptions.from("email") + options must beSome + createAnalyzer(options.get) must haveClass[Some[UAX29URLEmailAnalyzer]] } "perfield" in { // basic - createAnalyzer("perfield") must haveClass[Some[PerFieldAnalyzer]] + val basicOptions = AnalyzerOptions.from("perfield") + basicOptions must beSome + createAnalyzer(basicOptions.get) must haveClass[Some[PerFieldAnalyzer]] // override default - createAnalyzer(Map("name" -> "perfield", "default" -> "english")).toString must + val overrideDefaultOptions = AnalyzerOptions.from(Map("name" -> "perfield", "default" -> "english")) + overrideDefaultOptions must beSome + createAnalyzer(overrideDefaultOptions.get).toString must contain("default=org.apache.lucene.analysis.en.EnglishAnalyzer") // override field - createAnalyzer(Map("name" -> "perfield", "fields" -> List("foo" -> "english"))).toString must + val overrideFieldOptions = AnalyzerOptions.from(Map("name" -> "perfield", "fields" -> List("foo" -> "english"))) + overrideFieldOptions must beSome + createAnalyzer(overrideFieldOptions.get).toString must contain("foo -> org.apache.lucene.analysis.en.EnglishAnalyzer") // unrecognized per-field becomes default - createAnalyzer(Map("name" -> "perfield", "default" -> "english", "fields" -> List("foo" -> "foo"))).toString must + val unrecognizedOptions = AnalyzerOptions.from(Map("name" -> "perfield", "default" -> "english", "fields" -> List("foo" -> "foo"))) + unrecognizedOptions must beSome + createAnalyzer(unrecognizedOptions.get).toString must contain("foo -> org.apache.lucene.analysis.en.EnglishAnalyzer") } "arabic" in { - createAnalyzer("arabic") must haveClass[Some[ArabicAnalyzer]] + val options = AnalyzerOptions.from("arabic") + options must beSome + createAnalyzer(options.get) must haveClass[Some[ArabicAnalyzer]] } "bulgarian" in { - createAnalyzer("bulgarian") must haveClass[Some[BulgarianAnalyzer]] + val options = AnalyzerOptions.from("bulgarian") + options must beSome + createAnalyzer(options.get) must haveClass[Some[BulgarianAnalyzer]] } "brazilian" in { - createAnalyzer("brazilian") must haveClass[Some[BrazilianAnalyzer]] + val options = AnalyzerOptions.from("brazilian") + options must beSome + createAnalyzer(options.get) must haveClass[Some[BrazilianAnalyzer]] } "catalan" in { - createAnalyzer("catalan") must haveClass[Some[CatalanAnalyzer]] + val options = AnalyzerOptions.from("catalan") + options must beSome + createAnalyzer(options.get) must haveClass[Some[CatalanAnalyzer]] } "cjk" in { - createAnalyzer("cjk") must haveClass[Some[CJKAnalyzer]] + val options = AnalyzerOptions.from("cjk") + options must beSome + createAnalyzer(options.get) must haveClass[Some[CJKAnalyzer]] } "chinese" in { - createAnalyzer("chinese") must haveClass[Some[SmartChineseAnalyzer]] + val options = AnalyzerOptions.from("chinese") + options must beSome + createAnalyzer(options.get) must haveClass[Some[SmartChineseAnalyzer]] } "czech" in { - createAnalyzer("czech") must haveClass[Some[CzechAnalyzer]] + val options = AnalyzerOptions.from("czech") + options must beSome + createAnalyzer(options.get) must haveClass[Some[CzechAnalyzer]] } "danish" in { - createAnalyzer("danish") must haveClass[Some[DanishAnalyzer]] + val options = AnalyzerOptions.from("danish") + options must beSome + createAnalyzer(options.get) must haveClass[Some[DanishAnalyzer]] } "german" in { - createAnalyzer("german") must haveClass[Some[GermanAnalyzer]] + val options = AnalyzerOptions.from("german") + options must beSome + createAnalyzer(options.get) must haveClass[Some[GermanAnalyzer]] } "greek" in { - createAnalyzer("greek") must haveClass[Some[GreekAnalyzer]] + val options = AnalyzerOptions.from("greek") + options must beSome + createAnalyzer(options.get) must haveClass[Some[GreekAnalyzer]] } "english" in { - createAnalyzer("english") must haveClass[Some[EnglishAnalyzer]] + val options = AnalyzerOptions.from("english") + options must beSome + createAnalyzer(options.get) must haveClass[Some[EnglishAnalyzer]] } "spanish" in { - createAnalyzer("spanish") must haveClass[Some[SpanishAnalyzer]] + val options = AnalyzerOptions.from("spanish") + options must beSome + createAnalyzer(options.get) must haveClass[Some[SpanishAnalyzer]] } "basque" in { - createAnalyzer("basque") must haveClass[Some[BasqueAnalyzer]] + val options = AnalyzerOptions.from("basque") + options must beSome + createAnalyzer(options.get) must haveClass[Some[BasqueAnalyzer]] } "persian" in { - createAnalyzer("persian") must haveClass[Some[PersianAnalyzer]] + val options = AnalyzerOptions.from("persian") + options must beSome + createAnalyzer(options.get) must haveClass[Some[PersianAnalyzer]] } "finnish" in { - createAnalyzer("finnish") must haveClass[Some[FinnishAnalyzer]] + val options = AnalyzerOptions.from("finnish") + options must beSome + createAnalyzer(options.get) must haveClass[Some[FinnishAnalyzer]] } "french" in { - createAnalyzer("french") must haveClass[Some[FrenchAnalyzer]] + val options = AnalyzerOptions.from("french") + options must beSome + createAnalyzer(options.get) must haveClass[Some[FrenchAnalyzer]] } "irish" in { - createAnalyzer("irish") must haveClass[Some[IrishAnalyzer]] + val options = AnalyzerOptions.from("irish") + options must beSome + createAnalyzer(options.get) must haveClass[Some[IrishAnalyzer]] } "galician" in { - createAnalyzer("galician") must haveClass[Some[GalicianAnalyzer]] + val options = AnalyzerOptions.from("galician") + options must beSome + createAnalyzer(options.get) must haveClass[Some[GalicianAnalyzer]] } "hindi" in { - createAnalyzer("hindi") must haveClass[Some[HindiAnalyzer]] + val options = AnalyzerOptions.from("hindi") + options must beSome + createAnalyzer(options.get) must haveClass[Some[HindiAnalyzer]] } "hungarian" in { - createAnalyzer("hungarian") must haveClass[Some[HungarianAnalyzer]] + val options = AnalyzerOptions.from("hungarian") + options must beSome + createAnalyzer(options.get) must haveClass[Some[HungarianAnalyzer]] } "armenian" in { - createAnalyzer("armenian") must haveClass[Some[ArmenianAnalyzer]] + val options = AnalyzerOptions.from("armenian") + options must beSome + createAnalyzer(options.get) must haveClass[Some[ArmenianAnalyzer]] } "indonesian" in { - createAnalyzer("indonesian") must haveClass[Some[IndonesianAnalyzer]] + val options = AnalyzerOptions.from("indonesian") + options must beSome + createAnalyzer(options.get) must haveClass[Some[IndonesianAnalyzer]] } "italian" in { - createAnalyzer("italian") must haveClass[Some[ItalianAnalyzer]] + val options = AnalyzerOptions.from("italian") + options must beSome + createAnalyzer(options.get) must haveClass[Some[ItalianAnalyzer]] } "japanese" in { - createAnalyzer("japanese") must haveClass[Some[JapaneseAnalyzer]] + val options = AnalyzerOptions.from("japanese") + options must beSome + createAnalyzer(options.get) must haveClass[Some[JapaneseAnalyzer]] } "latvian" in { - createAnalyzer("latvian") must haveClass[Some[LatvianAnalyzer]] + val options = AnalyzerOptions.from("latvian") + options must beSome + createAnalyzer(options.get) must haveClass[Some[LatvianAnalyzer]] } "dutch" in { - createAnalyzer("dutch") must haveClass[Some[DutchAnalyzer]] + val options = AnalyzerOptions.from("dutch") + options must beSome + createAnalyzer(options.get) must haveClass[Some[DutchAnalyzer]] } "norwegian" in { - createAnalyzer("norwegian") must haveClass[Some[NorwegianAnalyzer]] + val options = AnalyzerOptions.from("norwegian") + options must beSome + createAnalyzer(options.get) must haveClass[Some[NorwegianAnalyzer]] } "polish" in { - createAnalyzer("polish") must haveClass[Some[PolishAnalyzer]] + val options = AnalyzerOptions.from("polish") + options must beSome + createAnalyzer(options.get) must haveClass[Some[PolishAnalyzer]] } "portuguese" in { - createAnalyzer("portuguese") must haveClass[Some[PortugueseAnalyzer]] + val options = AnalyzerOptions.from("portuguese") + options must beSome + createAnalyzer(options.get) must haveClass[Some[PortugueseAnalyzer]] } "romanian" in { - createAnalyzer("romanian") must haveClass[Some[RomanianAnalyzer]] + val options = AnalyzerOptions.from("romanian") + options must beSome + createAnalyzer(options.get) must haveClass[Some[RomanianAnalyzer]] } "russian" in { - createAnalyzer("russian") must haveClass[Some[RussianAnalyzer]] + val options = AnalyzerOptions.from("russian") + options must beSome + createAnalyzer(options.get) must haveClass[Some[RussianAnalyzer]] } "classic" in { - createAnalyzer("classic") must haveClass[Some[ClassicAnalyzer]] + val options = AnalyzerOptions.from("classic") + options must beSome + createAnalyzer(options.get) must haveClass[Some[ClassicAnalyzer]] } "standard" in { - createAnalyzer("standard") must haveClass[Some[StandardAnalyzer]] + val options = AnalyzerOptions.from("standard") + options must beSome + createAnalyzer(options.get) must haveClass[Some[StandardAnalyzer]] } "swedish" in { - createAnalyzer("swedish") must haveClass[Some[SwedishAnalyzer]] + val options = AnalyzerOptions.from("swedish") + options must beSome + createAnalyzer(options.get) must haveClass[Some[SwedishAnalyzer]] } "thai" in { - createAnalyzer("thai") must haveClass[Some[ThaiAnalyzer]] + val options = AnalyzerOptions.from("thai") + options must beSome + createAnalyzer(options.get) must haveClass[Some[ThaiAnalyzer]] } "turkish" in { - createAnalyzer("turkish") must haveClass[Some[TurkishAnalyzer]] + val options = AnalyzerOptions.from("turkish") + options must beSome + createAnalyzer(options.get) must haveClass[Some[TurkishAnalyzer]] } - } }