Skip to content

Commit

Permalink
Introduce types to supported analyzers take2 (#84)
Browse files Browse the repository at this point in the history
* Refactor createAnalyzer to remove polymorphic arguments

The polymorphism makes it harder to introduce types. Because it requires union
types. Which are not natively supported by the version of Scala we use.
Elimination of the polymorphism would allow us to introduce types.

The introduction of types is the vehicle to solve type erasure problem we would
have to deal with when we upgrade Scala to the next version.

The refactoring is done using following steps

1. Introduce `AnalyzerOptions` class with type specific constructors
  - `def fromMap(map: Map[_, _])`
  - `def fromAnalyzerName(name: String)`
  - `def fromKVsList(options: List[_])`
2. Make sure we correctly go from `Any` to the concrete type. This PR uses the
  `.collect` combinator instead of relying on `ClassCastException`.

Assumptions

1. The keys of `options` passed to `OpenIndexMsg` are strings.
2. It is ok to just ignore all non-string keys in `options`.
3. The analyzer name is either a `String` or a single `String` element wrapped
   in the `List`.
4. The keys of options passed to OpenIndexMsg are strings.
5. It is ok to just ignore all non-string keys in options.
6. The fields value is a list.
7. The elements of a fields list are tuples.
   `(String, String)` or (String, [String]).
8. The config is a String in ('analyze, config, text) message in
   `AnalyzerService.handleCall` and it should really be named
   `('analyze, analyzerName, text)`.

* Avoid matching on `Some(stopwords: List[String])`

The matching on inner types of the container will not work on next version of
Scala. Newer Scala causes type erasure when matching on inner type of the
container (such as List in this case).

Assumptions

1. The `stopwords` is a list
2. The elements of a `stopwords` are strings.
3. It is ok to skip non-strings elements of `stopwords`.
  • Loading branch information
iilyak authored Dec 4, 2023
1 parent af7e555 commit 32929a6
Show file tree
Hide file tree
Showing 9 changed files with 352 additions and 203 deletions.
42 changes: 42 additions & 0 deletions src/main/scala/com/cloudant/clouseau/AnalyzerOptions.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy of
// the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations under
// the License.

package com.cloudant.clouseau

class AnalyzerOptions private (inner: Map[String, Any]) {
def toMap = inner
}

object AnalyzerOptions {
def fromMap(map: Map[_, _]) =
new AnalyzerOptions(map.asInstanceOf[Map[String, Any]])
def fromAnalyzerName(name: String) =
new AnalyzerOptions(Map("name" -> name).asInstanceOf[Map[String, Any]])
def fromKVsList(options: List[_]) = {
// options can be a List of key-value pairs or a single String element wrapped in a List
// the latter is a corner case which we should deprecate
options match {
case List(name: String) => new AnalyzerOptions(Map("name" -> name).asInstanceOf[Map[String, Any]])
case list: List[_] => new AnalyzerOptions(collectKVs(list).toMap[String, Any].asInstanceOf[Map[String, Any]])
}
}
def collectKVs(list: List[_]): List[(String, Any)] =
list.collect { case t @ (_: String, _: Any) => t }.asInstanceOf[List[(String, Any)]]

def from(options: Any): Option[AnalyzerOptions] =
options match {
case map: Map[_, _] => Some(fromMap(map))
case list: List[_] => Some(fromKVsList(list))
case string: String => Some(fromAnalyzerName(string))
case _ => None
}
}
2 changes: 1 addition & 1 deletion src/main/scala/com/cloudant/clouseau/AnalyzerService.scala
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class AnalyzerService(ctx: ServiceContext[ConfigurationArgs]) extends Service(ct

override def handleCall(tag: (Pid, Any), msg: Any): Any = msg match {
case ('analyze, analyzerConfig: Any, text: String) =>
SupportedAnalyzers.createAnalyzer(analyzerConfig) match {
AnalyzerOptions.from(analyzerConfig).flatMap(SupportedAnalyzers.createAnalyzer) match {
case Some(analyzer) =>
('ok, tokenize(text, analyzer))
case None =>
Expand Down
10 changes: 7 additions & 3 deletions src/main/scala/com/cloudant/clouseau/ClouseauTypeFactory.scala
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ import scala.collection.mutable.ArrayBuffer

case class SearchRequest(options: Map[Symbol, Any])

case class OpenIndexMsg(peer: Pid, path: String, options: Any)
case class OpenIndexMsg(peer: Pid, path: String, options: AnalyzerOptions)
case class CleanupPathMsg(path: String)
case class RenamePathMsg(dbName: String)
case class CleanupDbMsg(dbName: String, activeSigs: List[String])
Expand All @@ -53,8 +53,12 @@ object ClouseauTypeFactory extends TypeFactory {
val logger = LoggerFactory.getLogger("clouseau.tf")

def createType(name: Symbol, arity: Int, reader: TermReader): Option[Any] = (name, arity) match {
case ('open, 4) =>
Some(OpenIndexMsg(reader.readAs[Pid], reader.readAs[String], reader.readTerm))
case ('open, 4) => {
val peer = reader.readAs[Pid]
val path = reader.readAs[String]
val options = reader.readTerm
AnalyzerOptions.from(options).map(OpenIndexMsg(peer, path, _))
}
case ('cleanup, 2) =>
Some(CleanupPathMsg(reader.readAs[String]))
case ('rename, 2) =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ class IndexManagerService(ctx: ServiceContext[ConfigurationArgs]) extends Servic
}

override def handleCall(tag: (Pid, Any), msg: Any): Any = msg match {
case OpenIndexMsg(peer: Pid, path: String, options: Any) =>
case OpenIndexMsg(peer: Pid, path: String, options: AnalyzerOptions) =>
lru.get(path) match {
case null =>
waiters.get(path) match {
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/com/cloudant/clouseau/IndexService.scala
Original file line number Diff line number Diff line change
Expand Up @@ -899,7 +899,7 @@ object IndexService {
val FP = """([-+]?[0-9]+(?:\.[0-9]+)?)"""
val DISTANCE_RE = "^([-+])?<distance,([\\.\\w]+),([\\.\\w]+),%s,%s,(mi|km)>$".format(FP, FP).r

def start(node: Node, config: Configuration, path: String, options: Any): Any = {
def start(node: Node, config: Configuration, path: String, options: AnalyzerOptions): Any = {
val rootDir = new File(config.getString("clouseau.dir", "target/indexes"))
val dir = newDirectory(config, new File(rootDir, path))
try {
Expand Down
Loading

0 comments on commit 32929a6

Please sign in to comment.