How to create custom component for sentiment analyzer in Rasa 3.X.

Hi everyone,

First of all, thanks for this amazing project. I am working on a **sentiment analyzer** and am having some trouble understanding how it works. I need to use a naive Bayes classifier to train my NLU training examples on sentiment analysis. Do any of you know how to do it? I am attaching my custom sentiment analyzer component that I have made for Rasa **3.1.0. **

import logging
from typing import Any, Text, Dict, List

from joblib import dump, load
from nltk.classify import **NaiveBayesClassifier**

from rasa.engine.recipes.default_recipe import DefaultV1Recipe
from rasa.engine.graph import ExecutionContext, GraphComponent
from rasa.engine.storage.resource import Resource
from rasa.engine.storage.storage import ModelStorage
from rasa.shared.nlu.training_data.training_data import TrainingData
from rasa.shared.nlu.training_data.message import Message
from rasa.shared.nlu.constants import (TEXT)
logger = logging.getLogger(__name__)


@DefaultV1Recipe.register(
    DefaultV1Recipe.ComponentType.MESSAGE_TOKENIZER, is_trainable=True
)
class DemoSentiment(GraphComponent):
    name = "sentiment"
    provides = ["entities"]
    requires = ["tokens"]
    defaults = {}
    language_list = ["en"]
    print('initialised the class')

    def __init__(
            self,
            config: Dict[Text, Any],
            name: Text,
            model_storage: ModelStorage,
            resource: Resource,
    ) -> None:
        self.name = name
        # self.clf = NaiveBayesClassifier(
        #     feature_probdist=None,
        #     label_probdist=None
        # )

        # We need to use these later when saving the trained component.
        self._model_storage = model_storage
        self._resource = resource

    def preprocessing(self, tokens):
        """Create bag-of-words representation of the training examples."""

        return {word: True for word in tokens}

    def train(self, training_data: TrainingData) -> Resource:
        """Trains the component from training data."""
        texts = [e.get(TEXT) for e in training_data.intent_examples if e.get(TEXT)]
        with open('labels.txt', 'r') as f: # in this labels .txt I have store the labels like positive, negative and neutral
            labels = f.read().splitlines()
        print("type : ", self)
        processed_tokens = [self.preprocessing(t) for t in texts]
        labeled_data = [(t, x) for t, x in zip(processed_tokens, labels)]
        self.clf = NaiveBayesClassifier.train(labeled_data)
        self.persist()
        return self._resource

    def convert_to_rasa(self, value, confidence):
        """Convert model output into the Rasa NLU compatible output format."""

        entity = {"value": value,
                  "confidence": confidence,
                  "entity": "sentiment",
                  "extractor": "sentiment_extractor"}

        return entity

    @classmethod
    def create(
            cls,
            config: Dict[Text, Any],
            model_storage: ModelStorage,
            resource: Resource,
            execution_context: ExecutionContext
    ) -> GraphComponent:
        print("Model_Create :", model_storage)
        return cls(config, execution_context.node_name, model_storage, resource)

    def process(self, messages: List[Message]) -> List[Message]:
        # TODO: This is the method which Rasa Open Source will call during inference.
        if not self.clf:
            # component is either not trained or didn't
            # receive enough training data
            entity = None
        else:
            for message in messages:
                tokens = [t for t in message.get(TEXT)]
                tb = self.preprocessing(tokens)
                pre = self.clf.prob_classify(tb)

                sentiment = pre.max()
                confidence = pre.prob(sentiment)

                entity = self.convert_to_rasa(sentiment, confidence)

                message.set("entities", [entity], add_to_output=True)
        return messages

    def persist(self) -> None:
        """
        Persist this model into the passed directory.

        Returns the metadata necessary to load the model again. In this case; `None`.
        """

        with self._model_storage.write_to(self._resource) as model_dir:
            dump(self.clf, model_dir / f"{self.name}.joblib")
            # classifier_file = os.path.join(model_dir, SENTIMENT_MODEL_FILE_NAME)
            # utils.json_pickle(classifier_file, self)
            # return {"classifier_file": SENTIMENT_MODEL_FILE_NAME}

    @classmethod
    def load(
            cls,
            config: Dict[Text, Any],
            model_storage: ModelStorage,
            resource: Resource,
            execution_context: ExecutionContext,
    ) -> GraphComponent:
        """Loads trained component from disk."""
        with model_storage.read_from(resource) as model_dir:
            classifier = load(model_dir / f"{resource.name}.joblib")
            component = cls(
                config, execution_context.node_name, model_storage, resource
            )
            component.clf = classifier
            return component
            # file_name = config.get("classifier_file")
            # classifier_file = os.path.join(model_dir, file_name)
            # return utils.json_unpickle(classifier_file)

    def process_training_data(self, training_data: TrainingData) -> TrainingData:
        #self.process(training_data.intent_examples)
        pass
        #return training_data

    @classmethod
    def validate_config(cls, config: Dict[Text, Any]) -> None:
        """Validates that the component is configured properly."""
        pass


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

How to create custom component for sentiment analyzer in Rasa 3.X. #171

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

How to create custom component for sentiment analyzer in Rasa 3.X. #171

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions