Skip to content

How to create custom component for sentiment analyzer in Rasa 3.X. #171

@shreyashgupta68

Description

@shreyashgupta68

Hi everyone,

First of all, thanks for this amazing project. I am working on a sentiment analyzer and am having some trouble understanding how it works. I need to use a naive Bayes classifier to train my NLU training examples on sentiment analysis. Do any of you know how to do it? I am attaching my custom sentiment analyzer component that I have made for Rasa **3.1.0. **

import logging
from typing import Any, Text, Dict, List

from joblib import dump, load
from nltk.classify import NaiveBayesClassifier

from rasa.engine.recipes.default_recipe import DefaultV1Recipe
from rasa.engine.graph import ExecutionContext, GraphComponent
from rasa.engine.storage.resource import Resource
from rasa.engine.storage.storage import ModelStorage
from rasa.shared.nlu.training_data.training_data import TrainingData
from rasa.shared.nlu.training_data.message import Message
from rasa.shared.nlu.constants import (TEXT)
logger = logging.getLogger(name)

@DefaultV1Recipe.register(
DefaultV1Recipe.ComponentType.MESSAGE_TOKENIZER, is_trainable=True
)
class DemoSentiment(GraphComponent):
name = "sentiment"
provides = ["entities"]
requires = ["tokens"]
defaults = {}
language_list = ["en"]
print('initialised the class')

def __init__(
        self,
        config: Dict[Text, Any],
        name: Text,
        model_storage: ModelStorage,
        resource: Resource,
) -> None:
    self.name = name
    # self.clf = NaiveBayesClassifier(
    #     feature_probdist=None,
    #     label_probdist=None
    # )

    # We need to use these later when saving the trained component.
    self._model_storage = model_storage
    self._resource = resource

def preprocessing(self, tokens):
    """Create bag-of-words representation of the training examples."""

    return {word: True for word in tokens}

def train(self, training_data: TrainingData) -> Resource:
    """Trains the component from training data."""
    texts = [e.get(TEXT) for e in training_data.intent_examples if e.get(TEXT)]
    with open('labels.txt', 'r') as f: # in this labels .txt I have store the labels like positive, negative and neutral
        labels = f.read().splitlines()
    print("type : ", self)
    processed_tokens = [self.preprocessing(t) for t in texts]
    labeled_data = [(t, x) for t, x in zip(processed_tokens, labels)]
    self.clf = NaiveBayesClassifier.train(labeled_data)
    self.persist()
    return self._resource

def convert_to_rasa(self, value, confidence):
    """Convert model output into the Rasa NLU compatible output format."""

    entity = {"value": value,
              "confidence": confidence,
              "entity": "sentiment",
              "extractor": "sentiment_extractor"}

    return entity

@classmethod
def create(
        cls,
        config: Dict[Text, Any],
        model_storage: ModelStorage,
        resource: Resource,
        execution_context: ExecutionContext
) -> GraphComponent:
    print("Model_Create :", model_storage)
    return cls(config, execution_context.node_name, model_storage, resource)

def process(self, messages: List[Message]) -> List[Message]:
    # TODO: This is the method which Rasa Open Source will call during inference.
    if not self.clf:
        # component is either not trained or didn't
        # receive enough training data
        entity = None
    else:
        for message in messages:
            tokens = [t for t in message.get(TEXT)]
            tb = self.preprocessing(tokens)
            pre = self.clf.prob_classify(tb)

            sentiment = pre.max()
            confidence = pre.prob(sentiment)

            entity = self.convert_to_rasa(sentiment, confidence)

            message.set("entities", [entity], add_to_output=True)
    return messages

def persist(self) -> None:
    """
    Persist this model into the passed directory.

    Returns the metadata necessary to load the model again. In this case; `None`.
    """

    with self._model_storage.write_to(self._resource) as model_dir:
        dump(self.clf, model_dir / f"{self.name}.joblib")
        # classifier_file = os.path.join(model_dir, SENTIMENT_MODEL_FILE_NAME)
        # utils.json_pickle(classifier_file, self)
        # return {"classifier_file": SENTIMENT_MODEL_FILE_NAME}

@classmethod
def load(
        cls,
        config: Dict[Text, Any],
        model_storage: ModelStorage,
        resource: Resource,
        execution_context: ExecutionContext,
) -> GraphComponent:
    """Loads trained component from disk."""
    with model_storage.read_from(resource) as model_dir:
        classifier = load(model_dir / f"{resource.name}.joblib")
        component = cls(
            config, execution_context.node_name, model_storage, resource
        )
        component.clf = classifier
        return component
        # file_name = config.get("classifier_file")
        # classifier_file = os.path.join(model_dir, file_name)
        # return utils.json_unpickle(classifier_file)

def process_training_data(self, training_data: TrainingData) -> TrainingData:
    #self.process(training_data.intent_examples)
    pass
    #return training_data

@classmethod
def validate_config(cls, config: Dict[Text, Any]) -> None:
    """Validates that the component is configured properly."""
    pass

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions