-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconfig.yml
81 lines (79 loc) · 4.93 KB
/
config.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# Configuration for Rasa NLU for building a COVID-19 chatbot - Elementary Chatbot
language: en
# Configuration for the NLU pipeline
pipeline:
# 0. Language Model
# Using SpacyNLP pre-trained language model to initialize spaCy structures and use pre-trained word vectors
# (strings/utterances).
- name: "SpacyNLP"
model: "en_core_web_md" # English (en) pipeline optimized for CPU with medium size
case_sensitive: false # Not case-sensitive i.e. "Hello" and "hello" are understood as the same
# 1. Tokenizers
# (Not) Using the Whitespace Tokenizer to separate words at whitespaces
# - name: WhitespaceTokenizer
# Using SpacyTokenizer to separate the utterances into tokens based on configuration
- name: SpacyTokenizer
intent_tokenization_flag: false # Setting it to false to avoid splitting intents
intent_split_symbol: " " # Delimiter on which intents will be split
token_pattern: None # NO regular expression to detect tokens
# 2. Featurizers
# Using the SpacyFeaturizer to create vector representation of user messages and responses based on spaCy
# instruction-set
- name: SpacyFeaturizer
# Using the RegexFeaturizer to create vector representation of user messages (only) based on regular expressions
- name: RegexFeaturizer
# Using the LexicalSyntacticFeaturizer to create lexical and syntatic features for user messages (only) supporting
# entity extraction
- name: LexicalSyntacticFeaturizer
# Using the CountVectorsFeaturizer to create groupings of words from user messages, intents and responses
- name: CountVectorsFeaturizer
# char_wb creats character n-grams (char) only from text inside the specified word boundaries (wb)
analyzer: char_wb
min_ngram: 1 # Minimum of a unigram
max_ngram: 4 # Maximum of a quadrigram
# 3. Intent Classifier & 4. Entity Extractors
# Using DIETClassifier for both intent classification and entity extraction
- name: DIETClassifier
epochs: 100 # Train the classifier for 100 epochs i.e. 100 iterations over the training dataset
constrain_similarities: true # Using sigmoid (logistic) function to scale down high similarities to low
# Using softmax to normalize the computed dot products for confidence determination
model_confidence: softmax
# Using EntitySynonymMapper to associate synonymous entities (e.g. "Hi" with "Hello")
- name: EntitySynonymMapper
# 5. Selectors
# Using ResponseSelector to generate a database of predicted responses with its confidence
- name: ResponseSelector
epochs: 100 # Train the selector for 100 epochs i.e. 100 iterations over the training dataset
constrain_similarities: true # Using sigmoid (logistic) function to scale down high similarities to low
# Using ResponseSelector to generate a database of predicted responses with its confidence
- name: ResponseSelector
epochs: 100 # Train the selector for 100 epochs i.e. 100 iterations over the training dataset
retrieval_intent: chitchat # Handling the Rasa chitchat retrieval intent
constrain_similarities: true # Using sigmoid (logistic) function to scale down high similarities to low
# Using ResponseSelector to generate a database of predicted responses with its confidence
- name: ResponseSelector
epochs: 100 # Train the selector for 100 epochs i.e. 100 iterations over the training dataset
retrieval_intent: faq # Handling the FAQs retrieval intent
constrain_similarities: true # Using sigmoid (logistic) function to scale down high similarities to low
# 6. FallbackClassifier for assigning boilerplate intents where the actual classification scores are ambiguous or
# confidence lower than threshold)
- name: FallbackClassifier
# Setting a classification threshold value of 0.3 (any classification scores lower than this, have
# "nlu_fallback" intent assigned)
threshold: 0.3
# Setting an ambiguity threshold value of 0.1 (any two different intents closer than this, have "nlu_fallback"
# intent assigned)
ambiguity_threshold: 0.1
# Configuration for Rasa Core (policies)
policies:
# Using the MemoizationPolicy to remember the previous stories from training data
- name: MemoizationPolicy
max_history: 3 # Sets the maximum history context to 3 turns (1 turn is user request + bot response)
# Using the RulePolicy to enable handling conversations based on a set of ground rules specified
- name: RulePolicy
# Using the TEDPolicy (Transformer Embedding Dialogue) for a multi-tasking approach to action prediction and
# entity recognition
- name: TEDPolicy
max_history: 5 # Sets the maximum history context to 3 turns (1 turn is user request + bot response)
epochs: 100 # Train the policy for 100 epochs i.e. 100 iterations over the training dataset
constrain_similarities: true # Using sigmoid (logistic) function to scale down high similarities to low