@@ -63,7 +63,7 @@ from neofuzz import char_ngram_process
6363# We create a process that takes character 1 to 5-grams as features for
6464# vectorization and uses a tf-idf weighting scheme.
6565# We will use cosine distance for the nearest neighbour search.
66- process = char_ngram_process(ngram_range = (1 ,5 ), metric = " cosine " , tf_idf = True )
66+ process = char_ngram_process(ngram_range = (1 ,5 ), metric = " angular " , tf_idf = True )
6767
6868# We index the options that we are going to search in
6969process.index(options)
@@ -103,7 +103,7 @@ from sklearn.feature_extraction.text import TfidfVectorizer
103103 vectorizer = TfidfVectorizer()
104104
105105 # We use cosine distance because it's waay better for high-dimentional spaces.
106- process = Process(vectorizer, metric = " cosine " )
106+ process = Process(vectorizer, metric = " angular " )
107107```
108108
109109### Dimensionality Reduction
@@ -125,7 +125,7 @@ nmf = NMF(n_components=20)
125125# Create a pipeline of the two
126126pipeline = make_pipeline(vectorizer, nmf)
127127
128- process = Process(pipeline, metric = " cosine " )
128+ process = Process(pipeline, metric = " angular " )
129129```
130130
131131### Semantic Search/Large Language Models
@@ -144,7 +144,7 @@ from neofuzz import Process
144144# Here we will use a pretrained Bert sentence encoder as vectorizer
145145vectorizer = SentenceEncoder(" all-distilroberta-v1" )
146146# Then we make a process with the language model
147- process = Process(vectorizer, metric = " cosine " )
147+ process = Process(vectorizer, metric = " angular " )
148148
149149# Remember that the options STILL have to be indexed even though you have a pretrained vectorizer
150150process.index(options)
0 commit comments