Skip to content

Commit 7b61599

Browse files
Made solution more robust (#7)
1 parent 79e296b commit 7b61599

16 files changed

+289
-74
lines changed

.env

+4-2
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@ KB_BLOB_CONTAINER=kmoaidemo
2626
OUTPUT_BLOB_CONTAINER=kmoaiprocessed
2727

2828

29+
#### OPENAI
30+
OPENAI_RESOURCE_ENDPOINT=""
31+
OPENAI_API_KEY=""
32+
2933

3034
############################################
3135
###### No need to fill in the below sections
@@ -71,8 +75,6 @@ COSMOS_DB_NAME="KM_OAI_DB"
7175

7276

7377
#### OPENAI
74-
OPENAI_RESOURCE_ENDPOINT=""
75-
OPENAI_API_KEY=""
7678
MAX_QUERY_TOKENS = 500
7779
MAX_OUTPUT_TOKENS = 750
7880
MAX_HISTORY_TOKENS = 1000

.env.template

+4-2
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@ KB_BLOB_CONTAINER=kmoaidemo
2626
OUTPUT_BLOB_CONTAINER=kmoaiprocessed
2727

2828

29+
#### OPENAI
30+
OPENAI_RESOURCE_ENDPOINT=""
31+
OPENAI_API_KEY=""
32+
2933

3034
############################################
3135
###### No need to fill in the below sections
@@ -71,8 +75,6 @@ COSMOS_DB_NAME="KM_OAI_DB"
7175

7276

7377
#### OPENAI
74-
OPENAI_RESOURCE_ENDPOINT=""
75-
OPENAI_API_KEY=""
7678
MAX_QUERY_TOKENS = 500
7779
MAX_OUTPUT_TOKENS = 750
7880
MAX_HISTORY_TOKENS = 1000

.gitignore

+4-1
Original file line numberDiff line numberDiff line change
@@ -174,4 +174,7 @@ agent_name.csv
174174
pres.ipynb
175175
hukoomi/
176176
hukoomi_new/
177-
.env
177+
.env
178+
qna.ipynb
179+
notebooks/
180+
demo copy.ipynb

.vscode/settings.json

+34-1
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,38 @@
44
"azureFunctions.pythonVenv": ".venv",
55
"azureFunctions.projectLanguage": "Python",
66
"azureFunctions.projectRuntime": "~4",
7-
"debug.internalConsoleOptions": "neverOpen"
7+
"debug.internalConsoleOptions": "neverOpen",
8+
"appService.zipIgnorePattern": [
9+
"__pycache__{,/**}",
10+
"*.py[cod]",
11+
"*$py.class",
12+
".Python{,/**}",
13+
"build{,/**}",
14+
"develop-eggs{,/**}",
15+
"dist{,/**}",
16+
"downloads{,/**}",
17+
"eggs{,/**}",
18+
".eggs{,/**}",
19+
"lib{,/**}",
20+
"lib64{,/**}",
21+
"parts{,/**}",
22+
"sdist{,/**}",
23+
"var{,/**}",
24+
"wheels{,/**}",
25+
"share/python-wheels{,/**}",
26+
"*.egg-info{,/**}",
27+
".installed.cfg",
28+
"*.egg",
29+
"MANIFEST",
30+
".env{,/**}",
31+
".venv{,/**}",
32+
"env{,/**}",
33+
"venv{,/**}",
34+
"ENV{,/**}",
35+
"env.bak{,/**}",
36+
"venv.bak{,/**}",
37+
".vscode{,/**}"
38+
],
39+
"appService.defaultWebAppToDeploy": "/subscriptions/2a7eed04-714e-4ba9-96ba-47355c32a8d6/resourceGroups/km-demo/providers/Microsoft.Web/sites/kmaoiwebappdemo0001",
40+
"appService.deploySubpath": "app"
841
}

WISHLIST.md

+3-5
Original file line numberDiff line numberDiff line change
@@ -9,17 +9,15 @@ The following features are added to the wishlist to be implemented:
99
1. ARM: Implementing Dedicated Plan with B1 for the Function App, instead of the Premium Plan
1010
1. Code: Adding Form Recognizer either as a new data source to the "kmoaiprocessed" container, or as a new Custom Skill
1111
1. Code: Storing contents, embeddings, and queries in Cosmos. It's important to know which are the most asked queries.
12-
12+
1. ARM: Adding Application Insights to the ARM template
13+
1. Code: Adding a custom skill that processes csv files
14+
1. GUI for triggering Cognitive Search and Form Recognizer document ingestion - streamlit
1315

1416
### TBD
1517
1. ARM: Adding Application Insights to the ARM template
16-
1. Automated Azure Functions deployment through ARM template
1718
1. Code: Adding a custom skill that processes csv files
18-
1. Code: Adding a demo for HTML crawling
19-
1. Code: Adding an embedding match filtering (in Redis) for filtering on metadata
2019
1. GUI for triggering Cognitive Search and Form Recognizer document ingestion - streamlit
2120

2221

2322
### Future
24-
1. ChatGPT integration (when released on Azure)
2523
1. Code: Adding support for fine-tuned models.

app/.deployment

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
[config]
2+
SCM_DO_BUILD_DURING_DEPLOYMENT=true

app/app.py

+85
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
import logging
2+
import os
3+
from flask import Flask, redirect, url_for, request, jsonify
4+
from flask_socketio import SocketIO
5+
from flask_socketio import send, emit
6+
7+
import sys
8+
sys.path.insert(0, '../')
9+
10+
from utils import bot_helpers
11+
12+
13+
CHOSEN_EMB_MODEL = os.environ['CHOSEN_EMB_MODEL']
14+
CHOSEN_QUERY_EMB_MODEL = os.environ['CHOSEN_QUERY_EMB_MODEL']
15+
CHOSEN_COMP_MODEL = os.environ['CHOSEN_COMP_MODEL']
16+
17+
DAVINCI_003_COMPLETIONS_MODEL = os.environ['DAVINCI_003_COMPLETIONS_MODEL']
18+
NUM_TOP_MATCHES = int(os.environ['NUM_TOP_MATCHES'])
19+
20+
app = Flask(__name__)
21+
app.config['SECRET_KEY'] = 'secret!'
22+
socketio = SocketIO(app)
23+
24+
# source venv/bin/activate
25+
# flask --app app.py --debug run
26+
27+
28+
@app.route("/")
29+
def hello():
30+
print("hello there 3")
31+
return "<html><body><h1>Hello Enterprise Search with OpenAI Solution!</h1></body></html>\n"
32+
33+
34+
35+
@app.route('/kmoai_request', methods=['POST'])
36+
def kmoai_request():
37+
data = request.get_json()
38+
return process_kmoai_request(data)
39+
40+
41+
42+
def check_param(param):
43+
if param == 'false':
44+
param = False
45+
else:
46+
param = True
47+
48+
return param
49+
50+
51+
def get_param(req, param_name):
52+
param = req.get(param_name, None)
53+
return param
54+
55+
56+
def process_kmoai_request(req):
57+
logging.info('Python HTTP trigger function processed a request.')
58+
59+
query = get_param(req, 'query')
60+
session_id = get_param(req, 'session_id')
61+
filter_param = get_param(req, 'filter')
62+
search_method = get_param(req, 'search_method')
63+
64+
enable_unified_search = get_param(req, 'enable_unified_search')
65+
enable_redis_search = get_param(req, 'enable_redis_search')
66+
enable_cognitive_search = get_param(req, 'enable_cognitive_search')
67+
evaluate_step = get_param(req, 'evaluate_step')
68+
check_adequacy = get_param(req, 'check_adequacy')
69+
check_intent = get_param(req, 'check_intent')
70+
71+
params_dict = {
72+
'enable_unified_search': check_param(enable_unified_search),
73+
'enable_redis_search': check_param(enable_redis_search),
74+
'enable_cognitive_search': check_param(enable_cognitive_search),
75+
'evaluate_step': check_param(evaluate_step),
76+
'check_adequacy': check_param(check_adequacy),
77+
'check_intent': check_param(check_intent)
78+
}
79+
80+
if filter_param is None:
81+
os.environ['redis_filter_param'] = '*'
82+
else:
83+
os.environ['redis_filter_param'] = filter_param
84+
85+
return bot_helpers.openai_interrogate_text(query, session_id=session_id, filter_param=filter_param, agent_name=search_method, params_dict=params_dict)

host.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
},
2020
"extensionBundle": {
2121
"id": "Microsoft.Azure.Functions.ExtensionBundle",
22-
"version": "[3.*, 4.0.0)"
22+
"version": "[3.3.*, 4.0.0)"
2323
},
2424
"concurrency": {
2525
"dynamicConcurrencyEnabled": true,

requirements.txt

+3-1
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,6 @@ azure-ai-formrecognizer
2222
beautifulsoup4
2323
lxml
2424
azure-ai-textanalytics
25-
langchain
25+
langchain
26+
flask
27+
flask-socketio

utils/helpers.py

+10-7
Original file line numberDiff line numberDiff line change
@@ -219,28 +219,30 @@ def redis_search(query: str, filter_param: str):
219219
query = embedding_enc.decode(embedding_enc.encode(query)[:MAX_QUERY_TOKENS])
220220

221221
query_embedding = openai_helpers.get_openai_embedding(query, CHOSEN_EMB_MODEL)
222-
results = redis_helpers.redis_query_embedding_index(redis_conn, query_embedding, -1, topK=NUM_TOP_MATCHES, filter_param=filter_param)
222+
results = redis_helpers.redis_query_embedding_index(redis_conn, query_embedding, -1, topK=25, filter_param=filter_param)
223223

224224

225225
if len(results) == 0:
226226
logging.warning("No embeddings found in Redis, attempting to load embeddings from Cosmos")
227227
cosmos_helpers.cosmos_restore_embeddings()
228-
results = redis_helpers.redis_query_embedding_index(redis_conn, query_embedding, -1, topK=NUM_TOP_MATCHES, filter_param=filter_param)
228+
results = redis_helpers.redis_query_embedding_index(redis_conn, query_embedding, -1, topK=25, filter_param=filter_param)
229229

230230
context = []
231231

232232
# r = [t['web_url'] + ' ' + t['container'] + ' ' + t['filename'] for t in results]
233-
# print(results)
233+
# [print(r['vector_score']) for r in results]
234234

235235
for t in results:
236+
t['text_en'] = t['text_en'].replace('\n', ' ').replace('\r', ' ')
237+
236238
try:
237239
if ('web_url' in t.keys()) and (t['web_url'] is not None) and (t['web_url'] != ''):
238-
context.append(f"[{t['web_url']}] " + t['text_en'].replace('\n', ' ') )
240+
context.append(f"[{t['web_url']}] " + t['text_en'])
239241
else:
240-
context.append(f"[{t['container']}/{t['filename']}] " + t['text_en'].replace('\n', ' ') )
242+
context.append(f"[{t['container']}/{t['filename']}] " + t['text_en'])
241243
except Exception as e:
242-
print("Exception in redis_search: ", e)
243-
context.append(t['text_en'].replace('\n', ' ') )
244+
print("------------------- Exception in redis_search: ", e)
245+
context.append(t['text_en'] )
244246

245247

246248
for i in range(len(context)):
@@ -253,6 +255,7 @@ def redis_search(query: str, filter_param: str):
253255

254256
for i in range(len(context)):
255257
total_tokens += len(completion_enc.encode(context[i]))
258+
print(total_tokens)
256259
if total_tokens < MAX_SEARCH_TOKENS:
257260
final_context.append(context[i])
258261
else:

0 commit comments

Comments
 (0)