samelhousseini
diff --git a/‎.env
+4-2 b/‎.env
+4-2
diff --git a/‎.env.template
+4-2 b/‎.env.template
+4-2
diff --git a/‎.gitignore
+4-1 b/‎.gitignore
+4-1
diff --git a/‎.vscode/settings.json
+34-1 b/‎.vscode/settings.json
+34-1
diff --git a/‎WISHLIST.md
+3-5 b/‎WISHLIST.md
+3-5
diff --git a/‎app/.deployment
+2 b/‎app/.deployment
+2
diff --git a/‎app/app.py
+85 b/‎app/app.py
+85
diff --git a/‎host.json
+1-1 b/‎host.json
+1-1
diff --git a/‎requirements.txt
+3-1 b/‎requirements.txt
+3-1
diff --git a/‎utils/helpers.py
+10-7 b/‎utils/helpers.py
+10-7
@@ -26,6 +26,10 @@ KB_BLOB_CONTAINER=kmoaidemo
 OUTPUT_BLOB_CONTAINER=kmoaiprocessed
 
 
+#### OPENAI
+OPENAI_RESOURCE_ENDPOINT=""
+OPENAI_API_KEY=""
+
 
 ############################################
 ###### No need to fill in the below sections
@@ -71,8 +75,6 @@ COSMOS_DB_NAME="KM_OAI_DB"
 
 
 #### OPENAI
-OPENAI_RESOURCE_ENDPOINT=""
-OPENAI_API_KEY=""
 MAX_QUERY_TOKENS = 500
 MAX_OUTPUT_TOKENS = 750
 MAX_HISTORY_TOKENS = 1000
 
@@ -26,6 +26,10 @@ KB_BLOB_CONTAINER=kmoaidemo
 OUTPUT_BLOB_CONTAINER=kmoaiprocessed
 
 
+#### OPENAI
+OPENAI_RESOURCE_ENDPOINT=""
+OPENAI_API_KEY=""
+
 
 ############################################
 ###### No need to fill in the below sections
@@ -71,8 +75,6 @@ COSMOS_DB_NAME="KM_OAI_DB"
 
 
 #### OPENAI
-OPENAI_RESOURCE_ENDPOINT=""
-OPENAI_API_KEY=""
 MAX_QUERY_TOKENS = 500
 MAX_OUTPUT_TOKENS = 750
 MAX_HISTORY_TOKENS = 1000
 
@@ -174,4 +174,7 @@ agent_name.csv
 pres.ipynb
 hukoomi/
 hukoomi_new/
-.env
+.env
+qna.ipynb
+notebooks/
+demo copy.ipynb
@@ -4,5 +4,38 @@
     "azureFunctions.pythonVenv": ".venv",
     "azureFunctions.projectLanguage": "Python",
     "azureFunctions.projectRuntime": "~4",
-    "debug.internalConsoleOptions": "neverOpen"
+    "debug.internalConsoleOptions": "neverOpen",
+    "appService.zipIgnorePattern": [
+        "__pycache__{,/**}",
+        "*.py[cod]",
+        "*$py.class",
+        ".Python{,/**}",
+        "build{,/**}",
+        "develop-eggs{,/**}",
+        "dist{,/**}",
+        "downloads{,/**}",
+        "eggs{,/**}",
+        ".eggs{,/**}",
+        "lib{,/**}",
+        "lib64{,/**}",
+        "parts{,/**}",
+        "sdist{,/**}",
+        "var{,/**}",
+        "wheels{,/**}",
+        "share/python-wheels{,/**}",
+        "*.egg-info{,/**}",
+        ".installed.cfg",
+        "*.egg",
+        "MANIFEST",
+        ".env{,/**}",
+        ".venv{,/**}",
+        "env{,/**}",
+        "venv{,/**}",
+        "ENV{,/**}",
+        "env.bak{,/**}",
+        "venv.bak{,/**}",
+        ".vscode{,/**}"
+    ],
+    "appService.defaultWebAppToDeploy": "/subscriptions/2a7eed04-714e-4ba9-96ba-47355c32a8d6/resourceGroups/km-demo/providers/Microsoft.Web/sites/kmaoiwebappdemo0001",
+    "appService.deploySubpath": "app"
 }
@@ -9,17 +9,15 @@ The following features are added to the wishlist to be implemented:
 1. ARM: Implementing Dedicated Plan with B1 for the Function App, instead of the Premium Plan
 1. Code: Adding Form Recognizer either as a new data source to the "kmoaiprocessed" container, or as a new Custom Skill
 1. Code: Storing contents, embeddings, and queries in Cosmos. It's important to know which are the most asked queries.
-
+1. ARM: Adding Application Insights to the ARM template
+1. Code: Adding a custom skill that processes csv files
+1. GUI for triggering Cognitive Search and Form Recognizer document ingestion - streamlit
 
 ### TBD
 1. ARM: Adding Application Insights to the ARM template
-1. Automated Azure Functions deployment through ARM template
 1. Code: Adding a custom skill that processes csv files
-1. Code: Adding a demo for HTML crawling
-1. Code: Adding an embedding match filtering (in Redis) for filtering on metadata 
 1. GUI for triggering Cognitive Search and Form Recognizer document ingestion - streamlit
 
 
 ### Future
-1. ChatGPT integration (when released on Azure)
 1. Code: Adding support for fine-tuned models.
@@ -0,0 +1,2 @@
+[config]
+SCM_DO_BUILD_DURING_DEPLOYMENT=true
@@ -0,0 +1,85 @@
+import logging
+import os
+from flask import Flask, redirect, url_for, request, jsonify
+from flask_socketio import SocketIO
+from flask_socketio import send, emit
+
+import sys
+sys.path.insert(0, '../')
+
+from utils import bot_helpers
+
+
+CHOSEN_EMB_MODEL   = os.environ['CHOSEN_EMB_MODEL']
+CHOSEN_QUERY_EMB_MODEL   = os.environ['CHOSEN_QUERY_EMB_MODEL']
+CHOSEN_COMP_MODEL   = os.environ['CHOSEN_COMP_MODEL']
+
+DAVINCI_003_COMPLETIONS_MODEL = os.environ['DAVINCI_003_COMPLETIONS_MODEL']
+NUM_TOP_MATCHES = int(os.environ['NUM_TOP_MATCHES'])
+
+app = Flask(__name__)
+app.config['SECRET_KEY'] = 'secret!'
+socketio = SocketIO(app)
+
+# source venv/bin/activate
+# flask --app app.py --debug run
+
+
+@app.route("/")
+def hello():
+    print("hello there 3")
+    return "<html><body><h1>Hello Enterprise Search with OpenAI Solution!</h1></body></html>\n"
+
+
+
+@app.route('/kmoai_request', methods=['POST'])
+def kmoai_request():
+    data = request.get_json()
+    return process_kmoai_request(data)
+
+
+
+def check_param(param):
+    if param == 'false':
+        param = False
+    else:
+        param = True
+
+    return param
+
+
+def get_param(req, param_name):
+    param = req.get(param_name, None) 
+    return param
+
+
+def process_kmoai_request(req):
+    logging.info('Python HTTP trigger function processed a request.')
+
+    query = get_param(req, 'query')
+    session_id = get_param(req, 'session_id')
+    filter_param = get_param(req, 'filter')
+    search_method = get_param(req, 'search_method')
+
+    enable_unified_search = get_param(req, 'enable_unified_search')
+    enable_redis_search = get_param(req, 'enable_redis_search')
+    enable_cognitive_search = get_param(req, 'enable_cognitive_search')
+    evaluate_step = get_param(req, 'evaluate_step')
+    check_adequacy = get_param(req, 'check_adequacy')
+    check_intent = get_param(req, 'check_intent') 
+
+    params_dict = {
+        'enable_unified_search': check_param(enable_unified_search),
+        'enable_redis_search': check_param(enable_redis_search),
+        'enable_cognitive_search': check_param(enable_cognitive_search),
+        'evaluate_step': check_param(evaluate_step),
+        'check_adequacy': check_param(check_adequacy),
+        'check_intent': check_param(check_intent)
+    }
+    
+    if filter_param is None:
+        os.environ['redis_filter_param'] = '*'
+    else:
+        os.environ['redis_filter_param'] = filter_param
+
+    return bot_helpers.openai_interrogate_text(query, session_id=session_id, filter_param=filter_param, agent_name=search_method, params_dict=params_dict)
@@ -19,7 +19,7 @@
   },
   "extensionBundle": {
     "id": "Microsoft.Azure.Functions.ExtensionBundle",
-    "version": "[3.*, 4.0.0)"
+    "version": "[3.3.*, 4.0.0)"
   },
   "concurrency": {
     "dynamicConcurrencyEnabled": true,
 
@@ -22,4 +22,6 @@ azure-ai-formrecognizer
 beautifulsoup4
 lxml
 azure-ai-textanalytics
-langchain
+langchain
+flask
+flask-socketio
@@ -219,28 +219,30 @@ def redis_search(query: str, filter_param: str):
     query = embedding_enc.decode(embedding_enc.encode(query)[:MAX_QUERY_TOKENS])
 
     query_embedding = openai_helpers.get_openai_embedding(query, CHOSEN_EMB_MODEL)    
-    results = redis_helpers.redis_query_embedding_index(redis_conn, query_embedding, -1, topK=NUM_TOP_MATCHES, filter_param=filter_param)
+    results = redis_helpers.redis_query_embedding_index(redis_conn, query_embedding, -1, topK=25, filter_param=filter_param)
 
 
     if len(results) == 0:
         logging.warning("No embeddings found in Redis, attempting to load embeddings from Cosmos")
         cosmos_helpers.cosmos_restore_embeddings()
-        results = redis_helpers.redis_query_embedding_index(redis_conn, query_embedding, -1, topK=NUM_TOP_MATCHES, filter_param=filter_param)
+        results = redis_helpers.redis_query_embedding_index(redis_conn, query_embedding, -1, topK=25, filter_param=filter_param)
 
     context = []
 
     # r = [t['web_url'] + ' ' + t['container'] + ' ' + t['filename'] for t in results]
-    # print(results)
+    # [print(r['vector_score']) for r in results]
 
     for t in results:
+        t['text_en'] = t['text_en'].replace('\n', ' ').replace('\r', ' ') 
+
         try:
             if ('web_url' in t.keys()) and (t['web_url'] is not None) and (t['web_url'] != ''):
-                context.append(f"[{t['web_url']}] " + t['text_en'].replace('\n', ' ') )
+                context.append(f"[{t['web_url']}] " + t['text_en'])
             else:
-                context.append(f"[{t['container']}/{t['filename']}] " + t['text_en'].replace('\n', ' ') )
+                context.append(f"[{t['container']}/{t['filename']}] " + t['text_en'])
         except Exception as e:
-            print("Exception in redis_search: ", e)
-            context.append(t['text_en'].replace('\n', ' ') )
+            print("------------------- Exception in redis_search: ", e)
+            context.append(t['text_en'] )
 
 
     for i in range(len(context)):
@@ -253,6 +255,7 @@ def redis_search(query: str, filter_param: str):
 
     for i in range(len(context)):
         total_tokens += len(completion_enc.encode(context[i]))
+        print(total_tokens)
         if  total_tokens < MAX_SEARCH_TOKENS:
             final_context.append(context[i])
         else:
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+[config]`
	`2`	`+SCM_DO_BUILD_DURING_DEPLOYMENT=true`