logan-markewich
diff --git a/‎flask_react/index_server.py‎
Lines changed: 7 additions & 7 deletions b/‎flask_react/index_server.py‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎flask_react/requirements.txt‎
Lines changed: 3 additions & 3 deletions b/‎flask_react/requirements.txt‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎streamlit_sql_sandbox/constants.py‎
Lines changed: 1 addition & 1 deletion b/‎streamlit_sql_sandbox/constants.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎streamlit_sql_sandbox/requirements.txt‎
Lines changed: 4 additions & 2 deletions b/‎streamlit_sql_sandbox/requirements.txt‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎streamlit_sql_sandbox/sql_index.json‎
Lines changed: 0 additions & 1 deletion b/‎streamlit_sql_sandbox/sql_index.json‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎streamlit_sql_sandbox/streamlit_demo.py‎
Lines changed: 100 additions & 48 deletions b/‎streamlit_sql_sandbox/streamlit_demo.py‎
Lines changed: 100 additions & 48 deletions
diff --git a/‎streamlit_sql_sandbox/utils.py‎
Lines changed: 6 additions & 4 deletions b/‎streamlit_sql_sandbox/utils.py‎
Lines changed: 6 additions & 4 deletions
diff --git a/‎streamlit_term_definition/constants.py‎
Lines changed: 4 additions & 4 deletions b/‎streamlit_term_definition/constants.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎streamlit_term_definition/initial_index/docstore.json‎
Lines changed: 1 addition & 0 deletions b/‎streamlit_term_definition/initial_index/docstore.json‎
Lines changed: 1 addition & 0 deletions
@@ -6,13 +6,13 @@
 
 from multiprocessing import Lock
 from multiprocessing.managers import BaseManager
-from llama_index import SimpleDirectoryReader, GPTSimpleVectorIndex, Document, ServiceContext
+from llama_index import SimpleDirectoryReader, GPTVectorStoreIndex, Document, ServiceContext, StorageContext, load_index_from_storage
 
 index = None
 stored_docs = {}
 lock = Lock()
 
-index_name = "./index.json"
+index_name = "./saved_index"
 pkl_name = "stored_documents.pkl"
 
 
@@ -23,10 +23,10 @@ def initialize_index():
     service_context = ServiceContext.from_defaults(chunk_size_limit=512)
     with lock:
         if os.path.exists(index_name):
-            index = GPTSimpleVectorIndex.load_from_disk(index_name, service_context=service_context)
+            index = load_index_from_storage(StorageContext.from_defaults(persist_dir=index_name), service_context=service_context)
         else:
-            index = GPTSimpleVectorIndex([], service_context=service_context)
-            index.save_to_disk(index_name)
+            index = GPTVectorStoreIndex([], service_context=service_context)
+            index.storage_context.persist(persist_dir=index_name)
         if os.path.exists(pkl_name):
             with open(pkl_name, "rb") as f:
                 stored_docs = pickle.load(f)
@@ -35,7 +35,7 @@ def initialize_index():
 def query_index(query_text):
     """Query the global index."""
     global index
-    response = index.query(query_text)
+    response = index.as_query_engine().query(query_text)
     return response
 
 
@@ -51,7 +51,7 @@ def insert_into_index(doc_file_path, doc_id=None):
         stored_docs[document.doc_id] = document.text[0:200]  # only take the first 200 chars
 
         index.insert(document)
-        index.save_to_disk(index_name)
+        index.storage_context.persist(persist_dir=index_name)
 
         with open(pkl_name, "wb") as f:
             pickle.dump(stored_docs, f)
 
@@ -1,5 +1,5 @@
 Flask==2.2.3
 Flask-Cors==3.0.10
-langchain==0.0.128
-llama-index==0.5.4
-PyPDF2==3.0.1
+langchain==0.0.154
+llama-index==0.6.13
+pypdf==3.9.0
@@ -21,4 +21,4 @@
     "The restaurant KING-KONG had an routine unscheduled inspection on 2023/12/31. "
     "The business achieved a score of 50. We two violations, a high risk "
     "vermin infestation as well as a high risk food holding temperatures."
-)
+)
@@ -1,4 +1,6 @@
-langchain==0.0.128
-llama-index==0.5.4
+altair==4.2.2
+langchain==0.0.154
+llama-index==0.6.13
 streamlit==1.19.0
 streamlit-chat==0.0.2.2
+transformers==4.29.2
@@ -15,13 +15,15 @@
     DEFAULT_BUSINESS_TABLE_DESCRP,
     DEFAULT_VIOLATIONS_TABLE_DESCRP,
     DEFAULT_INSPECTIONS_TABLE_DESCRP,
-    DEFAULT_LC_TOOL_DESCRP
+    DEFAULT_LC_TOOL_DESCRP,
 )
 from utils import get_sql_index_tool, get_llm
 
 
 @st.cache_resource
-def initialize_index(llm_name, model_temperature, table_context_dict, api_key, sql_path=DEFAULT_SQL_PATH):
+def initialize_index(
+    llm_name, model_temperature, table_context_dict, api_key, sql_path=DEFAULT_SQL_PATH
+):
     """Create the GPTSQLStructStoreIndex object."""
     llm = get_llm(llm_name, model_temperature, api_key)
 
@@ -30,78 +32,116 @@ def initialize_index(llm_name, model_temperature, table_context_dict, api_key, s
 
     context_container = None
     if table_context_dict is not None:
-        context_builder = SQLContextContainerBuilder(sql_database, context_dict=table_context_dict)
+        context_builder = SQLContextContainerBuilder(
+            sql_database, context_dict=table_context_dict
+        )
         context_container = context_builder.build_context_container()
-    
+
     service_context = ServiceContext.from_defaults(llm_predictor=LLMPredictor(llm=llm))
-    index = GPTSQLStructStoreIndex([], 
-                                   sql_database=sql_database, 
-                                   sql_context_container=context_container, 
-                                   service_context=service_context)
+    index = GPTSQLStructStoreIndex(
+        [],
+        sql_database=sql_database,
+        sql_context_container=context_container,
+        service_context=service_context,
+    )
 
     return index
 
 
 @st.cache_resource
 def initialize_chain(llm_name, model_temperature, lc_descrp, api_key, _sql_index):
     """Create a (rather hacky) custom agent and sql_index tool."""
-    sql_tool = Tool(name="SQL Index", 
-                    func=get_sql_index_tool(_sql_index, _sql_index.sql_context_container.context_dict), 
-                    description=lc_descrp)
+    sql_tool = Tool(
+        name="SQL Index",
+        func=get_sql_index_tool(
+            _sql_index, _sql_index.sql_context_container.context_dict
+        ),
+        description=lc_descrp,
+    )
 
     llm = get_llm(llm_name, model_temperature, api_key=api_key)
 
     memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
 
-    agent_chain = initialize_agent([sql_tool], llm, agent="chat-conversational-react-description", verbose=True, memory=memory)
+    agent_chain = initialize_agent(
+        [sql_tool],
+        llm,
+        agent="chat-conversational-react-description",
+        verbose=True,
+        memory=memory,
+    )
 
     return agent_chain
 
 
 st.title("🦙 Llama Index SQL Sandbox 🦙")
-st.markdown((
-    "This sandbox uses a sqlite database by default, powered by [Llama Index](https://gpt-index.readthedocs.io/en/latest/index.html) ChatGPT, and LangChain.\n\n" 
-    "The database contains information on health violations and inspections at restaurants in San Francisco."
-    "This data is spread across three tables - businesses, inspections, and violations.\n\n"
-    "Using the setup page, you can adjust LLM settings, change the context for the SQL tables, and change the tool description for Langchain."
-    "The other tabs will perform chatbot and text2sql operations.\n\n"
-    "Read more about LlamaIndexes structured data support [here!](https://gpt-index.readthedocs.io/en/latest/guides/tutorials/sql_guide.html)"
-))
+st.markdown(
+    (
+        "This sandbox uses a sqlite database by default, powered by [Llama Index](https://gpt-index.readthedocs.io/en/latest/index.html) ChatGPT, and LangChain.\n\n"
+        "The database contains information on health violations and inspections at restaurants in San Francisco."
+        "This data is spread across three tables - businesses, inspections, and violations.\n\n"
+        "Using the setup page, you can adjust LLM settings, change the context for the SQL tables, and change the tool description for Langchain."
+        "The other tabs will perform chatbot and text2sql operations.\n\n"
+        "Read more about LlamaIndexes structured data support [here!](https://gpt-index.readthedocs.io/en/latest/guides/tutorials/sql_guide.html)"
+    )
+)
 
 
-setup_tab, llama_tab, lc_tab = st.tabs(["Setup", "Llama Index", "Langchain+Llama Index"])
+setup_tab, llama_tab, lc_tab = st.tabs(
+    ["Setup", "Llama Index", "Langchain+Llama Index"]
+)
 
 with setup_tab:
     st.subheader("LLM Setup")
     api_key = st.text_input("Enter your OpenAI API key here", type="password")
-    llm_name = st.selectbox('Which LLM?', ["text-davinci-003", "gpt-3.5-turbo", "gpt-4"])
-    model_temperature = st.slider("LLM Temperature", min_value=0.0, max_value=1.0, step=0.1)
+    llm_name = st.selectbox(
+        "Which LLM?", ["text-davinci-003", "gpt-3.5-turbo", "gpt-4"]
+    )
+    model_temperature = st.slider(
+        "LLM Temperature", min_value=0.0, max_value=1.0, step=0.1
+    )
 
     st.subheader("Table Setup")
-    business_table_descrp = st.text_area("Business table description", value=DEFAULT_BUSINESS_TABLE_DESCRP)
-    violations_table_descrp = st.text_area("Business table description", value=DEFAULT_VIOLATIONS_TABLE_DESCRP)
-    inspections_table_descrp = st.text_area("Business table description", value=DEFAULT_INSPECTIONS_TABLE_DESCRP)
-
-    table_context_dict = {"businesses": business_table_descrp, 
-                          "inspections": inspections_table_descrp, 
-                          "violations": violations_table_descrp}
-    
+    business_table_descrp = st.text_area(
+        "Business table description", value=DEFAULT_BUSINESS_TABLE_DESCRP
+    )
+    violations_table_descrp = st.text_area(
+        "Business table description", value=DEFAULT_VIOLATIONS_TABLE_DESCRP
+    )
+    inspections_table_descrp = st.text_area(
+        "Business table description", value=DEFAULT_INSPECTIONS_TABLE_DESCRP
+    )
+
+    table_context_dict = {
+        "businesses": business_table_descrp,
+        "inspections": inspections_table_descrp,
+        "violations": violations_table_descrp,
+    }
+
     use_table_descrp = st.checkbox("Use table descriptions?", value=True)
     lc_descrp = st.text_area("LangChain Tool Description", value=DEFAULT_LC_TOOL_DESCRP)
 
 with llama_tab:
     st.subheader("Text2SQL with Llama Index")
     if st.button("Initialize Index", key="init_index_1"):
-        st.session_state['llama_index'] = initialize_index(llm_name, model_temperature, table_context_dict if use_table_descrp else None, api_key)
-    
+        st.session_state["llama_index"] = initialize_index(
+            llm_name,
+            model_temperature,
+            table_context_dict if use_table_descrp else None,
+            api_key,
+        )
+
     if "llama_index" in st.session_state:
-        query_text = st.text_input("Query:", value="Which restaurant has the most violations?")
+        query_text = st.text_input(
+            "Query:", value="Which restaurant has the most violations?"
+        )
+        use_nl = st.checkbox("Return natural language response?")
         if st.button("Run Query") and query_text:
             with st.spinner("Getting response..."):
                 try:
-                    response =  st.session_state['llama_index'].query(query_text)
+                    response = st.session_state["llama_index"].as_query_engine(synthesize_response=use_nl).query(query_text)
                     response_text = str(response)
-                    response_sql = response.extra_info['sql_query']
+                    response_sql = response.extra_info["sql_query"]
                 except Exception as e:
                     response_text = "Error running SQL Query."
                     response_sql = str(e)
@@ -119,19 +159,31 @@ def initialize_chain(llm_name, model_temperature, lc_descrp, api_key, _sql_index
     st.subheader("Langchain + Llama Index SQL Demo")
 
     if st.button("Initialize Agent"):
-        st.session_state['llama_index'] = initialize_index(llm_name, model_temperature, table_context_dict if use_table_descrp else None, api_key)
-        st.session_state['lc_agent'] = initialize_chain(llm_name, model_temperature, lc_descrp, api_key, st.session_state['llama_index'])
-        st.session_state['chat_history'] = []
-
-    model_input = st.text_input("Message:", value="Which restaurant has the most violations?")
-    if 'lc_agent' in st.session_state and st.button("Send"):
+        st.session_state["llama_index"] = initialize_index(
+            llm_name,
+            model_temperature,
+            table_context_dict if use_table_descrp else None,
+            api_key,
+        )
+        st.session_state["lc_agent"] = initialize_chain(
+            llm_name,
+            model_temperature,
+            lc_descrp,
+            api_key,
+            st.session_state["llama_index"],
+        )
+        st.session_state["chat_history"] = []
+
+    model_input = st.text_input(
+        "Message:", value="Which restaurant has the most violations?"
+    )
+    if "lc_agent" in st.session_state and st.button("Send"):
         model_input = "User: " + model_input
-        st.session_state['chat_history'].append(model_input)
+        st.session_state["chat_history"].append(model_input)
         with st.spinner("Getting response..."):
-            response =  st.session_state['lc_agent'].run(input=model_input)
-        st.session_state['chat_history'].append(response)
+            response = st.session_state["lc_agent"].run(input=model_input)
+        st.session_state["chat_history"].append(response)
 
-    if 'chat_history' in st.session_state:
-        for msg in st.session_state['chat_history']:
+    if "chat_history" in st.session_state:
+        for msg in st.session_state["chat_history"]:
             st_message(msg.split("User: ")[-1], is_user="User: " in msg)
-        
 
@@ -5,20 +5,22 @@
 
 def get_sql_index_tool(sql_index, table_context_dict):
     table_context_str = "\n".join(table_context_dict.values())
+
     def run_sql_index_query(query_text):
         try:
-            response = sql_index.query(query_text)
+            response = sql_index.as_query_engine(synthesize_response=False).query(query_text)
         except Exception as e:
             return f"Error running SQL {e}.\nNot able to retrieve answer."
         text = str(response)
-        sql = response.extra_info['sql_query']
+        sql = response.extra_info["sql_query"]
         return f"Here are the details on the SQL table: {table_context_str}\nSQL Query Used: {sql}\nSQL Result: {text}\n"
-        #return f"SQL Query Used: {sql}\nSQL Result: {text}\n"
+        # return f"SQL Query Used: {sql}\nSQL Result: {text}\n"
+
     return run_sql_index_query
 
 
 def get_llm(llm_name, model_temperature, api_key):
-    os.environ['OPENAI_API_KEY'] = api_key
+    os.environ["OPENAI_API_KEY"] = api_key
     if llm_name == "text-davinci-003":
         return OpenAI(temperature=model_temperature, model_name=llm_name)
     else:
 
@@ -13,7 +13,7 @@
     "---------------------\n"
     "{context_str}"
     "\n---------------------\n"
-    "Given the context information answer the following question "
+    "Given the context information, directly answer the following question "
     "(if you don't know the answer, use the best of your knowledge): {query_str}\n"
 )
 TEXT_QA_TEMPLATE = QuestionAnswerPrompt(DEFAULT_TEXT_QA_PROMPT_TMPL)
@@ -29,6 +29,7 @@
     "------------\n"
     "Given the new context and using the best of your knowledge, improve the existing answer. "
     "If you can't improve the existing answer, just repeat it again. "
+    "Do not include un-needed or un-helpful information that is shown in the new context. "
     "Do not mention that you've read the above context."
 )
 DEFAULT_REFINE_PROMPT = RefinePrompt(DEFAULT_REFINE_PROMPT_TMPL)
@@ -44,6 +45,7 @@
         "------------\n"
         "Given the new context and using the best of your knowledge, improve the existing answer. "
         "If you can't improve the existing answer, just repeat it again. "
+        "Do not include un-needed or un-helpful information that is shown in the new context. "
         "Do not mention that you've read the above context."
     ),
 ]
@@ -56,9 +58,7 @@
     default_prompt=DEFAULT_REFINE_PROMPT.get_langchain_prompt(),
     conditionals=[(is_chat_model, CHAT_REFINE_PROMPT.get_langchain_prompt())],
 )
-REFINE_TEMPLATE = RefinePrompt(
-    langchain_prompt_selector=DEFAULT_REFINE_PROMPT_SEL_LC
-)
+REFINE_TEMPLATE = RefinePrompt(langchain_prompt_selector=DEFAULT_REFINE_PROMPT_SEL_LC)
 
 DEFAULT_TERM_STR = (
     "Make a list of terms and definitions that are defined in the context, "