From 26607be730c7d407ad8333b9b2614bf6220221d0 Mon Sep 17 00:00:00 2001
From: Jiashen Cao <caojiashen24@gmail.com>
Date: Wed, 18 Oct 2023 21:51:10 +0000
Subject: [PATCH 1/7] [RELEASE]: 0.3.8

---
 CHANGELOG.md | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5557e9fadc..f8ef0b95de 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,37 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### [Deprecated]
 ### [Removed]
 
+##  [0.3.8] - 2023-10-18
+
+* PR #1303: v0.3.8 - new release 
+* PR #1302: Reenable batch for release 
+* PR #1301: Add Documentation for UDF Unit Testing and Mocking 
+* PR #1232: Starting the change for XGBoost integration into EVADb. 
+* PR #1294: fix: improve testcase 
+* PR #1293: fix: make the table/function catalog insert operation atomic 
+* PR #1295: feat: add support for show databases 
+* PR #1296: feat: function_metadata supports boolean and float  
+* PR #1290: fix: text_summarization uses drop udf 
+* PR #1240: Add stable diffusion integration 
+* PR #1285: Update custom-ai-function.rst 
+* PR #1234: Added basic functionalities of REST apis 
+* PR #1281: Clickhouse integration 
+* PR #1273: Update custom-ai-function.rst 
+* PR #1274: Fix Notebook and Ray testcases at staging 
+* PR #1264: SHOW command for retrieveing configurations 
+* PR #1270: fix: Catalog init introduces significant overhead  
+* PR #1267: Improve the error message when there is a typo in the column name in the query.  
+* PR #1261: Remove dimensions from `TEXT` and `FLOAT` 
+* PR #1256: Remove table names from column names for `df
+* PR #1253: Collection of fixes for the staging branch 
+* PR #1246: feat: insertion update index 
+* PR #1245: Documentation on vector stores + vector benchmark 
+* PR #1244: feat: create index from projection 
+* PR #1233: GitHub Data Source Integration 
+* PR #1115: Add support for Neuralforecast 
+* PR #1241: Bump Version to v0.3.8+dev 
+* PR #1239: release 0.3.7 
+
 ##  [0.3.7] - 2023-09-30
 
 * PR #1239: release 0.3.7 

From 59f85fa81efdcdd90ba935f69614eda9813b1151 Mon Sep 17 00:00:00 2001
From: Jiashen Cao <caojiashen24@gmail.com>
Date: Wed, 18 Oct 2023 21:51:29 +0000
Subject: [PATCH 2/7] [BUMP]: v0.3.9+dev

---
 evadb/version.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/evadb/version.py b/evadb/version.py
index e583161907..671c0bd8a8 100644
--- a/evadb/version.py
+++ b/evadb/version.py
@@ -1,6 +1,6 @@
 _MAJOR = "0"
 _MINOR = "3"
-_REVISION = "8"
+_REVISION = "9+dev"
 
 VERSION_SHORT = f"{_MAJOR}.{_MINOR}"
-VERSION = f"{_MAJOR}.{_MINOR}.{_REVISION}"
+VERSION = f"{_MAJOR}.{_MINOR}.{_REVISION}"
\ No newline at end of file

From 42a3850114f9ca785af4aac7a63a55cd3f9d55d3 Mon Sep 17 00:00:00 2001
From: Gaurav Tarlok Kakkar <gaurav21776@gmail.com>
Date: Sun, 22 Oct 2023 23:51:04 -0400
Subject: [PATCH 3/7] chekcpoint

---
 evadb/binder/function_expression_binder.py | 209 +++++++++++++++++++++
 evadb/binder/statement_binder.py           | 114 +----------
 evadb/parser/evadb.lark                    |   1 +
 3 files changed, 214 insertions(+), 110 deletions(-)
 create mode 100644 evadb/binder/function_expression_binder.py

diff --git a/evadb/binder/function_expression_binder.py b/evadb/binder/function_expression_binder.py
new file mode 100644
index 0000000000..f355362f87
--- /dev/null
+++ b/evadb/binder/function_expression_binder.py
@@ -0,0 +1,209 @@
+# coding=utf-8
+# Copyright 2018-2023 EvaDB
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from pathlib import Path
+
+from evadb.binder.binder_utils import (
+    BinderError,
+    extend_star,
+    resolve_alias_table_value_expression,
+)
+from evadb.binder.statement_binder import StatementBinder
+from evadb.catalog.catalog_utils import (
+    get_metadata_properties,
+    get_video_table_column_definitions,
+)
+from evadb.configuration.constants import EvaDB_INSTALLATION_DIR
+from evadb.expression.function_expression import FunctionExpression
+from evadb.expression.tuple_value_expression import TupleValueExpression
+from evadb.parser.types import FunctionType
+from evadb.third_party.huggingface.binder import assign_hf_function
+from evadb.utils.generic_utils import (
+    load_function_class_from_file,
+    string_comparison_case_insensitive,
+)
+from evadb.utils.logging_manager import logger
+
+
+def bind_func_expr(binder: StatementBinder, node: FunctionExpression):
+    
+    
+    # handle the special case of "completion or chatgpt"
+    if string_comparison_case_insensitive(node.name, "chatgpt") or string_comparison_case_insensitive(node.name, "completion"):
+        handle_bind_llm_function(node, binder)
+        return
+    
+    # handle the special case of "extract_object"
+    if node.name.upper() == str(FunctionType.EXTRACT_OBJECT):
+        handle_bind_extract_object_function(node, binder)
+        return
+
+    # Handle Func(*)
+    if (
+        len(node.children) == 1
+        and isinstance(node.children[0], TupleValueExpression)
+        and node.children[0].name == "*"
+    ):
+        node.children = extend_star(binder._binder_context)
+    # bind all the children
+    for child in node.children:
+        binder.bind(child)
+
+    function_obj = binder._catalog().get_function_catalog_entry_by_name(node.name)
+    if function_obj is None:
+        err_msg = (
+            f"Function '{node.name}' does not exist in the catalog. "
+            "Please create the function using CREATE FUNCTION command."
+        )
+        logger.error(err_msg)
+        raise BinderError(err_msg)
+
+    if string_comparison_case_insensitive(function_obj.type, "HuggingFace"):
+        node.function = assign_hf_function(function_obj)
+
+    elif string_comparison_case_insensitive(function_obj.type, "Ludwig"):
+        function_class = load_function_class_from_file(
+            function_obj.impl_file_path,
+            "GenericLudwigModel",
+        )
+        function_metadata = get_metadata_properties(function_obj)
+        assert "model_path" in function_metadata, "Ludwig models expect 'model_path'."
+        node.function = lambda: function_class(
+            model_path=function_metadata["model_path"]
+        )
+
+    else:
+        if function_obj.type == "ultralytics":
+            # manually set the impl_path for yolo functions we only handle object
+            # detection for now, hopefully this can be generalized
+            function_dir = Path(EvaDB_INSTALLATION_DIR) / "functions"
+            function_obj.impl_file_path = (
+                Path(f"{function_dir}/yolo_object_detector.py").absolute().as_posix()
+            )
+
+        # Verify the consistency of the function. If the checksum of the function does not
+        # match the one stored in the catalog, an error will be thrown and the user
+        # will be asked to register the function again.
+        # assert (
+        #     get_file_checksum(function_obj.impl_file_path) == function_obj.checksum
+        # ), f"""Function file {function_obj.impl_file_path} has been modified from the
+        #     registration. Please use DROP FUNCTION to drop it and re-create it # using CREATE FUNCTION."""
+
+        try:
+            function_class = load_function_class_from_file(
+                function_obj.impl_file_path,
+                function_obj.name,
+            )
+            # certain functions take additional inputs like yolo needs the model_name
+            # these arguments are passed by the user as part of metadata
+            node.function = lambda: function_class(
+                **get_metadata_properties(function_obj)
+            )
+        except Exception as e:
+            err_msg = (
+                f"{str(e)}. Please verify that the function class name in the "
+                "implementation file matches the function name."
+            )
+            logger.error(err_msg)
+            raise BinderError(err_msg)
+
+    node.function_obj = function_obj
+    output_objs = binder._catalog().get_function_io_catalog_output_entries(function_obj)
+    if node.output:
+        for obj in output_objs:
+            if obj.name.lower() == node.output:
+                node.output_objs = [obj]
+        if not node.output_objs:
+            err_msg = f"Output {node.output} does not exist for {function_obj.name}."
+            logger.error(err_msg)
+            raise BinderError(err_msg)
+        node.projection_columns = [node.output]
+    else:
+        node.output_objs = output_objs
+        node.projection_columns = [obj.name.lower() for obj in output_objs]
+
+    resolve_alias_table_value_expression(node)
+
+
+def handle_bind_llm_function()
+
+def handle_bind_extract_object_function(
+    node: FunctionExpression, binder_context: StatementBinder
+):
+    """Handles the binding of extract_object function.
+        1. Bind the source video data
+        2. Create and bind the detector function expression using the provided name.
+        3. Create and bind the tracker function expression.
+            Its inputs are id, data, output of detector.
+        4. Bind the EXTRACT_OBJECT function expression and append the new children.
+        5. Handle the alias and populate the outputs of the EXTRACT_OBJECT function
+
+    Args:
+        node (FunctionExpression): The function expression representing the extract object operation.
+        binder_context (StatementBinder): The binder object used to bind expressions in the statement.
+
+    Raises:
+        AssertionError: If the number of children in the `node` is not equal to 3.
+    """
+    assert (
+        len(node.children) == 3
+    ), f"Invalid arguments provided to {node}. Example correct usage, (data, Detector, Tracker)"
+
+    # 1. Bind the source video
+    video_data = node.children[0]
+    binder_context.bind(video_data)
+
+    # 2. Construct the detector
+    # convert detector to FunctionExpression before binding
+    # eg. YoloV5 -> YoloV5(data)
+    detector = FunctionExpression(None, node.children[1].name)
+    detector.append_child(video_data.copy())
+    binder_context.bind(detector)
+
+    # 3. Construct the tracker
+    # convert tracker to FunctionExpression before binding
+    # eg. ByteTracker -> ByteTracker(id, data, labels, bboxes, scores)
+    tracker = FunctionExpression(None, node.children[2].name)
+    # create the video id expression
+    columns = get_video_table_column_definitions()
+    tracker.append_child(
+        TupleValueExpression(name=columns[1].name, table_alias=video_data.table_alias)
+    )
+    tracker.append_child(video_data.copy())
+    binder_context.bind(tracker)
+    # append the bound output of detector
+    for obj in detector.output_objs:
+        col_alias = "{}.{}".format(obj.function_name.lower(), obj.name.lower())
+        child = TupleValueExpression(
+            obj.name,
+            table_alias=obj.function_name.lower(),
+            col_object=obj,
+            col_alias=col_alias,
+        )
+        tracker.append_child(child)
+
+    # 4. Bind the EXTRACT_OBJECT expression and append the new children.
+    node.children = []
+    node.children = [video_data, detector, tracker]
+
+    # 5. assign the outputs of tracker to the output of extract_object
+    node.output_objs = tracker.output_objs
+    node.projection_columns = [obj.name.lower() for obj in node.output_objs]
+
+    # 5. resolve alias based on the what user provided
+    # we assign the alias to tracker as it governs the output of the extract object
+    resolve_alias_table_value_expression(node)
+    tracker.alias = node.alias
+
+
diff --git a/evadb/binder/statement_binder.py b/evadb/binder/statement_binder.py
index f1e949941c..cbafdad758 100644
--- a/evadb/binder/statement_binder.py
+++ b/evadb/binder/statement_binder.py
@@ -13,7 +13,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from functools import singledispatchmethod
-from pathlib import Path
 from typing import Callable
 
 from evadb.binder.binder_utils import (
@@ -26,14 +25,12 @@
     extend_star,
     get_bound_func_expr_outputs_as_tuple_value_expr,
     get_column_definition_from_select_target_list,
-    handle_bind_extract_object_function,
-    resolve_alias_table_value_expression,
 )
+from evadb.binder.function_expression_binder import bind_func_expr
 from evadb.binder.statement_binder_context import StatementBinderContext
 from evadb.catalog.catalog_type import ColumnType, TableType
-from evadb.catalog.catalog_utils import get_metadata_properties, is_document_table
+from evadb.catalog.catalog_utils import is_document_table
 from evadb.catalog.sql_config import RESTRICTED_COL_NAMES
-from evadb.configuration.constants import EvaDB_INSTALLATION_DIR
 from evadb.expression.abstract_expression import AbstractExpression, ExpressionType
 from evadb.expression.function_expression import FunctionExpression
 from evadb.expression.tuple_value_expression import TupleValueExpression
@@ -46,13 +43,7 @@
 from evadb.parser.select_statement import SelectStatement
 from evadb.parser.statement import AbstractStatement
 from evadb.parser.table_ref import TableRef
-from evadb.parser.types import FunctionType
-from evadb.third_party.huggingface.binder import assign_hf_function
-from evadb.utils.generic_utils import (
-    load_function_class_from_file,
-    string_comparison_case_insensitive,
-)
-from evadb.utils.logging_manager import logger
+from evadb.utils.generic_utils import string_comparison_case_insensitive
 
 
 class StatementBinder:
@@ -273,101 +264,4 @@ def _bind_tuple_expr(self, node: TupleValueExpression):
 
     @bind.register(FunctionExpression)
     def _bind_func_expr(self, node: FunctionExpression):
-        # handle the special case of "extract_object"
-        if node.name.upper() == str(FunctionType.EXTRACT_OBJECT):
-            handle_bind_extract_object_function(node, self)
-            return
-
-        # Handle Func(*)
-        if (
-            len(node.children) == 1
-            and isinstance(node.children[0], TupleValueExpression)
-            and node.children[0].name == "*"
-        ):
-            node.children = extend_star(self._binder_context)
-        # bind all the children
-        for child in node.children:
-            self.bind(child)
-
-        function_obj = self._catalog().get_function_catalog_entry_by_name(node.name)
-        if function_obj is None:
-            err_msg = (
-                f"Function '{node.name}' does not exist in the catalog. "
-                "Please create the function using CREATE FUNCTION command."
-            )
-            logger.error(err_msg)
-            raise BinderError(err_msg)
-
-        if string_comparison_case_insensitive(function_obj.type, "HuggingFace"):
-            node.function = assign_hf_function(function_obj)
-
-        elif string_comparison_case_insensitive(function_obj.type, "Ludwig"):
-            function_class = load_function_class_from_file(
-                function_obj.impl_file_path,
-                "GenericLudwigModel",
-            )
-            function_metadata = get_metadata_properties(function_obj)
-            assert (
-                "model_path" in function_metadata
-            ), "Ludwig models expect 'model_path'."
-            node.function = lambda: function_class(
-                model_path=function_metadata["model_path"]
-            )
-
-        else:
-            if function_obj.type == "ultralytics":
-                # manually set the impl_path for yolo functions we only handle object
-                # detection for now, hopefully this can be generalized
-                function_dir = Path(EvaDB_INSTALLATION_DIR) / "functions"
-                function_obj.impl_file_path = (
-                    Path(f"{function_dir}/yolo_object_detector.py")
-                    .absolute()
-                    .as_posix()
-                )
-
-            # Verify the consistency of the function. If the checksum of the function does not
-            # match the one stored in the catalog, an error will be thrown and the user
-            # will be asked to register the function again.
-            # assert (
-            #     get_file_checksum(function_obj.impl_file_path) == function_obj.checksum
-            # ), f"""Function file {function_obj.impl_file_path} has been modified from the
-            #     registration. Please use DROP FUNCTION to drop it and re-create it # using CREATE FUNCTION."""
-
-            try:
-                function_class = load_function_class_from_file(
-                    function_obj.impl_file_path,
-                    function_obj.name,
-                )
-                # certain functions take additional inputs like yolo needs the model_name
-                # these arguments are passed by the user as part of metadata
-                node.function = lambda: function_class(
-                    **get_metadata_properties(function_obj)
-                )
-            except Exception as e:
-                err_msg = (
-                    f"{str(e)}. Please verify that the function class name in the "
-                    "implementation file matches the function name."
-                )
-                logger.error(err_msg)
-                raise BinderError(err_msg)
-
-        node.function_obj = function_obj
-        output_objs = self._catalog().get_function_io_catalog_output_entries(
-            function_obj
-        )
-        if node.output:
-            for obj in output_objs:
-                if obj.name.lower() == node.output:
-                    node.output_objs = [obj]
-            if not node.output_objs:
-                err_msg = (
-                    f"Output {node.output} does not exist for {function_obj.name}."
-                )
-                logger.error(err_msg)
-                raise BinderError(err_msg)
-            node.projection_columns = [node.output]
-        else:
-            node.output_objs = output_objs
-            node.projection_columns = [obj.name.lower() for obj in output_objs]
-
-        resolve_alias_table_value_expression(node)
+        bind_func_expr(self, node)
diff --git a/evadb/parser/evadb.lark b/evadb/parser/evadb.lark
index e834d1a7d0..f44412f726 100644
--- a/evadb/parser/evadb.lark
+++ b/evadb/parser/evadb.lark
@@ -482,6 +482,7 @@ IMPL:                                "IMPL"i
 
 ABS:                                 "ABS"i
 
+
 // Operators
 // Operators. Assignment
 

From 960d33b4d8bfcc411978afc2443cc33aa94d3cfa Mon Sep 17 00:00:00 2001
From: Gaurav Tarlok Kakkar <gaurav21776@gmail.com>
Date: Mon, 23 Oct 2023 17:02:17 -0400
Subject: [PATCH 4/7] fix linter

---
 evadb/binder/function_expression_binder.py    | 30 +++++---------
 evadb/binder/statement_binder.py              |  2 +-
 evadb/constants.py                            |  2 +-
 evadb/executor/llm_executor.py                |  3 +-
 evadb/expression/expression_utils.py          |  7 ++--
 evadb/optimizer/operators.py                  |  9 +----
 evadb/optimizer/rules/rules.py                |  2 +-
 evadb/optimizer/rules/rules_manager.py        |  2 +-
 evadb/optimizer/statement_to_opr_converter.py | 39 ++++++-------------
 evadb/plan_nodes/llm_plan.py                  |  1 -
 .../short/test_llm_executor.py                | 15 ++-----
 .../short/test_select_executor.py             |  2 -
 12 files changed, 35 insertions(+), 79 deletions(-)

diff --git a/evadb/binder/function_expression_binder.py b/evadb/binder/function_expression_binder.py
index d01af6f7e0..b1919bdbdd 100644
--- a/evadb/binder/function_expression_binder.py
+++ b/evadb/binder/function_expression_binder.py
@@ -25,6 +25,7 @@
     get_video_table_column_definitions,
 )
 from evadb.configuration.constants import EvaDB_INSTALLATION_DIR
+from evadb.executor.execution_context import Context
 from evadb.expression.function_expression import FunctionExpression
 from evadb.expression.tuple_value_expression import TupleValueExpression
 from evadb.parser.types import FunctionType
@@ -34,11 +35,9 @@
     string_comparison_case_insensitive,
 )
 from evadb.utils.logging_manager import logger
-from evadb.executor.execution_context import Context
 
 
 def bind_func_expr(binder: StatementBinder, node: FunctionExpression):
-    
     # setup the context
     # we read the GPUs from the catalog and populate in the context
     gpus_ids = binder._catalog().get_configuration_catalog_value("gpu_ids")
@@ -49,11 +48,12 @@ def bind_func_expr(binder: StatementBinder, node: FunctionExpression):
         handle_bind_extract_object_function(node, binder)
         return
 
-
     # handle the special case of "completion or chatgpt"
-    if string_comparison_case_insensitive(node.name, "chatgpt") or string_comparison_case_insensitive(node.name, "completion"):
+    if string_comparison_case_insensitive(
+        node.name, "chatgpt"
+    ) or string_comparison_case_insensitive(node.name, "completion"):
         handle_bind_llm_function(node, binder)
-        
+
     # Handle Func(*)
     if (
         len(node.children) == 1
@@ -83,9 +83,7 @@ def bind_func_expr(binder: StatementBinder, node: FunctionExpression):
             "GenericLudwigModel",
         )
         function_metadata = get_metadata_properties(function_obj)
-        assert (
-            "model_path" in function_metadata
-        ), "Ludwig models expect 'model_path'."
+        assert "model_path" in function_metadata, "Ludwig models expect 'model_path'."
         node.function = lambda: function_class(
             model_path=function_metadata["model_path"]
         )
@@ -96,9 +94,7 @@ def bind_func_expr(binder: StatementBinder, node: FunctionExpression):
             # detection for now, hopefully this can be generalized
             function_dir = Path(EvaDB_INSTALLATION_DIR) / "functions"
             function_obj.impl_file_path = (
-                Path(f"{function_dir}/yolo_object_detector.py")
-                .absolute()
-                .as_posix()
+                Path(f"{function_dir}/yolo_object_detector.py").absolute().as_posix()
             )
 
         # Verify the consistency of the function. If the checksum of the function does not
@@ -128,17 +124,13 @@ def bind_func_expr(binder: StatementBinder, node: FunctionExpression):
             raise BinderError(err_msg)
 
     node.function_obj = function_obj
-    output_objs = binder._catalog().get_function_io_catalog_output_entries(
-        function_obj
-    )
+    output_objs = binder._catalog().get_function_io_catalog_output_entries(function_obj)
     if node.output:
         for obj in output_objs:
             if obj.name.lower() == node.output:
                 node.output_objs = [obj]
         if not node.output_objs:
-            err_msg = (
-                f"Output {node.output} does not exist for {function_obj.name}."
-            )
+            err_msg = f"Output {node.output} does not exist for {function_obj.name}."
             logger.error(err_msg)
             raise BinderError(err_msg)
         node.projection_columns = [node.output]
@@ -162,8 +154,6 @@ def handle_bind_llm_function(node, binder):
         properties["openai_api_key"] = openapi_key
 
 
-
-
 def handle_bind_extract_object_function(
     node: FunctionExpression, binder_context: StatementBinder
 ):
@@ -231,5 +221,3 @@ def handle_bind_extract_object_function(
     # we assign the alias to tracker as it governs the output of the extract object
     resolve_alias_table_value_expression(node)
     tracker.alias = node.alias
-
-
diff --git a/evadb/binder/statement_binder.py b/evadb/binder/statement_binder.py
index e73fbd3dc3..e08085a1d8 100644
--- a/evadb/binder/statement_binder.py
+++ b/evadb/binder/statement_binder.py
@@ -264,5 +264,5 @@ def _bind_tuple_expr(self, node: TupleValueExpression):
     @bind.register(FunctionExpression)
     def _bind_func_expr(self, node: FunctionExpression):
         from evadb.binder.function_expression_binder import bind_func_expr
-        
+
         bind_func_expr(self, node)
diff --git a/evadb/constants.py b/evadb/constants.py
index 44af637b19..c8693ca203 100644
--- a/evadb/constants.py
+++ b/evadb/constants.py
@@ -21,4 +21,4 @@
 IFRAMES = "IFRAMES"
 AUDIORATE = "AUDIORATE"
 DEFAULT_FUNCTION_EXPRESSION_COST = 100
-LLM_FUNCTIONS = ["chatgpt", "completion"]
\ No newline at end of file
+LLM_FUNCTIONS = ["chatgpt", "completion"]
diff --git a/evadb/executor/llm_executor.py b/evadb/executor/llm_executor.py
index 74ae8ef561..86dc2626a1 100644
--- a/evadb/executor/llm_executor.py
+++ b/evadb/executor/llm_executor.py
@@ -21,7 +21,6 @@
 
 
 class LLMExecutor(AbstractExecutor):
-    
     def __init__(self, db: EvaDBDatabase, node: LLMPlan):
         super().__init__(db, node)
         self.llm_expr = node.llm_expr
@@ -33,5 +32,5 @@ def exec(self, *args, **kwargs) -> Iterator[Batch]:
             llm_result = self.llm_expr.evaluate(batch)
 
             output = Batch.merge_column_wise([batch, llm_result])
-            
+
             yield output
diff --git a/evadb/expression/expression_utils.py b/evadb/expression/expression_utils.py
index c0a758fd27..5fd413c631 100644
--- a/evadb/expression/expression_utils.py
+++ b/evadb/expression/expression_utils.py
@@ -14,8 +14,8 @@
 # limitations under the License.
 
 from typing import List, Set
-from evadb.constants import LLM_FUNCTIONS
 
+from evadb.constants import LLM_FUNCTIONS
 from evadb.expression.abstract_expression import AbstractExpression, ExpressionType
 from evadb.expression.comparison_expression import ComparisonExpression
 from evadb.expression.constant_value_expression import ConstantValueExpression
@@ -304,7 +304,8 @@ def is_llm_expression(expr: AbstractExpression):
     if isinstance(expr, FunctionExpression) and expr.name.lower() in LLM_FUNCTIONS:
         return True
     return False
-        
+
+
 def extract_llm_expressions_from_project(exprs: List[AbstractExpression]):
     remaining_exprs = []
     llm_exprs = []
@@ -313,4 +314,4 @@ def extract_llm_expressions_from_project(exprs: List[AbstractExpression]):
             llm_exprs.append(expr.copy())
         else:
             remaining_exprs.append(expr)
-    return llm_exprs, remaining_exprs
\ No newline at end of file
+    return llm_exprs, remaining_exprs
diff --git a/evadb/optimizer/operators.py b/evadb/optimizer/operators.py
index 0fa330185b..4f756208e5 100644
--- a/evadb/optimizer/operators.py
+++ b/evadb/optimizer/operators.py
@@ -1273,7 +1273,6 @@ def __hash__(self) -> int:
 
 
 class LogicalLLM(Operator):
- 
     def __init__(
         self,
         llm_expr: FunctionExpression,
@@ -1282,15 +1281,11 @@ def __init__(
         super().__init__(OperatorType.LOGICAL_LLM, children)
         self.llm_expr = llm_expr
 
-
     def __eq__(self, other):
         is_subtree_equal = super().__eq__(other)
         if not isinstance(other, LogicalLLM):
             return False
-        return (
-            is_subtree_equal
-            and self.llm_expr == other.llm_expr
-        )
+        return is_subtree_equal and self.llm_expr == other.llm_expr
 
     def __hash__(self) -> int:
         return hash(
@@ -1298,4 +1293,4 @@ def __hash__(self) -> int:
                 super().__hash__(),
                 self.llm_expr,
             )
-        )
\ No newline at end of file
+        )
diff --git a/evadb/optimizer/rules/rules.py b/evadb/optimizer/rules/rules.py
index d87c98d2ee..2b82ef4820 100644
--- a/evadb/optimizer/rules/rules.py
+++ b/evadb/optimizer/rules/rules.py
@@ -70,8 +70,8 @@
     LogicalGroupBy,
     LogicalInsert,
     LogicalJoin,
-    LogicalLLM,
     LogicalLimit,
+    LogicalLLM,
     LogicalLoadData,
     LogicalOrderBy,
     LogicalProject,
diff --git a/evadb/optimizer/rules/rules_manager.py b/evadb/optimizer/rules/rules_manager.py
index 9dc7ba7626..57169ec78a 100644
--- a/evadb/optimizer/rules/rules_manager.py
+++ b/evadb/optimizer/rules/rules_manager.py
@@ -44,9 +44,9 @@
     LogicalInsertToPhysical,
     LogicalJoinToPhysicalHashJoin,
     LogicalJoinToPhysicalNestedLoopJoin,
-    LogicalLLMToPhysical,
     LogicalLateralJoinToPhysical,
     LogicalLimitToPhysical,
+    LogicalLLMToPhysical,
     LogicalLoadToPhysical,
     LogicalOrderByToPhysical,
     LogicalProjectNoTableToPhysical,
diff --git a/evadb/optimizer/statement_to_opr_converter.py b/evadb/optimizer/statement_to_opr_converter.py
index be1d6785f6..885e156d9f 100644
--- a/evadb/optimizer/statement_to_opr_converter.py
+++ b/evadb/optimizer/statement_to_opr_converter.py
@@ -14,7 +14,7 @@
 # limitations under the License.
 from evadb.binder.binder_utils import get_bound_func_expr_outputs_as_tuple_value_expr
 from evadb.expression.abstract_expression import AbstractExpression
-from evadb.expression.expression_utils import extract_llm_expressions_from_project, to_conjunction_list
+from evadb.expression.expression_utils import extract_llm_expressions_from_project
 from evadb.expression.function_expression import FunctionExpression
 from evadb.optimizer.operators import (
     LogicalCreate,
@@ -30,8 +30,8 @@
     LogicalGroupBy,
     LogicalInsert,
     LogicalJoin,
-    LogicalLLM,
     LogicalLimit,
+    LogicalLLM,
     LogicalLoadData,
     LogicalOrderBy,
     LogicalProject,
@@ -61,21 +61,6 @@
 from evadb.parser.types import FunctionType, JoinType
 from evadb.utils.logging_manager import logger
 
-def extract_llm_expressions_from_projection():
-        
-        llm_expr_list = ["chatgpt", "completion"]
-        # Extracts the LLM calls
-        projection_columns = []
-        llm_exprs = []
-        for expr in proj_list:
-            if isinstance(expr, FunctionExpression) and expr.name.lower() in llm_expr_list:
-                llm_exprs.append(expr.copy())
-                projection_columns.extend(
-                        get_bound_func_expr_outputs_as_tuple_value_expr(expr)
-                    )
-            else:
-                projection_columns.append(expr)
-
 
 class StatementToPlanConverter:
     def __init__(self):
@@ -245,20 +230,18 @@ def _visit_union(self, target, all):
         self._plan.append_child(right_child_plan)
 
     def _visit_projection(self, select_columns):
-        
         def __construct_llm_nodes(llm_exprs):
             return [LogicalLLM(llm_expr) for llm_expr in llm_exprs]
 
-            
-        llm_exprs, remaining_exprs = extract_llm_expressions_from_project(select_columns)
+        llm_exprs, remaining_exprs = extract_llm_expressions_from_project(
+            select_columns
+        )
         llm_nodes = __construct_llm_nodes(llm_exprs)
-        
+
         select_columns = []
         for expr in llm_exprs:
-            select_columns.extend(
-                            get_bound_func_expr_outputs_as_tuple_value_expr(expr)
-                        )
-        
+            select_columns.extend(get_bound_func_expr_outputs_as_tuple_value_expr(expr))
+
         select_columns.extend(remaining_exprs)
 
         # add llm plan nodes
@@ -269,13 +252,13 @@ def __construct_llm_nodes(llm_exprs):
             for expr in llm_exprs[1:]:
                 new_root = LogicalLLM(expr)
                 new_root.append_child(plan_root)
-                plan_root = new_root 
+                plan_root = new_root
             self._plan = plan_root
-                
+
         projection_opr = LogicalProject(select_columns)
         if self._plan is not None:
             projection_opr.append_child(self._plan)
-        
+
         self._plan = projection_opr
 
     def _visit_select_predicate(self, predicate: AbstractExpression):
diff --git a/evadb/plan_nodes/llm_plan.py b/evadb/plan_nodes/llm_plan.py
index c0044a9d01..ead61ad5aa 100644
--- a/evadb/plan_nodes/llm_plan.py
+++ b/evadb/plan_nodes/llm_plan.py
@@ -13,7 +13,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from evadb.expression.function_expression import FunctionExpression
-from evadb.parser.alias import Alias
 from evadb.plan_nodes.abstract_plan import AbstractPlan
 from evadb.plan_nodes.types import PlanOprType
 
diff --git a/test/integration_tests/short/test_llm_executor.py b/test/integration_tests/short/test_llm_executor.py
index b64e7a1727..0a17454510 100644
--- a/test/integration_tests/short/test_llm_executor.py
+++ b/test/integration_tests/short/test_llm_executor.py
@@ -13,13 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import unittest
-from evadb.plan_nodes.llm_plan import LLMPlan
-from test.util import (  # file_remove,
-    DummyLLM,
-    create_dummy_4d_batches,
-    create_dummy_batches,
-    create_sample_video,
-    create_table,
+from test.util import (
     file_remove,
     get_evadb_for_testing,
     get_logical_query_plan,
@@ -28,13 +22,12 @@
     shutdown_ray,
 )
 
-import numpy as np
 import pandas as pd
 import pytest
 
-from evadb.binder.binder_utils import BinderError
 from evadb.models.storage.batch import Batch
-from evadb.optimizer.operators import LogicalFilter, LogicalLLM, OperatorType
+from evadb.optimizer.operators import LogicalLLM
+from evadb.plan_nodes.llm_plan import LLMPlan
 from evadb.server.command_handler import execute_query_fetch_all
 
 NUM_FRAMES = 10
@@ -45,7 +38,7 @@ class LLMExecutorTest(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         # add DummyLLM to LLM_FUNCTIONS, CACHEABLE_FUNCTIONS
-        from evadb.constants import LLM_FUNCTIONS, CACHEABLE_FUNCTIONS
+        from evadb.constants import CACHEABLE_FUNCTIONS, LLM_FUNCTIONS
 
         LLM_FUNCTIONS += ["DummyLLM".lower()]
         CACHEABLE_FUNCTIONS += ["DummyLLM".lower()]
diff --git a/test/integration_tests/short/test_select_executor.py b/test/integration_tests/short/test_select_executor.py
index 6e901dfc20..b108150458 100644
--- a/test/integration_tests/short/test_select_executor.py
+++ b/test/integration_tests/short/test_select_executor.py
@@ -452,5 +452,3 @@ def test_function_with_no_input_arguments(self):
             pd.DataFrame([{"dummynoinputfunction.label": "DummyNoInputFunction"}])
         )
         self.assertEqual(actual_batch, expected)
-
-

From affa8ecd50aaf1280317524873e19ae8b4461b23 Mon Sep 17 00:00:00 2001
From: Gaurav Tarlok Kakkar <gaurav21776@gmail.com>
Date: Mon, 23 Oct 2023 17:04:09 -0400
Subject: [PATCH 5/7] revert changes

---
 evadb/parser/evadb.lark | 3 +--
 evadb/version.py        | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/evadb/parser/evadb.lark b/evadb/parser/evadb.lark
index e645727acf..e834d1a7d0 100644
--- a/evadb/parser/evadb.lark
+++ b/evadb/parser/evadb.lark
@@ -84,7 +84,7 @@ set_statement: SET config_name (EQUAL_SYMBOL | TO) config_value
 
 config_name: uid
 
-config_value: constant
+config_value: (string_literal | decimal_literal | boolean_literal | real_literal)
 
 // Data Manipulation Language
 
@@ -482,7 +482,6 @@ IMPL:                                "IMPL"i
 
 ABS:                                 "ABS"i
 
-
 // Operators
 // Operators. Assignment
 
diff --git a/evadb/version.py b/evadb/version.py
index 671c0bd8a8..c6ac2fe6fb 100644
--- a/evadb/version.py
+++ b/evadb/version.py
@@ -3,4 +3,4 @@
 _REVISION = "9+dev"
 
 VERSION_SHORT = f"{_MAJOR}.{_MINOR}"
-VERSION = f"{_MAJOR}.{_MINOR}.{_REVISION}"
\ No newline at end of file
+VERSION = f"{_MAJOR}.{_MINOR}.{_REVISION}"

From 55d172794f92ec9c8ea100f78f27ba0bf5abb91f Mon Sep 17 00:00:00 2001
From: Gaurav Tarlok Kakkar <gaurav21776@gmail.com>
Date: Mon, 23 Oct 2023 17:05:16 -0400
Subject: [PATCH 6/7] revert

---
 evadb/parser/evadb.lark | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/evadb/parser/evadb.lark b/evadb/parser/evadb.lark
index e834d1a7d0..d3f455fbb6 100644
--- a/evadb/parser/evadb.lark
+++ b/evadb/parser/evadb.lark
@@ -84,7 +84,7 @@ set_statement: SET config_name (EQUAL_SYMBOL | TO) config_value
 
 config_name: uid
 
-config_value: (string_literal | decimal_literal | boolean_literal | real_literal)
+config_value: constant
 
 // Data Manipulation Language
 

From 8414c3e27859bfbd92266878f0932b47a1e6c4e7 Mon Sep 17 00:00:00 2001
From: Gaurav Tarlok Kakkar <gaurav21776@gmail.com>
Date: Wed, 25 Oct 2023 15:37:54 -0400
Subject: [PATCH 7/7] llm cost added

---
 evadb/functions/llms/base.py        |  91 ++++++++++++++++++++
 evadb/functions/llms/llm_stats.json | 128 ++++++++++++++++++++++++++++
 evadb/functions/llms/openai.py      | 113 ++++++++++++++++++++++++
 evadb/utils/generic_utils.py        |  10 +++
 setup.py                            |  19 +++--
 5 files changed, 355 insertions(+), 6 deletions(-)
 create mode 100644 evadb/functions/llms/base.py
 create mode 100644 evadb/functions/llms/llm_stats.json
 create mode 100644 evadb/functions/llms/openai.py

diff --git a/evadb/functions/llms/base.py b/evadb/functions/llms/base.py
new file mode 100644
index 0000000000..726552b7ba
--- /dev/null
+++ b/evadb/functions/llms/base.py
@@ -0,0 +1,91 @@
+# coding=utf-8
+# Copyright 2018-2023 EvaDB
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import json
+import os
+from abc import abstractmethod
+from typing import List
+
+import pandas as pd
+
+from evadb.catalog.catalog_type import NdArrayType
+from evadb.functions.abstract.abstract_function import AbstractFunction
+from evadb.functions.decorators.decorators import forward, setup
+from evadb.functions.decorators.io_descriptors.data_types import PandasDataframe
+
+
+class BaseLLM(AbstractFunction):
+    """ """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.model_stats = None
+
+    @setup(cacheable=True, function_type="chat-completion", batchable=True)
+    def setup(self, *args, **kwargs) -> None:
+        super().setup(*args, **kwargs)
+
+    @forward(
+        input_signatures=[
+            PandasDataframe(
+                columns=["prompt"],
+                column_types=[
+                    NdArrayType.STR,
+                ],
+                column_shapes=[(None,)],
+            )
+        ],
+        output_signatures=[
+            PandasDataframe(
+                columns=["response"],
+                column_types=[
+                    NdArrayType.STR,
+                ],
+                column_shapes=[(None,)],
+            )
+        ],
+    )
+    def forward(self, text_df):
+        prompts = text_df[text_df.columns[0]]
+        responses = self.generate(prompts)
+        return pd.DataFrame({"response": responses})
+
+    @abstractmethod
+    def generate(self, prompts: List[str]) -> List[str]:
+        """
+        All the child classes should overload this function
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def get_cost(self, prompt: str, response: str = "") -> tuple(tuple, float):
+        """
+        Return the token usage as tuple of input_token_usage, output_token_usage, and dollar cost of running the LLM on the prompt and the getting the provided response.
+        """
+        pass
+
+    def get_model_stats(self, model_name: str):
+        # read the statistics if not already read
+        if self.model_stats is None:
+            current_file_path = os.path.dirname(os.path.realpath(__file__))
+            with open(f"{current_file_path}/llm_stats.json") as f:
+                self.model_stats = json.load(f)
+
+        assert (
+            model_name in self.model_stats
+        ), f"we do not have statistics for the model {model_name}"
+
+        return self.model_stats[model_name]
diff --git a/evadb/functions/llms/llm_stats.json b/evadb/functions/llms/llm_stats.json
new file mode 100644
index 0000000000..4910fe4cbd
--- /dev/null
+++ b/evadb/functions/llms/llm_stats.json
@@ -0,0 +1,128 @@
+{
+    "gpt-4": {
+        "max_token_context": 8192,
+        "input_cost_per_token": 0.00003,
+        "output_cost_per_token": 0.00006,
+        "provider": "openai",
+        "mode": "chat"
+    },
+    "gpt-4-0314": {
+        "max_token_context": 8192,
+        "input_cost_per_token": 0.00003,
+        "output_cost_per_token": 0.00006,
+        "provider": "openai",
+        "mode": "chat"
+    },
+    "gpt-4-0613": {
+        "max_token_context": 8192,
+        "input_cost_per_token": 0.00003,
+        "output_cost_per_token": 0.00006,
+        "provider": "openai",
+        "mode": "chat"
+    },
+    "gpt-4-32k": {
+        "max_token_context": 32768,
+        "input_cost_per_token": 0.00006,
+        "output_cost_per_token": 0.00012,
+        "provider": "openai",
+        "mode": "chat"
+    },
+    "gpt-4-32k-0314": {
+        "max_token_context": 32768,
+        "input_cost_per_token": 0.00006,
+        "output_cost_per_token": 0.00012,
+        "provider": "openai",
+        "mode": "chat"
+    },
+    "gpt-4-32k-0613": {
+        "max_token_context": 32768,
+        "input_cost_per_token": 0.00006,
+        "output_cost_per_token": 0.00012,
+        "provider": "openai",
+        "mode": "chat"
+    },
+    "gpt-3.5-turbo": {
+        "max_token_context": 4097,
+        "input_cost_per_token": 0.0000015,
+        "output_cost_per_token": 0.000002,
+        "provider": "openai",
+        "mode": "chat"
+    },
+    "gpt-3.5-turbo-0301": {
+        "max_token_context": 4097,
+        "input_cost_per_token": 0.0000015,
+        "output_cost_per_token": 0.000002,
+        "provider": "openai",
+        "mode": "chat"
+    },
+    "gpt-3.5-turbo-0613": {
+        "max_token_context": 4097,
+        "input_cost_per_token": 0.0000015,
+        "output_cost_per_token": 0.000002,
+        "provider": "openai",
+        "mode": "chat"
+    },
+    "gpt-3.5-turbo-16k": {
+        "max_token_context": 16385,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.000004,
+        "provider": "openai",
+        "mode": "chat"
+    },
+    "gpt-3.5-turbo-16k-0613": {
+        "max_token_context": 16385,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.000004,
+        "provider": "openai",
+        "mode": "chat"
+    },
+    "text-davinci-003": {
+        "max_token_context": 4097,
+        "input_cost_per_token": 0.000002,
+        "output_cost_per_token": 0.000002,
+        "provider": "openai",
+        "mode": "completion"
+    },
+    "text-curie-001": {
+        "max_token_context": 2049,
+        "input_cost_per_token": 0.000002,
+        "output_cost_per_token": 0.000002,
+        "provider": "openai",
+        "mode": "completion"
+    },
+    "text-babbage-001": {
+        "max_token_context": 2049,
+        "input_cost_per_token": 0.0000004,
+        "output_cost_per_token": 0.0000004,
+        "provider": "openai",
+        "mode": "completion"
+    },
+    "text-ada-001": {
+        "max_token_context": 2049,
+        "input_cost_per_token": 0.0000004,
+        "output_cost_per_token": 0.0000004,
+        "provider": "openai",
+        "mode": "completion"
+    },
+    "babbage-002": {
+        "max_token_context": 16384,
+        "input_cost_per_token": 0.0000004,
+        "output_cost_per_token": 0.0000004,
+        "provider": "openai",
+        "mode": "completion"
+    },
+    "davinci-002": {
+        "max_token_context": 16384,
+        "input_cost_per_token": 0.000002,
+        "output_cost_per_token": 0.000002,
+        "provider": "openai",
+        "mode": "completion"
+    },
+    "gpt-3.5-turbo-instruct": {
+        "max_token_context": 8192,
+        "input_cost_per_token": 0.0000015,
+        "output_cost_per_token": 0.000002,
+        "provider": "openai",
+        "mode": "completion"
+    }
+}
\ No newline at end of file
diff --git a/evadb/functions/llms/openai.py b/evadb/functions/llms/openai.py
new file mode 100644
index 0000000000..240c98c7d7
--- /dev/null
+++ b/evadb/functions/llms/openai.py
@@ -0,0 +1,113 @@
+# coding=utf-8
+# Copyright 2018-2023 EvaDB
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import os
+from typing import List
+
+from retry import retry
+
+from evadb.functions.llms.base import BaseLLM
+from evadb.utils.generic_utils import (
+    try_to_import_openai,
+    try_to_import_tiktoken,
+    validate_kwargs,
+)
+
+_DEFAULT_MODEL = "gpt-3.5-turbo"
+_DEFAULT_PARAMS = {
+    "model": _DEFAULT_MODEL,
+    "temperature": 0.0,
+    "request_timeout": 30,
+    "max_tokens": 1000,
+}
+
+
+class OpenAILLM(BaseLLM):
+    @property
+    def name(self) -> str:
+        return "OpenAILLM"
+
+    def setup(
+        self,
+        openai_api_key="",
+        **kwargs,
+    ) -> None:
+        super().setup(**kwargs)
+
+        try_to_import_openai()
+        import openai
+
+        openai.api_key = self.openai_api_key
+        if len(openai.api_key) == 0:
+            openai.api_key = os.environ.get("OPENAI_API_KEY", "")
+        assert (
+            len(openai.api_key) != 0
+        ), "Please set your OpenAI API key using SET OPENAI_API_KEY = 'sk-' or environment variable (OPENAI_API_KEY)"
+
+        validate_kwargs(kwargs, allowed_keys=_DEFAULT_PARAMS.keys())
+        self.model_params = {**_DEFAULT_PARAMS, **kwargs}
+
+        self.model_name = self.model_params["model"]
+
+    def generate(self, prompts: List[str]) -> List[str]:
+        import openai
+
+        @retry(tries=6, delay=20)
+        def completion_with_backoff(**kwargs):
+            return openai.ChatCompletion.create(**kwargs)
+
+        results = []
+
+        for prompt in prompts:
+            def_sys_prompt_message = {
+                "role": "system",
+                "content": "You are a helpful assistant that accomplishes user tasks.",
+            }
+
+            self.model_params["messages"].append(def_sys_prompt_message)
+            self.model_params["messages"].extend(
+                [
+                    {
+                        "role": "user",
+                        "content": f"Complete the following task: {prompt}",
+                    },
+                ],
+            )
+
+            response = completion_with_backoff(**self.model_params)
+            answer = response.choices[0].message.content
+            results.append(answer)
+
+        return results
+
+    def get_cost(self, prompt: str, response: str):
+        try_to_import_tiktoken()
+        import tiktoken
+
+        encoding = tiktoken.encoding_for_model(self.model_name)
+        num_prompt_tokens = len(encoding.encode(prompt))
+        num_response_tokens = self.model_params["max_tokens"]
+        if response:
+            num_response_tokens = len(encoding.encode(response))
+
+        model_stats = self.get_model_stats(self.model_name)
+
+        token_consumed = (num_prompt_tokens, num_response_tokens)
+        dollar_cost = (
+            model_stats["input_cost_per_token"] * num_prompt_tokens
+            + model_stats["output_cost_per_token"] * num_response_tokens
+        )
+        return token_consumed, dollar_cost
diff --git a/evadb/utils/generic_utils.py b/evadb/utils/generic_utils.py
index fb6bd9986a..3a1a75ee72 100644
--- a/evadb/utils/generic_utils.py
+++ b/evadb/utils/generic_utils.py
@@ -526,6 +526,16 @@ def try_to_import_openai():
         )
 
 
+def try_to_import_tiktoken():
+    try:
+        import tiktoken  # noqa: F401
+    except ImportError:
+        raise ValueError(
+            """Could not import tiktoken python package.
+                Please install them with `pip install tiktoken`."""
+        )
+
+
 def try_to_import_langchain():
     try:
         import langchain  # noqa: F401
diff --git a/setup.py b/setup.py
index a18796d847..ca2f86d41c 100644
--- a/setup.py
+++ b/setup.py
@@ -81,6 +81,7 @@ def read(path, encoding="utf-8"):
     "protobuf",
     "bs4",
     "openai>=0.27.4",  # CHATGPT
+    "tiktoken >= 0.3.3",  # For calculating tokens
     "gpt4all",  # PRIVATE GPT
     "sentencepiece",  # TRANSFORMERS
 ]
@@ -123,13 +124,11 @@ def read(path, encoding="utf-8"):
 xgboost_libs = ["flaml[automl]"]
 
 forecasting_libs = [
-    "statsforecast", # MODEL TRAIN AND FINE TUNING
-    "neuralforecast" # MODEL TRAIN AND FINE TUNING
+    "statsforecast",  # MODEL TRAIN AND FINE TUNING
+    "neuralforecast",  # MODEL TRAIN AND FINE TUNING
 ]
 
-imagegen_libs = [
-    "replicate"
-]
+imagegen_libs = ["replicate"]
 
 ### NEEDED FOR DEVELOPER TESTING ONLY
 
@@ -174,7 +173,15 @@ def read(path, encoding="utf-8"):
     "xgboost": xgboost_libs,
     "forecasting": forecasting_libs,
     # everything except ray, qdrant, ludwig and postgres. The first three fail on pyhton 3.11.
-    "dev": dev_libs + vision_libs + document_libs + function_libs + notebook_libs + forecasting_libs + sklearn_libs + imagegen_libs + xgboost_libs
+    "dev": dev_libs
+    + vision_libs
+    + document_libs
+    + function_libs
+    + notebook_libs
+    + forecasting_libs
+    + sklearn_libs
+    + imagegen_libs
+    + xgboost_libs,
 }
 
 setup(