diff --git a/README.md b/README.md index 8c3af2373..e1b48a08c 100644 --- a/README.md +++ b/README.md @@ -173,7 +173,6 @@ A `Microservices` can be created by using the decorator `register_microservice`. ```python from langchain_community.embeddings import HuggingFaceHubEmbeddings -from langsmith import traceable from comps import register_microservice, EmbedDoc, ServiceType, TextDoc @@ -187,7 +186,6 @@ from comps import register_microservice, EmbedDoc, ServiceType, TextDoc input_datatype=TextDoc, output_datatype=EmbedDoc, ) -@traceable(run_type="embedding") def embedding(input: TextDoc) -> EmbedDoc: embed_vector = embeddings.embed_query(input.text) res = EmbedDoc(text=input.text, embedding=embed_vector) diff --git a/comps/agent/langchain/requirements.txt b/comps/agent/langchain/requirements.txt index 16859ac2c..fa1c4423b 100644 --- a/comps/agent/langchain/requirements.txt +++ b/comps/agent/langchain/requirements.txt @@ -11,7 +11,6 @@ langchain-openai langchain_community langchainhub langgraph -langsmith numpy # used by cloud native diff --git a/comps/dataprep/milvus/prepare_doc_milvus.py b/comps/dataprep/milvus/prepare_doc_milvus.py index 143010c06..a293963c4 100644 --- a/comps/dataprep/milvus/prepare_doc_milvus.py +++ b/comps/dataprep/milvus/prepare_doc_milvus.py @@ -22,7 +22,6 @@ from langchain_core.documents import Document from langchain_milvus.vectorstores import Milvus from langchain_text_splitters import HTMLHeaderTextSplitter -from langsmith import traceable from pyspark import SparkConf, SparkContext from comps import DocPath, opea_microservices, register_microservice @@ -167,7 +166,6 @@ async def ingest_link_to_milvus(link_list: List[str]): @register_microservice(name="opea_service@prepare_doc_milvus", endpoint="/v1/dataprep", host="0.0.0.0", port=6010) -@traceable(run_type="tool") async def ingest_documents( files: Optional[Union[UploadFile, List[UploadFile]]] = File(None), link_list: Optional[str] = Form(None), @@ -239,7 +237,6 @@ def process_files_wrapper(files): @register_microservice( name="opea_service@prepare_doc_milvus_file", endpoint="/v1/dataprep/get_file", host="0.0.0.0", port=6011 ) -@traceable(run_type="tool") async def rag_get_file_structure(): print("[ dataprep - get file ] start to get file structure") @@ -270,7 +267,6 @@ def delete_by_partition_field(my_milvus, partition_field): @register_microservice( name="opea_service@prepare_doc_milvus_del", endpoint="/v1/dataprep/delete_file", host="0.0.0.0", port=6012 ) -@traceable(run_type="tool") async def delete_single_file(file_path: str = Body(..., embed=True)): """Delete file according to `file_path`. diff --git a/comps/dataprep/milvus/requirements.txt b/comps/dataprep/milvus/requirements.txt index cf088a1c0..48b8be1d9 100644 --- a/comps/dataprep/milvus/requirements.txt +++ b/comps/dataprep/milvus/requirements.txt @@ -9,7 +9,6 @@ langchain langchain-community langchain-text-splitters langchain_milvus -langsmith markdown numpy openai diff --git a/comps/dataprep/pgvector/langchain/prepare_doc_pgvector.py b/comps/dataprep/pgvector/langchain/prepare_doc_pgvector.py index efb394991..f46e466ba 100644 --- a/comps/dataprep/pgvector/langchain/prepare_doc_pgvector.py +++ b/comps/dataprep/pgvector/langchain/prepare_doc_pgvector.py @@ -13,7 +13,6 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings from langchain_community.vectorstores import PGVector -from langsmith import traceable from comps import DocPath, opea_microservices, register_microservice from comps.dataprep.utils import ( @@ -174,7 +173,6 @@ async def ingest_link_to_pgvector(link_list: List[str]): host="0.0.0.0", port=6007, ) -@traceable(run_type="tool") async def ingest_documents( files: Optional[Union[UploadFile, List[UploadFile]]] = File(None), link_list: Optional[str] = Form(None) ): @@ -214,7 +212,6 @@ async def ingest_documents( @register_microservice( name="opea_service@prepare_doc_pgvector", endpoint="/v1/dataprep/get_file", host="0.0.0.0", port=6007 ) -@traceable(run_type="tool") async def rag_get_file_structure(): print("[ dataprep - get file ] start to get file structure") @@ -229,7 +226,6 @@ async def rag_get_file_structure(): @register_microservice( name="opea_service@prepare_doc_pgvector", endpoint="/v1/dataprep/delete_file", host="0.0.0.0", port=6007 ) -@traceable(run_type="tool") async def delete_single_file(file_path: str = Body(..., embed=True)): """Delete file according to `file_path`. diff --git a/comps/dataprep/pgvector/langchain/requirements.txt b/comps/dataprep/pgvector/langchain/requirements.txt index 84fd48e52..b5b4f168b 100644 --- a/comps/dataprep/pgvector/langchain/requirements.txt +++ b/comps/dataprep/pgvector/langchain/requirements.txt @@ -8,7 +8,6 @@ huggingface_hub langchain langchain-community langchain-text-splitters -langsmith markdown numpy opentelemetry-api diff --git a/comps/dataprep/pinecone/prepare_doc_pinecone.py b/comps/dataprep/pinecone/prepare_doc_pinecone.py index ec0e200a2..cbee8cd94 100644 --- a/comps/dataprep/pinecone/prepare_doc_pinecone.py +++ b/comps/dataprep/pinecone/prepare_doc_pinecone.py @@ -14,7 +14,6 @@ from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceEmbeddings, HuggingFaceHubEmbeddings from langchain_pinecone import PineconeVectorStore from langchain_text_splitters import HTMLHeaderTextSplitter -from langsmith import traceable from pinecone import Pinecone, ServerlessSpec from comps import DocPath, opea_microservices, opea_telemetry, register_microservice @@ -172,7 +171,6 @@ async def ingest_link_to_pinecone(link_list: List[str]): @register_microservice(name="opea_service@prepare_doc_pinecone", endpoint="/v1/dataprep", host="0.0.0.0", port=6007) -@traceable(run_type="tool") async def ingest_documents( files: Optional[Union[UploadFile, List[UploadFile]]] = File(None), link_list: Optional[str] = Form(None), @@ -223,7 +221,6 @@ async def ingest_documents( @register_microservice( name="opea_service@prepare_doc_pinecone_file", endpoint="/v1/dataprep/get_file", host="0.0.0.0", port=6008 ) -@traceable(run_type="tool") async def rag_get_file_structure(): print("[ dataprep - get file ] start to get file structure") @@ -238,7 +235,6 @@ async def rag_get_file_structure(): @register_microservice( name="opea_service@prepare_doc_pinecone_del", endpoint="/v1/dataprep/delete_file", host="0.0.0.0", port=6009 ) -@traceable(run_type="tool") async def delete_all(file_path: str = Body(..., embed=True)): """Delete file according to `file_path`. diff --git a/comps/dataprep/pinecone/requirements.txt b/comps/dataprep/pinecone/requirements.txt index a2d5c4298..06ab12ecd 100644 --- a/comps/dataprep/pinecone/requirements.txt +++ b/comps/dataprep/pinecone/requirements.txt @@ -10,7 +10,6 @@ langchain-community langchain-openai langchain-pinecone langchain-text-splitters -langsmith markdown numpy opentelemetry-api diff --git a/comps/dataprep/redis/langchain_ray/prepare_doc_redis_on_ray.py b/comps/dataprep/redis/langchain_ray/prepare_doc_redis_on_ray.py index c55165061..07ad81da8 100644 --- a/comps/dataprep/redis/langchain_ray/prepare_doc_redis_on_ray.py +++ b/comps/dataprep/redis/langchain_ray/prepare_doc_redis_on_ray.py @@ -26,7 +26,6 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings from langchain_community.vectorstores import Redis -from langsmith import traceable cur_path = pathlib.Path(__file__).parent.resolve() comps_path = os.path.join(cur_path, "../../../../") @@ -322,7 +321,6 @@ async def ingest_documents(files: List[UploadFile] = File(None), link_list: str @register_microservice( name="opea_service@prepare_doc_redis_file", endpoint="/v1/dataprep/get_file", host="0.0.0.0", port=6008 ) -@traceable(run_type="tool") async def rag_get_file_structure(): print("[ get_file_structure] ") @@ -337,7 +335,6 @@ async def rag_get_file_structure(): @register_microservice( name="opea_service@prepare_doc_redis_del", endpoint="/v1/dataprep/delete_file", host="0.0.0.0", port=6009 ) -@traceable(run_type="tool") async def delete_single_file(file_path: str = Body(..., embed=True)): """Delete file according to `file_path`. diff --git a/comps/dataprep/redis/langchain_ray/requirements.txt b/comps/dataprep/redis/langchain_ray/requirements.txt index 50d57b20e..a77ba5c4d 100644 --- a/comps/dataprep/redis/langchain_ray/requirements.txt +++ b/comps/dataprep/redis/langchain_ray/requirements.txt @@ -7,7 +7,6 @@ fastapi huggingface_hub langchain langchain-community -langsmith numpy opentelemetry-api opentelemetry-exporter-otlp diff --git a/comps/dataprep/redis/llama_index/prepare_doc_redis.py b/comps/dataprep/redis/llama_index/prepare_doc_redis.py index da176555b..ae4d00461 100644 --- a/comps/dataprep/redis/llama_index/prepare_doc_redis.py +++ b/comps/dataprep/redis/llama_index/prepare_doc_redis.py @@ -8,7 +8,6 @@ from config import EMBED_MODEL, INDEX_NAME, REDIS_URL from fastapi import Body, File, HTTPException, UploadFile -from langsmith import traceable from llama_index.core import SimpleDirectoryReader, StorageContext, VectorStoreIndex from llama_index.core.settings import Settings from llama_index.embeddings.huggingface import HuggingFaceEmbedding @@ -55,7 +54,6 @@ async def ingest_data_to_redis(doc_path: DocPath): @register_microservice(name="opea_service@prepare_doc_redis", endpoint="/v1/dataprep", host="0.0.0.0", port=6007) -@traceable(run_type="tool") # llama index only support upload files now async def ingest_documents(files: Optional[Union[UploadFile, List[UploadFile]]] = File(None)): print(f"files:{files}") @@ -81,7 +79,6 @@ async def ingest_documents(files: Optional[Union[UploadFile, List[UploadFile]]] @register_microservice( name="opea_service@prepare_doc_redis_file", endpoint="/v1/dataprep/get_file", host="0.0.0.0", port=6008 ) -@traceable(run_type="tool") async def rag_get_file_structure(): print("[ get_file_structure] ") @@ -96,7 +93,6 @@ async def rag_get_file_structure(): @register_microservice( name="opea_service@prepare_doc_redis_del", endpoint="/v1/dataprep/delete_file", host="0.0.0.0", port=6009 ) -@traceable(run_type="tool") async def delete_single_file(file_path: str = Body(..., embed=True)): """Delete file according to `file_path`. diff --git a/comps/dataprep/redis/llama_index/requirements.txt b/comps/dataprep/redis/llama_index/requirements.txt index ad75869c1..2f808e534 100644 --- a/comps/dataprep/redis/llama_index/requirements.txt +++ b/comps/dataprep/redis/llama_index/requirements.txt @@ -1,7 +1,6 @@ docarray[full] fastapi huggingface_hub -langsmith llama-index llama-index-embeddings-huggingface==0.2.0 llama-index-readers-file diff --git a/comps/embeddings/langchain-mosec/embedding_mosec.py b/comps/embeddings/langchain-mosec/embedding_mosec.py index f13c23eca..702b1937c 100644 --- a/comps/embeddings/langchain-mosec/embedding_mosec.py +++ b/comps/embeddings/langchain-mosec/embedding_mosec.py @@ -6,7 +6,6 @@ from typing import List, Optional from langchain_community.embeddings import OpenAIEmbeddings -from langsmith import traceable from comps import ( EmbedDoc, @@ -53,7 +52,6 @@ def empty_embedding() -> List[float]: input_datatype=TextDoc, output_datatype=EmbedDoc, ) -@traceable(run_type="embedding") @register_statistics(names=["opea_service@embedding_mosec"]) def embedding(input: TextDoc) -> EmbedDoc: start = time.time() diff --git a/comps/embeddings/langchain/embedding_tei.py b/comps/embeddings/langchain/embedding_tei.py index 6a5fbc614..a318ff0bf 100644 --- a/comps/embeddings/langchain/embedding_tei.py +++ b/comps/embeddings/langchain/embedding_tei.py @@ -6,7 +6,6 @@ from typing import Union from langchain_huggingface import HuggingFaceEndpointEmbeddings -from langsmith import traceable from comps import ( EmbedDoc, @@ -32,7 +31,6 @@ host="0.0.0.0", port=6000, ) -@traceable(run_type="embedding") @register_statistics(names=["opea_service@embedding_tei_langchain"]) def embedding( input: Union[TextDoc, EmbeddingRequest, ChatCompletionRequest] diff --git a/comps/embeddings/langchain/requirements.txt b/comps/embeddings/langchain/requirements.txt index 8f0dd3ad4..1bfe6f44c 100644 --- a/comps/embeddings/langchain/requirements.txt +++ b/comps/embeddings/langchain/requirements.txt @@ -3,7 +3,6 @@ fastapi huggingface_hub langchain langchain_huggingface -langsmith opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk diff --git a/comps/embeddings/llama_index/embedding_tei.py b/comps/embeddings/llama_index/embedding_tei.py index 4f3920d32..9042a61a7 100644 --- a/comps/embeddings/llama_index/embedding_tei.py +++ b/comps/embeddings/llama_index/embedding_tei.py @@ -3,7 +3,6 @@ import os -from langsmith import traceable from llama_index.embeddings.text_embeddings_inference import TextEmbeddingsInference from comps import EmbedDoc, ServiceType, TextDoc, opea_microservices, register_microservice @@ -18,7 +17,6 @@ input_datatype=TextDoc, output_datatype=EmbedDoc, ) -@traceable(run_type="embedding") def embedding(input: TextDoc) -> EmbedDoc: embed_vector = embeddings._get_query_embedding(input.text) res = EmbedDoc(text=input.text, embedding=embed_vector) diff --git a/comps/embeddings/llama_index/local_embedding.py b/comps/embeddings/llama_index/local_embedding.py index f6a69afaf..53cc30e15 100644 --- a/comps/embeddings/llama_index/local_embedding.py +++ b/comps/embeddings/llama_index/local_embedding.py @@ -1,7 +1,6 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -from langsmith import traceable from llama_index.embeddings.huggingface_api import HuggingFaceInferenceAPIEmbedding from comps import EmbedDoc, ServiceType, TextDoc, opea_microservices, register_microservice @@ -16,7 +15,6 @@ input_datatype=TextDoc, output_datatype=EmbedDoc, ) -@traceable(run_type="embedding") def embedding(input: TextDoc) -> EmbedDoc: embed_vector = embeddings.get_text_embedding(input.text) res = EmbedDoc(text=input.text, embedding=embed_vector) diff --git a/comps/embeddings/llama_index/requirements.txt b/comps/embeddings/llama_index/requirements.txt index 908c38b06..4f1457e4a 100644 --- a/comps/embeddings/llama_index/requirements.txt +++ b/comps/embeddings/llama_index/requirements.txt @@ -1,7 +1,6 @@ docarray[full] fastapi huggingface_hub -langsmith llama-index-embeddings-huggingface-api llama-index-embeddings-text-embeddings-inference opentelemetry-api diff --git a/comps/guardrails/llama_guard/guardrails_tgi.py b/comps/guardrails/llama_guard/guardrails_tgi.py index b415876ed..93b046e97 100644 --- a/comps/guardrails/llama_guard/guardrails_tgi.py +++ b/comps/guardrails/llama_guard/guardrails_tgi.py @@ -7,7 +7,6 @@ from langchain_community.utilities.requests import JsonRequestsWrapper from langchain_huggingface import ChatHuggingFace from langchain_huggingface.llms import HuggingFaceEndpoint -from langsmith import traceable from comps import GeneratedDoc, ServiceType, TextDoc, opea_microservices, register_microservice @@ -63,7 +62,6 @@ def get_tgi_service_model_id(endpoint_url, default=DEFAULT_MODEL): input_datatype=Union[GeneratedDoc, TextDoc], output_datatype=TextDoc, ) -@traceable(run_type="llm") def safety_guard(input: Union[GeneratedDoc, TextDoc]) -> TextDoc: if isinstance(input, GeneratedDoc): messages = [{"role": "user", "content": input.prompt}, {"role": "assistant", "content": input.text}] diff --git a/comps/guardrails/llama_guard/requirements.txt b/comps/guardrails/llama_guard/requirements.txt index 5eda60170..9f35ee74d 100644 --- a/comps/guardrails/llama_guard/requirements.txt +++ b/comps/guardrails/llama_guard/requirements.txt @@ -4,7 +4,6 @@ fastapi huggingface-hub<=0.24.0 langchain-community langchain-huggingface -langsmith opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk diff --git a/comps/guardrails/pii_detection/pii_detection.py b/comps/guardrails/pii_detection/pii_detection.py index feecf3baf..1ae0dddae 100644 --- a/comps/guardrails/pii_detection/pii_detection.py +++ b/comps/guardrails/pii_detection/pii_detection.py @@ -8,7 +8,6 @@ from pathlib import Path from fastapi import File, Form, HTTPException, UploadFile -from langsmith import traceable cur_path = pathlib.Path(__file__).parent.resolve() comps_path = os.path.join(cur_path, "../../../") diff --git a/comps/guardrails/pii_detection/requirements.txt b/comps/guardrails/pii_detection/requirements.txt index e9bb7ba66..bc95de0fd 100644 --- a/comps/guardrails/pii_detection/requirements.txt +++ b/comps/guardrails/pii_detection/requirements.txt @@ -8,7 +8,6 @@ gibberish-detector huggingface_hub langchain langchain-community -langsmith numpy opentelemetry-api opentelemetry-exporter-otlp diff --git a/comps/knowledgegraphs/langchain/knowledge_graph.py b/comps/knowledgegraphs/langchain/knowledge_graph.py index 9ed2c5b65..01d1a5a5a 100755 --- a/comps/knowledgegraphs/langchain/knowledge_graph.py +++ b/comps/knowledgegraphs/langchain/knowledge_graph.py @@ -22,7 +22,6 @@ from langchain_community.graphs import Neo4jGraph from langchain_community.llms import HuggingFaceEndpoint from langchain_community.vectorstores.neo4j_vector import Neo4jVector -from langsmith import traceable from comps import GeneratedDoc, GraphDoc, ServiceType, opea_microservices, register_microservice diff --git a/comps/knowledgegraphs/requirements.txt b/comps/knowledgegraphs/requirements.txt index ecb5228af..39b351530 100755 --- a/comps/knowledgegraphs/requirements.txt +++ b/comps/knowledgegraphs/requirements.txt @@ -8,7 +8,6 @@ langchain langchain_community==0.2.5 langchain_openai langchainhub -langsmith neo4j numpy opentelemetry-api diff --git a/comps/llms/faq-generation/tgi/llm.py b/comps/llms/faq-generation/tgi/llm.py index af23a74af..beaa5700b 100644 --- a/comps/llms/faq-generation/tgi/llm.py +++ b/comps/llms/faq-generation/tgi/llm.py @@ -9,12 +9,10 @@ from langchain.prompts import PromptTemplate from langchain.text_splitter import CharacterTextSplitter from langchain_community.llms import HuggingFaceEndpoint -from langsmith import traceable from comps import GeneratedDoc, LLMParamsDoc, ServiceType, opea_microservices, register_microservice -@traceable(run_type="tool") def post_process_text(text: str): if text == " ": return "data: @#$\n\n" @@ -33,7 +31,6 @@ def post_process_text(text: str): host="0.0.0.0", port=9000, ) -@traceable(run_type="llm") def llm_generate(input: LLMParamsDoc): llm_endpoint = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080") llm = HuggingFaceEndpoint( diff --git a/comps/llms/faq-generation/tgi/requirements.txt b/comps/llms/faq-generation/tgi/requirements.txt index 623a8f667..fbe54af95 100644 --- a/comps/llms/faq-generation/tgi/requirements.txt +++ b/comps/llms/faq-generation/tgi/requirements.txt @@ -3,7 +3,6 @@ fastapi huggingface_hub langchain==0.1.16 langserve -langsmith opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk diff --git a/comps/llms/summarization/tgi/llm.py b/comps/llms/summarization/tgi/llm.py index a9db6248d..43a583a96 100644 --- a/comps/llms/summarization/tgi/llm.py +++ b/comps/llms/summarization/tgi/llm.py @@ -8,12 +8,10 @@ from langchain.docstore.document import Document from langchain.text_splitter import CharacterTextSplitter from langchain_huggingface import HuggingFaceEndpoint -from langsmith import traceable from comps import GeneratedDoc, LLMParamsDoc, ServiceType, opea_microservices, register_microservice -@traceable(run_type="tool") def post_process_text(text: str): if text == " ": return "data: @#$\n\n" @@ -32,7 +30,6 @@ def post_process_text(text: str): host="0.0.0.0", port=9000, ) -@traceable(run_type="llm") def llm_generate(input: LLMParamsDoc): llm_endpoint = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080") llm = HuggingFaceEndpoint( diff --git a/comps/llms/summarization/tgi/requirements.txt b/comps/llms/summarization/tgi/requirements.txt index c6c151f6e..e074ba8c8 100644 --- a/comps/llms/summarization/tgi/requirements.txt +++ b/comps/llms/summarization/tgi/requirements.txt @@ -6,7 +6,6 @@ langchain-huggingface langchain-openai langchain_community langchainhub -langsmith opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk diff --git a/comps/llms/text-generation/ollama/llm.py b/comps/llms/text-generation/ollama/llm.py index aadb2e2fa..1f6d330c8 100644 --- a/comps/llms/text-generation/ollama/llm.py +++ b/comps/llms/text-generation/ollama/llm.py @@ -5,7 +5,6 @@ from fastapi.responses import StreamingResponse from langchain_community.llms import Ollama -from langsmith import traceable from comps import GeneratedDoc, LLMParamsDoc, ServiceType, opea_microservices, register_microservice @@ -17,7 +16,6 @@ host="0.0.0.0", port=9000, ) -@traceable(run_type="llm") def llm_generate(input: LLMParamsDoc): ollama = Ollama( base_url=ollama_endpoint, diff --git a/comps/llms/text-generation/ollama/requirements.txt b/comps/llms/text-generation/ollama/requirements.txt index e224aaaa5..78b323c73 100644 --- a/comps/llms/text-generation/ollama/requirements.txt +++ b/comps/llms/text-generation/ollama/requirements.txt @@ -2,7 +2,6 @@ docarray[full] fastapi huggingface_hub langchain==0.1.16 -langsmith opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk diff --git a/comps/llms/text-generation/ray_serve/llm.py b/comps/llms/text-generation/ray_serve/llm.py new file mode 100644 index 000000000..c86025625 --- /dev/null +++ b/comps/llms/text-generation/ray_serve/llm.py @@ -0,0 +1,82 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from fastapi.responses import StreamingResponse +from langchain_openai import ChatOpenAI + +from comps import GeneratedDoc, LLMParamsDoc, ServiceType, opea_microservices, register_microservice + + +def post_process_text(text: str): + if text == " ": + return "data: @#$\n\n" + if text == "\n": + return "data:
\n\n" + if text.isspace(): + return None + new_text = text.replace(" ", "@#$") + return f"data: {new_text}\n\n" + + +@register_microservice( + name="opea_service@llm_ray", + service_type=ServiceType.LLM, + endpoint="/v1/chat/completions", + host="0.0.0.0", + port=9000, +) +def llm_generate(input: LLMParamsDoc): + llm_endpoint = os.getenv("RAY_Serve_ENDPOINT", "http://localhost:8080") + llm_model = os.getenv("LLM_MODEL", "Llama-2-7b-chat-hf") + if "/" in llm_model: + llm_model = llm_model.split("/")[-1] + llm = ChatOpenAI( + openai_api_base=llm_endpoint + "/v1", + model_name=llm_model, + openai_api_key=os.getenv("OPENAI_API_KEY", "not_needed"), + max_tokens=input.max_new_tokens, + temperature=input.temperature, + streaming=input.streaming, + request_timeout=600, + ) + + if input.streaming: + + async def stream_generator(): + chat_response = "" + async for text in llm.astream(input.query): + text = text.content + chat_response += text + processed_text = post_process_text(text) + if text and processed_text: + if "" in text: + res = text.split("")[0] + if res != "": + yield res + break + yield processed_text + print(f"[llm - chat_stream] stream response: {chat_response}") + yield "data: [DONE]\n\n" + + return StreamingResponse(stream_generator(), media_type="text/event-stream") + else: + response = llm.invoke(input.query) + response = response.content + return GeneratedDoc(text=response, prompt=input.query) + + +if __name__ == "__main__": + opea_microservices["opea_service@llm_ray"].start() diff --git a/comps/llms/text-generation/ray_serve/requirements.txt b/comps/llms/text-generation/ray_serve/requirements.txt new file mode 100644 index 000000000..d97ce32e2 --- /dev/null +++ b/comps/llms/text-generation/ray_serve/requirements.txt @@ -0,0 +1,13 @@ +docarray[full] +fastapi +huggingface_hub +langchain==0.1.16 +langchain_openai +openai +opentelemetry-api +opentelemetry-exporter-otlp +opentelemetry-sdk +prometheus-fastapi-instrumentator +ray[serve]>=2.10 +shortuuid +transformers diff --git a/comps/llms/text-generation/tgi/llm.py b/comps/llms/text-generation/tgi/llm.py index c202aede7..6597d5b57 100644 --- a/comps/llms/text-generation/tgi/llm.py +++ b/comps/llms/text-generation/tgi/llm.py @@ -8,7 +8,6 @@ from fastapi.responses import StreamingResponse from huggingface_hub import AsyncInferenceClient from langchain_core.prompts import PromptTemplate -from langsmith import traceable from openai import OpenAI from template import ChatTemplate @@ -37,7 +36,6 @@ host="0.0.0.0", port=9000, ) -@traceable(run_type="llm") @register_statistics(names=["opea_service@llm_tgi"]) async def llm_generate(input: Union[LLMParamsDoc, ChatCompletionRequest]): diff --git a/comps/llms/text-generation/tgi/requirements.txt b/comps/llms/text-generation/tgi/requirements.txt index 9670813d6..6b6f11ee4 100644 --- a/comps/llms/text-generation/tgi/requirements.txt +++ b/comps/llms/text-generation/tgi/requirements.txt @@ -3,7 +3,6 @@ docarray[full] fastapi httpx huggingface_hub -langsmith openai==1.35.13 opentelemetry-api opentelemetry-exporter-otlp diff --git a/comps/llms/text-generation/vllm-ray/llm.py b/comps/llms/text-generation/vllm-ray/llm.py index 6d8abd028..d3de026e0 100644 --- a/comps/llms/text-generation/vllm-ray/llm.py +++ b/comps/llms/text-generation/vllm-ray/llm.py @@ -16,7 +16,6 @@ from fastapi.responses import StreamingResponse from langchain_openai import ChatOpenAI -from langsmith import traceable from comps import GeneratedDoc, LLMParamsDoc, ServiceType, opea_microservices, register_microservice @@ -28,7 +27,6 @@ host="0.0.0.0", port=9000, ) -@traceable(run_type="llm") def llm_generate(input: LLMParamsDoc): llm_endpoint = os.getenv("vLLM_RAY_ENDPOINT", "http://localhost:8006") llm_model = os.getenv("LLM_MODEL", "meta-llama/Llama-2-7b-chat-hf") diff --git a/comps/llms/text-generation/vllm-ray/requirements.txt b/comps/llms/text-generation/vllm-ray/requirements.txt index 083a2910b..1f9020583 100644 --- a/comps/llms/text-generation/vllm-ray/requirements.txt +++ b/comps/llms/text-generation/vllm-ray/requirements.txt @@ -4,7 +4,6 @@ huggingface_hub langchain==0.1.16 langchain_openai langserve -langsmith openai opentelemetry-api opentelemetry-exporter-otlp diff --git a/comps/llms/text-generation/vllm-xft/llm.py b/comps/llms/text-generation/vllm-xft/llm.py index 02446baa6..2c479d90b 100644 --- a/comps/llms/text-generation/vllm-xft/llm.py +++ b/comps/llms/text-generation/vllm-xft/llm.py @@ -5,7 +5,6 @@ from fastapi.responses import StreamingResponse from langchain_community.llms import VLLMOpenAI -from langsmith import traceable from comps import GeneratedDoc, LLMParamsDoc, ServiceType, opea_microservices, register_microservice @@ -17,7 +16,6 @@ host="0.0.0.0", port=9000, ) -@traceable(run_type="llm") def llm_generate(input: LLMParamsDoc): llm_endpoint = os.getenv("vLLM_LLM_ENDPOINT", "http://localhost:18688") llm = VLLMOpenAI( diff --git a/comps/llms/text-generation/vllm-xft/requirements.txt b/comps/llms/text-generation/vllm-xft/requirements.txt index bc9f457c4..a4accaed2 100644 --- a/comps/llms/text-generation/vllm-xft/requirements.txt +++ b/comps/llms/text-generation/vllm-xft/requirements.txt @@ -1,7 +1,6 @@ docarray[full] fastapi langchain==0.1.16 -langsmith opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk diff --git a/comps/llms/text-generation/vllm/llm.py b/comps/llms/text-generation/vllm/llm.py index ea8691f1a..61bebbe27 100644 --- a/comps/llms/text-generation/vllm/llm.py +++ b/comps/llms/text-generation/vllm/llm.py @@ -5,7 +5,6 @@ from fastapi.responses import StreamingResponse from langchain_community.llms import VLLMOpenAI -from langsmith import traceable from comps import GeneratedDoc, LLMParamsDoc, ServiceType, opea_microservices, opea_telemetry, register_microservice @@ -29,7 +28,6 @@ def post_process_text(text: str): host="0.0.0.0", port=9000, ) -@traceable(run_type="llm") def llm_generate(input: LLMParamsDoc): llm_endpoint = os.getenv("vLLM_ENDPOINT", "http://localhost:8008") model_name = os.getenv("LLM_MODEL", "meta-llama/Meta-Llama-3-8B-Instruct") diff --git a/comps/ragas/tgi/llm.py b/comps/ragas/tgi/llm.py index 895705703..03c214d30 100644 --- a/comps/ragas/tgi/llm.py +++ b/comps/ragas/tgi/llm.py @@ -11,7 +11,6 @@ HuggingFaceInstructEmbeddings, ) from langchain_community.llms import HuggingFaceEndpoint -from langsmith import traceable from ragas import evaluate from ragas.metrics import answer_relevancy, context_precision, context_recall, faithfulness @@ -30,7 +29,6 @@ input_datatype=RAGASParams, output_datatype=RAGASScores, ) -@traceable(run_type="llm") def llm_generate(input: RAGASParams): llm_endpoint = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080") diff --git a/comps/ragas/tgi/requirements.txt b/comps/ragas/tgi/requirements.txt index 2c8fad29f..7839ef15d 100644 --- a/comps/ragas/tgi/requirements.txt +++ b/comps/ragas/tgi/requirements.txt @@ -3,7 +3,6 @@ docarray[full] fastapi huggingface_hub langchain==0.1.16 -langsmith opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk diff --git a/comps/reranks/fastrag/local_reranking.py b/comps/reranks/fastrag/local_reranking.py index f11ebc1f9..4548ef9a7 100644 --- a/comps/reranks/fastrag/local_reranking.py +++ b/comps/reranks/fastrag/local_reranking.py @@ -4,7 +4,6 @@ from config import RANKER_MODEL from fastrag.rankers import IPEXBiEncoderSimilarityRanker from haystack import Document -from langsmith import traceable from comps.cores.mega.micro_service import ServiceType, opea_microservices, register_microservice from comps.cores.proto.docarray import RerankedDoc, SearchedDoc, TextDoc @@ -19,7 +18,6 @@ input_datatype=SearchedDoc, output_datatype=RerankedDoc, ) -@traceable(run_type="llm") def reranking(input: SearchedDoc) -> RerankedDoc: documents = [] for i, d in enumerate(input.retrieved_docs): diff --git a/comps/reranks/fastrag/requirements.txt b/comps/reranks/fastrag/requirements.txt index c4ded91c8..e8166a77a 100644 --- a/comps/reranks/fastrag/requirements.txt +++ b/comps/reranks/fastrag/requirements.txt @@ -2,7 +2,6 @@ docarray[full] fastapi haystack-ai langchain -langsmith opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk diff --git a/comps/reranks/langchain-mosec/reranking_mosec_xeon.py b/comps/reranks/langchain-mosec/reranking_mosec_xeon.py index 4640433af..da3d7854a 100644 --- a/comps/reranks/langchain-mosec/reranking_mosec_xeon.py +++ b/comps/reranks/langchain-mosec/reranking_mosec_xeon.py @@ -22,7 +22,6 @@ import requests from langchain_core.prompts import ChatPromptTemplate -from langsmith import traceable from comps import ( LLMParamsDoc, @@ -44,7 +43,6 @@ input_datatype=SearchedDoc, output_datatype=LLMParamsDoc, ) -@traceable(run_type="llm") @register_statistics(names=["opea_service@reranking_mosec_xeon"]) def reranking(input: SearchedDoc) -> LLMParamsDoc: print("reranking input: ", input) diff --git a/comps/reranks/requirements.txt b/comps/reranks/requirements.txt index 67503038f..7260862a3 100644 --- a/comps/reranks/requirements.txt +++ b/comps/reranks/requirements.txt @@ -2,7 +2,6 @@ aiohttp docarray[full] fastapi httpx -langsmith opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk diff --git a/comps/reranks/tei/local_reranking.py b/comps/reranks/tei/local_reranking.py index d0fa8a79c..f02a95823 100644 --- a/comps/reranks/tei/local_reranking.py +++ b/comps/reranks/tei/local_reranking.py @@ -1,7 +1,6 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -from langsmith import traceable from sentence_transformers import CrossEncoder from comps import RerankedDoc, SearchedDoc, ServiceType, TextDoc, opea_microservices, register_microservice @@ -16,7 +15,6 @@ input_datatype=SearchedDoc, output_datatype=RerankedDoc, ) -@traceable(run_type="llm") def reranking(input: SearchedDoc) -> RerankedDoc: query_and_docs = [(input.initial_query, doc.text) for doc in input.retrieved_docs] scores = reranker_model.predict(query_and_docs) diff --git a/comps/reranks/tei/reranking_tei.py b/comps/reranks/tei/reranking_tei.py index 2440f800a..5575aa88f 100644 --- a/comps/reranks/tei/reranking_tei.py +++ b/comps/reranks/tei/reranking_tei.py @@ -9,7 +9,6 @@ from typing import Union import requests -from langsmith import traceable from comps import ( LLMParamsDoc, @@ -37,7 +36,6 @@ input_datatype=SearchedDoc, output_datatype=LLMParamsDoc, ) -@traceable(run_type="llm") @register_statistics(names=["opea_service@reranking_tgi_gaudi"]) def reranking( input: Union[SearchedDoc, RerankingRequest, ChatCompletionRequest] diff --git a/comps/retrievers/haystack/qdrant/requirements.txt b/comps/retrievers/haystack/qdrant/requirements.txt index 9b99c00fb..24458a6fb 100644 --- a/comps/retrievers/haystack/qdrant/requirements.txt +++ b/comps/retrievers/haystack/qdrant/requirements.txt @@ -3,7 +3,6 @@ easyocr fastapi haystack-ai==2.2.4 langchain_community -langsmith opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk diff --git a/comps/retrievers/haystack/qdrant/retriever_qdrant.py b/comps/retrievers/haystack/qdrant/retriever_qdrant.py index c2cc8735b..30d1bd72d 100644 --- a/comps/retrievers/haystack/qdrant/retriever_qdrant.py +++ b/comps/retrievers/haystack/qdrant/retriever_qdrant.py @@ -4,7 +4,6 @@ from haystack.components.embedders import HuggingFaceTEITextEmbedder, SentenceTransformersTextEmbedder from haystack_integrations.components.retrievers.qdrant import QdrantEmbeddingRetriever from haystack_integrations.document_stores.qdrant import QdrantDocumentStore -from langsmith import traceable from qdrant_config import EMBED_DIMENSION, EMBED_ENDPOINT, EMBED_MODEL, INDEX_NAME, QDRANT_HOST, QDRANT_PORT from comps import EmbedDoc, SearchedDoc, ServiceType, TextDoc, opea_microservices, register_microservice @@ -28,7 +27,6 @@ def initialize_qdrant_retriever() -> QdrantEmbeddingRetriever: host="0.0.0.0", port=7000, ) -@traceable(run_type="retriever") def retrieve(input: EmbedDoc) -> SearchedDoc: search_res = retriever.run(query_embedding=input.embedding)["documents"] searched_docs = [TextDoc(text=r.content) for r in search_res if r.content] diff --git a/comps/retrievers/langchain/milvus/retriever_milvus.py b/comps/retrievers/langchain/milvus/retriever_milvus.py index 1625eed0a..0c81e76ce 100644 --- a/comps/retrievers/langchain/milvus/retriever_milvus.py +++ b/comps/retrievers/langchain/milvus/retriever_milvus.py @@ -15,9 +15,8 @@ MODEL_ID, MOSEC_EMBEDDING_ENDPOINT, ) -from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings, OpenAIEmbeddings +from langchain_community.embeddings import HuggingFaceBgeEmbeddings, OpenAIEmbeddings from langchain_milvus.vectorstores import Milvus -from langsmith import traceable from comps import ( EmbedDoc, @@ -63,7 +62,6 @@ def empty_embedding() -> List[float]: host="0.0.0.0", port=7000, ) -@traceable(run_type="retriever") @register_statistics(names=["opea_service@retriever_milvus"]) def retrieve(input: EmbedDoc) -> SearchedDoc: vector_db = Milvus( diff --git a/comps/retrievers/langchain/pgvector/requirements.txt b/comps/retrievers/langchain/pgvector/requirements.txt index 18609d361..d3d95dee9 100644 --- a/comps/retrievers/langchain/pgvector/requirements.txt +++ b/comps/retrievers/langchain/pgvector/requirements.txt @@ -2,7 +2,6 @@ docarray[full] easyocr fastapi langchain_community -langsmith opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk diff --git a/comps/retrievers/langchain/pgvector/retriever_pgvector.py b/comps/retrievers/langchain/pgvector/retriever_pgvector.py index 2fba1f1c0..014a616a5 100644 --- a/comps/retrievers/langchain/pgvector/retriever_pgvector.py +++ b/comps/retrievers/langchain/pgvector/retriever_pgvector.py @@ -7,7 +7,6 @@ from config import EMBED_MODEL, INDEX_NAME, PG_CONNECTION_STRING, PORT from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings from langchain_community.vectorstores import PGVector -from langsmith import traceable from comps import ( EmbedDoc, @@ -30,7 +29,6 @@ host="0.0.0.0", port=PORT, ) -@traceable(run_type="retriever") @register_statistics(names=["opea_service@retriever_pgvector"]) def retrieve(input: EmbedDoc) -> SearchedDoc: start = time.time() diff --git a/comps/retrievers/langchain/pinecone/requirements.txt b/comps/retrievers/langchain/pinecone/requirements.txt index 32df7f40f..7536f591c 100644 --- a/comps/retrievers/langchain/pinecone/requirements.txt +++ b/comps/retrievers/langchain/pinecone/requirements.txt @@ -6,7 +6,6 @@ huggingface_hub langchain langchain-community langchain-pinecone -langsmith numpy opentelemetry-api opentelemetry-exporter-otlp diff --git a/comps/retrievers/langchain/pinecone/retriever_pinecone.py b/comps/retrievers/langchain/pinecone/retriever_pinecone.py index ba8e6526f..73e77d111 100644 --- a/comps/retrievers/langchain/pinecone/retriever_pinecone.py +++ b/comps/retrievers/langchain/pinecone/retriever_pinecone.py @@ -7,7 +7,6 @@ from config import EMBED_MODEL, PINECONE_API_KEY, PINECONE_INDEX_NAME from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings from langchain_pinecone import PineconeVectorStore -from langsmith import traceable from pinecone import Pinecone, ServerlessSpec from comps import ( @@ -31,7 +30,6 @@ host="0.0.0.0", port=7000, ) -@traceable(run_type="retriever") @register_statistics(names=["opea_service@retriever_pinecone"]) def retrieve(input: EmbedDoc) -> SearchedDoc: start = time.time() diff --git a/comps/retrievers/langchain/redis/requirements.txt b/comps/retrievers/langchain/redis/requirements.txt index 3720190d3..c68c3d274 100644 --- a/comps/retrievers/langchain/redis/requirements.txt +++ b/comps/retrievers/langchain/redis/requirements.txt @@ -2,7 +2,6 @@ docarray[full] easyocr fastapi langchain_community -langsmith opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk diff --git a/comps/retrievers/langchain/redis/retriever_redis.py b/comps/retrievers/langchain/redis/retriever_redis.py index 43f3e0c05..a4ab5dc4e 100644 --- a/comps/retrievers/langchain/redis/retriever_redis.py +++ b/comps/retrievers/langchain/redis/retriever_redis.py @@ -7,7 +7,6 @@ from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings from langchain_community.vectorstores import Redis -from langsmith import traceable from redis_config import EMBED_MODEL, INDEX_NAME, REDIS_URL from comps import ( @@ -37,7 +36,6 @@ host="0.0.0.0", port=7000, ) -@traceable(run_type="retriever") @register_statistics(names=["opea_service@retriever_redis"]) def retrieve( input: Union[EmbedDoc, RetrievalRequest, ChatCompletionRequest] diff --git a/comps/retrievers/llamaindex/requirements.txt b/comps/retrievers/llamaindex/requirements.txt index 236ea9af8..83228c259 100644 --- a/comps/retrievers/llamaindex/requirements.txt +++ b/comps/retrievers/llamaindex/requirements.txt @@ -3,7 +3,6 @@ docarray[full] easyocr fastapi httpx -langsmith llama-index-vector-stores-redis llama_index opentelemetry-api diff --git a/comps/retrievers/llamaindex/retriever_redis.py b/comps/retrievers/llamaindex/retriever_redis.py index 4999a7235..3c387010e 100644 --- a/comps/retrievers/llamaindex/retriever_redis.py +++ b/comps/retrievers/llamaindex/retriever_redis.py @@ -3,7 +3,6 @@ import os -from langsmith import traceable from llama_index.core.vector_stores.types import VectorStoreQuery from llama_index.vector_stores.redis import RedisVectorStore from redis_config import INDEX_NAME, REDIS_URL @@ -20,7 +19,6 @@ host="0.0.0.0", port=7000, ) -@traceable(run_type="retriever") def retrieve(input: EmbedDoc) -> SearchedDoc: vector_store_query = VectorStoreQuery(query_embedding=input.embedding) search_res = vector_store.query(query=vector_store_query)