Skip to content

Commit

Permalink
Embedding compatible with OpenAI API (#892)
Browse files Browse the repository at this point in the history
* Embedding TEI Langchain compatible with OpenAI API

Signed-off-by: Xinyao Wang <[email protected]>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* TextDoc support list

Signed-off-by: Xinyao Wang <[email protected]>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* support tei llama index openai compatible API

Signed-off-by: Xinyao Wang <[email protected]>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* support mosec langchain openai compatible API

Signed-off-by: Xinyao Wang <[email protected]>

* update UT for embedding tests

Signed-off-by: Xinyao Wang <[email protected]>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix ut bug

Signed-off-by: Xinyao Wang <[email protected]>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* support embedding predictionguard  openai compatible API

Signed-off-by: Xinyao Wang <[email protected]>

* support embedding multimodal clip OpenAI compatible API

Signed-off-by: Xinyao Wang <[email protected]>

* fix bug

Signed-off-by: Xinyao Wang <[email protected]>

* enable debug mode for embedding UT

Signed-off-by: Xinyao Wang <[email protected]>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: Xinyao Wang <[email protected]>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: chen, suyue <[email protected]>
Co-authored-by: ZePan110 <[email protected]>
  • Loading branch information
4 people authored Nov 14, 2024
1 parent 4418824 commit 7bf1953
Show file tree
Hide file tree
Showing 16 changed files with 429 additions and 58 deletions.
8 changes: 4 additions & 4 deletions comps/cores/proto/docarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class TopologyInfo:


class TextDoc(BaseDoc, TopologyInfo):
text: str = None
text: Union[str, List[str]] = None


class Audio2text(BaseDoc, TopologyInfo):
Expand Down Expand Up @@ -93,15 +93,15 @@ class DocPath(BaseDoc):


class EmbedDoc(BaseDoc):
text: str
embedding: conlist(float, min_length=0)
text: Union[str, List[str]]
embedding: Union[conlist(float, min_length=0), List[conlist(float, min_length=0)]]
search_type: str = "similarity"
k: int = 4
distance_threshold: Optional[float] = None
fetch_k: int = 20
lambda_mult: float = 0.5
score_threshold: float = 0.2
constraints: Optional[Union[Dict[str, Any], None]] = None
constraints: Optional[Union[Dict[str, Any], List[Dict[str, Any]], None]] = None


class EmbedMultimodalDoc(EmbedDoc):
Expand Down
35 changes: 30 additions & 5 deletions comps/embeddings/mosec/langchain/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,34 @@ docker run -d --name="embedding-langchain-mosec-server" -e http_proxy=$http_prox

## run client test

```
curl localhost:6000/v1/embeddings \
-X POST \
-d '{"text":"Hello, world!"}' \
-H 'Content-Type: application/json'
Use our basic API.

```bash
## query with single text
curl http://localhost:6000/v1/embeddings\
-X POST \
-d '{"text":"Hello, world!"}' \
-H 'Content-Type: application/json'

## query with multiple texts
curl http://localhost:6000/v1/embeddings\
-X POST \
-d '{"text":["Hello, world!","How are you?"]}' \
-H 'Content-Type: application/json'
```

We are also compatible with [OpenAI API](https://platform.openai.com/docs/api-reference/embeddings).

```bash
## Input single text
curl http://localhost:6000/v1/embeddings\
-X POST \
-d '{"input":"Hello, world!"}' \
-H 'Content-Type: application/json'

## Input multiple texts with parameters
curl http://localhost:6000/v1/embeddings\
-X POST \
-d '{"input":["Hello, world!","How are you?"], "dimensions":100}' \
-H 'Content-Type: application/json'
```
39 changes: 35 additions & 4 deletions comps/embeddings/mosec/langchain/embedding_mosec.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import asyncio
import os
import time
from typing import List, Optional
from typing import List, Optional, Union

from langchain_community.embeddings import OpenAIEmbeddings

Expand All @@ -18,6 +18,12 @@
register_statistics,
statistics_dict,
)
from comps.cores.proto.api_protocol import (
ChatCompletionRequest,
EmbeddingRequest,
EmbeddingResponse,
EmbeddingResponseData,
)

logger = CustomLogger("embedding_mosec")
logflag = os.getenv("LOGFLAG", False)
Expand Down Expand Up @@ -62,18 +68,43 @@ async def get_embedding(e: Optional[List[float]]) -> List[float]:
output_datatype=EmbedDoc,
)
@register_statistics(names=["opea_service@embedding_mosec"])
async def embedding(input: TextDoc) -> EmbedDoc:
async def embedding(
input: Union[TextDoc, EmbeddingRequest, ChatCompletionRequest]
) -> Union[EmbedDoc, EmbeddingResponse, ChatCompletionRequest]:
if logflag:
logger.info(input)
start = time.time()
embed_vector = await embeddings.aembed_query(input.text)
res = EmbedDoc(text=input.text, embedding=embed_vector)
if isinstance(input, TextDoc):
embed_vector = await get_embeddings(input.text)
embedding_res = embed_vector[0] if isinstance(input.text, str) else embed_vector
res = EmbedDoc(text=input.text, embedding=embedding_res)
else:
embed_vector = await get_embeddings(input.input)
if input.dimensions is not None:
embed_vector = [embed_vector[i][: input.dimensions] for i in range(len(embed_vector))]

# for standard openai embedding format
res = EmbeddingResponse(
data=[EmbeddingResponseData(index=i, embedding=embed_vector[i]) for i in range(len(embed_vector))]
)

if isinstance(input, ChatCompletionRequest):
input.embedding = res
# keep
res = input

statistics_dict["opea_service@embedding_mosec"].append_latency(time.time() - start, None)
if logflag:
logger.info(res)
return res


async def get_embeddings(text: Union[str, List[str]]) -> List[List[float]]:
texts = [text] if isinstance(text, str) else text
embed_vector = await embeddings.aembed_documents(texts)
return embed_vector


if __name__ == "__main__":
MOSEC_EMBEDDING_ENDPOINT = os.environ.get("MOSEC_EMBEDDING_ENDPOINT", "http://127.0.0.1:8080")
os.environ["OPENAI_API_BASE"] = MOSEC_EMBEDDING_ENDPOINT
Expand Down
31 changes: 28 additions & 3 deletions comps/embeddings/multimodal_clip/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,34 @@ curl http://localhost:6000/v1/health_check\

### 2.2 Consume Embedding Service

Use our basic API.

```bash
## query with single text
curl http://localhost:6000/v1/embeddings\
-X POST \
-d '{"text":"Hello, world!"}' \
-H 'Content-Type: application/json'

## query with multiple texts
curl http://localhost:6000/v1/embeddings\
-X POST \
-d '{"text":["Hello, world!","How are you?"]}' \
-H 'Content-Type: application/json'
```

We are also compatible with [OpenAI API](https://platform.openai.com/docs/api-reference/embeddings).

```bash
curl http://localhost:6000/v1/embeddings \
-X POST -d '{"text":"Sample text"}' \
-H 'Content-Type: application/json'
## Input single text
curl http://localhost:6000/v1/embeddings\
-X POST \
-d '{"input":"Hello, world!"}' \
-H 'Content-Type: application/json'

## Input multiple texts with parameters
curl http://localhost:6000/v1/embeddings\
-X POST \
-d '{"input":["Hello, world!","How are you?"], "dimensions":100}' \
-H 'Content-Type: application/json'
```
52 changes: 46 additions & 6 deletions comps/embeddings/multimodal_clip/embedding_multimodal.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,15 @@
# SPDX-License-Identifier: Apache-2.0

import datetime
import os
import time
from typing import List, Optional, Union

from dateparser.search import search_dates
from embeddings_clip import vCLIP

from comps import (
CustomLogger,
EmbedDoc,
ServiceType,
TextDoc,
Expand All @@ -16,6 +19,15 @@
register_statistics,
statistics_dict,
)
from comps.cores.proto.api_protocol import (
ChatCompletionRequest,
EmbeddingRequest,
EmbeddingResponse,
EmbeddingResponseData,
)

logger = CustomLogger("embedding_multimodal")
logflag = os.getenv("LOGFLAG", False)


def filtler_dates(prompt):
Expand Down Expand Up @@ -64,21 +76,49 @@ def filtler_dates(prompt):
output_datatype=EmbedDoc,
)
@register_statistics(names=["opea_service@embedding_multimodal"])
def embedding(input: TextDoc) -> EmbedDoc:
async def embedding(
input: Union[TextDoc, EmbeddingRequest, ChatCompletionRequest]
) -> Union[EmbedDoc, EmbeddingResponse, ChatCompletionRequest]:
if logflag:
logger.info(input)
start = time.time()

if isinstance(input, TextDoc):
# Handle text input
embed_vector = embeddings.embed_query(input.text).tolist()[0]
res = EmbedDoc(text=input.text, embedding=embed_vector, constraints=filtler_dates(input.text))

embed_vector = await get_embeddings(input.text)
if isinstance(input.text, str):
embedding_res = embed_vector[0]
constraints_res = filtler_dates(input.text)
else:
embedding_res = embed_vector
constraints_res = [filtler_dates(input.text[i]) for i in range(len(input.text))]
res = EmbedDoc(text=input.text, embedding=embedding_res, constraints=constraints_res)
else:
raise ValueError("Invalid input type")
embed_vector = await get_embeddings(input.input)
if input.dimensions is not None:
embed_vector = [embed_vector[i][: input.dimensions] for i in range(len(embed_vector))]

# for standard openai embedding format
res = EmbeddingResponse(
data=[EmbeddingResponseData(index=i, embedding=embed_vector[i]) for i in range(len(embed_vector))]
)

if isinstance(input, ChatCompletionRequest):
input.embedding = res
# keep
res = input

statistics_dict["opea_service@embedding_multimodal"].append_latency(time.time() - start, None)
if logflag:
logger.info(res)
return res


async def get_embeddings(text: Union[str, List[str]]) -> List[List[float]]:
texts = [text] if isinstance(text, str) else text
embed_vector = embeddings.embed_query(texts).tolist()
return embed_vector


if __name__ == "__main__":
embeddings = vCLIP({"model_name": "openai/clip-vit-base-patch32", "num_frm": 4})
opea_microservices["opea_service@embedding_multimodal"].start()
33 changes: 29 additions & 4 deletions comps/embeddings/predictionguard/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,34 @@ docker run -d --name="embedding-predictionguard" -p 6000:6000 -e PREDICTIONGUARD

## 🚀 Consume Embeddings Service

Use our basic API.

```bash
curl localhost:6000/v1/embeddings \
-X POST \
-d '{"text":"Hello, world!"}' \
-H 'Content-Type: application/json'
## query with single text
curl http://localhost:6000/v1/embeddings\
-X POST \
-d '{"text":"Hello, world!"}' \
-H 'Content-Type: application/json'

## query with multiple texts
curl http://localhost:6000/v1/embeddings\
-X POST \
-d '{"text":["Hello, world!","How are you?"]}' \
-H 'Content-Type: application/json'
```

We are also compatible with [OpenAI API](https://platform.openai.com/docs/api-reference/embeddings).

```bash
## Input single text
curl http://localhost:6000/v1/embeddings\
-X POST \
-d '{"input":"Hello, world!"}' \
-H 'Content-Type: application/json'

## Input multiple texts with parameters
curl http://localhost:6000/v1/embeddings\
-X POST \
-d '{"input":["Hello, world!","How are you?"], "dimensions":100}' \
-H 'Content-Type: application/json'
```
51 changes: 46 additions & 5 deletions comps/embeddings/predictionguard/embedding_predictionguard.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@

import os
import time
from typing import List, Optional, Union

from predictionguard import PredictionGuard

from comps import (
CustomLogger,
EmbedDoc,
ServiceType,
TextDoc,
Expand All @@ -16,6 +18,15 @@
register_statistics,
statistics_dict,
)
from comps.cores.proto.api_protocol import (
ChatCompletionRequest,
EmbeddingRequest,
EmbeddingResponse,
EmbeddingResponseData,
)

logger = CustomLogger("embedding_predictionguard")
logflag = os.getenv("LOGFLAG", False)

# Initialize Prediction Guard client
client = PredictionGuard()
Expand All @@ -31,16 +42,46 @@
output_datatype=EmbedDoc,
)
@register_statistics(names=["opea_service@embedding_predictionguard"])
def embedding(input: TextDoc) -> EmbedDoc:
async def embedding(
input: Union[TextDoc, EmbeddingRequest, ChatCompletionRequest]
) -> Union[EmbedDoc, EmbeddingResponse, ChatCompletionRequest]:
if logflag:
logger.info(input)
start = time.time()
response = client.embeddings.create(model=pg_embedding_model_name, input=[{"text": input.text}])
embed_vector = response["data"][0]["embedding"]
embed_vector = embed_vector[:512] # Keep only the first 512 elements
res = EmbedDoc(text=input.text, embedding=embed_vector)

if isinstance(input, TextDoc):
embed_vector = await get_embeddings(input.text)
embedding_res = embed_vector[0] if isinstance(input.text, str) else embed_vector
res = EmbedDoc(text=input.text, embedding=embedding_res)
else:
embed_vector = await get_embeddings(input.input)
input.dimensions = input.dimensions if input.dimensions is not None else 512
embed_vector = [embed_vector[i][: input.dimensions] for i in range(len(embed_vector))]

# for standard openai embedding format
res = EmbeddingResponse(
data=[EmbeddingResponseData(index=i, embedding=embed_vector[i]) for i in range(len(embed_vector))]
)

if isinstance(input, ChatCompletionRequest):
input.embedding = res
# keep
res = input

statistics_dict["opea_service@embedding_predictionguard"].append_latency(time.time() - start, None)
if logflag:
logger.info(res)
return res


async def get_embeddings(text: Union[str, List[str]]) -> List[List[float]]:
texts = [text] if isinstance(text, str) else text
texts = [{"text": texts[i]} for i in range(len(texts))]
response = client.embeddings.create(model=pg_embedding_model_name, input=texts)["data"]
embed_vector = [response[i]["embedding"] for i in range(len(response))]
return embed_vector


if __name__ == "__main__":
pg_embedding_model_name = os.getenv("PG_EMBEDDING_MODEL_NAME", "bridgetower-large-itm-mlm-itc")
print("Prediction Guard Embedding initialized.")
Expand Down
Loading

0 comments on commit 7bf1953

Please sign in to comment.