-
Notifications
You must be signed in to change notification settings - Fork 60
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #56 from enoch3712/55-vision-without-documentloade…
…r-is-not-working 55 vision without documentloader is not working
- Loading branch information
Showing
14 changed files
with
1,811 additions
and
1,459 deletions.
There are no files selected for viewing
68 changes: 24 additions & 44 deletions
68
extract_thinker/document_loader/document_loader_llm_image.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,54 +1,34 @@ | ||
from abc import ABC | ||
from io import BytesIO | ||
from typing import Any, List, Union | ||
from PIL import Image | ||
from extract_thinker.document_loader.cached_document_loader import CachedDocumentLoader | ||
from extract_thinker.utils import extract_json | ||
|
||
|
||
class DocumentLoaderLLMImage(CachedDocumentLoader, ABC): | ||
SUPPORTED_FORMATS = ['pdf', 'jpg', 'jpeg', 'png'] | ||
|
||
def __init__(self, content=None, cache_ttl=300, llm=None): | ||
super().__init__(content, cache_ttl) | ||
self.llm = llm | ||
|
||
def extract_image_content(self, image_stream: BytesIO) -> str: | ||
""" | ||
Extracts text or data from an image using an LLM. | ||
The actual implementation uses an LLM to process the image content. | ||
""" | ||
# Load the image from the stream | ||
image = Image.open(image_stream) | ||
|
||
# Encode the image to base64 | ||
base64_image = self.encode_image(image) | ||
|
||
# Use the LLM to extract the content from the image | ||
resp = self.llm.completion( | ||
model="claude-3-sonnet-20240229", | ||
messages=[ | ||
{ | ||
"role": "system", | ||
"content": 'You are a worldclass Image data extractor. You receive an image and extract useful information from it. You output a JSON with the extracted information.', | ||
}, | ||
{ | ||
"role": "user", | ||
"content": [ | ||
{ | ||
"type": "image_url", | ||
"image_url": { | ||
"url": "data:image/jpeg;base64," + base64_image | ||
}, | ||
}, | ||
{"type": "text", "text": "###JSON Output\n"}, | ||
], | ||
}, | ||
], | ||
) | ||
|
||
# Extract the JSON text from the response | ||
jsonText = resp.choices[0].message.content | ||
|
||
# Extract the JSON from the text | ||
jsonText = extract_json(jsonText) | ||
|
||
# Return the extracted content | ||
return jsonText | ||
def load_content_from_file(self, file_path: str) -> Union[str, object]: | ||
images = self.convert_to_images(file_path) | ||
results = [] | ||
for _, image_bytes in images.items(): | ||
image_stream = BytesIO(image_bytes) | ||
results.append({"image": image_stream}) | ||
return results | ||
|
||
def load_content_from_stream(self, stream: BytesIO) -> Union[str, object]: | ||
images = self.convert_to_images(stream) | ||
results = [] | ||
for _, image_bytes in images.items(): | ||
image_stream = BytesIO(image_bytes) | ||
results.append({"image": image_stream}) | ||
return results | ||
|
||
def load_content_from_stream_list(self, stream: BytesIO) -> List[Any]: | ||
return self.load_content_from_stream(stream) | ||
|
||
def load_content_from_file_list(self, file_path: str) -> List[Any]: | ||
return self.load_content_from_file(file_path) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.