Skip to content

Commit 9ecc011

Browse files
committed
Fix images and multimodality.
1 parent 3acbb6c commit 9ecc011

File tree

15 files changed

+411
-251
lines changed

15 files changed

+411
-251
lines changed

chars/joshu/config.json

Lines changed: 0 additions & 25 deletions
This file was deleted.

examples/image-compare/main.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@
1414
args = p.parse_args()
1515

1616
# get a ghostbox
17-
box = ghostbox.from_generic(character_folder="art_critic")
17+
box = ghostbox.from_generic(character_folder="art_critic",
18+
stderr=False,
19+
quiet=True)
1820

1921
# let's make sure the images exist
2022
# ghostbox will not raise an error if they don't
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
You are a game master in a role playing game.
2+
You tell a story collaboratively with a user, who is playing as {{chat_user}}.

examples/mini-adventure/main.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
#!/usr/bin/env python
2+
# This is an example of a very basic interaction loop
3+
# it probably won't feel good, since it blocks and doesn't use streaming
4+
# but we want to keep it simple
5+
import ghostbox, time, random
6+
7+
# the generic adapter will work with anything that supports the OAI API
8+
box = ghostbox.from_generic(
9+
character_folder="game_master", # see below
10+
stderr=False, # since this is a CLI program, we don't want clutter
11+
quiet=True, # we do printing and tts ourselves
12+
tts=True, # this means responses will be spoken automatically
13+
tts_model="kokoro", # kokoro is nice because it's small and good
14+
tts_voice="bm_daniel", # daniel is real GM material
15+
)
16+
17+
if name := input("What is your cool adventurer name?\nName: "):
18+
print(f"Welcome, {name}! A game master will be with you shortly...")
19+
else:
20+
name = "Drizzt Do'Urden"
21+
print("Better sharpen your scimitars...")
22+
23+
# this will make {{chat_user}} expand to whatever the user just typed
24+
box.set_vars({"chat_user": name})
25+
26+
print(
27+
box.text(
28+
"Come up with an adventure scenario and give an introduction to the player."
29+
)
30+
)
31+
32+
# we start conservative, but the adventure will get wilder as we go on
33+
current_temperature, escalation_factor = 0.3, 0.05
34+
while True:
35+
user_msg = input("Your response (q to quit): ")
36+
box.tts_stop() # users usually like it when the tts shuts up after they hit enter
37+
38+
if user_msg == "q":
39+
print(
40+
box.text(
41+
"{{chat_user}} will quit the game now. Please conclude the adventure and write a proper goodbye."
42+
)
43+
)
44+
break
45+
46+
with box.options(
47+
temperature=current_temperature, # this changes every loop iteration
48+
max_length=100
49+
+ 10
50+
* random.randint(
51+
-3, 3
52+
), # keep it from talking for too long, but give some variety
53+
):
54+
print(box.text(user_msg))
55+
56+
current_temperature = min(current_temperature + escalation_factor, 1.3)
57+
58+
time.sleep(10) # give time to finish the speech

examples/structured-output.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#!/usr/bin/env python
2+
# This example shows how to use pydantic classes to get structured output from an LLM
3+
# You could also use json schemas, but I recommend against it.
4+
# Pydantic is quite wonderful.
5+
from pydantic import BaseModel
6+
from typing import *
7+
import ghostbox, json
8+
9+
box = ghostbox.from_generic(character_folder="ghost-writer")
10+
11+
12+
# this is the type for the object that we will let the LLM create
13+
# how we name things here really matters
14+
class BlogPost(BaseModel):
15+
title: str
16+
content: str
17+
tags: List[str]
18+
19+
20+
post = box.new(
21+
BlogPost, # this tells ghostbox and the backend what the structure should be
22+
"Write an extremely argumentative post about how an overabundance of busking is ruining berlin.",
23+
) # the prompt will provide context for filling in the python object
24+
print(json.dumps(post.model_dump(), indent=4))

ghostbox/Story.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,15 @@ class Story(object):
77

88
data: List[ChatMessage] = []
99

10-
def addUserText(self, w: str, image_id:Optional[int]=None, **kwargs) -> None:
11-
new_data = ChatMessage(role = "user", content = w, image_id=image_id, **kwargs)
10+
def addUserText(self, w: str, image_context: Dict[int, ImageRef]={}, **kwargs) -> None:
11+
"""Adds a user message to the story.
12+
:param w: The user's prompt or message as plaintext.
13+
:param images: A list of 0 or more images to include with the message. The images, confusingly, may be http URLs, filenames, or binary data.
14+
"""
15+
if image_context == {}:
16+
new_data = ChatMessage(role = "user", content = w, **kwargs)
17+
else:
18+
new_data = ChatMessage.make_image_message(w, image_context.values(), **kwargs)
1219
self.data.append(new_data)
1320

1421
def addAssistantText(self, w: str, **kwargs):
@@ -45,7 +52,7 @@ def extendAssistantText(self, w: str) -> None:
4552
if msg.content is None:
4653
# this case is too weird, we just skip empty content
4754
continue
48-
elif type(msg) == str:
55+
elif type(msg.content) == str:
4956
# easy case
5057
msg.content += w
5158
return

ghostbox/__VERSION__

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
0.21.2
1+
0.21.3

ghostbox/_argparse.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def makeTaggedParser(default_params) -> TaggedArgumentParser:
5555
tag=mktag(type=AT.Porcelain, group=AG.Backend))
5656
parser.add_argument("--backend", type=str, default=LLMBackend.generic.name, help="Backend to use. The default is `generic`, which conforms to the OpenAI REST API, and is supported by most LLM providers. Choosing a more specific backend may provide additional functionality. Other possible values are " + ", ".join([e.name for e in LLMBackend]) + ".",
5757
tag=mktag(type=AT.Porcelain, group=AG.Backend, very_important=True))
58-
parser.add_argument("--openai_api_key", type=str, default="", help="API key for OpenAI. Without the `--backend openai` option, this has no effect.",
58+
parser.add_argument("--api_key", type=str, default="", help="API key for OpenAI. Without the `--backend openai` option, this has no effect.",
5959
tag=mktag(type=AT.Plumbing, group=AG.OpenAI))
6060
parser.add_argument("--max_length", type=int, default=300, help="Number of tokens to request from backend for generation. Generation is stopped when this number is exceeded. Negative values mean generation is unlimited and will terminate when the backend generates a stop token.",
6161
tag=mktag(type=AT.Porcelain, group=AG.Generation, very_important=True))

ghostbox/backends.py

Lines changed: 1 addition & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -394,8 +394,6 @@ def generate(self, payload):
394394

395395
if self._config["llamacpp_use_chat_completion_endpoint"]:
396396
endpoint_suffix = "/chat/completions"
397-
# /chat/completions expects a more OAI like payload
398-
llama_payload |= OpenAIBackend.dataFromPayload(llama_payload)
399397
else:
400398
endpoint_suffix = "/completion"
401399
if "tools" in payload:
@@ -446,8 +444,7 @@ def one_line_lambdas_for_python(r):
446444

447445
if self._config["llamacpp_use_chat_completion_endpoint"]:
448446
endpoint_suffix = "/chat/completions"
449-
# /chat/completions expects a more OAI like payload
450-
llama_payload |= OpenAIBackend.dataFromPayload(llama_payload)
447+
451448
final_callback = OpenAIBackend.makeOpenAICallback(
452449
callback, last_result_callback=one_line_lambdas_for_python
453450
)
@@ -658,8 +655,6 @@ def generate(self, payload):
658655
"Content-Type": "application/json",
659656
}
660657
data = payload | {"max_tokens": payload["max_length"], "stream": False}
661-
# the /V1/chat/completions endpoint expects structured data of user/assistant pairs
662-
data |= self.dataFromPayload(payload)
663658

664659
if "tools" in data:
665660
# see the llamacpp generate method fixme
@@ -720,8 +715,6 @@ def generateStreaming(self, payload, callback=lambda w: print(w)):
720715
}
721716

722717
data = payload | {"stream": True, "stream_options": {"include_usage": True}}
723-
# the /V1/chat/completions endpoint expects structured data of user/assistant pairs
724-
data |= self.dataFromPayload(payload)
725718
self._last_request = data
726719

727720
def one_line_lambdas_for_python(r):
@@ -744,47 +737,6 @@ def one_line_lambdas_for_python(r):
744737
return True
745738
return False
746739

747-
@staticmethod
748-
def dataFromPayload(payload: Dict[str, Any]) -> Dict[str, Any]:
749-
"""Take a payload dictionary from Plumbing and return dictionary with elements specific to the chat/completions endpoint.
750-
This expects payload to include the messages key, with various dictionaries in it, unlike other backends.
751-
"""
752-
messages = [{"role": "system", "content": payload["system"]}]
753-
# story is list of dicts with role and content keys
754-
# we go through story one by one, mostly because of images
755-
for story_item in payload["story"]:
756-
if "image_id" in story_item:
757-
# images is more complicated, see https://platform.openai.com/docs/guides/vision
758-
# API wants the content field of an image message to be a list of dicts, not a string
759-
# the dicts have the type field, which determines wether its a user msg (text) or image (image-url)
760-
image_id = story_item["image_id"]
761-
image_content_list = []
762-
image_content_list.append(
763-
{"type": "text", "content": story_item["content"]}
764-
)
765-
if "images" not in payload or image_id not in payload["images"]:
766-
printerr("warning: image with id " + str(image_id) + " not found.")
767-
continue
768-
769-
# actually packaging the image
770-
image_data = payload["images"][image_id]
771-
ext = getImageExtension(image_data["url"], default="png")
772-
base64_image = image_data["data"].decode("utf-8")
773-
image_content_list.append(
774-
{
775-
"type": "image_url",
776-
"image_url": {"url": f"data:image/{ext};base64,{base64_image}"},
777-
}
778-
)
779-
780-
messages.append(
781-
{"role": story_item["role"], "content": image_content_list}
782-
)
783-
else:
784-
messages.append(story_item)
785-
786-
return {"messages": messages}
787-
788740
def tokenize(self, w):
789741
headers = {
790742
"Authorization": f"Bearer {self.api_key}",

0 commit comments

Comments
 (0)