Allow multiple OpenAI clients per Pipeline

bbrowning · bbrowning · commit 4b0c0fbead79 · 2025-02-18T21:18:11.000-05:00
This change allows a user to construct a PipelineContext with multiple OpenAI clients, such as: ```python PipelineContext( clients={ "default": OpenAI(base_url="https://foo.local"), "server_a": OpenAI(base_url="https://server_a.local"), "server_b": OpenAI(base_url="https://server_b.local"), } ) ``` And then, within the pipeline yaml, choose which client to apply to which LLMBlock via a new `client` key, such as: ```yaml version: "1.0" blocks: - name: server_a_client type: LLMBlock config: client: server_a ... - name: server_b_client type: LLMBlock config: client: server_b ... ``` See `docs/examples/multiple_llm_clients` for more details and a full example. Resolves #521 Signed-off-by: Ben Browning <bbrownin@redhat.com>
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,12 @@
 
 ### Features
 
+### Pipelines can now have LLMBlocks with different OpenAI clients
+
+For advanced use-cases, PipelineContext now accepts a `clients` dictionary of string to OpenAI client mappings. The special string of "default" sets the OpenAI client used for LLMBlocks by default, but individual LLMBlocks can override the client used by the `client` parameter in their yaml config.
+
+Backwards-compatibility is maintained for Pipelines that only need a single client, where setting the `client` property on PipelineContext objects just sets the default client in the `clients` dictionary automatically.
+
 ### LLMBlocks can now specify `model_family` or `model_id` in their config
 
 Each `LLMBlock` in a `Pipeline` can now specify `model_family` or `model_id` in their yaml configuration to set the values to use for these blocks, as opposed to setting this for the entire `Pipeline` in the `PipelineContext` object. This is useful for the cases where multiple `LLMBlocks` exist in the same `Pipeline` where each one uses a different model.
diff --git a/docs/examples/multiple_llm_clients/README.md b/docs/examples/multiple_llm_clients/README.md
@@ -0,0 +1,5 @@
+# Multiple LLM clients in a single Pipeline
+
+For advanced use-cases, PipelineContext accepts a `clients` dictionary of string to OpenAI client mappings. The special string of "default" sets the OpenAI client used for LLMBlocks by default, but individual LLMBlocks can override the client used by the `client` parameter in their yaml config.
+
+See `pipeline.yaml` in this directory for an example of a Pipeline that uses different clients per `LLMBlock`.
diff --git a/docs/examples/multiple_llm_clients/llm_config.yaml b/docs/examples/multiple_llm_clients/llm_config.yaml
@@ -0,0 +1,16 @@
+system: You are a helpful AI assistant.
+
+introduction: |
+  Repeat the document below back to me verbatim.
+
+principles: |
+  Do not change anything.
+
+examples: ""
+
+generation: |
+  Document:
+  {{document}}
+
+start_tags: [""]
+end_tags: [""]
diff --git a/docs/examples/multiple_llm_clients/pipeline.yaml b/docs/examples/multiple_llm_clients/pipeline.yaml
@@ -0,0 +1,44 @@
+version: "1.0"
+blocks:
+  # This uses the default client, since we don't specify one
+  - name: default_client
+    type: LLMBlock
+    config:
+      model_family: mixtral
+      model_id: Mixtral-8x7B-Instruct-v0.1
+      config_path: llm_config.yaml
+      output_cols:
+        - column_one
+
+  # We can also explicitly specify the default client
+  - name: also_default_client
+    type: LLMBlock
+    config:
+      client: default
+      model_family: mixtral
+      model_id: Mixtral-8x7B-Instruct-v0.1
+      config_path: llm_config.yaml
+      output_cols:
+        - column_two
+
+  # This uses the "server_a" client explicitly
+  - name: server_a_client
+    type: LLMBlock
+    config:
+      client: server_a
+      model_family: granite
+      model_id: granite-7b-lab
+      config_path: llm_config.yaml
+      output_cols:
+        - column_three
+
+  # This uses the "server_b" client explicitly
+  - name: server_b_client
+    type: LLMBlock
+    config:
+      client: server_b
+      model_family: granite
+      model_id: granite-7b-lab
+      config_path: llm_config.yaml
+      output_cols:
+        - column_four
diff --git a/src/instructlab/sdg/blocks/llmblock.py b/src/instructlab/sdg/blocks/llmblock.py
@@ -63,6 +63,15 @@ def template_from_struct_and_config(struct, config):
     return PromptRegistry.template_from_string(struct.format(**filtered_config))
 
 
+def _resolve_client(client_name, context, block):
+    client = context.clients.get(client_name, None)
+    if not client:
+        raise BlockConfigParserError(
+            f"{type(block).__name__} {block.block_name} requests a client named {client_name} but no client of that name was found in the PipelineContext clients"
+        )
+    return client
+
+
 def _resolve_model_id(model_id, ctx_model_id, block):
     # If a model id was passed in the PipelineContext, use that
     if ctx_model_id:
@@ -105,6 +114,7 @@ def __init__(
         model_id=None,
         model_family=None,
         model_prompt=None,
+        client="default",
         gen_kwargs={},
         parser_kwargs={},
         batch_kwargs={},
@@ -117,6 +127,7 @@ def __init__(
         self.prompt_template = template_from_struct_and_config(
             self.prompt_struct, self.block_config
         )
+        self.client = _resolve_client(client, self.ctx, self)
         self.model_id = _resolve_model_id(model_id, self.ctx.model_id, self)
         self.model_family = models.get_model_family(
             _resolve_model_family(model_family, self.ctx.model_family),
@@ -146,7 +157,7 @@ def __init__(
         # Whether the LLM server supports a list of input prompts
         # and supports the n parameter to generate n outputs per input
         self.server_supports_batched = server_supports_batched(
-            self.ctx.client, self.model_id
+            self.client, self.model_id
         )
 
     def _parse(self, generated_string) -> dict:
@@ -236,9 +247,7 @@ def _generate(self, samples) -> list:
         logger.debug(f"STARTING GENERATION FOR LLMBlock USING PROMPTS: {prompts}")
         logger.debug(f"Generation arguments: {self.gen_kwargs}")
         if self.server_supports_batched:
-            response = self.ctx.client.completions.create(
-                prompt=prompts, **self.gen_kwargs
-            )
+            response = self.client.completions.create(prompt=prompts, **self.gen_kwargs)
             return [choice.text.strip() for choice in response.choices]
 
         results = []
@@ -248,7 +257,7 @@ def _generate(self, samples) -> list:
         for prompt in prompts:
             logger.debug(f"CREATING COMPLETION FOR PROMPT: {prompt}")
             for _ in range(self.gen_kwargs.get("n", 1)):
-                response = self.ctx.client.completions.create(
+                response = self.client.completions.create(
                     prompt=prompt, **self.gen_kwargs
                 )
                 results.append(response.choices[0].text.strip())
@@ -514,9 +523,11 @@ def __init__(
         input_col,
         output_col,
         model_id=None,
+        client="default",
         gen_kwargs={},
     ) -> None:
         super().__init__(ctx, pipe, block_name)
+        self.client = _resolve_client(client, self.ctx, self)
         self.model_id = _resolve_model_id(model_id, self.ctx.model_id, self)
         self.input_col = input_col
         self.output_col = output_col
@@ -553,7 +564,7 @@ def _generate(self, samples) -> list:
         n = self.gen_kwargs.get("n", 1)
         for message in messages:
             logger.debug(f"CREATING CHAT COMPLETION FOR MESSAGE: {message}")
-            responses = self.ctx.client.chat.completions.create(
+            responses = self.client.chat.completions.create(
                 messages=message, **self.gen_kwargs
             )
             if n > 1:
diff --git a/src/instructlab/sdg/pipeline.py b/src/instructlab/sdg/pipeline.py
@@ -60,7 +60,10 @@ class PipelineContext:  # pylint: disable=too-many-instance-attributes
     # on individual datasets
     DEFAULT_DATASET_NUM_PROCS = 8
 
-    client: OpenAI
+    # The key of our default client
+    DEFAULT_CLIENT_KEY = "default"
+
+    client: Optional[OpenAI] = None
     model_family: Optional[str] = None
     model_id: Optional[str] = None
     num_instructions_to_generate: Optional[int] = None
@@ -70,6 +73,9 @@ class PipelineContext:  # pylint: disable=too-many-instance-attributes
     max_num_tokens: Optional[int] = llmblock.DEFAULT_MAX_NUM_TOKENS
     batch_size: int = DEFAULT_BATCH_SIZE
     batch_num_workers: Optional[int] = None
+    clients: Optional[Dict[str, OpenAI]] = None
+
+    _clients = None
 
     @property
     def batching_enabled(self) -> bool:
@@ -78,6 +84,33 @@ def batching_enabled(self) -> bool:
         """
         return self.batch_size > 0 and self.batch_num_workers != 1
 
+    @property  # type: ignore
+    def client(self):
+        return self.clients.get(self.DEFAULT_CLIENT_KEY, None)
+
+    @client.setter
+    def client(self, value):
+        if isinstance(value, property):
+            # No default value
+            value = None
+        self.clients[self.DEFAULT_CLIENT_KEY] = value
+
+    @property  # type: ignore
+    def clients(self):
+        if self._clients is None:
+            self._clients = {}
+        return self._clients
+
+    @clients.setter
+    def clients(self, value):
+        if isinstance(value, property):
+            # Empty hash default value
+            value = {}
+        if value:
+            # Only set _clients if passed in a value, so we don't
+            # override it with the default of None from the @dataclass
+            self._clients = value
+
 
 # This is part of the public API.
 class PipelineBlockError(Exception):
diff --git a/tests/functional/test_examples.py b/tests/functional/test_examples.py
@@ -2,6 +2,7 @@
 
 # Standard
 from pathlib import Path
+from unittest.mock import MagicMock, patch
 import shlex
 import shutil
 import subprocess
@@ -11,6 +12,7 @@
 from docling.document_converter import DocumentConverter
 
 # First Party
+from instructlab.sdg.pipeline import Pipeline, PipelineContext, _lookup_block_type
 from instructlab.sdg.utils.json import jlload
 
 
@@ -74,3 +76,29 @@ def test_example_iterblock(tmp_path: Path, examples_path: Path):
     output = jlload(output_jsonl)
     assert len(output) == 5
     assert output[4]["baz"] == "bar"
+
+
+def test_example_multiple_llm_clients(examples_path: Path):
+    pipeline_path = examples_path.joinpath("multiple_llm_clients", "pipeline.yaml")
+    default_client = MagicMock()
+    server_a_client = MagicMock()
+    server_b_client = MagicMock()
+    context = PipelineContext(
+        clients={
+            "default": default_client,
+            "server_a": server_a_client,
+            "server_b": server_b_client,
+        }
+    )
+    pipeline = Pipeline.from_file(context, pipeline_path)
+    blocks = []
+    for block_prop in pipeline.chained_blocks:
+        block_name = block_prop["name"]
+        block_type = _lookup_block_type(block_prop["type"])
+        block_config = block_prop["config"]
+        block = block_type(pipeline.ctx, pipeline, block_name, **block_config)
+        blocks.append(block)
+    assert blocks[0].client == default_client
+    assert blocks[1].client == default_client
+    assert blocks[2].client == server_a_client
+    assert blocks[3].client == server_b_client
diff --git a/tests/test_default_pipeline_configs.py b/tests/test_default_pipeline_configs.py
@@ -2,7 +2,7 @@
 
 # Standard
 from importlib import resources
-from unittest.mock import patch
+from unittest.mock import MagicMock, patch
 import unittest
 
 # Third Party
@@ -53,7 +53,7 @@ def setUp(self):
 
     def test_pipeline_from_config(self):
         ctx = PipelineContext(
-            client=None,
+            client=MagicMock(),
             model_family="mixtral",
             model_id="model",
             num_instructions_to_generate=1,
diff --git a/tests/test_llmblock.py b/tests/test_llmblock.py
diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py