pasteurlabs · linusseelinger · Jan 8, 2026 · Dec 18, 2025 · Dec 19, 2025 · Dec 23, 2025
@@ -44,20 +44,10 @@ Alternatively, you can log metrics and artifacts to an MLflow server by setting
 docker-compose -f extra/mlflow/docker-compose-mlflow.yml up
 ```
 
-This MLflow server shows logged items in the MLflow GUI at `http://localhost:5000`.
-
-Launch the `metrics` example Tesseract with the the following volume mount and `TESSERACT_MLFLOW_TRACKING_URI` to ensure that it connects to that MLflow server. Note that your Tesseract now needs to declare `mlflow` as a required package in its `requirements.txt` file.
-
-MacOS and Windows with Docker Desktop requires the `host.docker.internal` hostname to access services running on the host machine:
-
-```bash
-tesseract serve --env=TESSERACT_MLFLOW_TRACKING_URI=http://host.docker.internal:5000 --volume mlflow-data:/mlflow-data:rw metrics
-```
-
-For Linux, set your Tesseracts to use the host network and access the MLflow server at `localhost`:
+Launch the `metrics` example Tesseract with the the following volume mount, network and `TESSERACT_MLFLOW_TRACKING_URI` to ensure that it connects to that MLflow server.
 
 ```bash
-tesseract serve --env=TESSERACT_MLFLOW_TRACKING_URI=http://localhost:5000 --network=host --volume mlflow-data:/mlflow-data:rw metrics
+tesseract serve --network=tesseract-mlflow-server --env=TESSERACT_MLFLOW_TRACKING_URI=http://mlflow-server:5000 --volume mlflow-data:/mlflow-data:rw metrics
 ```
 
 The same options apply when executing Tesseracts through `tesseract run`.

@@ -1,4 +1,2 @@
 # Tesseract requirements file
 # Generated by tesseract 0.9.2.dev16+g7ca45a2.d20250627 on 2025-06-27T11:44:45.333107
-
-mlflow==3.1.1
@@ -17,15 +17,30 @@ services:
     image: ghcr.io/mlflow/mlflow:latest
     restart: unless-stopped
     user: 1000:1000
-    command: mlflow server --backend-store-uri sqlite:///mlflow-data/mlflow.db --default-artifact-root file:///mlflow-data/mlruns --host 0.0.0.0 --port 5000
+    command: >
+      mlflow server
+      --backend-store-uri sqlite:///mlflow-data/mlflow.db
+      --serve-artifacts
+      --artifacts-destination file:///mlflow-data/mlruns/mlartifacts
+      --host 0.0.0.0
+      --allowed-hosts "mlflow-server:5000,localhost:*,host.docker.internal:*"
+      --port 5000
     volumes:
-      - mlflow-data:/mlflow-data
+      - mlflow-data:/mlflow-data:rw
     ports:
-      - "5000:5000"
+      - 5000
     depends_on:
       mlflow-init:
         condition: service_completed_successfully
+    networks:
+      - mlflow-network
 
 volumes:
   mlflow-data:
     name: mlflow-data
+
+networks:
+  # Use a deterministic network name so we can attach Tesseract
+  # containers to it more easily
+  mlflow-network:
+    name: tesseract-mlflow-server
@@ -28,7 +28,6 @@
         "fastapi",
         "httpx",  # required by fastapi test client
         "jsf",
-        "mlflow",
         "numpy",
         "pre-commit",
         "pytest",

@@ -16,6 +16,7 @@ dependencies = [
     "pybase64<=1.4.3,>=1.4",
     "numpy<=2.4.0,>=1.26",
     "debugpy<=1.8.19,>=1.8.14",
+    "mlflow-skinny<=3.8.1,>=3.7.0",
 ]
 
 [project.scripts]

@@ -16,7 +16,9 @@
 from io import UnsupportedOperation
 from pathlib import Path
 from typing import Any
+from urllib.parse import urlparse
 
+import mlflow
 import requests
 
 from tesseract_core.runtime.config import get_config
@@ -132,77 +134,81 @@ def __init__(self, base_dir: str | None = None) -> None:
             "quiet"  # Suppress potential MLflow git warnings
         )
 
-        try:
-            import mlflow
-        except ImportError as exc:
-            raise ImportError(
-                "MLflow is required for MLflowBackend but is not installed"
-            ) from exc
-
-        self._ensure_mlflow_reachable()
-        self.mlflow = mlflow
-
         config = get_config()
         tracking_uri = config.mlflow_tracking_uri
 
-        if not tracking_uri.startswith(("http://", "https://")):
-            # If it's a db file URI, convert to local path
-            tracking_uri = tracking_uri.replace("sqlite:///", "")
+        parsed = urlparse(tracking_uri)
+        if not parsed.scheme:
+            tracking_uri = f"https://{tracking_uri}"
 
-            # Relative paths are resolved against the base output path
-            if not Path(tracking_uri).is_absolute():
-                tracking_uri = (Path(get_config().output_path) / tracking_uri).resolve()
-
-            tracking_uri = f"sqlite:///{tracking_uri}"
+        parsed = urlparse(tracking_uri)
+        if parsed.scheme not in ("http", "https"):
+            raise ValueError(
+                f"Tesseract only supports accessing MLflow server via HTTP/HTTPS (got URI scheme: {parsed.scheme})"
+            )
 
+        self._ensure_mlflow_reachable(tracking_uri)
         mlflow.set_tracking_uri(tracking_uri)
 
-    def _ensure_mlflow_reachable(self) -> None:
+    def _ensure_mlflow_reachable(self, mlflow_tracking_uri: str) -> None:
         """Check if the MLflow tracking server is reachable."""
-        config = get_config()
-        mlflow_tracking_uri = config.mlflow_tracking_uri
-        if mlflow_tracking_uri.startswith(("http://", "https://")):
-            try:
-                # Check for MLflow credentials in environment variables
-                username = os.environ.get("MLFLOW_TRACKING_USERNAME")
-                password = os.environ.get("MLFLOW_TRACKING_PASSWORD")
-
-                auth = None
-                if username and password:
-                    auth = (username, password)
-
-                response = requests.get(mlflow_tracking_uri, timeout=5, auth=auth)
-                response.raise_for_status()
-            except requests.RequestException as e:
-                raise RuntimeError(
-                    f"Failed to connect to MLflow tracking server at {mlflow_tracking_uri}. "
-                    "Please make sure an MLflow server is running and TESSERACT_MLFLOW_TRACKING_URI is set correctly, "
-                    "or switch to file-based logging by setting TESSERACT_MLFLOW_TRACKING_URI to an empty string. "
-                    "If your MLflow server has authentication enabled, please make sure that "
-                    "MLFLOW_TRACKING_USERNAME and MLFLOW_TRACKING_PASSWORD are set correctly."
-                ) from e
+        # Check for MLflow credentials in environment variables
+        username = os.environ.get("MLFLOW_TRACKING_USERNAME")
+        password = os.environ.get("MLFLOW_TRACKING_PASSWORD")
+
+        if (username and not password) or (password and not username):
+            raise RuntimeError(
+                "If one of MLFLOW_TRACKING_USERNAME and MLFLOW_TRACKING_PASSWORD is defined, "
+                "both must be defined."
+            )
+
+        auth = None
+        if username and password:
+            auth = (username, password)
+
+        try:
+            response = requests.get(mlflow_tracking_uri, timeout=5, auth=auth)
+            response.raise_for_status()
+        except requests.HTTPError as e:
+            raise RuntimeError(
+                f"MLflow tracking server at {mlflow_tracking_uri} returned an error response: "
+                f"{e.response.status_code} {e.response.reason}. "
+                "Please check that the server is configured correctly. "
+                "If your MLflow server has authentication enabled, please make sure that "
+                "MLFLOW_TRACKING_USERNAME and MLFLOW_TRACKING_PASSWORD are set correctly. "
+                "To switch to file-based logging instead, set TESSERACT_MLFLOW_TRACKING_URI "
+                "to an empty string."
+            ) from e
+        except requests.RequestException as e:
+            raise RuntimeError(
+                f"Failed to connect to MLflow tracking server at {mlflow_tracking_uri}. "
+                "Please make sure an MLflow server is running at this address and "
+                "TESSERACT_MLFLOW_TRACKING_URI is set correctly. "
+                "To switch to file-based logging instead, set TESSERACT_MLFLOW_TRACKING_URI "
+                "to an empty string."
+            ) from e
 
     def log_parameter(self, key: str, value: Any) -> None:
         """Log a parameter to MLflow."""
-        self.mlflow.log_param(key, value)
+        mlflow.log_param(key, value)
 
     def log_metric(self, key: str, value: float, step: int | None = None) -> None:
         """Log a metric to MLflow."""
-        self.mlflow.log_metric(key, value, step=step)
+        mlflow.log_metric(key, value, step=step)
 
     def log_artifact(self, local_path: str) -> None:
         """Log an artifact to MLflow."""
-        self.mlflow.log_artifact(local_path)
+        mlflow.log_artifact(local_path)
 
     def start_run(self) -> None:
         """Start a new MLflow run with optional extra arguments from config."""
         config = get_config()
         run_extra_args = config.mlflow_run_extra_args
-        self.mlflow.start_run(**run_extra_args)
+        mlflow.start_run(**run_extra_args)
 
     def end_run(self) -> None:
         """End the current MLflow run."""
-        self.mlflow.end_run()
+        mlflow.end_run()
 
 
 def _create_backend(base_dir: str | None) -> BaseBackend:

@@ -6,13 +6,15 @@
 import random
 import string
 import subprocess
+import time
 from pathlib import Path
 from shutil import copytree
 from textwrap import indent
 from traceback import format_exception
 from typing import Any
 
 import pytest
+import requests
 
 # NOTE: Do NOT import tesseract_core here, as it will cause typeguard to fail
 
@@ -473,3 +475,104 @@ def hacked_get(url, *args, **kwargs):
     monkeypatch.setattr(engine.requests, "get", hacked_get)
 
     yield mock_instance
+
+
+@pytest.fixture(scope="module")
+def mlflow_server():
+    """MLflow server to use in tests."""
+    # Check if docker-compose is available
+    try:
+        result = subprocess.run(
+            ["docker", "compose", "version"],
+            capture_output=True,
+            check=True,
+        )
+    except (subprocess.CalledProcessError, FileNotFoundError):
+        pytest.fail("docker-compose not available")
+
+    # Start MLflow server with unique project name
+    project_name = f"test_mlflow_{int(time.time())}"
+
+    compose_file = (
+        Path(__file__).parent.parent / "extra" / "mlflow" / "docker-compose-mlflow.yml"
+    )
+
+    try:
+        # Start the services
+        subprocess.run(
+            [
+                "docker",
+                "compose",
+                "-f",
+                str(compose_file),
+                "-p",
+                project_name,
+                "up",
+                "-d",
+            ],
+            check=True,
+            capture_output=True,
+        )
+
+        res = subprocess.run(
+            [
+                "docker",
+                "compose",
+                "-f",
+                str(compose_file),
+                "-p",
+                project_name,
+                "ps",
+                "--format",
+                "json",
+            ],
+            check=True,
+            capture_output=True,
+            text=True,
+        )
+        service_data = json.loads(res.stdout)
+        service_port = service_data["Publishers"][0]["PublishedPort"]
+
+        # Note: We don't track containers/volumes here because docker-compose down -v
+        # will handle cleanup automatically in the finally block
+
+        # Wait for MLflow to be ready (with timeout)
+        tracking_uri = f"http://localhost:{service_port}"
+        max_wait = 30  # seconds
+        start_time = time.time()
+
+        while time.time() - start_time < max_wait:
+            try:
+                response = requests.get(tracking_uri, timeout=2)
+                if response.status_code == 200:
+                    break
+            except requests.RequestException:
+                pass
+            time.sleep(1)
+        else:
+            pytest.fail(f"MLflow server did not become ready within {max_wait}s")
+
+        yield tracking_uri
+
+    finally:
+        # Get logs for debugging
+        result = subprocess.run(
+            ["docker", "compose", "-f", str(compose_file), "-p", project_name, "logs"],
+            capture_output=True,
+            text=True,
+        )
+        print(result.stdout)
+        # Stop and remove containers
+        subprocess.run(
+            [
+                "docker",
+                "compose",
+                "-f",
+                str(compose_file),
+                "-p",
+                project_name,
+                "down",
+                "-v",
+            ],
+            capture_output=True,
+        )
@@ -89,7 +89,9 @@ def build_tesseract(
     print_debug_info(result)
     assert result.exit_code == 0, result.exception
 
-    image_tag = json.loads(result.stdout.strip())[0]
+    # Parse the last line of stdout which contains the JSON array of image tags
+    stdout_lines = result.stdout.strip().split("\n")
+    image_tag = json.loads(stdout_lines[-1])[0]
 
     # This raise an error if the image does not exist
     client.images.get(image_tag)