apache · jason810496 · Jul 14, 2025 · Jul 10, 2025
@@ -20,7 +20,8 @@
 import contextlib
 import textwrap
 
-from fastapi import Depends, HTTPException, Request, Response, status
+from fastapi import Depends, HTTPException, Request, status
+from fastapi.responses import StreamingResponse
 from itsdangerous import BadSignature, URLSafeSerializer
 from pydantic import NonNegativeInt
 from sqlalchemy.orm import joinedload
@@ -119,12 +120,17 @@ def get_log(
     )
     ti = session.scalar(query)
     if ti is None:
-        query = select(TaskInstanceHistory).where(
-            TaskInstanceHistory.task_id == task_id,
-            TaskInstanceHistory.dag_id == dag_id,
-            TaskInstanceHistory.run_id == dag_run_id,
-            TaskInstanceHistory.map_index == map_index,
-            TaskInstanceHistory.try_number == try_number,
+        query = (
+            select(TaskInstanceHistory)
+            .where(
+                TaskInstanceHistory.task_id == task_id,
+                TaskInstanceHistory.dag_id == dag_id,
+                TaskInstanceHistory.run_id == dag_run_id,
+                TaskInstanceHistory.map_index == map_index,
+                TaskInstanceHistory.try_number == try_number,
+            )
+            .options(joinedload(TaskInstanceHistory.dag_run))
+            # we need to joinedload the dag_run, since FileTaskHandler._render_filename needs ti.dag_run
         )
         ti = session.scalar(query)
 
@@ -137,21 +143,24 @@ def get_log(
         with contextlib.suppress(TaskNotFound):
             ti.task = dag.get_task(ti.task_id)
 
-    if accept == Mimetype.JSON or accept == Mimetype.ANY:  # default
-        logs, metadata = task_log_reader.read_log_chunks(ti, try_number, metadata)
-        encoded_token = None
+    if accept == Mimetype.NDJSON:  # only specified application/x-ndjson will return streaming response
+        # LogMetadata(TypedDict) is used as type annotation for log_reader; added ignore to suppress mypy error
+        log_stream = task_log_reader.read_log_stream(ti, try_number, metadata)  # type: ignore[arg-type]
+        headers = None
         if not metadata.get("end_of_log", False):
-            encoded_token = URLSafeSerializer(request.app.state.secret_key).dumps(metadata)
-        return TaskInstancesLogResponse.model_construct(continuation_token=encoded_token, content=logs)
-    # text/plain, or something else we don't understand. Return raw log content
-
-    # We need to exhaust the iterator before we can generate the continuation token.
-    # We could improve this by making it a streaming/async response, and by then setting the header using
-    # HTTP Trailers
-    logs = "".join(task_log_reader.read_log_stream(ti, try_number, metadata))
-    headers = None
-    if not metadata.get("end_of_log", False):
-        headers = {
-            "Airflow-Continuation-Token": URLSafeSerializer(request.app.state.secret_key).dumps(metadata)
-        }
-    return Response(media_type="application/x-ndjson", content=logs, headers=headers)
+            headers = {
+                "Airflow-Continuation-Token": URLSafeSerializer(request.app.state.secret_key).dumps(metadata)
+            }
+        return StreamingResponse(media_type="application/x-ndjson", content=log_stream, headers=headers)
+
+    # application/json, or something else we don't understand.
+    # Return JSON format, which will be more easily for users to debug.
+
+    # LogMetadata(TypedDict) is used as type annotation for log_reader; added ignore to suppress mypy error
+    structured_log_stream, out_metadata = task_log_reader.read_log_chunks(ti, try_number, metadata)  # type: ignore[arg-type]
+    encoded_token = None
+    if not out_metadata.get("end_of_log", False):
+        encoded_token = URLSafeSerializer(request.app.state.secret_key).dumps(out_metadata)
+    return TaskInstancesLogResponse.model_construct(
+        continuation_token=encoded_token, content=list(structured_log_stream)
+    )