Skip to content

[BUG] DD_CONTAINER_ENV_AS_TAGS not working in docker swarm mode #41257

@Twisterking

Description

@Twisterking

Agent Environment
Version: Agent 7.70.2

Describe what happened:
I am running several docker app containers in docker swarm / stack mode.

Here is a minimal docker-compose for my docker stack:

services:
  traefik:
    image: traefik:v3.1
    command:
      # Swarm provider
      - --providers.swarm=true
      - --providers.swarm.endpoint=unix:///var/run/docker.sock
      - --providers.swarm.exposedbydefault=false

      # Docker Providers
      # - --providers.docker=true
      # - --providers.docker.watch=true
      # - --providers.docker.exposedbydefault=false

      # Entrypoints
      - --entrypoints.web.address=:80
      - --entrypoints.websecure.address=:443

      # Ping healthcheck endpoint
      - --ping=true
      - --ping.entrypoint=web

      - --api=true
      - --api.dashboard=true
      - --certificatesresolvers.le.acme.email=...
      - --certificatesresolvers.le.acme.storage=/acme.json
      - --certificatesresolvers.le.acme.httpchallenge.entrypoint=web
      - --certificatesresolvers.le.acme.httpchallenge=true
      - --log=true
      - --log.filepath=/var/log/traefik.log
      - --log.level=WARN
      # - --accesslog=true
      # - --accesslog.filepath=/var/log/traefik_access.log
      # - --accesslog.bufferingsize=100
    ports:
      - 80:80
      - 443:443
      - 8080:8080 # optional dashboard
    volumes:
      - '/var/run/docker.sock:/var/run/docker.sock:ro'
      - '/files/traefik/acme.json:/acme.json'
      - '/var/log:/var/log'
      - loadbalancerdata:/data
    deploy:
      mode: global
      placement:
        constraints:
          - node.role == manager # run Traefik only on manager(s)
      restart_policy:
        condition: on-failure
    networks:
      - webswarm

  my-app:
    image: ${MYIMAGE}
    environment:
      - PORT=3000
      - SERVER_ENV=production
      - DD_ENV=production
      - MY_CONTAINER_NAME=my-app-{{.Task.Slot}}
    volumes:
      - /files:/files
      - '/var/run/docker.sock:/var/run/docker.sock:ro'
    depends_on:
      - traefik
    networks:
      - webswarm
    healthcheck:
      test: curl -f http://localhost:3000/healthcheck || exit 1
      interval: 30s
      timeout: 15s
      retries: 3
      start_period: 10s
    deploy:
      labels:
        ...
      mode: replicated
      replicas: 7
      placement:
        preferences:
          - spread: node.id # makes sure to spread the containers across the nodes
      restart_policy:
        condition: on-failure
      update_config:
        parallelism: 1
        delay: 15s
        order: start-first
      resources:
        limits:
          cpus: '1.5'
          memory: 4096M
        reservations:
          cpus: '0.5'
          memory: 1536M

  dd-agent:
    image: gcr.io/datadoghq/agent:latest
    volumes:
      - '/var/run/docker.sock:/var/run/docker.sock:ro'
      - '/var/lib/docker/containers:/var/lib/docker/containers:ro'
      - '/proc:/host/proc:ro'
      - '/sys/fs/cgroup/:/host/sys/fs/cgroup:ro'
      - '/opt/datadog-agent/run:/opt/datadog-agent/run:rw'
    environment:
      - DD_API_KEY=...
      - DD_LOGS_ENABLED=true
      - DD_LOGS_INJECTION=true
      - DD_ENV=production
      - DD_SITE=datadoghq.eu
      - DD_LOGS_CONFIG_CONTAINER_COLLECT_ALL=true
      - DD_CONTAINER_EXCLUDE=name:dd-agent name:mongomyadmin name:orderlion-temp name:traefik name:exitelink name:autoheal name:bullboard
      - DD_CONTAINER_ENV_AS_TAGS={"MY_CONTAINER_NAME":"container_name"}
    networks:
      - webswarm
    deploy:
      mode: global
      restart_policy:
        condition: on-failure

volumes:
  loadbalancerdata:

networks:
  webswarm:
    external: true

Describe what you expected:
I would expect, that the logs inside DD show up coming from the docker containers with names like my-app-1, my-app-2, ...
But they simple don't!

I checked with docker inspect my main app containers in the swarm and the env var MY_CONTAINER_NAME correctly shows up inside docker inspect with the right value! Somehow, the dd-agent is simply ignoring the DD_CONTAINER_ENV_AS_TAGS env var. I tried almost everything but can't get it to work.

Additional environment details (Operating System, Cloud provider, etc):
Ubuntu 22.04 LTS, DD Agent 7.70.2, all hosted on AWS

Metadata

Metadata

Assignees

No one assigned

    Labels

    pendingLabel for issues waiting a Datadog member's response.team/container-platformThe Container Platform Team

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions