diffy/docker-compose.yml at master · opendiffy/diffy · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
version: "3.9"

services:
  mongodb:
    image: mongo
    container_name: mongodb
    environment:
      - MONGO_INITDB_ROOT_USERNAME=root
      - MONGO_INITDB_ROOT_PASSWORD=pass12345
    ports:
      - 27017:27017
    healthcheck:
      test: echo 'db.stats().ok' | mongosh localhost:27017/test --quiet
      interval: 10s
      timeout: 10s
      retries: 10
    restart: unless-stopped

  loki:
    image: grafana/loki:3.0.0
    command: -config.file=/etc/loki/loki-local.yaml
    user: "0"
    ports:
      - "3101:3100"
    volumes:
      - ./etc/loki-local.yaml:/etc/loki/loki-local.yaml
      - ./data/loki-data:/tmp/loki
    healthcheck:
      test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:3100/ready || exit 1"]
      interval: 10s
      timeout: 5s
      retries: 12

  tempo:
    image: grafana/tempo:2.4.0
    command: ["-config.file=/etc/tempo.yaml"]
    volumes:
      - ./etc/tempo-local.yaml:/etc/tempo.yaml
      - ./data/tempo-data:/tmp/tempo
    restart: unless-stopped
    ports:
      - "3102:3100"   # tempo HTTP API
      - "4317:4317"   # OTLP gRPC
    healthcheck:
      test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:3100/ready || exit 1"]
      interval: 15s
      timeout: 5s
      retries: 12

  # ── LLM inference ────────────────────────────────────────────────────────────
  # Downloads Qwen2.5-1.5B-Instruct Q4_K_M (~1 GB) once into a named volume.
  # Subsequent starts skip the download entirely.
  model-init:
    profiles: ["llm-bundled"]
    image: alpine:3.19
    volumes:
      - llm-models:/models
    entrypoint: ["/bin/sh", "-c"]
    command:
      - |
        apk add --no-cache curl -q
        if [ ! -f /models/qwen2.5-1.5b-instruct-q4_k_m.gguf ]; then
          echo '==> Downloading Qwen2.5-1.5B-Instruct Q4_K_M (~1 GB) — first run only...'
          curl -L --retry 5 --retry-delay 3 --progress-bar \
            -o /models/qwen2.5-1.5b-instruct-q4_k_m.gguf \
            'https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GGUF/resolve/main/qwen2.5-1.5b-instruct-q4_k_m.gguf'
          echo '==> Download complete.'
        else
          echo '==> Model already present, skipping download.'
        fi
    restart: "no"

  llama-server:
    profiles: ["llm-bundled"]
    image: ghcr.io/ggerganov/llama.cpp:server
    volumes:
      - llm-models:/models
    command:
      - "-m"
      - "/models/qwen2.5-1.5b-instruct-q4_k_m.gguf"
      - "--host"
      - "0.0.0.0"
      - "--port"
      - "8080"
      - "--ctx-size"
      - "2048"
      - "--alias"
      - "qwen2.5"
      - "-ngl"
      - "0"        # CPU-only; set to 99 if the host has a GPU
    ports:
      - "8082:8080"   # exposed for local debugging; diffy uses internal hostname
    healthcheck:
      test: ["CMD-SHELL", "wget -qO- http://localhost:8080/health 2>/dev/null | grep -q ok"]
      interval: 15s
      timeout: 5s
      retries: 12
      start_period: 30s
    depends_on:
      model-init:
        condition: service_completed_successfully
    restart: unless-stopped
  # ─────────────────────────────────────────────────────────────────────────────

  primary:
    image: diffy/example-service:production

  secondary:
    image: diffy/example-service:production

  candidate:
    image: diffy/example-service:candidate

  diffy:
    image: diffy/diffy:latest
    ports:
      - "8888:8888"
      - "8880:8880"
#    env_file: "./diffy.env"
    environment:
      spring.data.mongodb.authentication-database: admin
      spring.data.mongodb.host: "mongodb"
      spring.data.mongodb.port: 27017
      spring.data.mongodb.username: root
      spring.data.mongodb.password: pass12345
      candidate: "candidate:5000"
      master.primary: "primary:5000"
      master.secondary: "secondary:5000"
      responseMode: "primary"
      service.protocol: "http"
      allowHttpSideEffects: "true"
      serviceName: "Sample Service"
      proxy.port: 8880
      http.port: 8888
      rootUrl: "localhost:8888"
      otel.javaagent.debug: "false"
      otel.metrics.exporter: "none"
      otel.traces.exporter: "otlp"
      otel.exporter.otlp.endpoint: "http://tempo:4317"
      otel.resource.attributes: "service.name=diffy"
      spring.application.name: "diffy"
      logging.level.web: "INFO"
      logging.level.io.opentelemetry: "WARN"
      logging.level.root: "INFO"
      logging.file.name: "/app/logs/diffy.log"
      logging.file.max-size: "10MB"
      logging.file.max-history: 1
      logging.file.clean-history-on-start: "true"
      dockerComposeLocal: "true"
      llm.provider: "${LLM_PROVIDER:-ollama}"
      llm.ollama.url: "${LLM_URL:-http://llama-server:8080}"
      llm.ollama.model: "${LLM_MODEL:-qwen2.5}"
    volumes:
      - ./data/logs:/app/logs
    depends_on:
      - tempo
      - mongodb
      - primary
      - secondary
      - candidate

  promtail:
    image: grafana/promtail:2.9.8
    command: -config.file=/etc/promtail/promtail-local.yaml
    volumes:
      - ./etc/promtail-local.yaml:/etc/promtail/promtail-local.yaml
      - ./data/logs:/app/logs
    depends_on:
      - diffy
      - loki

  prometheus:
    image: prom/prometheus:latest
    volumes:
      - ./etc/prometheus.yaml:/etc/prometheus.yaml
    entrypoint:
      - /bin/prometheus
      - --config.file=/etc/prometheus.yaml
    ports:
      - "9090:9090"
    depends_on:
      - diffy

  grafana:
    image: grafana/grafana:11.2.0
    volumes:
      - ./data/grafana-data/datasources:/etc/grafana/provisioning/datasources
      - ./data/grafana-data/dashboards-provisioning:/etc/grafana/provisioning/dashboards
      - ./data/grafana-data/dashboards:/var/lib/grafana/dashboards
    environment:
      - GF_AUTH_ANONYMOUS_ENABLED=true
      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin
      - GF_AUTH_DISABLE_LOGIN_FORM=true
    ports:
      - "3000:3000"
    depends_on:
      - prometheus
      - tempo
      - loki

volumes:
  llm-models:   # persists the model across restarts; ~1 GB, downloaded once