radis/docker-compose.prod.yml at main · openradx/radis · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
x-app: &default-app
  image: ghcr.io/openradx/radis:latest
  volumes:
    - web_data:/var/www/web
    - ${SSL_SERVER_CERT_FILE:?}:/etc/web/ssl/cert.pem
    - ${SSL_SERVER_KEY_FILE:?}:/etc/web/ssl/key.pem
  environment:
    DJANGO_EMAIL_URL: ${DJANGO_EMAIL_URL:?}
    DJANGO_SECURE_SSL_REDIRECT: ${DJANGO_SECURE_SSL_REDIRECT:-true}
    DJANGO_SETTINGS_MODULE: radis.settings.production
    DJANGO_STATIC_ROOT: /var/www/web/static/
    POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:?}

x-deploy: &deploy
  replicas: 1
  restart_policy:
    condition: on-failure
    max_attempts: 3

services:
  # We can't use those manage commands inside the web container in production because
  # the web service may have multiple replicas. So we make sure to only run them once
  # and wait for it to be finished by the web service containers.
  init:
    <<: *default-app
    hostname: init.local
    command: >
      bash -c "
        wait-for-it -s postgres.local:5432 -t ${WAIT_POSTGRES_TIMEOUT:-180} &&
        ./manage.py migrate &&
        ./manage.py collectstatic --no-input &&
        ./manage.py create_superuser &&
        ./manage.py retry_stalled_jobs &&
        ./manage.py ok_server --host 0.0.0.0 --port 8000
      "
    deploy:
      <<: *deploy

  web:
    <<: *default-app
    build:
      target: production
    ports:
      - ${WEB_HTTP_PORT:-80}:80
      - ${WEB_HTTPS_PORT:-443}:443
    command: >
      bash -c "
        wait-for-it -s init.local:8000 -t 300 &&
        echo 'Starting web server ...' &&
        daphne -b 0.0.0.0 -p 80 \\
          -e ssl:443:privateKey=/etc/web/ssl/key.pem:certKey=/etc/web/ssl/cert.pem \\
          radis.asgi:application
      "
    healthcheck:
      test: ["CMD", "curl", "-fk", "https://localhost/health/"]
    deploy:
      <<: *deploy
      replicas: 3

  default_worker:
    <<: *default-app
    command: >
      bash -c "
        wait-for-it -s postgres.local:5432 -t ${WAIT_POSTGRES_TIMEOUT:-180} &&
        ./manage.py bg_worker -q default
      "
    deploy:
      <<: *deploy

  llm_worker:
    <<: *default-app
    command: >
      bash -c "
        wait-for-it -s postgres.local:5432 -t ${WAIT_POSTGRES_TIMEOUT:-180} &&
        ./manage.py bg_worker -q llm
      "
    deploy:
      <<: *deploy

  postgres:
    environment:
      POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:?}
    deploy:
      <<: *deploy

  llm_gpu:
    image: lmsysorg/sglang:latest
    hostname: llm.local
    environment:
      HF_TOKEN: ${HF_TOKEN:-}
      HTTP_PROXY: ${HTTP_PROXY:-}
      HTTPS_PROXY: ${HTTPS_PROXY:-}
      NO_PROXY: ${NO_PROXY:-}
    volumes:
      - models_data:/root/.cache/huggingface
    network_mode: host
    privileged: true
    entrypoint: python3 -m sglang.launch_server
    command: >
      --model-path ${LLM_MODEL_NAME:?}
      --host 0.0.0.0
      --port 8080
    ulimits:
      memlock: -1
      stack: 67108864
    ipc: host
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:8080/health || exit 1"]
    deploy:
      <<: *deploy
      resources:
        reservations:
          # https://gist.github.com/medihack/6a6d24dc6376939e1919f32409c2119f
          generic_resources:
            - discrete_resource_spec:
                kind: gpu
                value: 1

volumes:
  web_data:
  models_data: