-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathdocker-compose.prod.yml
More file actions
121 lines (113 loc) · 3.2 KB
/
docker-compose.prod.yml
File metadata and controls
121 lines (113 loc) · 3.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
x-app: &default-app
image: ghcr.io/openradx/radis:latest
volumes:
- web_data:/var/www/web
- ${SSL_SERVER_CERT_FILE:?}:/etc/web/ssl/cert.pem
- ${SSL_SERVER_KEY_FILE:?}:/etc/web/ssl/key.pem
environment:
DJANGO_EMAIL_URL: ${DJANGO_EMAIL_URL:?}
DJANGO_SECURE_SSL_REDIRECT: ${DJANGO_SECURE_SSL_REDIRECT:-true}
DJANGO_SETTINGS_MODULE: radis.settings.production
DJANGO_STATIC_ROOT: /var/www/web/static/
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:?}
x-deploy: &deploy
replicas: 1
restart_policy:
condition: on-failure
max_attempts: 3
services:
# We can't use those manage commands inside the web container in production because
# the web service may have multiple replicas. So we make sure to only run them once
# and wait for it to be finished by the web service containers.
init:
<<: *default-app
hostname: init.local
command: >
bash -c "
wait-for-it -s postgres.local:5432 -t ${WAIT_POSTGRES_TIMEOUT:-180} &&
./manage.py migrate &&
./manage.py collectstatic --no-input &&
./manage.py create_superuser &&
./manage.py retry_stalled_jobs &&
./manage.py ok_server --host 0.0.0.0 --port 8000
"
deploy:
<<: *deploy
web:
<<: *default-app
build:
target: production
ports:
- ${WEB_HTTP_PORT:-80}:80
- ${WEB_HTTPS_PORT:-443}:443
command: >
bash -c "
wait-for-it -s init.local:8000 -t 300 &&
echo 'Starting web server ...' &&
daphne -b 0.0.0.0 -p 80 \\
-e ssl:443:privateKey=/etc/web/ssl/key.pem:certKey=/etc/web/ssl/cert.pem \\
radis.asgi:application
"
healthcheck:
test: ["CMD", "curl", "-fk", "https://localhost/health/"]
deploy:
<<: *deploy
replicas: 3
default_worker:
<<: *default-app
command: >
bash -c "
wait-for-it -s postgres.local:5432 -t ${WAIT_POSTGRES_TIMEOUT:-180} &&
./manage.py bg_worker -q default
"
deploy:
<<: *deploy
llm_worker:
<<: *default-app
command: >
bash -c "
wait-for-it -s postgres.local:5432 -t ${WAIT_POSTGRES_TIMEOUT:-180} &&
./manage.py bg_worker -q llm
"
deploy:
<<: *deploy
postgres:
environment:
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:?}
deploy:
<<: *deploy
llm_gpu:
image: lmsysorg/sglang:latest
hostname: llm.local
environment:
HF_TOKEN: ${HF_TOKEN:-}
HTTP_PROXY: ${HTTP_PROXY:-}
HTTPS_PROXY: ${HTTPS_PROXY:-}
NO_PROXY: ${NO_PROXY:-}
volumes:
- models_data:/root/.cache/huggingface
network_mode: host
privileged: true
entrypoint: python3 -m sglang.launch_server
command: >
--model-path ${LLM_MODEL_NAME:?}
--host 0.0.0.0
--port 8080
ulimits:
memlock: -1
stack: 67108864
ipc: host
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:8080/health || exit 1"]
deploy:
<<: *deploy
resources:
reservations:
# https://gist.github.com/medihack/6a6d24dc6376939e1919f32409c2119f
generic_resources:
- discrete_resource_spec:
kind: gpu
value: 1
volumes:
web_data:
models_data: