forked from tensorlayer/TensorLayer
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtutorial_mnist_distributed.yml
More file actions
87 lines (86 loc) · 2.33 KB
/
Copy pathtutorial_mnist_distributed.yml
File metadata and controls
87 lines (86 loc) · 2.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# https://docs.docker.com/compose/compose-file/
#
# reference: https://docs.microsoft.com/en-us/azure/container-service/dcos-swarm/container-service-swarm-walkthrough
# 1. create a swarm cluster on azure:
# $ az group create -l southeastasia -n tensorlayer-swarm -o table --debug
# $ az acs create -n tl-swarm-culster --orchestrator-type Swarm -g tensorlayer-swarm --agent-count 3 -o table --debug
#
# 2. create a ssh tunnel to swarm master:
# $ master=$(az acs show -n tl-swarm-culster -g tensorlayer-swarm --query 'masterProfile.fqdn' | jq -r .)
# $ ssh -p 2200 -fNL 2375:localhost:2375 azureuser@$master
# $ export DOCKER_HOST=:2375
#
# 3. start
# $ docker-compose -f tutorial_mnist_distributed.yml up
---
version: '3'
services:
master:
image: tensorlayer/tensorlayer:latest
entrypoint:
- python
- /tensorlayer/example/tutorial_mnist_distributed.py
environment:
CUDA_VISIBLE_DEVICES: ''
TF_CONFIG: |-
{
"cluster": {
"ps": [
"ps:3001"
],
"worker": [
"master:3002",
"worker:3003"
]
},
"task": {
"type": "worker",
"index": 0
}
}
worker:
image: tensorlayer/tensorlayer:latest
entrypoint:
- python
- /tensorlayer/example/tutorial_mnist_distributed.py
environment:
CUDA_VISIBLE_DEVICES: ''
TF_CONFIG: |-
{
"cluster": {
"ps": [
"ps:3001"
],
"worker": [
"master:3002",
"worker:3003"
]
},
"task": {
"type": "worker",
"index": 1
}
}
ps:
image: tensorlayer/tensorlayer:latest
entrypoint:
- python
- /tensorlayer/example/tutorial_mnist_distributed.py
environment:
CUDA_VISIBLE_DEVICES: ''
TF_CONFIG: |-
{
"cluster": {
"ps": [
"ps:3001"
],
"worker": [
"master:3002",
"worker:3003"
]
},
"task": {
"type": "ps",
"index": 0
}
}