-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathdsub_beast.sh
executable file
·164 lines (146 loc) · 6.11 KB
/
dsub_beast.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
#!/bin/bash
# This is only a demonstration of how to run beast via dsub.
# The accelerator (GPU) type and count should be set according to
# the size of the data and number of partitions.
# --accelerator-count should scale with number of partitions in data
# --nvidia-driver-version must match compatible CUDA version
#
GPU_TYPE="nvidia-tesla-p4" # see: https://cloud.google.com/compute/docs/gpus/
DOCKER_IMAGE="quay.io/broadinstitute/beast-beagle-cuda"
# get absolute path for file
function absolute_path() {
local SOURCE="$1"
while [ -h "$SOURCE" ]; do # resolve $SOURCE until the file is no longer a symlink
DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"
if [[ "$OSTYPE" == "darwin"* ]]; then
SOURCE="$(readlink "$SOURCE")"
else
SOURCE="$(readlink -f "$SOURCE")"
fi
[[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" # if $SOURCE was a relative symlink, we need to resolve it relative to the path where the symlink file was located
done
echo "$SOURCE"
}
SOURCE="${BASH_SOURCE[0]}"
SCRIPT=$(absolute_path "$SOURCE")
SCRIPT_DIRNAME="$(dirname "$SOURCE")"
SCRIPTPATH="$(cd -P "$(echo $SCRIPT_DIRNAME)" &> /dev/null && pwd)"
SCRIPT="$SCRIPTPATH/$(basename "$SCRIPT")" # absolute path for this script
function print_usage(){
echo "Usage: "
echo " $(basename $0) gs://path/to/in.xml gcp-project-name num_gpus [beagle_order]"
echo ""
echo " Note: The version of BEAST used should match the version of BEAUTi"
echo " used to generate the input xml file."
echo ""
echo " Docker images have been built for several versions of BEAST."
echo " The Docker image to be used can be selected by the BEAST_VERSION environment variable."
echo " For example:"
echo " BEAST_VERSION='1.10.5pre_thorney_v0.1.2' $(basename $0) gs://path/to/in.xml gcp-project-name num_gpus [beagle_order]"
echo " For available versions of BEAST, see the tags on Quay.io:"
echo " https://quay.io/repository/broadinstitute/beast-beagle-cuda?tab=tags"
echo " If BEAST_VERSION is not specified the 'latest' tag will be used."
echo ""
echo " The GPU type can be set via the BEAST_GPU_MODEL environment variable."
echo " For example:"
echo " BEAST_GPU_MODEL='nvidia-tesla-v100' $(basename $0) gs://path/to/in.xml gcp-project-name num_gpus [beagle_order]"
echo " For available GPU models, see:"
echo " https://cloud.google.com/compute/docs/gpus/"
echo ""
echo " If 'beagle_order' is not specified, the number of partitions will be read from"
echo " the input xml file and spread across the number of GPUs specified."
echo " Note: *the entire xml file will be downloaded from its bucket if 'beagle_order' is not specified*"
echo ""
echo " Extra arguments for BEAST may be passed via the BEAST_EXTRA_ARGS environment variable."
echo " For example:"
echo " BEAST_EXTRA_ARGS='-beagle_instances 4' $(basename $0) gs://path/to/in.xml gcp-project-name num_gpus [beagle_order]"
}
hash dsub &> /dev/null
if [ $? -ne 0 ]; then
echo ""
echo "IMPORTANT: dsub must be installed and available to use this script"
echo ""
echo " -> Follow the dsub instructions to Get Started on Google Cloud:"
echo " https://github.com/DataBiosphere/dsub#getting-started-on-google-cloud"
echo " -> Install dsub:"
echo " https://github.com/DataBiosphere/dsub"
exit 1
fi
if [ $# -eq 0 ] || [ $# -lt 3 ]; then
print_usage
exit 1
fi
# if the user HAS NOT set the BEAST_VERSION environment variable
# use the latest tagged Docker image
if [[ -z "${BEAST_VERSION}" ]]; then
DOCKER_IMAGE_TAG=":latest"
else
DOCKER_IMAGE_TAG=":${BEAST_VERSION}"
fi
# if the user HAS set the BEAST_GPU_MODEL environment variable
if [[ ! -z "${BEAST_GPU_MODEL}" ]]; then
GPU_TYPE="${BEAST_GPU_MODEL}"
fi
# input args for this script
IN_XML="$1"
OUT_BUCKET="$(dirname $1)"
GCP_PROJECT="$2"
NUM_GPUS="$3"
# if the user HAS NOT specified a beagle_order
# generate one based on the number of GPUs specified
# and the number of partitions in the input XML file
if [ -z "$4" ]; then
number_of_partitions=$(gsutil cat "$1" | grep "<partition>" | wc -l | awk '{ printf "%d\n", $0 }')
if [[ ${NUM_GPUS} > ${number_of_partitions} ]]; then
echo "More GPUs (${NUM_GPUS}) have been requested than there are paritions (${number_of_partitions})."
echo "Consider reducing the number of GPUs, or specify the 'beagle_order' yourself."
echo "Exiting..."
exit 1
fi
partition_string=""
if [[ ${NUM_GPUS} > 0 ]]; then
partitions_that_fit="$((${number_of_partitions}/${NUM_GPUS}))"
extra_partitions="$((${number_of_partitions}%${NUM_GPUS}))"
for i in $(seq 1 ${partitions_that_fit}); do
partition_string="${partition_string}$(echo $(seq 1 ${NUM_GPUS})) "
done
if [[ ${extra_partitions} > 0 ]]; then
partition_string="${partition_string} $(echo $(seq 1 ${extra_partitions}))"
fi
else
# if no GPUs are specified, set all partitions to be on
# resource 0 (CPU)
for i in $(seq 1 ${number_of_partitions}); do
partition_string="${partition_string}0,"
done
fi
partition_string=$(echo "${partition_string}" | sed 's/ / /g' | sed 's/ /,/g' | sed 's/,$//')
BEAGLE_ORDER="${partition_string}"
else
BEAGLE_ORDER="$4"
fi
ACCELERATOR_SPEC=""
if [[ ${NUM_GPUS} > 0 ]]; then
ACCELERATOR_SPEC="--accelerator-type ${GPU_TYPE} --accelerator-count ${NUM_GPUS}"
fi
echo ""
echo "Input file: ${IN_XML}"
echo "OUT_BUCKET: ${OUT_BUCKET}"
echo "NUM_GPUs: ${NUM_GPUS}"
echo "BEAGLE_ORDER: ${BEAGLE_ORDER}"
echo "GPU_TYPE: ${GPU_TYPE}"
echo "DOCKER_IMAGE: ${DOCKER_IMAGE}${DOCKER_IMAGE_TAG}"
echo "BEAST_EXTRA_ARGS: ${BEAST_EXTRA_ARGS}"
dsub \
--provider=google-cls-v2 \
--project "${GCP_PROJECT}" \
--zone "us*" \
--image "${DOCKER_IMAGE}${DOCKER_IMAGE_TAG}" \
--input "INPUT_FILE=${IN_XML}" \
--output "OUTPUT_FILES=${OUT_BUCKET}/*" \
--logging "${OUT_BUCKET}" \
--env BEAGLE_ORDER="${BEAGLE_ORDER}" BEAST_EXTRA_ARGS="${BEAST_EXTRA_ARGS}" \
--script "${SCRIPTPATH}/run_beast.sh" \
--boot-disk-size 15 \
${ACCELERATOR_SPEC}
#--wait