Add deploy example, add remove folder only

BioComputingUP · Sep 2, 2024 · 9bf0b68 · 9bf0b68
1 parent e19041b
commit 9bf0b68
Show file tree

Hide file tree

Showing 11 changed files with 391 additions and 4 deletions.
diff --git a/docker/deploy-example/.env b/docker/deploy-example/.env
@@ -0,0 +1,22 @@
+### DRMAAtic settings ###
+# Maximum length of the parameters values, after this length an error is raised on the job submission
+PARAMS_VALUES_MAX_LENGTH=5000
+# Set true if you want to remove the task directory when the task is deleted
+REMOVE_JOB_FILES_ON_DELETE=True
+# DRMAA library for drmaatic
+DRMAA_LIBRARY_PATH="/usr/lib/slurm-drmaa/lib/libdrmaa.so.1"
+# Set true if you want to use the SLURM database daemon to get the job status when the job is completed
+SLURM_DRMAA_USE_SLURMDBD=1
+# Set true if you want to include also the browsable APIs
+ONLY_JSON_APIS=False
+# Maximum number of items for a paginated response
+MAX_PAGE_SIZE=1000
+
+### Django settings ###
+DJANGO_SETTINGS_MODULE=server.settings
+CORS_ALLOW_ALL_ORIGINS=True
+# CORS_ALLOWED_ORIGINS=""
+ALLOWED_HOSTS=*
+TIME_ZONE=Europe/Rome
+
+PYTHONUNBUFFERED=1
diff --git a/docker/deploy-example/Dockerfile b/docker/deploy-example/Dockerfile
@@ -0,0 +1,64 @@
+FROM ubuntu:20.04 as build
+
+RUN apt update && apt install -y software-properties-common
+RUN add-apt-repository ppa:natefoo/slurm-drmaa
+RUN apt update && apt install -y libmunge-dev slurm-client libslurm-dev slurm-drmaa-dev python3.8-dev python3.8-venv \
+  python3-pip pkg-config build-essential default-libmysqlclient-dev python3-mysqldb gosu apache2 libapache2-mod-wsgi-py3 \
+    locales language-pack-en
+
+# We need to set the locale to UTF-8 to avoid issues with Python 3 and using ASCII encoding
+ENV LANG C.UTF-8
+ENV LC_ALL C.UTF-8
+
+RUN mkdir /run/munge
+RUN chown -R munge: /etc/munge/ /var/log/munge/ /run/munge/
+RUN chmod 0700 /etc/munge/ /var/log/munge/
+
+# ATTENTION: If you using user remapping, you need to set the same gid as the users group on the cluster
+# To do so, we need to create an internal user with a uid that when remapped will have the same gid as the users group on the cluster
+# This is necessary to allow the container to write to the shared volume and for the external user to read and write in the
+# directories created by the container
+# The user here created will be the one running the Django application with Apache2 and mod_wsgi (look at the Apache2 configuration)
+RUN groupdel users
+RUN groupadd -g 997001 users
+RUN useradd -u 1000042 myuser
+RUN usermod -a -G users myuser
+# Add users to the root user, so root can write to the shared volume
+RUN usermod -a -G users root
+
+# Create a virtual environment for Python 3 and use th requirements.txt file to install the dependencies
+RUN python3.8 -m venv /opt/venv
+RUN /opt/venv/bin/pip3 install --upgrade pip
+
+# Copy the files from the host to the container
+COPY . /app
+WORKDIR /app
+
+# Copy the munge key to the container
+COPY --chown=munge:munge docker/deploy/munge.key /etc/munge/munge.key
+RUN chmod 400 /etc/munge/munge.key
+
+# Install the dependencies for DRMAAtic
+RUN /opt/venv/bin/pip3 install -r requirements.txt
+
+# Copy the entrypoint script to the container and make it executable
+COPY docker/deploy/drmaa-entrypoint.sh /usr/local/bin/drmaa-entrypoint.sh
+RUN chmod +x /usr/local/bin/drmaa-entrypoint.sh
+
+# Copy the SLURM configuration files to the container
+COPY docker/deploy/slurm.conf /etc/slurm-llnl/slurm.conf
+COPY docker/deploy/slurmdbd.conf /etc/slurm-llnl/slurmdbd.conf
+
+# Create the static directory and set the permissions
+RUN mkdir -p /app/static
+RUN chown -R django:users /app/static
+RUN chown -R django:users /app/drmaatic/migrations
+
+# Configure Apache2 to serve the Django application using mod_wsgi
+RUN a2enmod wsgi
+
+# Add ServerName to apache2.conf
+RUN echo "ServerName localhost" >> /etc/apache2/apache2.conf
+
+ENTRYPOINT ["/usr/local/bin/drmaa-entrypoint.sh"]
+EXPOSE 80
diff --git a/docker/deploy-example/README.md b/docker/deploy-example/README.md
@@ -0,0 +1,63 @@
+# Docker deploy configuration for DRMAAtic
+
+This directory contains a Docker deploy configuration for DRMAAtic. It is intended to be used for the 
+automatic deployment of DRMAAtic in a cluster, using Github Actions.
+The action will build the Docker images and then push them to the BioComputing Docker registry.
+
+### Deployment
+
+To deploy, you first need to commit and push your changes to the `main` (for prod) or `dev` (for staging) branch. 
+This will build and push the image to the private registry.
+
+Then, you need to run the `start-staging.sh` or `start-prod.sh` script. This will copy the files that are needed 
+in the server, and then run the correct `docker-compose.yml` file.
+
+### First run
+
+Upon the first run, the database for DRMAAtic needs to be created, and the migrations executed.
+This can be done by accessing the correct database (e.g. using phpMyAdmin) as root (or user with write privileges), and
+creating a database named `drmaatic` (or `drmaatic_dev` for staging environment).
+
+Then, the migrations can be executed by running the following command on the drmaatic container:
+
+```bash
+docker exec -it drmaatic /opt/venv/bin/python3.8 manage.py makemigrations
+docker exec -it drmaatic /opt/venv/bin/python3.8 manage.py migrate
+```
+
+You should also create a superuser for the Django admin interface:
+
+```bash
+docker exec -it drmaatic /opt/venv/bin/python3.8 manage.py createsuperuser
+```
+
+### Configurations
+The configuration files for DRMAAtic are `.env` files located in the settings folder, and are loaded by
+the `docker-compose.yml` file.
+
+
+## User permissions
+
+This is a tough one. In the Dockerfile, a new user is created, called `myuser`. This user has the same UID as the 
+myuser user from our LDAP server (1'000'042). Moreover, a new group is created, called `users` which has 
+the GID of the `users` group from our LDAP server (1'000'001) **minus 3000**, so **997'001**.
+Why this is needed?
+* In the docker server, where the docker daemon is installed, the user remapping is enabled, shifting the UID and GID of
+  the user and group by 3000 (for example).
+* The `myuser` user is the one that is used to run the Django application via the `mod_wsgi` module of Apache. This user
+  is also the one that connects to the `slurmctld` daemon to interact with the cluster, and also at the same time it will
+  write in the filesystem the directory for the job execution and its input files. 
+* When sending the job to the cluster, it is necessary that the controller recognize the uid of the user sending the job
+  as a valid user. The uid when sending the job is not shifted by 3000, so the `myuser` user in the container has to have
+  the same UID as the `myuser` user in the LDAP server.
+* When writing in the filesystem, the `myuser` user in the container has to have the same GID as the `users` group in the
+  LDAP server, so that the files written by the container are accessible by the `myuser` user in the cluster, even if
+  virtually they have different UIDs. This is made possible by having the same GID, one shifted by 3000 and the other not.
+* In order to make possible to the `myuser` LDAP user to write in the directory of the job execution, the directory has
+  to have the correct permissions. This is done in two ways:
+  * By setting the correct permissions on the root job directory with `chmod g+s`
+  * By having an `umask 0002` in the `/etc/profile` file of all the worker nodes, and by setting it also on the apache 
+    config file, under the `WSGIDaemonProcess` directive. This will make sure that all the files created by the `myuser`
+    user inside the container, will have the correct permissions to be read by the `myuser` user in the LDAP, using the
+    `users` group.
+If this doesn't make sense, it is because it doesn't. It is a mess, but it works.
diff --git a/docker/deploy-example/drmaa-entrypoint.sh b/docker/deploy-example/drmaa-entrypoint.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+set -e
+
+if [ "$1" = "runserver" ]; then
+  # Collect static files
+  echo "---> Collecting static files ..."
+  gosu django /opt/venv/bin/python3.8 manage.py collectstatic --noinput
+
+  echo "---> Starting the MUNGE Authentication service (munged) ..."
+  gosu munge /usr/sbin/munged
+
+  echo "---> Starting Apache service ..."
+  exec apachectl -D FOREGROUND
+fi
+
+exec "$@"
diff --git a/docker/deploy-example/munge.key b/docker/deploy-example/munge.key
@@ -0,0 +1,2 @@
+In here you should place the munge key that you have in your /etc/munge directory and shared with all the compute nodes.
+
diff --git a/docker/deploy-example/prod/.prod.env b/docker/deploy-example/prod/.prod.env
@@ -0,0 +1,33 @@
+### DRMAAtic settings ###
+DRMAATIC_TASK_SCRIPT_DIR=/scriptDirectory
+
+# WARNING: This path should be accessible outside of the container, as the job output files will be written there,
+# you should have the same path mounted in the container as a volume.
+# e.g. outside: /mnt/shared/DRMAAtic/jobsDirectory, inside: /mnt/shared/DRMAAtic/jobsDirectory
+# So that when the job is passed outside the container it can write the output files to the same path.
+DRMAATIC_JOB_OUTPUT_DIR=/mnt/shared/DRMAAtic/jobsDirectory
+DRMAATIC_LOGGER_FILE_PTH=/drmaatic.log
+
+DRMAA_LIBRARY_PATH="/usr/lib/slurm-drmaa/lib/libdrmaa.so"
+
+DRMAATIC_WS_URL=https://urlToYourWS.com
+
+ORCID_AUTH_URL=https://orcid.org/oauth/userinfo
+
+DATABASE_ENGINE='django.db.backends.mysql'
+DATABASE_NAME=drmaatic
+DATABASE_USER=user
+DATABASE_PASSWORD=password
+DATABASE_HOST=machineName
+DATABASE_PORT=3306
+
+### Django settings ###
+DJANGO_DEBUG=False
+ONLY_JSON_APIS=True
+DJANGO_SECRET_KEY=aVerySecretKey
+CORS_ALLOW_ALL_ORIGINS=True
+CSRF_COOKIE_SECURE=True
+SESSION_COOKIE_SECURE=True
+CSRF_TRUSTED_ORIGINS='https://urlToYourWS.com'
+SECURE_BROWSER_XSS_FILTER=True
+INTERNAL_IPS=''
diff --git a/docker/deploy-example/prod/apache-config.conf b/docker/deploy-example/prod/apache-config.conf
@@ -0,0 +1,36 @@
+Define server_name drmaatic
+Define app_home /app
+Define env_root /opt/venv
+Define logdir /logs
+
+WSGIPythonHome ${env_root}
+WSGIPythonPath ${app_home}
+
+<VirtualHost *:80>
+    ## serve static files if needed
+    Alias /static/ ${app_home}/static/
+    <Directory ${app_home}/static>
+        Require all granted
+    </Directory>
+
+    # wsgi app
+    WSGIPassAuthorization On
+    WSGIScriptAlias / ${app_home}/server/wsgi.py
+    WSGIDaemonProcess ${server_name} python-home=${env_root} python-path=/app:/opt/venv/lib/python3.8/site-packages home=/ user=myuser group=users umask=0002 lang='en_US.UTF-8' locale='en_US.UTF-8'
+    WSGIProcessGroup ${server_name}
+
+    # Set the environment to be staging (dev on perse)
+    <Directory ${app_home}/server>
+        <Files wsgi.py>
+            Require all granted
+        </Files>
+    </Directory>
+
+    # default logging (server_name)
+    LogLevel notice
+	ErrorLog /logs/apache2_error.log
+	CustomLog /logs/apache2_access.log combined
+
+</VirtualHost>
+
+# vim: syntax=apache ts=4 sw=4 sts=4 sr noet
diff --git a/docker/deploy-example/prod/docker-compose.yml b/docker/deploy-example/prod/docker-compose.yml
@@ -0,0 +1,44 @@
+version: '3.5'
+name: drmaatic-prod
+
+services:
+  drmaatic:
+    image: registry.yourRegistry.com/drmaatic/drmaatic:latest
+    command: [ "runserver" ]
+    container_name: drmaatic
+    restart: unless-stopped
+    env_file:
+      - ../.env
+      - .prod.env
+    ports:
+      - "8302:80"
+    volumes:
+      - type: bind
+        source: ./apache-config.conf
+        target: /etc/apache2/sites-available/000-default.conf
+      # Mount the directories for the scripts, jobs and logs
+      - scriptdir:/scripts
+      - jobdir:/mnt/shared/DRMAAtic/jobsDirectory
+      - loggerdir:/logs
+    extra_hosts: # Add the controller machine to the hosts file so we can access it by name from the container
+      - "machineName:172.10.2.90"
+
+volumes:
+  scriptdir:
+    driver: local
+    driver_opts:
+      type: none
+      o: bind
+      device: /scriptDirectory
+  jobdir:
+    driver: local
+    driver_opts:
+      type: none
+      o: bind
+      device: /mnt/shared/DRMAAtic/jobsDirectory
+  loggerdir:
+    driver: local
+    driver_opts:
+      type: none
+      o: bind
+      device: /logsDirectory
diff --git a/docker/deploy-example/slurm.conf b/docker/deploy-example/slurm.conf
@@ -0,0 +1,77 @@
+# This is an example configuration of a SLURM cluster. Please refer to the SLURM documentation for more information.
+# I've pointed out all the important settings that you need so that DRMAAtic can work properly.
+
+ClusterName=cluster
+ControlMachine=machineName.local   # this is the hostname of the machine where the slurmctld is running, it needs the .local suffix to work properly
+ControlAddr=172.10.2.90            # this is the IP address of the machine where the slurmctld is running
+SlurmUser=root                     # this is the user that runs the slurmctld and slurmd daemons
+
+SlurmctldPort=6817                 # this is the port where the slurmctld listens, important for DRMAAtic communications
+SlurmdPort=6818
+
+# AUTHENTICATION - All nodes must have the same munge.key file (in /etc/munge)
+AuthType=auth/munge                # this is the authentication method used by the cluster
+
+MailProg=/opt/slurm-mail/bin/slurm-spool-mail.py
+MpiDefault=pmi2
+#MpiParams=ports=#-#
+ProctrackType=proctrack/cgroup
+ReturnToService=2
+SlurmctldPidFile=/run/slurmctld.pid
+SlurmdPidFile=/run/slurmd.pid
+SlurmdSpoolDir=/var/lib/slurm-llnl/slurmd
+StateSaveLocation=/var/lib/slurm-llnl/slurmctld
+SwitchType=switch/none
+TaskPlugin=task/affinity
+#
+# SCHEDULING
+FastSchedule=1
+SchedulerType=sched/backfill
+SchedulerParameters=bf_continue,bf_max_job_test=5000,kill_invalid_depend
+# from 20+ use DependencyParameters instead of SchedulerParameters
+#DependencyParameters=kill_invalid_depend
+SelectType=select/cons_res
+#SelectTypeParameters=CR_CPU
+SelectTypeParameters=CR_CPU_Memory
+#
+# Activate the Multifactor Job Priority Plugin with decay
+PriorityType=priority/multifactor
+# 2 week half-life
+PriorityDecayHalfLife=14-0
+# The larger the job, the greater its job size priority.
+PriorityFavorSmall=NO
+# The job's age factor reaches 1.0 after waiting in the
+# queue for 2 weeks.
+PriorityMaxAge=14-0
+# This next group determines the weighting of each of the
+# components of the Multifactor Job Priority Plugin.
+# The default value for each of the following is 1.
+PriorityWeightAge=1000
+PriorityWeightFairshare=10000
+PriorityWeightJobSize=1000
+PriorityWeightPartition=1000
+PriorityWeightQOS=0 # don't use the qos factor
+
+# LOGGING
+# SlurmctldDebug=3
+# SlurmdDebug=info
+SlurmctldLogFile=/var/log/slurm-llnl/slurmctld.log
+SlurmdLogFile=/var/log/slurm-llnl/slurmd.log
+
+# ACCOUNTING - the accounting storage is the database where the accounting data is stored, where the slurmdbd daemon is running
+AccountingStorageType=accounting_storage/slurmdbd
+AccountingStorageHost=machineName
+AccountingStoragePort=6819             # this is the port where the slurmdbd listens
+AccountingStorageLoc=slurm_acct_db
+JobAcctGatherFrequency=30
+JobAcctGatherType=jobacct_gather/cgroup
+#
+MaxJobCount=50000
+MaxArraySize=100000
+DefMemPerCPU=1536
+#
+# COMPUTE NODES
+# man slurm.conf says not to use CoresPerSocket/ThreadsPerCore if ThreadsPerCore>1 and you want per thread allocation
+NodeName=c1,c2,c3 CPUs=16 TmpDisk=512000 RealMemory=32073
+PartitionName=test Nodes=c1,c2,c3 Default=YES MaxTime=INFINITE State=UP
+
diff --git a/docker/deploy-example/slurmdbd.conf b/docker/deploy-example/slurmdbd.conf
@@ -0,0 +1,25 @@
+# Example slurmdbd.conf file.
+#
+# Archive info
+ArchiveJobs=yes
+ArchiveDir="/tmp"
+#
+# Authentication info
+AuthType=auth/munge
+#
+# slurmDBD info
+DbdAddr=machineName
+DbdHost=machineName
+DbdPort=6819
+SlurmUser=slurm
+#
+LogFile=/var/log/slurm-llnl/slurmdbd.log
+PidFile=/var/run/slurmdbd.pid
+#
+# Database info
+StorageType=accounting_storage/mysql
+StorageHost=localhost
+StoragePort=3306
+StoragePass=secretpassword
+StorageUser=slurm
+StorageLoc=slurm_acct_db
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		In here you should place the munge key that you have in your /etc/munge directory and shared with all the compute nodes.