From 2a534a8c72e604cf20db32d37490a5bb4e9fbcca Mon Sep 17 00:00:00 2001 From: Jason Kincl Date: Thu, 9 Nov 2023 16:36:13 -0500 Subject: [PATCH] Updating Slurm config --- slurm/manifests/kustomization.yaml | 1 + slurm/manifests/priorityclass.yaml | 7 +++++++ slurm/manifests/statefulset-compute.yaml | 22 +++++++++++++++++++++- 3 files changed, 29 insertions(+), 1 deletion(-) create mode 100644 slurm/manifests/priorityclass.yaml diff --git a/slurm/manifests/kustomization.yaml b/slurm/manifests/kustomization.yaml index 74be77a..aad6fed 100644 --- a/slurm/manifests/kustomization.yaml +++ b/slurm/manifests/kustomization.yaml @@ -11,6 +11,7 @@ resources: - serviceaccount.yaml - clusterrolebinding.yaml - claim.yaml + - priorityclass.yaml - servicemonitor.yaml - scaledobject.yaml diff --git a/slurm/manifests/priorityclass.yaml b/slurm/manifests/priorityclass.yaml new file mode 100644 index 0000000..613dd73 --- /dev/null +++ b/slurm/manifests/priorityclass.yaml @@ -0,0 +1,7 @@ +apiVersion: scheduling.k8s.io/v1 +kind: PriorityClass +metadata: + name: high-priority-batch +value: 1000000 +globalDefault: false +description: "This priority class should be used for high priority batch pods only." \ No newline at end of file diff --git a/slurm/manifests/statefulset-compute.yaml b/slurm/manifests/statefulset-compute.yaml index 360b385..c9e3e8c 100644 --- a/slurm/manifests/statefulset-compute.yaml +++ b/slurm/manifests/statefulset-compute.yaml @@ -35,6 +35,7 @@ spec: app: slurm component: compute spec: + priorityClassName: high-priority-batch automountServiceAccountToken: false shareProcessNamespace: true dnsConfig: @@ -43,6 +44,20 @@ spec: enableServiceLinks: false terminationGracePeriodSeconds: 15 serviceAccountName: slurm + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app + operator: In + values: + - slurm + - key: component + operator: In + values: + - compute + topologyKey: "kubernetes.io/hostname" initContainers: - name: fix-munge-paths image: ghcr.io/naps-product-sa/openshift-batch/munge:latest @@ -74,7 +89,12 @@ spec: - /bin/bash - -c - | - scontrol update nodename=$HOSTNAME state=down reason=shutdown + scontrol delete nodename=$HOSTNAME + resources: + requests: + cpu: 3 + limits: + cpu: 3 securityContext: runAsUser: 0 runAsGroup: 0