From 93dee99f40d74bc8889e8f72e8d82ce7276a8145 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Tue, 16 Apr 2024 21:21:41 +0200 Subject: [PATCH 1/2] Bump version to v0.15.0 This bumps the version in: * version.mk * the device plugin helm chart * the device plugin and gfd static deployments Signed-off-by: Evan Lezar --- deployments/helm/nvidia-device-plugin/Chart.yaml | 4 ++-- .../gpu-feature-discovery-daemonset-with-mig-mixed.yaml | 6 +++--- .../gpu-feature-discovery-daemonset-with-mig-single.yaml | 6 +++--- deployments/static/gpu-feature-discovery-daemonset.yaml | 6 +++--- deployments/static/gpu-feature-discovery-job.yaml.template | 6 +++--- .../static/nvidia-device-plugin-compat-with-cpumanager.yml | 2 +- ...nvidia-device-plugin-privileged-with-service-account.yml | 2 +- deployments/static/nvidia-device-plugin.yml | 2 +- versions.mk | 2 +- 9 files changed, 18 insertions(+), 18 deletions(-) diff --git a/deployments/helm/nvidia-device-plugin/Chart.yaml b/deployments/helm/nvidia-device-plugin/Chart.yaml index bef4df79a..4e3d3f820 100644 --- a/deployments/helm/nvidia-device-plugin/Chart.yaml +++ b/deployments/helm/nvidia-device-plugin/Chart.yaml @@ -2,8 +2,8 @@ apiVersion: v2 name: nvidia-device-plugin type: application description: A Helm chart for the nvidia-device-plugin on Kubernetes -version: "0.15.0-rc.2" -appVersion: "0.15.0-rc.2" +version: "0.15.0" +appVersion: "0.15.0" kubeVersion: ">= 1.10.0-0" home: https://github.com/NVIDIA/k8s-device-plugin diff --git a/deployments/static/gpu-feature-discovery-daemonset-with-mig-mixed.yaml b/deployments/static/gpu-feature-discovery-daemonset-with-mig-mixed.yaml index de0e50ae2..a7c75bf2e 100644 --- a/deployments/static/gpu-feature-discovery-daemonset-with-mig-mixed.yaml +++ b/deployments/static/gpu-feature-discovery-daemonset-with-mig-mixed.yaml @@ -4,7 +4,7 @@ metadata: name: gpu-feature-discovery labels: app.kubernetes.io/name: gpu-feature-discovery - app.kubernetes.io/version: 0.15.0-rc.2 + app.kubernetes.io/version: 0.15.0 app.kubernetes.io/part-of: nvidia-gpu spec: selector: @@ -15,11 +15,11 @@ spec: metadata: labels: app.kubernetes.io/name: gpu-feature-discovery - app.kubernetes.io/version: 0.15.0-rc.2 + app.kubernetes.io/version: 0.15.0 app.kubernetes.io/part-of: nvidia-gpu spec: containers: - - image: nvcr.io/nvidia/k8s-device-plugin:v0.15.0-rc.2 + - image: nvcr.io/nvidia/k8s-device-plugin:v0.15.0 name: gpu-feature-discovery command: ["/usr/bin/gpu-feature-discovery"] volumeMounts: diff --git a/deployments/static/gpu-feature-discovery-daemonset-with-mig-single.yaml b/deployments/static/gpu-feature-discovery-daemonset-with-mig-single.yaml index c6a52ab03..c6dd2b477 100644 --- a/deployments/static/gpu-feature-discovery-daemonset-with-mig-single.yaml +++ b/deployments/static/gpu-feature-discovery-daemonset-with-mig-single.yaml @@ -4,7 +4,7 @@ metadata: name: gpu-feature-discovery labels: app.kubernetes.io/name: gpu-feature-discovery - app.kubernetes.io/version: 0.15.0-rc.2 + app.kubernetes.io/version: 0.15.0 app.kubernetes.io/part-of: nvidia-gpu spec: selector: @@ -15,11 +15,11 @@ spec: metadata: labels: app.kubernetes.io/name: gpu-feature-discovery - app.kubernetes.io/version: 0.15.0-rc.2 + app.kubernetes.io/version: 0.15.0 app.kubernetes.io/part-of: nvidia-gpu spec: containers: - - image: nvcr.io/nvidia/k8s-device-plugin:v0.15.0-rc.2 + - image: nvcr.io/nvidia/k8s-device-plugin:v0.15.0 name: gpu-feature-discovery command: ["/usr/bin/gpu-feature-discovery"] volumeMounts: diff --git a/deployments/static/gpu-feature-discovery-daemonset.yaml b/deployments/static/gpu-feature-discovery-daemonset.yaml index 8eb54e94b..7f796af68 100644 --- a/deployments/static/gpu-feature-discovery-daemonset.yaml +++ b/deployments/static/gpu-feature-discovery-daemonset.yaml @@ -4,7 +4,7 @@ metadata: name: gpu-feature-discovery labels: app.kubernetes.io/name: gpu-feature-discovery - app.kubernetes.io/version: 0.15.0-rc.2 + app.kubernetes.io/version: 0.15.0 app.kubernetes.io/part-of: nvidia-gpu spec: selector: @@ -15,11 +15,11 @@ spec: metadata: labels: app.kubernetes.io/name: gpu-feature-discovery - app.kubernetes.io/version: 0.15.0-rc.2 + app.kubernetes.io/version: 0.15.0 app.kubernetes.io/part-of: nvidia-gpu spec: containers: - - image: nvcr.io/nvidia/k8s-device-plugin:v0.15.0-rc.2 + - image: nvcr.io/nvidia/k8s-device-plugin:v0.15.0 name: gpu-feature-discovery command: ["/usr/bin/gpu-feature-discovery"] volumeMounts: diff --git a/deployments/static/gpu-feature-discovery-job.yaml.template b/deployments/static/gpu-feature-discovery-job.yaml.template index 43ece7671..25afdeb6b 100644 --- a/deployments/static/gpu-feature-discovery-job.yaml.template +++ b/deployments/static/gpu-feature-discovery-job.yaml.template @@ -4,19 +4,19 @@ metadata: name: gpu-feature-discovery labels: app.kubernetes.io/name: gpu-feature-discovery - app.kubernetes.io/version: 0.15.0-rc.2 + app.kubernetes.io/version: 0.15.0 app.kubernetes.io/part-of: nvidia-gpu spec: template: metadata: labels: app.kubernetes.io/name: gpu-feature-discovery - app.kubernetes.io/version: 0.15.0-rc.2 + app.kubernetes.io/version: 0.15.0 app.kubernetes.io/part-of: nvidia-gpu spec: nodeName: NODE_NAME containers: - - image: nvcr.io/nvidia/k8s-device-plugin:v0.15.0-rc.2 + - image: nvcr.io/nvidia/k8s-device-plugin:v0.15.0 name: gpu-feature-discovery command: ["/usr/bin/gpu-feature-discovery"] args: diff --git a/deployments/static/nvidia-device-plugin-compat-with-cpumanager.yml b/deployments/static/nvidia-device-plugin-compat-with-cpumanager.yml index 25c320fe2..8ff9ddf2b 100644 --- a/deployments/static/nvidia-device-plugin-compat-with-cpumanager.yml +++ b/deployments/static/nvidia-device-plugin-compat-with-cpumanager.yml @@ -38,7 +38,7 @@ spec: # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/ priorityClassName: "system-node-critical" containers: - - image: nvcr.io/nvidia/k8s-device-plugin:v0.15.0-rc.2 + - image: nvcr.io/nvidia/k8s-device-plugin:v0.15.0 name: nvidia-device-plugin-ctr env: - name: FAIL_ON_INIT_ERROR diff --git a/deployments/static/nvidia-device-plugin-privileged-with-service-account.yml b/deployments/static/nvidia-device-plugin-privileged-with-service-account.yml index 9ecd71c2f..0d8820295 100644 --- a/deployments/static/nvidia-device-plugin-privileged-with-service-account.yml +++ b/deployments/static/nvidia-device-plugin-privileged-with-service-account.yml @@ -124,7 +124,7 @@ spec: - env: - name: PASS_DEVICE_SPECS value: "true" - image: nvcr.io/nvidia/k8s-device-plugin:v0.15.0-rc.2 + image: nvcr.io/nvidia/k8s-device-plugin:v0.15.0 name: nvidia-device-plugin-ctr securityContext: privileged: true diff --git a/deployments/static/nvidia-device-plugin.yml b/deployments/static/nvidia-device-plugin.yml index f750d79e4..f262e35ce 100644 --- a/deployments/static/nvidia-device-plugin.yml +++ b/deployments/static/nvidia-device-plugin.yml @@ -38,7 +38,7 @@ spec: # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/ priorityClassName: "system-node-critical" containers: - - image: nvcr.io/nvidia/k8s-device-plugin:v0.15.0-rc.2 + - image: nvcr.io/nvidia/k8s-device-plugin:v0.15.0 name: nvidia-device-plugin-ctr env: - name: FAIL_ON_INIT_ERROR diff --git a/versions.mk b/versions.mk index a4394e1b5..ed60f480e 100644 --- a/versions.mk +++ b/versions.mk @@ -17,7 +17,7 @@ MODULE := github.com/NVIDIA/$(DRIVER_NAME) REGISTRY ?= nvcr.io/nvidia -VERSION ?= v0.15.0-rc.2 +VERSION ?= v0.15.0 # vVERSION represents the version with a guaranteed v-prefix vVERSION := v$(VERSION:v%=%) From 16e9eb50f668d3d049edb2cc07ee1f7a427d2cf2 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Tue, 16 Apr 2024 21:38:23 +0200 Subject: [PATCH 2/2] Bump version in READMEs Signed-off-by: Evan Lezar --- CHANGELOG.md | 8 ++++++++ README.md | 2 +- RELEASE.md | 2 +- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 89da1f331..b219ede8f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ ## Changelog +### Version v0.15.0 +- Moved `nvidia-device-plugin.yml` static deployment at the root of the repository to `deployments/static/nvidia-device-plugin.yml`. +- Simplify PCI device clases in NFD worker configuration. +- Update CUDA base image version to 12.4.1. +- Switch to Ubuntu22.04-based CUDA image for default image. +- Add new CUDA driver and runtime version labels to align with other NFD version labels. +- Update NFD dependency to v0.15.3. + ### Version v0.15.0-rc.2 - Bump CUDA base image version to 12.3.2 - Add `cdi-cri` device list strategy. This uses the CDIDevices CRI field to request CDI devices instead of annotations. diff --git a/README.md b/README.md index 788c5b262..eb48088c2 100644 --- a/README.md +++ b/README.md @@ -123,7 +123,7 @@ Once you have configured the options above on all the GPU nodes in your cluster, you can enable GPU support by deploying the following Daemonset: ```shell -$ kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.14.5/deployments/static/nvidia-device-plugin.yml +$ kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.15.0/deployments/static/nvidia-device-plugin.yml ``` **Note:** This is a simple static daemonset meant to demonstrate the basic diff --git a/RELEASE.md b/RELEASE.md index 502f16a9f..5d830071e 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -9,7 +9,7 @@ Publishing the helm chart is currently manual, and we should move to an automate # Release Process Checklist - [ ] Update the README changelog -- [ ] Update the README to change occurances of the old version (e.g: `v0.14.5`) with the new version +- [ ] Update the README to change occurances of the old version (e.g: `v0.15.0`) with the new version - [ ] Commit, Tag and Push to Gitlab - [ ] Build a new helm package with `./hack/package-helm-charts.sh` - [ ] Switch to the `gh-pages` branch and move the newly generated package to the `stable` helm repo