fix: rename roles to use underscores instead of hyphens which was causing the roles not to be found
This commit is contained in:
68
roles/nvidia_drivers/README.md
Normal file
68
roles/nvidia_drivers/README.md
Normal file
@@ -0,0 +1,68 @@
|
||||
# NVIDIA Drivers Role
|
||||
|
||||
Installs NVIDIA drivers and CUDA runtime on Debian/Ubuntu systems with proper idempotency and error handling.
|
||||
|
||||
## Features
|
||||
|
||||
- Automatic OS detection (Debian/Ubuntu)
|
||||
- Idempotent repository management
|
||||
- Support for both proprietary and open-source drivers
|
||||
- Post-installation verification
|
||||
- Automatic cleanup on failures
|
||||
- Architecture detection (x86_64/arm64)
|
||||
|
||||
## Variables
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `nvidia_install_kernel_modules` | `false` | Install kernel modules (full drivers) vs userspace only |
|
||||
| `nvidia_open_source_drivers` | `false` | Use open-source nvidia-open instead of proprietary drivers |
|
||||
| `nvidia_cuda_runtime_version` | `"13-1"` | CUDA runtime version to install |
|
||||
| `nvidia_optional_packages` | `[]` | Additional NVIDIA packages to install |
|
||||
| `kernel_module_type` | `linux` | Kernel module type (linux/linux-image) |
|
||||
|
||||
## Usage
|
||||
|
||||
### Full Driver Installation (with kernel modules)
|
||||
```yaml
|
||||
- name: Install NVIDIA drivers with kernel modules
|
||||
include_role:
|
||||
name: nvidia_drivers
|
||||
vars:
|
||||
nvidia_install_kernel_modules: true
|
||||
nvidia_open_source_drivers: false
|
||||
```
|
||||
|
||||
### Userspace Only (containers/existing drivers)
|
||||
```yaml
|
||||
- name: Install NVIDIA userspace libraries
|
||||
include_role:
|
||||
name: nvidia_drivers
|
||||
vars:
|
||||
nvidia_install_kernel_modules: false
|
||||
```
|
||||
|
||||
### With Optional Packages
|
||||
```yaml
|
||||
- name: Install NVIDIA with extras
|
||||
include_role:
|
||||
name: nvidia_drivers
|
||||
vars:
|
||||
nvidia_install_kernel_modules: true
|
||||
nvidia_optional_packages:
|
||||
- nvidia-settings
|
||||
- nvidia-modprobe
|
||||
```
|
||||
|
||||
## Requirements
|
||||
|
||||
- Debian 11+ or Ubuntu 20.04+
|
||||
- Root privileges
|
||||
- Internet connectivity
|
||||
|
||||
## Notes
|
||||
|
||||
- Role is idempotent - safe to run multiple times
|
||||
- Automatically detects if drivers are already installed
|
||||
- Cleans up on installation failures
|
||||
- Supports both x86_64 and arm64 architectures
|
||||
6
roles/nvidia_drivers/defaults/main.yml
Normal file
6
roles/nvidia_drivers/defaults/main.yml
Normal file
@@ -0,0 +1,6 @@
|
||||
---
|
||||
nvidia_install_kernel_modules: false
|
||||
nvidia_open_source_drivers: false
|
||||
nvidia_cuda_runtime_version: "13-1"
|
||||
nvidia_optional_packages: []
|
||||
kernel_module_type: linux
|
||||
109
roles/nvidia_drivers/tasks/main.yml
Normal file
109
roles/nvidia_drivers/tasks/main.yml
Normal file
@@ -0,0 +1,109 @@
|
||||
---
|
||||
- name: Gather package facts
|
||||
ansible.builtin.package_facts:
|
||||
manager: apt
|
||||
|
||||
- name: Set OS-specific variables
|
||||
ansible.builtin.set_fact:
|
||||
cuda_repo_os: "{{ 'ubuntu' + ansible_distribution_version.replace('.', '') if ansible_distribution == 'Ubuntu' else 'debian' + ansible_distribution_major_version }}"
|
||||
cuda_arch: "{{ 'x86_64' if ansible_architecture == 'x86_64' else 'arm64' if ansible_architecture == 'aarch64' else ansible_architecture }}"
|
||||
|
||||
- name: Check if CUDA keyring is already installed
|
||||
ansible.builtin.stat:
|
||||
path: /usr/share/keyrings/cuda-archive-keyring.gpg
|
||||
register: cuda_keyring_installed
|
||||
|
||||
- name: Download CUDA keyring package
|
||||
ansible.builtin.get_url:
|
||||
url: "https://developer.download.nvidia.com/compute/cuda/repos/{{ cuda_repo_os }}/{{ cuda_arch }}/cuda-keyring_1.1-1_all.deb"
|
||||
dest: /tmp/cuda-keyring_1.1-1_all.deb
|
||||
mode: '0644'
|
||||
when: not cuda_keyring_installed.stat.exists
|
||||
register: keyring_downloaded
|
||||
|
||||
- name: Install CUDA keyring package
|
||||
ansible.builtin.apt:
|
||||
deb: /tmp/cuda-keyring_1.1-1_all.deb
|
||||
state: present
|
||||
when: keyring_downloaded is changed
|
||||
register: keyring_installed
|
||||
|
||||
- name: Clean up downloaded keyring package
|
||||
ansible.builtin.file:
|
||||
path: /tmp/cuda-keyring_1.1-1_all.deb
|
||||
state: absent
|
||||
when: keyring_downloaded is changed
|
||||
|
||||
- name: Update apt cache after keyring installation
|
||||
ansible.builtin.apt:
|
||||
update_cache: yes
|
||||
cache_valid_time: 3600
|
||||
when: keyring_installed is changed
|
||||
|
||||
- name: Check if NVIDIA drivers are already installed
|
||||
ansible.builtin.command:
|
||||
cmd: nvidia-smi
|
||||
register: nvidia_check
|
||||
failed_when: false
|
||||
changed_when: false
|
||||
|
||||
- name: Install Kernel Headers and NVIDIA drivers (with kernel modules)
|
||||
ansible.builtin.apt:
|
||||
name:
|
||||
- "{{ kernel_module_type }}-headers-{{ ansible_kernel }}"
|
||||
- "{{ 'nvidia-open' if nvidia_open_source_drivers else 'nvidia-driver' }}"
|
||||
state: present
|
||||
when:
|
||||
- nvidia_install_kernel_modules
|
||||
- nvidia_check.rc != 0 or ansible_facts['packages']['nvidia-driver'] is not defined
|
||||
register: kernel_driver_install
|
||||
|
||||
- name: Install CUDA drivers and userspace libraries (userspace only)
|
||||
ansible.builtin.apt:
|
||||
name:
|
||||
- nvidia-driver-cuda
|
||||
- cuda-runtime-{{ nvidia_cuda_runtime_version }}
|
||||
state: present
|
||||
when:
|
||||
- not nvidia_install_kernel_modules
|
||||
- nvidia_check.rc != 0 or ansible_facts['packages']['nvidia-driver-cuda'] is not defined
|
||||
register: cuda_driver_install
|
||||
|
||||
- name: Install optional NVIDIA packages
|
||||
ansible.builtin.apt:
|
||||
name: "{{ nvidia_optional_packages }}"
|
||||
state: present
|
||||
when: nvidia_optional_packages | length > 0
|
||||
|
||||
- name: Verify and handle NVIDIA driver installation
|
||||
block:
|
||||
- name: Verify NVIDIA driver installation
|
||||
ansible.builtin.command:
|
||||
cmd: nvidia-smi
|
||||
register: nvidia_verification
|
||||
changed_when: false
|
||||
when: kernel_driver_install is changed or cuda_driver_install is changed
|
||||
|
||||
- name: Display NVIDIA driver information
|
||||
ansible.builtin.debug:
|
||||
msg: "{{ nvidia_verification.stdout_lines }}"
|
||||
when: nvidia_verification is defined and nvidia_verification.rc == 0
|
||||
|
||||
rescue:
|
||||
- name: Remove partially installed packages on failure
|
||||
ansible.builtin.apt:
|
||||
name:
|
||||
- nvidia-driver
|
||||
- nvidia-open
|
||||
- nvidia-driver-cuda
|
||||
state: absent
|
||||
autoremove: yes
|
||||
|
||||
- name: Remove CUDA keyring on failure
|
||||
ansible.builtin.apt:
|
||||
name: cuda-keyring
|
||||
state: absent
|
||||
|
||||
- name: Fail with helpful message
|
||||
ansible.builtin.fail:
|
||||
msg: "NVIDIA driver installation failed. Packages have been cleaned up."
|
||||
Reference in New Issue
Block a user