fix: rename roles to use underscores instead of hyphens which was causing the roles not to be found

This commit is contained in:
2026-04-14 15:19:21 -04:00
parent b2379e597e
commit 62f4a3ea73
31 changed files with 31 additions and 31 deletions

View File

@@ -0,0 +1,68 @@
# NVIDIA Drivers Role
Installs NVIDIA drivers and CUDA runtime on Debian/Ubuntu systems with proper idempotency and error handling.
## Features
- Automatic OS detection (Debian/Ubuntu)
- Idempotent repository management
- Support for both proprietary and open-source drivers
- Post-installation verification
- Automatic cleanup on failures
- Architecture detection (x86_64/arm64)
## Variables
| Variable | Default | Description |
|----------|---------|-------------|
| `nvidia_install_kernel_modules` | `false` | Install kernel modules (full drivers) vs userspace only |
| `nvidia_open_source_drivers` | `false` | Use open-source nvidia-open instead of proprietary drivers |
| `nvidia_cuda_runtime_version` | `"13-1"` | CUDA runtime version to install |
| `nvidia_optional_packages` | `[]` | Additional NVIDIA packages to install |
| `kernel_module_type` | `linux` | Kernel module type (linux/linux-image) |
## Usage
### Full Driver Installation (with kernel modules)
```yaml
- name: Install NVIDIA drivers with kernel modules
include_role:
name: nvidia_drivers
vars:
nvidia_install_kernel_modules: true
nvidia_open_source_drivers: false
```
### Userspace Only (containers/existing drivers)
```yaml
- name: Install NVIDIA userspace libraries
include_role:
name: nvidia_drivers
vars:
nvidia_install_kernel_modules: false
```
### With Optional Packages
```yaml
- name: Install NVIDIA with extras
include_role:
name: nvidia_drivers
vars:
nvidia_install_kernel_modules: true
nvidia_optional_packages:
- nvidia-settings
- nvidia-modprobe
```
## Requirements
- Debian 11+ or Ubuntu 20.04+
- Root privileges
- Internet connectivity
## Notes
- Role is idempotent - safe to run multiple times
- Automatically detects if drivers are already installed
- Cleans up on installation failures
- Supports both x86_64 and arm64 architectures

View File

@@ -0,0 +1,6 @@
---
nvidia_install_kernel_modules: false
nvidia_open_source_drivers: false
nvidia_cuda_runtime_version: "13-1"
nvidia_optional_packages: []
kernel_module_type: linux

View File

@@ -0,0 +1,109 @@
---
- name: Gather package facts
ansible.builtin.package_facts:
manager: apt
- name: Set OS-specific variables
ansible.builtin.set_fact:
cuda_repo_os: "{{ 'ubuntu' + ansible_distribution_version.replace('.', '') if ansible_distribution == 'Ubuntu' else 'debian' + ansible_distribution_major_version }}"
cuda_arch: "{{ 'x86_64' if ansible_architecture == 'x86_64' else 'arm64' if ansible_architecture == 'aarch64' else ansible_architecture }}"
- name: Check if CUDA keyring is already installed
ansible.builtin.stat:
path: /usr/share/keyrings/cuda-archive-keyring.gpg
register: cuda_keyring_installed
- name: Download CUDA keyring package
ansible.builtin.get_url:
url: "https://developer.download.nvidia.com/compute/cuda/repos/{{ cuda_repo_os }}/{{ cuda_arch }}/cuda-keyring_1.1-1_all.deb"
dest: /tmp/cuda-keyring_1.1-1_all.deb
mode: '0644'
when: not cuda_keyring_installed.stat.exists
register: keyring_downloaded
- name: Install CUDA keyring package
ansible.builtin.apt:
deb: /tmp/cuda-keyring_1.1-1_all.deb
state: present
when: keyring_downloaded is changed
register: keyring_installed
- name: Clean up downloaded keyring package
ansible.builtin.file:
path: /tmp/cuda-keyring_1.1-1_all.deb
state: absent
when: keyring_downloaded is changed
- name: Update apt cache after keyring installation
ansible.builtin.apt:
update_cache: yes
cache_valid_time: 3600
when: keyring_installed is changed
- name: Check if NVIDIA drivers are already installed
ansible.builtin.command:
cmd: nvidia-smi
register: nvidia_check
failed_when: false
changed_when: false
- name: Install Kernel Headers and NVIDIA drivers (with kernel modules)
ansible.builtin.apt:
name:
- "{{ kernel_module_type }}-headers-{{ ansible_kernel }}"
- "{{ 'nvidia-open' if nvidia_open_source_drivers else 'nvidia-driver' }}"
state: present
when:
- nvidia_install_kernel_modules
- nvidia_check.rc != 0 or ansible_facts['packages']['nvidia-driver'] is not defined
register: kernel_driver_install
- name: Install CUDA drivers and userspace libraries (userspace only)
ansible.builtin.apt:
name:
- nvidia-driver-cuda
- cuda-runtime-{{ nvidia_cuda_runtime_version }}
state: present
when:
- not nvidia_install_kernel_modules
- nvidia_check.rc != 0 or ansible_facts['packages']['nvidia-driver-cuda'] is not defined
register: cuda_driver_install
- name: Install optional NVIDIA packages
ansible.builtin.apt:
name: "{{ nvidia_optional_packages }}"
state: present
when: nvidia_optional_packages | length > 0
- name: Verify and handle NVIDIA driver installation
block:
- name: Verify NVIDIA driver installation
ansible.builtin.command:
cmd: nvidia-smi
register: nvidia_verification
changed_when: false
when: kernel_driver_install is changed or cuda_driver_install is changed
- name: Display NVIDIA driver information
ansible.builtin.debug:
msg: "{{ nvidia_verification.stdout_lines }}"
when: nvidia_verification is defined and nvidia_verification.rc == 0
rescue:
- name: Remove partially installed packages on failure
ansible.builtin.apt:
name:
- nvidia-driver
- nvidia-open
- nvidia-driver-cuda
state: absent
autoremove: yes
- name: Remove CUDA keyring on failure
ansible.builtin.apt:
name: cuda-keyring
state: absent
- name: Fail with helpful message
ansible.builtin.fail:
msg: "NVIDIA driver installation failed. Packages have been cleaned up."