chore: rename .yaml extensions to .yml for consistency
This commit is contained in:
109
roles/nvidia-drivers/tasks/main.yml
Normal file
109
roles/nvidia-drivers/tasks/main.yml
Normal file
@@ -0,0 +1,109 @@
|
||||
---
|
||||
- name: Gather package facts
|
||||
ansible.builtin.package_facts:
|
||||
manager: apt
|
||||
|
||||
- name: Set OS-specific variables
|
||||
ansible.builtin.set_fact:
|
||||
cuda_repo_os: "{{ 'ubuntu' + ansible_distribution_version.replace('.', '') if ansible_distribution == 'Ubuntu' else 'debian' + ansible_distribution_major_version }}"
|
||||
cuda_arch: "{{ 'x86_64' if ansible_architecture == 'x86_64' else 'arm64' if ansible_architecture == 'aarch64' else ansible_architecture }}"
|
||||
|
||||
- name: Check if CUDA keyring is already installed
|
||||
ansible.builtin.stat:
|
||||
path: /usr/share/keyrings/cuda-archive-keyring.gpg
|
||||
register: cuda_keyring_installed
|
||||
|
||||
- name: Download CUDA keyring package
|
||||
ansible.builtin.get_url:
|
||||
url: "https://developer.download.nvidia.com/compute/cuda/repos/{{ cuda_repo_os }}/{{ cuda_arch }}/cuda-keyring_1.1-1_all.deb"
|
||||
dest: /tmp/cuda-keyring_1.1-1_all.deb
|
||||
mode: '0644'
|
||||
when: not cuda_keyring_installed.stat.exists
|
||||
register: keyring_downloaded
|
||||
|
||||
- name: Install CUDA keyring package
|
||||
ansible.builtin.apt:
|
||||
deb: /tmp/cuda-keyring_1.1-1_all.deb
|
||||
state: present
|
||||
when: keyring_downloaded is changed
|
||||
register: keyring_installed
|
||||
|
||||
- name: Clean up downloaded keyring package
|
||||
ansible.builtin.file:
|
||||
path: /tmp/cuda-keyring_1.1-1_all.deb
|
||||
state: absent
|
||||
when: keyring_downloaded is changed
|
||||
|
||||
- name: Update apt cache after keyring installation
|
||||
ansible.builtin.apt:
|
||||
update_cache: yes
|
||||
cache_valid_time: 3600
|
||||
when: keyring_installed is changed
|
||||
|
||||
- name: Check if NVIDIA drivers are already installed
|
||||
ansible.builtin.command:
|
||||
cmd: nvidia-smi
|
||||
register: nvidia_check
|
||||
failed_when: false
|
||||
changed_when: false
|
||||
|
||||
- name: Install Kernel Headers and NVIDIA drivers (with kernel modules)
|
||||
ansible.builtin.apt:
|
||||
name:
|
||||
- "{{ kernel_module_type }}-headers-{{ ansible_kernel }}"
|
||||
- "{{ 'nvidia-open' if nvidia_open_source_drivers else 'nvidia-driver' }}"
|
||||
state: present
|
||||
when:
|
||||
- nvidia_install_kernel_modules
|
||||
- nvidia_check.rc != 0 or ansible_facts['packages']['nvidia-driver'] is not defined
|
||||
register: kernel_driver_install
|
||||
|
||||
- name: Install CUDA drivers and userspace libraries (userspace only)
|
||||
ansible.builtin.apt:
|
||||
name:
|
||||
- nvidia-driver-cuda
|
||||
- cuda-runtime-{{ nvidia_cuda_runtime_version }}
|
||||
state: present
|
||||
when:
|
||||
- not nvidia_install_kernel_modules
|
||||
- nvidia_check.rc != 0 or ansible_facts['packages']['nvidia-driver-cuda'] is not defined
|
||||
register: cuda_driver_install
|
||||
|
||||
- name: Install optional NVIDIA packages
|
||||
ansible.builtin.apt:
|
||||
name: "{{ nvidia_optional_packages }}"
|
||||
state: present
|
||||
when: nvidia_optional_packages | length > 0
|
||||
|
||||
- name: Verify and handle NVIDIA driver installation
|
||||
block:
|
||||
- name: Verify NVIDIA driver installation
|
||||
ansible.builtin.command:
|
||||
cmd: nvidia-smi
|
||||
register: nvidia_verification
|
||||
changed_when: false
|
||||
when: kernel_driver_install is changed or cuda_driver_install is changed
|
||||
|
||||
- name: Display NVIDIA driver information
|
||||
ansible.builtin.debug:
|
||||
msg: "{{ nvidia_verification.stdout_lines }}"
|
||||
when: nvidia_verification is defined and nvidia_verification.rc == 0
|
||||
|
||||
rescue:
|
||||
- name: Remove partially installed packages on failure
|
||||
ansible.builtin.apt:
|
||||
name:
|
||||
- nvidia-driver
|
||||
- nvidia-open
|
||||
- nvidia-driver-cuda
|
||||
state: absent
|
||||
autoremove: yes
|
||||
|
||||
- name: Remove CUDA keyring on failure
|
||||
ansible.builtin.apt:
|
||||
name: cuda-keyring
|
||||
state: absent
|
||||
|
||||
- name: Fail with helpful message
|
||||
ansible.builtin.fail:
|
||||
msg: "NVIDIA driver installation failed. Packages have been cleaned up."
|
||||
Reference in New Issue
Block a user