Files
ansible-collection-infrastr…/roles/nvidia_drivers/tasks/main.yml

109 lines
3.6 KiB
YAML

---
- name: Gather package facts
ansible.builtin.package_facts:
manager: apt
- name: Set OS-specific variables
ansible.builtin.set_fact:
cuda_repo_os: "{{ 'ubuntu' + ansible_distribution_version.replace('.', '') if ansible_distribution == 'Ubuntu' else 'debian' + ansible_distribution_major_version }}"
cuda_arch: "{{ 'x86_64' if ansible_architecture == 'x86_64' else 'arm64' if ansible_architecture == 'aarch64' else ansible_architecture }}"
- name: Check if CUDA keyring is already installed
ansible.builtin.stat:
path: /usr/share/keyrings/cuda-archive-keyring.gpg
register: cuda_keyring_installed
- name: Download CUDA keyring package
ansible.builtin.get_url:
url: "https://developer.download.nvidia.com/compute/cuda/repos/{{ cuda_repo_os }}/{{ cuda_arch }}/cuda-keyring_1.1-1_all.deb"
dest: /tmp/cuda-keyring_1.1-1_all.deb
mode: '0644'
when: not cuda_keyring_installed.stat.exists
register: keyring_downloaded
- name: Install CUDA keyring package
ansible.builtin.apt:
deb: /tmp/cuda-keyring_1.1-1_all.deb
state: present
when: keyring_downloaded is changed
register: keyring_installed
- name: Clean up downloaded keyring package
ansible.builtin.file:
path: /tmp/cuda-keyring_1.1-1_all.deb
state: absent
when: keyring_downloaded is changed
- name: Update apt cache after keyring installation
ansible.builtin.apt:
update_cache: yes
cache_valid_time: 3600
when: keyring_installed is changed
- name: Check if NVIDIA drivers are already installed
ansible.builtin.command:
cmd: nvidia-smi
register: nvidia_check
failed_when: false
changed_when: false
- name: Install Kernel Headers and NVIDIA drivers (with kernel modules)
ansible.builtin.apt:
name:
- "{{ kernel_module_type }}-headers-{{ ansible_kernel }}"
- "{{ 'nvidia-open' if nvidia_open_source_drivers else 'nvidia-driver' }}"
state: present
when:
- nvidia_install_kernel_modules
- nvidia_check.rc != 0 or ansible_facts['packages']['nvidia-driver'] is not defined
register: kernel_driver_install
- name: Install CUDA drivers and userspace libraries (userspace only)
ansible.builtin.apt:
name:
- nvidia-driver-cuda
- cuda-runtime-{{ nvidia_cuda_runtime_version }}
state: present
when:
- not nvidia_install_kernel_modules
- nvidia_check.rc != 0 or ansible_facts['packages']['nvidia-driver-cuda'] is not defined
register: cuda_driver_install
- name: Install optional NVIDIA packages
ansible.builtin.apt:
name: "{{ nvidia_optional_packages }}"
state: present
when: nvidia_optional_packages | length > 0
- name: Verify and handle NVIDIA driver installation
block:
- name: Verify NVIDIA driver installation
ansible.builtin.command:
cmd: nvidia-smi
register: nvidia_verification
changed_when: false
when: kernel_driver_install is changed or cuda_driver_install is changed
- name: Display NVIDIA driver information
ansible.builtin.debug:
msg: "{{ nvidia_verification.stdout_lines }}"
when: nvidia_verification is defined and nvidia_verification.rc == 0
rescue:
- name: Remove partially installed packages on failure
ansible.builtin.apt:
name:
- nvidia-driver
- nvidia-open
- nvidia-driver-cuda
state: absent
autoremove: yes
- name: Remove CUDA keyring on failure
ansible.builtin.apt:
name: cuda-keyring
state: absent
- name: Fail with helpful message
ansible.builtin.fail:
msg: "NVIDIA driver installation failed. Packages have been cleaned up."