109 lines
3.6 KiB
YAML
109 lines
3.6 KiB
YAML
---
|
|
- name: Gather package facts
|
|
ansible.builtin.package_facts:
|
|
manager: apt
|
|
|
|
- name: Set OS-specific variables
|
|
ansible.builtin.set_fact:
|
|
cuda_repo_os: "{{ 'ubuntu' + ansible_distribution_version.replace('.', '') if ansible_distribution == 'Ubuntu' else 'debian' + ansible_distribution_major_version }}"
|
|
cuda_arch: "{{ 'x86_64' if ansible_architecture == 'x86_64' else 'arm64' if ansible_architecture == 'aarch64' else ansible_architecture }}"
|
|
|
|
- name: Check if CUDA keyring is already installed
|
|
ansible.builtin.stat:
|
|
path: /usr/share/keyrings/cuda-archive-keyring.gpg
|
|
register: cuda_keyring_installed
|
|
|
|
- name: Download CUDA keyring package
|
|
ansible.builtin.get_url:
|
|
url: "https://developer.download.nvidia.com/compute/cuda/repos/{{ cuda_repo_os }}/{{ cuda_arch }}/cuda-keyring_1.1-1_all.deb"
|
|
dest: /tmp/cuda-keyring_1.1-1_all.deb
|
|
mode: '0644'
|
|
when: not cuda_keyring_installed.stat.exists
|
|
register: keyring_downloaded
|
|
|
|
- name: Install CUDA keyring package
|
|
ansible.builtin.apt:
|
|
deb: /tmp/cuda-keyring_1.1-1_all.deb
|
|
state: present
|
|
when: keyring_downloaded is changed
|
|
register: keyring_installed
|
|
|
|
- name: Clean up downloaded keyring package
|
|
ansible.builtin.file:
|
|
path: /tmp/cuda-keyring_1.1-1_all.deb
|
|
state: absent
|
|
when: keyring_downloaded is changed
|
|
|
|
- name: Update apt cache after keyring installation
|
|
ansible.builtin.apt:
|
|
update_cache: yes
|
|
cache_valid_time: 3600
|
|
when: keyring_installed is changed
|
|
|
|
- name: Check if NVIDIA drivers are already installed
|
|
ansible.builtin.command:
|
|
cmd: nvidia-smi
|
|
register: nvidia_check
|
|
failed_when: false
|
|
changed_when: false
|
|
|
|
- name: Install Kernel Headers and NVIDIA drivers (with kernel modules)
|
|
ansible.builtin.apt:
|
|
name:
|
|
- "{{ kernel_module_type }}-headers-{{ ansible_kernel }}"
|
|
- "{{ 'nvidia-open' if nvidia_open_source_drivers else 'nvidia-driver' }}"
|
|
state: present
|
|
when:
|
|
- nvidia_install_kernel_modules
|
|
- nvidia_check.rc != 0 or ansible_facts['packages']['nvidia-driver'] is not defined
|
|
register: kernel_driver_install
|
|
|
|
- name: Install CUDA drivers and userspace libraries (userspace only)
|
|
ansible.builtin.apt:
|
|
name:
|
|
- nvidia-driver-cuda
|
|
- cuda-runtime-{{ nvidia_cuda_runtime_version }}
|
|
state: present
|
|
when:
|
|
- not nvidia_install_kernel_modules
|
|
- nvidia_check.rc != 0 or ansible_facts['packages']['nvidia-driver-cuda'] is not defined
|
|
register: cuda_driver_install
|
|
|
|
- name: Install optional NVIDIA packages
|
|
ansible.builtin.apt:
|
|
name: "{{ nvidia_optional_packages }}"
|
|
state: present
|
|
when: nvidia_optional_packages | length > 0
|
|
|
|
- name: Verify and handle NVIDIA driver installation
|
|
block:
|
|
- name: Verify NVIDIA driver installation
|
|
ansible.builtin.command:
|
|
cmd: nvidia-smi
|
|
register: nvidia_verification
|
|
changed_when: false
|
|
when: kernel_driver_install is changed or cuda_driver_install is changed
|
|
|
|
- name: Display NVIDIA driver information
|
|
ansible.builtin.debug:
|
|
msg: "{{ nvidia_verification.stdout_lines }}"
|
|
when: nvidia_verification is defined and nvidia_verification.rc == 0
|
|
|
|
rescue:
|
|
- name: Remove partially installed packages on failure
|
|
ansible.builtin.apt:
|
|
name:
|
|
- nvidia-driver
|
|
- nvidia-open
|
|
- nvidia-driver-cuda
|
|
state: absent
|
|
autoremove: yes
|
|
|
|
- name: Remove CUDA keyring on failure
|
|
ansible.builtin.apt:
|
|
name: cuda-keyring
|
|
state: absent
|
|
|
|
- name: Fail with helpful message
|
|
ansible.builtin.fail:
|
|
msg: "NVIDIA driver installation failed. Packages have been cleaned up." |