commit a28fcbd942ce10eac9f5cdbcdb08c0c8a25ce58e Author: hiperman Date: Fri Jan 30 20:13:58 2026 -0500 initial commit diff --git a/README.md b/README.md new file mode 100644 index 0000000..e38df29 --- /dev/null +++ b/README.md @@ -0,0 +1,93 @@ +# Infrastructure Collection + +Ansible collection for infrastructure management, system configuration, and virtualization. + +## Roles + +### proxmox-lxc-provision +Manages Proxmox LXC containers with comprehensive lifecycle operations. + +- Create, clone, start, stop, and delete containers +- Template conversion and configuration editing +- Post-provisioning tasks and health checks + +### system-setup +Performs initial system configuration for new servers. + +- Creates admin user with SSH key authentication +- Hardens SSH configuration +- Installs essential packages +- Optional passwordless sudo configuration + +### system-maintenance +Updates packages and handles maintenance across multiple distributions. + +- Multi-distribution support (Debian/Ubuntu/Alpine) +- Package updates and cleanup +- Automatic reboot handling when required + +### nvidia-drivers +Installs NVIDIA drivers and CUDA runtime with proper idempotency and error handling. + +- Supports Debian/Ubuntu with automatic OS detection +- Choice between proprietary and open-source drivers +- Post-installation verification and cleanup +- Architecture detection (x86_64/arm64) + + +## Installation + +```bash +ansible-galaxy collection install patrickj.infrastructure +``` + +## Quick Start + +### Basic System Setup +```yaml + +``` + +### Proxmox Container Management +```yaml +- name: Create and start an LXC container + hosts: localhost + connection: local + vars: + - container_hostname: new-debian-container + - os_template: "local:vztmpl/debian-12_amd64.tar.zst" + - container_ipv4: "10.0.0.99" + roles: + - role: proxmox-provision + +- name: Configure the LXC container + hosts: new-debian-container + become: yes + tasks: + - include_role: + name: patrickj.infrastructure.system_setup + vars: + password: "{{ admin_password }}" + ssh_pubkey_file: "~/.ssh/id_rsa.pub" + + - include_role: + name: patrickj.infrastructure.nvidia_drivers + vars: + nvidia_install_kernel_modules: true + when: gpu_server | default(false) +``` + +## Requirements + +- Ansible >= 2.12 +- Root/sudo privileges on target hosts +- community.general collection +- ansible.posix collection + +## License + +MIT + +## Author + +Patrick Jaroszewski \ No newline at end of file diff --git a/galaxy.yml b/galaxy.yml new file mode 100644 index 0000000..b05a448 --- /dev/null +++ b/galaxy.yml @@ -0,0 +1,25 @@ +--- +namespace: patrickj +name: infrastructure +version: 1.0.0 +readme: README.md +authors: + - Patrick Jaroszewski +description: Ansible collection for infrastructure management and system configuration +license: + - MIT +tags: + - infrastructure + - system + - nvidia + - proxmox + - lxc + - containers + - maintenance +dependencies: + "community.general": ">=1.0.0" + "ansible.posix": ">=1.0.0" +repository: https://git.jaroszew.ski/ansible/ansible-collection-infrastructure +documentation: https://git.jaroszew.ski/ansible/ansible-collection-infrastructure/README.md +homepage: https://git.jaroszew.ski/ansible/ansible-collection-infrastructure +issues: https://git.jaroszew.ski/ansible/ansible-collection-infrastructure/issues \ No newline at end of file diff --git a/roles/nvidia-drivers/README.md b/roles/nvidia-drivers/README.md new file mode 100644 index 0000000..440a5fa --- /dev/null +++ b/roles/nvidia-drivers/README.md @@ -0,0 +1,68 @@ +# NVIDIA Drivers Role + +Installs NVIDIA drivers and CUDA runtime on Debian/Ubuntu systems with proper idempotency and error handling. + +## Features + +- Automatic OS detection (Debian/Ubuntu) +- Idempotent repository management +- Support for both proprietary and open-source drivers +- Post-installation verification +- Automatic cleanup on failures +- Architecture detection (x86_64/arm64) + +## Variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `nvidia_install_kernel_modules` | `false` | Install kernel modules (full drivers) vs userspace only | +| `nvidia_open_source_drivers` | `false` | Use open-source nvidia-open instead of proprietary drivers | +| `nvidia_cuda_runtime_version` | `"13-1"` | CUDA runtime version to install | +| `nvidia_optional_packages` | `[]` | Additional NVIDIA packages to install | +| `kernel_module_type` | `linux` | Kernel module type (linux/linux-image) | + +## Usage + +### Full Driver Installation (with kernel modules) +```yaml +- name: Install NVIDIA drivers with kernel modules + include_role: + name: nvidia-drivers + vars: + nvidia_install_kernel_modules: true + nvidia_open_source_drivers: false +``` + +### Userspace Only (containers/existing drivers) +```yaml +- name: Install NVIDIA userspace libraries + include_role: + name: nvidia-drivers + vars: + nvidia_install_kernel_modules: false +``` + +### With Optional Packages +```yaml +- name: Install NVIDIA with extras + include_role: + name: nvidia-drivers + vars: + nvidia_install_kernel_modules: true + nvidia_optional_packages: + - nvidia-settings + - nvidia-modprobe +``` + +## Requirements + +- Debian 11+ or Ubuntu 20.04+ +- Root privileges +- Internet connectivity + +## Notes + +- Role is idempotent - safe to run multiple times +- Automatically detects if drivers are already installed +- Cleans up on installation failures +- Supports both x86_64 and arm64 architectures \ No newline at end of file diff --git a/roles/nvidia-drivers/defaults/main.yaml b/roles/nvidia-drivers/defaults/main.yaml new file mode 100644 index 0000000..a98c812 --- /dev/null +++ b/roles/nvidia-drivers/defaults/main.yaml @@ -0,0 +1,6 @@ +--- +nvidia_install_kernel_modules: false +nvidia_open_source_drivers: false +nvidia_cuda_runtime_version: "13-1" +nvidia_optional_packages: [] +kernel_module_type: linux diff --git a/roles/nvidia-drivers/tasks/main.yaml b/roles/nvidia-drivers/tasks/main.yaml new file mode 100644 index 0000000..259f0e7 --- /dev/null +++ b/roles/nvidia-drivers/tasks/main.yaml @@ -0,0 +1,109 @@ +--- +- name: Gather package facts + ansible.builtin.package_facts: + manager: apt + +- name: Set OS-specific variables + ansible.builtin.set_fact: + cuda_repo_os: "{{ 'ubuntu' + ansible_distribution_version.replace('.', '') if ansible_distribution == 'Ubuntu' else 'debian' + ansible_distribution_major_version }}" + cuda_arch: "{{ 'x86_64' if ansible_architecture == 'x86_64' else 'arm64' if ansible_architecture == 'aarch64' else ansible_architecture }}" + +- name: Check if CUDA keyring is already installed + ansible.builtin.stat: + path: /usr/share/keyrings/cuda-archive-keyring.gpg + register: cuda_keyring_installed + +- name: Download CUDA keyring package + ansible.builtin.get_url: + url: "https://developer.download.nvidia.com/compute/cuda/repos/{{ cuda_repo_os }}/{{ cuda_arch }}/cuda-keyring_1.1-1_all.deb" + dest: /tmp/cuda-keyring_1.1-1_all.deb + mode: '0644' + when: not cuda_keyring_installed.stat.exists + register: keyring_downloaded + +- name: Install CUDA keyring package + ansible.builtin.apt: + deb: /tmp/cuda-keyring_1.1-1_all.deb + state: present + when: keyring_downloaded is changed + register: keyring_installed + +- name: Clean up downloaded keyring package + ansible.builtin.file: + path: /tmp/cuda-keyring_1.1-1_all.deb + state: absent + when: keyring_downloaded is changed + +- name: Update apt cache after keyring installation + ansible.builtin.apt: + update_cache: yes + cache_valid_time: 3600 + when: keyring_installed is changed + +- name: Check if NVIDIA drivers are already installed + ansible.builtin.command: + cmd: nvidia-smi + register: nvidia_check + failed_when: false + changed_when: false + +- name: Install Kernel Headers and NVIDIA drivers (with kernel modules) + ansible.builtin.apt: + name: + - "{{ kernel_module_type }}-headers-{{ ansible_kernel }}" + - "{{ 'nvidia-open' if nvidia_open_source_drivers else 'nvidia-driver' }}" + state: present + when: + - nvidia_install_kernel_modules + - nvidia_check.rc != 0 or ansible_facts['packages']['nvidia-driver'] is not defined + register: kernel_driver_install + +- name: Install CUDA drivers and userspace libraries (userspace only) + ansible.builtin.apt: + name: + - nvidia-driver-cuda + - cuda-runtime-{{ nvidia_cuda_runtime_version }} + state: present + when: + - not nvidia_install_kernel_modules + - nvidia_check.rc != 0 or ansible_facts['packages']['nvidia-driver-cuda'] is not defined + register: cuda_driver_install + +- name: Install optional NVIDIA packages + ansible.builtin.apt: + name: "{{ nvidia_optional_packages }}" + state: present + when: nvidia_optional_packages | length > 0 + +- name: Verify and handle NVIDIA driver installation + block: + - name: Verify NVIDIA driver installation + ansible.builtin.command: + cmd: nvidia-smi + register: nvidia_verification + changed_when: false + when: kernel_driver_install is changed or cuda_driver_install is changed + + - name: Display NVIDIA driver information + ansible.builtin.debug: + msg: "{{ nvidia_verification.stdout_lines }}" + when: nvidia_verification is defined and nvidia_verification.rc == 0 + + rescue: + - name: Remove partially installed packages on failure + ansible.builtin.apt: + name: + - nvidia-driver + - nvidia-open + - nvidia-driver-cuda + state: absent + autoremove: yes + + - name: Remove CUDA keyring on failure + ansible.builtin.apt: + name: cuda-keyring + state: absent + + - name: Fail with helpful message + ansible.builtin.fail: + msg: "NVIDIA driver installation failed. Packages have been cleaned up." \ No newline at end of file diff --git a/roles/proxmox-lxc-provision/README.md b/roles/proxmox-lxc-provision/README.md new file mode 100755 index 0000000..326e29a --- /dev/null +++ b/roles/proxmox-lxc-provision/README.md @@ -0,0 +1,118 @@ +# Ansible Role: proxmox-lxc-provision + +## Description + +This Ansible role manages the provision of LXC containers and templates on a Proxmox host. By default the role will create and start an LXC container. It allows creating a new container from a [container image](https://pve.proxmox.com/wiki/Linux_Container#pct_container_images) or cloning an existing container or template container. + +It also includes tasks which may be used individually: +- `clone.yaml`: Creates a new LXC container by cloning another container or template +- `convert.yaml`: Converts an LXC container to a template +- `create.yaml`: Creates a new LXC container +- `delete.yaml`: Deletes an LXC container given its vmid or hostname +- `start.yaml`: Starts an LXC container +- `stop.yaml`: Stops an LXC container +- `update.yaml`: Updates an existing LXC container +- `wait.yaml`: Waits for SSH to be available on the container + +## Requirements + +- Ansible 2.9 or higher +- Python 3.6 or higher +- requests +- proxmoxer +- passlib + +## Role Variables + +### Required Variables + + +| Variable | Description | Example | +|----------|-------------|---------| +| `os_template` | The OS template to create the LXC from. Mutually exclusive with `ct_id`| `local:vztmpl/debian-12_amd64.tar.zst` | +| `ct_id` | The vmid of the container or template container to clone the LXC from. Mutually exclusive with `os_template` | `201` | + +### Required Proxmox API Authentication Variables + +| Variable | Description | Example | +|----------|-------------|---------| +| `proxmox_api_user` | The username for Proxmox authentication, typically in format `username@realm` | `ansible@pve` | +| `proxmox_api_token_id` | The API token ID used for authentication | `token` | +| `proxmox_api_token_secret` | The secret key associated with the API token | `xxx-yyy-zzz` (should be stored securely) | +| `proxmox_api_host` | The IP address or hostname of the Proxmox server | `192.168.1.10` | +| `proxmox_api_port` | The port on which the Proxmox API is listening | `8006` | +| `proxmox_node` | The name of the Proxmox node to target | `server1` | +| `proxmox_api_validate_certs` | Whether to validate SSL certificates (set to false for self-signed certs) | `false` | + + +### Optional Variables + +| Variable | Description | Default | +|----------|-------------|---------| +| `clone_type` | Only use with `ct_id`. Supports `full` and `linked` clones. | `full` | +| `container_storage` | Target storage for the container | `local-zfs` | +| `disk` | The target storage and storage size | `local-zfs:16` | +| `container_password` | The password for the root account | `password123` | +| `container_cores` | The number of CPU cores | `4` | +| `container_memory` | Memory size in MB for container | `2048` | +| `swap_memory` | Swap memory size in MB | `2048` | +| `container_ipv4` | The IPv4 address | `dhcp` | +| `container_ipv6` | The IPv6 address | `auto` | +| `container_pubkey_file` | The SSH public key for authentication to root user | Creates a temp key in `/tmp` | +| `container_features` | List of additional container features | `- nesting=1` | + + +## Example Playbook +*Assuming Proxmox authentication variables are set* +#### Creating a new LXC +```yaml +- name: Create and start an LXC container + hosts: localhost + connection: local + vars: + - container_hostname: new-debian-container + - os_template: "local:vztmpl/debian-12_amd64.tar.zst" + - container_ipv4: "10.0.0.99" + roles: + - role: proxmox-provision +``` + +#### Creating a new LXC by cloning an existing container with vmid 200 +```yaml +- name: Create and start an LXC container + hosts: localhost + connection: local + vars: + - container_hostname: new-debian-container + - ct_id: 200 + - container_ipv4: "10.0.0.99" + roles: + - role: proxmox-lxc-provision +``` + + +#### Creating an LXC Container and Converting it to a Template +```yaml +--- +- name: Create and start an LXC container + hosts: localhost + connection: local + vars: + - container_hostname: "{{ container_hostname }}" + - os_template: "local:vztmpl/debian-12_amd64.tar.zst" + - container_ipv4: "10.0.0.99" + roles: + - role: proxmox-lxc-provision + +# Run configuration tasks on the container +# ... + +- name: Convert the created container to a template + hosts: localhost + vars: + container_hostname: "{{ container_hostname }}" + tasks: + - include_role: + name: proxmox-lxc-provision + tasks_from: convert +``` \ No newline at end of file diff --git a/roles/proxmox-lxc-provision/defaults/main.yaml b/roles/proxmox-lxc-provision/defaults/main.yaml new file mode 100755 index 0000000..c989f8f --- /dev/null +++ b/roles/proxmox-lxc-provision/defaults/main.yaml @@ -0,0 +1,20 @@ +--- +container_template: "local:vztmpl/debian-12-standard_12.12-1_amd64.tar.zst" +container_cores: 4 +container_memory: 2048 +container_swap: 2048 +container_storage: local-zfs +container_size: 16 +container_disk: "{{ container_storage }}:{{ container_size }}" +container_ipv4: dhcp +container_ipv6: auto +container_nameserver: 10.0.0.7 +container_gateway: 10.0.0.1 +container_pubkey_file: "~/.ssh/id_ed25519.pub" +container_unprivileged: true +container_features: + - nesting=1 +container_nvidia_gpu_mount: false +container_tags: ["ansible-managed"] +clone_type: full +container_start: true diff --git a/roles/proxmox-lxc-provision/tasks/clone.yaml b/roles/proxmox-lxc-provision/tasks/clone.yaml new file mode 100755 index 0000000..7fbe4dc --- /dev/null +++ b/roles/proxmox-lxc-provision/tasks/clone.yaml @@ -0,0 +1,42 @@ +--- +- name: Create a full clone of the container + community.general.proxmox: + api_user: "{{ proxmox_api_user }}" + api_token_id: "{{ proxmox_api_token_id }}" + api_token_secret: "{{ proxmox_api_token_secret }}" + api_host: "{{ proxmox_api_host }}" + node: "{{ proxmox_node }}" + + vmid: "{{ container_vmid | default(0) }}" + clone: "{{ clone_from }}" + clone_type: "{{ clone_type }}" + hostname: "{{ container_hostname }}" + storage: "{{ container_storage }}" + register: clone_result + +- name: Debug container_mounts + ansible.builtin.debug: + msg: + - "container_mounts: {{ container_mounts }}" + - "Type: {{ container_mounts | type_debug }}" + - "Defined: {{ container_mounts is defined }}" + - "Length: {{ container_mounts | length }}" + +- name: Add bind mounts via pct + become: yes + ansible.builtin.shell: | + pct set {{ clone_result.vmid | default(container_vmid) }} {% for key, value in container_mounts.items() %}-{{ key }} {{ value }} {% endfor %} + delegate_to: "{{ proxmox_api_host }}" + when: container_mounts is defined + +- name: Resize rootfs after clone + ansible.builtin.command: + cmd: "pct resize {{ clone_result.vmid }} rootfs {{ container_size }}G" + delegate_to: "{{ proxmox_api_host }}" + become: yes + register: resize_result + changed_when: resize_result.rc == 0 and 'already at specified size' not in resize_result.stderr + failed_when: + - resize_result.rc != 0 + - "'already at specified size' not in resize_result.stderr" + when: container_size is defined diff --git a/roles/proxmox-lxc-provision/tasks/convert.yaml b/roles/proxmox-lxc-provision/tasks/convert.yaml new file mode 100755 index 0000000..43620b4 --- /dev/null +++ b/roles/proxmox-lxc-provision/tasks/convert.yaml @@ -0,0 +1,12 @@ +--- +- ansible.builtin.include_tasks: stop.yaml +- name: Convert container to template + community.general.proxmox: + api_user: "{{ proxmox_api_user }}" + api_token_id: "{{ proxmox_api_token_id }}" + api_token_secret: "{{ proxmox_api_token_secret }}" + api_host: "{{ proxmox_api_host }}" + node: server + + state: template + hostname: "{{ container_hostname }}" diff --git a/roles/proxmox-lxc-provision/tasks/create.yaml b/roles/proxmox-lxc-provision/tasks/create.yaml new file mode 100755 index 0000000..fbfc30c --- /dev/null +++ b/roles/proxmox-lxc-provision/tasks/create.yaml @@ -0,0 +1,32 @@ +--- +- name: Create an LXC container + community.general.proxmox: + api_host: "{{ proxmox_api_host }}" + api_port: "{{ proxmox_api_port }}" + api_user: "{{ proxmox_api_user }}" + api_token_id: "{{ proxmox_api_token_id }}" + api_token_secret: "{{ proxmox_api_token_secret }}" + validate_certs: "{{ proxmox_api_validate_certs }}" + + node: "{{ proxmox_node }}" + vmid: "{{ container_vmid | default(omit) }}" + hostname: "{{ container_hostname }}" + password: "{{ container_password }}" + ostemplate: "{{ container_template }}" + cores: "{{ container_cores }}" + memory: "{{ container_memory }}" + swap: "{{ container_swap }}" + disk: "{{ container_disk }}" + mounts: "{{ container_mounts | default(omit) }}" + netif: > + {"net0": "name=eth0,gw={{ container_gateway }},ip={{ container_ipv4 }},ip6={{ container_ipv6 | default(omit) }},bridge=vmbr0"} + pubkey: "{{ lookup('file', container_pubkey_file) | default(omit) }}" + onboot: "{{ container_onboot | default(false) }}" + startup: "{{ container_startup | default(omit) }}" + unprivileged: "{{ container_unprivileged | default(true) }}" + features: "{{ container_features | default(omit) }}" + timezone: "{{ container_timezone | default(omit) }}" + nameserver: "{{ container_nameserver | default(omit) }}" + state: present + tags: "{{ container_tags | default(omit) }}" + register: container_result diff --git a/roles/proxmox-lxc-provision/tasks/delete.yaml b/roles/proxmox-lxc-provision/tasks/delete.yaml new file mode 100755 index 0000000..25e495c --- /dev/null +++ b/roles/proxmox-lxc-provision/tasks/delete.yaml @@ -0,0 +1,18 @@ +--- +- ansible.builtin.include_tasks: stop.yaml +- name: Delete a container + community.general.proxmox: + api_host: "{{ proxmox_api_host }}" + api_port: "{{ proxmox_api_port }}" + api_user: "{{ proxmox_api_user }}" + api_token_id: "{{ proxmox_api_token_id }}" + api_token_secret: "{{ proxmox_api_token_secret }}" + validate_certs: "{{ proxmox_api_validate_certs }}" + + vmid: "{{ container_vmid | default(omit) }}" + hostname: "{{ container_hostname | default(omit) }}" + state: absent + register: delete_result + failed_when: | + delete_result.failed and + ('does not exist' not in delete_result.msg) diff --git a/roles/proxmox-lxc-provision/tasks/edit-config.yaml b/roles/proxmox-lxc-provision/tasks/edit-config.yaml new file mode 100755 index 0000000..ab5ac73 --- /dev/null +++ b/roles/proxmox-lxc-provision/tasks/edit-config.yaml @@ -0,0 +1,43 @@ +--- + +- name: Remove all existing ID mappings + lineinfile: + path: "/etc/pve/lxc/{{ container_vmid }}.conf" + regexp: '^lxc\.idmap:' + state: absent + when: container_id_mappings is defined + +- name: Add ID mappings + blockinfile: + path: "/etc/pve/lxc/{{ container_vmid }}.conf" + block: "{{ container_id_mappings }}" + insertafter: EOF + when: container_id_mappings is defined + +- name: Remove existing GPU configuration + lineinfile: + path: "/etc/pve/lxc/{{ container_vmid }}.conf" + regexp: "{{ item }}" + state: absent + loop: + - '^lxc\.cgroup2\.devices\.allow: c {{ gpu_device_id }}:\* rwm' + - '^lxc\.cgroup2\.devices\.allow: c {{ uvm_device_id }}:\* rwm' + - '^lxc\.mount\.entry: /dev/nvidia0' + - '^lxc\.mount\.entry: /dev/nvidiactl' + - '^lxc\.mount\.entry: /dev/nvidia-uvm ' + - '^lxc\.mount\.entry: /dev/nvidia-uvm-tools' + when: container_nvidia_gpu_mount + +- name: Add GPU device for passthrough + blockinfile: + path: /etc/pve/lxc/{{ container_vmid }}.conf + block: | + lxc.cgroup2.devices.allow: c {{ gpu_device_id }}:* rwm + lxc.cgroup2.devices.allow: c {{ uvm_device_id }}:* rwm + lxc.mount.entry: /dev/nvidia0 dev/nvidia0 none bind,optional,create=file + lxc.mount.entry: /dev/nvidiactl dev/nvidiactl none bind,optional,create=file + lxc.mount.entry: /dev/nvidia-uvm dev/nvidia-uvm none bind,optional,create=file + lxc.mount.entry: /dev/nvidia-uvm-tools dev/nvidia-uvm-tools none bind,optional,create=file + when: container_nvidia_gpu_mount + + \ No newline at end of file diff --git a/roles/proxmox-lxc-provision/tasks/main.yaml b/roles/proxmox-lxc-provision/tasks/main.yaml new file mode 100755 index 0000000..05a54f3 --- /dev/null +++ b/roles/proxmox-lxc-provision/tasks/main.yaml @@ -0,0 +1,38 @@ +--- +- name: Container source must be defined (clone_from or container_template) + ansible.builtin.fail: + msg: "Neither clone_from or container_template are defined" + when: clone_from is undefined and container_template is undefined + +- name: Clone container from another container or template, then update + when: clone_from is defined + block: + - name: Clone from template + ansible.builtin.include_tasks: clone.yaml + register: clone_result + + - name: Update container + ansible.builtin.include_tasks: update.yaml + vars: + container_vmid: "{{ clone_result.vmid }}" + register: container_result + +- name: Create the new container + ansible.builtin.include_tasks: create.yaml + when: container_template is defined and clone_from is undefined + +- name: Start the created container and wait for ssh + vars: + container_vmid: "{{ container_result.vmid }}" + ansible.builtin.include_tasks: "{{ item }}" + loop: + - start.yaml + - wait.yaml + when: container_start + +- name: Post clone updates + when: clone_from is defined + delegate_to: "{{ container_hostname }}" + block: + - name: Include post-clone tasks + ansible.builtin.include_tasks: post-clone.yaml diff --git a/roles/proxmox-lxc-provision/tasks/post-clone.yaml b/roles/proxmox-lxc-provision/tasks/post-clone.yaml new file mode 100755 index 0000000..9412535 --- /dev/null +++ b/roles/proxmox-lxc-provision/tasks/post-clone.yaml @@ -0,0 +1,27 @@ +--- +- name: Change root password + ansible.builtin.user: + name: root + password: "{{ container_password | password_hash('sha512') }}" + update_password: always + when: container_password is defined + +- name: Change admin password + ansible.builtin.user: + name: admin + password: "{{ password | password_hash('sha512') }}" + update_password: always + when: password is defined + +- name: Regenerate SSH host keys + ansible.builtin.include_role: + name: system-setup + tasks_from: ssh + vars: + regenerate_ssh_keys: true + +- name: Remove previous entry from known hosts + ansible.builtin.known_hosts: + name: "{{ hostvars[container_hostname]['ansible_host'] }}" + state: absent + delegate_to: localhost diff --git a/roles/proxmox-lxc-provision/tasks/start.yaml b/roles/proxmox-lxc-provision/tasks/start.yaml new file mode 100755 index 0000000..512f948 --- /dev/null +++ b/roles/proxmox-lxc-provision/tasks/start.yaml @@ -0,0 +1,17 @@ +--- +- name: Start the LXC container + community.general.proxmox: + api_host: "{{ proxmox_api_host }}" + api_port: "{{ proxmox_api_port }}" + api_user: "{{ proxmox_api_user }}" + api_token_id: "{{ proxmox_api_token_id }}" + api_token_secret: "{{ proxmox_api_token_secret }}" + validate_certs: "{{ proxmox_api_validate_certs }}" + + vmid: "{{ container_result.vmid }}" + state: started + register: start_result + retries: 3 + delay: 5 + until: start_result is success + failed_when: start_result.failed and ('already running' not in start_result.msg) diff --git a/roles/proxmox-lxc-provision/tasks/stop.yaml b/roles/proxmox-lxc-provision/tasks/stop.yaml new file mode 100755 index 0000000..bac188b --- /dev/null +++ b/roles/proxmox-lxc-provision/tasks/stop.yaml @@ -0,0 +1,16 @@ +--- +- name: Stop container if it is running + community.general.proxmox: + api_user: "{{ proxmox_api_user }}" + api_token_id: "{{ proxmox_api_token_id }}" + api_token_secret: "{{ proxmox_api_token_secret }}" + api_host: "{{ proxmox_api_host }}" + + vmid: "{{ container_vmid | default(omit) }}" + hostname: "{{ container_hostname | default(omit) }}" + state: "stopped" + register: stop_result + failed_when: |- + stop_result.failed and + ('not running' not in stop_result.msg) and + ('does not exist' not in stop_result.msg) diff --git a/roles/proxmox-lxc-provision/tasks/update.yaml b/roles/proxmox-lxc-provision/tasks/update.yaml new file mode 100755 index 0000000..f0daab7 --- /dev/null +++ b/roles/proxmox-lxc-provision/tasks/update.yaml @@ -0,0 +1,29 @@ +--- +- name: Update an LXC container + community.general.proxmox: + api_host: "{{ proxmox_api_host }}" + api_port: "{{ proxmox_api_port }}" + api_user: "{{ proxmox_api_user }}" + api_token_id: "{{ proxmox_api_token_id }}" + api_token_secret: "{{ proxmox_api_token_secret }}" + validate_certs: "{{ proxmox_api_validate_certs }}" + node: "{{ proxmox_node }}" + + vmid: "{{ container_vmid }}" + hostname: "{{ container_hostname }}" + password: "{{ container_password | default(omit) }}" # Updating password does not work + cores: "{{ container_cores }}" + memory: "{{ container_memory }}" + swap: "{{ container_swap }}" + disk: "{{ container_disk }}" + netif: '{"net0": "name=eth0,gw={{ container_gateway }},ip={{ container_ipv4 }},ip6={{ container_ipv6 | default(omit) }},bridge=vmbr0"}' + pubkey: "{{ lookup('file', container_pubkey_file) | default(omit) }}" + onboot: "{{ container_onboot | default(false) }}" + startup: "{{ container_startup | default(omit) }}" + features: "{{ container_features | default(omit) }}" + timezone: "{{ container_timezone | default(omit) }}" + nameserver: "{{ container_nameserver | default(omit) }}" + state: present + tags: "{{ container_tags | default(omit) }}" + update: true + register: container_result diff --git a/roles/proxmox-lxc-provision/tasks/wait.yaml b/roles/proxmox-lxc-provision/tasks/wait.yaml new file mode 100755 index 0000000..917f8d9 --- /dev/null +++ b/roles/proxmox-lxc-provision/tasks/wait.yaml @@ -0,0 +1,20 @@ +--- +- ansible.builtin.meta: refresh_inventory + +- name: Wait a moment for container to be available in inventory + ansible.builtin.pause: + seconds: 2 + +- name: Debug - Check if container is in inventory + ansible.builtin.debug: + msg: "Container {{ container_hostname }} found with IP: {{ hostvars[container_hostname]['ansible_host'] | default('NOT FOUND') }}" + + +- name: Wait for SSH to become available + ansible.builtin.wait_for: + host: "{{ hostvars[container_hostname]['ansible_host'] }}" + port: 22 + delay: 3 + state: started + register: ssh_wait_result + until: ssh_wait_result is not failed diff --git a/roles/system-maintenance/README.md b/roles/system-maintenance/README.md new file mode 100644 index 0000000..1b0e2f1 --- /dev/null +++ b/roles/system-maintenance/README.md @@ -0,0 +1,48 @@ +# System Maintenance Role + +Updates packages and handles system maintenance tasks across multiple distributions. + +## Features + +- Multi-distribution support (Debian/Ubuntu/Alpine) +- Package cache updates and upgrades +- Automatic cleanup of unused packages +- Reboot handling when required +- Distribution detection and validation + +## Supported Distributions + +- **Debian/Ubuntu** - Updates via apt, removes unused packages, checks for reboot requirements +- **Alpine** - Updates via apk package manager + +## Usage + +```yaml +- name: Perform system maintenance + include_role: + name: system-maintenance +``` + +## What It Does + +### Debian/Ubuntu Systems +1. Updates package cache (6-hour validity) +2. Upgrades all packages +3. Removes unused packages and purges configs +4. Checks if reboot is required +5. Triggers reboot handler if needed + +### Alpine Systems +1. Updates package cache +2. Upgrades all packages + +## Requirements + +- Root privileges +- Supported distribution (Debian/Ubuntu/Alpine) + +## Notes + +- Role automatically detects distribution and uses appropriate tasks +- Fails gracefully on unsupported distributions +- Reboot is handled via handler (only when required on Debian/Ubuntu) \ No newline at end of file diff --git a/roles/system-maintenance/handlers/main.yaml b/roles/system-maintenance/handlers/main.yaml new file mode 100755 index 0000000..e1c8af2 --- /dev/null +++ b/roles/system-maintenance/handlers/main.yaml @@ -0,0 +1,4 @@ +--- +- name: Reboot system + ansible.builtin.reboot: + msg: Rebooting as a reboot is required after upgrade diff --git a/roles/system-maintenance/tasks/alpine.yaml b/roles/system-maintenance/tasks/alpine.yaml new file mode 100755 index 0000000..40cc12f --- /dev/null +++ b/roles/system-maintenance/tasks/alpine.yaml @@ -0,0 +1,6 @@ +--- +- name: Update and upgrade apk packages (Alpine) + community.general.apk: + upgrade: true + update_cache: true + when: ansible_os_family == 'Alpine' diff --git a/roles/system-maintenance/tasks/debian.yaml b/roles/system-maintenance/tasks/debian.yaml new file mode 100755 index 0000000..eb798a6 --- /dev/null +++ b/roles/system-maintenance/tasks/debian.yaml @@ -0,0 +1,22 @@ +--- +- name: Update and upgrade apt packages (Debian/Ubuntu) + ansible.builtin.apt: + upgrade: true + update_cache: true + cache_valid_time: 21600 # 6 hours + +- name: Remove unused packages + ansible.builtin.apt: + autoremove: true + purge: true + +- name: Check if reboot required + ansible.builtin.stat: + path: /var/run/reboot-required + register: reboot_required_file + +- name: Call reboot handler if reboot required + ansible.builtin.debug: + msg: "Reboot is required" + when: reboot_required_file.stat.exists + notify: Reboot system diff --git a/roles/system-maintenance/tasks/main.yaml b/roles/system-maintenance/tasks/main.yaml new file mode 100755 index 0000000..05efa35 --- /dev/null +++ b/roles/system-maintenance/tasks/main.yaml @@ -0,0 +1,19 @@ +--- +- name: Use appropriate tasks for given distribution + ansible.builtin.set_fact: + task_distro_file: "{{ ansible_os_family | lower }}.yaml" + +- name: Verify that the distribution is supported + become: false + ansible.builtin.stat: + path: "{{ role_path }}/tasks/{{ task_distro_file }}" + register: distro_stat_result + delegate_to: localhost + +- name: Fail if the distribution is not supported + ansible.builtin.fail: + msg: "Unsupported distribution: {{ ansible_os_family }}" + when: not distro_stat_result.stat.exists + +- name: Include distribution specific update tasks + ansible.builtin.include_tasks: "{{ task_distro_file }}" diff --git a/roles/system-setup/README.md b/roles/system-setup/README.md new file mode 100644 index 0000000..85799fb --- /dev/null +++ b/roles/system-setup/README.md @@ -0,0 +1,86 @@ +# System Setup Role + +Performs initial system configuration including user creation, SSH hardening, and package installation. + +## Features + +- Creates admin user with SSH key authentication +- Hardens SSH configuration +- Installs essential packages +- Optional passwordless sudo +- Optional SSH host key regeneration +- System package updates (via system-maintenance role) + +## Variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `username` | `admin` | Username for the admin account | +| `password` | *required* | Password for the admin user | +| `shell` | `/bin/bash` | Default shell for the user | +| `passwordless_sudo` | `true` | Allow sudo without password | +| `ssh_pubkey_file` | *required* | Path to SSH public key file | +| `regenerate_ssh_host_keys` | `false` | Generate new SSH host keys | +| `extra_packages` | `[sudo, vim]` | Additional packages to install | + +## Required Variables + +You must provide these variables when using this role: + +```yaml +password: "your_secure_password" +ssh_pubkey_file: "/path/to/your/public/key.pub" +``` + +## Usage + +### Basic Setup +```yaml +- name: Initial system setup + include_role: + name: system-setup + vars: + password: "{{ admin_password }}" + ssh_pubkey_file: "~/.ssh/id_rsa.pub" +``` + +### Custom Configuration +```yaml +- name: System setup with custom user + include_role: + name: system-setup + vars: + username: myuser + password: "{{ user_password }}" + shell: /bin/zsh + ssh_pubkey_file: "keys/mykey.pub" + passwordless_sudo: false + regenerate_ssh_host_keys: true + extra_packages: + - sudo + - vim + - htop + - git +``` + +## What It Does + +1. **System Updates** - Calls system-maintenance role for package updates +2. **Package Installation** - Installs packages from `extra_packages` list +3. **User Creation** - Creates user with password and sudo access +4. **SSH Key Setup** - Configures authorized keys for the user +5. **SSH Hardening** - Applies secure SSH configuration +6. **Host Keys** - Optionally regenerates SSH host keys + +## Requirements + +- Root privileges +- SSH public key file accessible to Ansible +- system-maintenance role (dependency) + +## Security Notes + +- SSH configuration is hardened by default +- Password authentication can be disabled via SSH config +- User is added to sudo group +- SSH host key regeneration removes old keys completely \ No newline at end of file diff --git a/roles/system-setup/defaults/main.yaml b/roles/system-setup/defaults/main.yaml new file mode 100755 index 0000000..42758ce --- /dev/null +++ b/roles/system-setup/defaults/main.yaml @@ -0,0 +1,9 @@ +--- +username: admin +shell: /bin/bash +passwordless_sudo: true +extra_packages: + - sudo + - vim + +regenerate_ssh_host_keys: false \ No newline at end of file diff --git a/roles/system-setup/handlers/main.yaml b/roles/system-setup/handlers/main.yaml new file mode 100755 index 0000000..62f48c4 --- /dev/null +++ b/roles/system-setup/handlers/main.yaml @@ -0,0 +1,6 @@ +--- +- name: Restart sshd daemon + become: true + service: + name: sshd + state: restarted \ No newline at end of file diff --git a/roles/system-setup/tasks/extra-packages.yaml b/roles/system-setup/tasks/extra-packages.yaml new file mode 100755 index 0000000..ec7004b --- /dev/null +++ b/roles/system-setup/tasks/extra-packages.yaml @@ -0,0 +1,5 @@ +--- +- name: Install extra packages + package: + name: "{{ extra_packages }}" + state: present diff --git a/roles/system-setup/tasks/main.yaml b/roles/system-setup/tasks/main.yaml new file mode 100755 index 0000000..864eb79 --- /dev/null +++ b/roles/system-setup/tasks/main.yaml @@ -0,0 +1,14 @@ +--- + +# Upgrade and update packages +- include_role: + name: system-maintenance + +# Install extra packages +- include_tasks: extra-packages.yaml + +# Create a user admin account +- include_tasks: user.yaml + +# Harden SSH configuration +- include_tasks: ssh.yaml \ No newline at end of file diff --git a/roles/system-setup/tasks/ssh.yaml b/roles/system-setup/tasks/ssh.yaml new file mode 100755 index 0000000..5d34e7a --- /dev/null +++ b/roles/system-setup/tasks/ssh.yaml @@ -0,0 +1,32 @@ +--- +- name: Update SSH configuration for better security + become: true + template: + src: '../templates/sshd_config.j2' + dest: '/etc/ssh/sshd_config' + owner: root + mode: '0600' + validate: '/usr/sbin/sshd -t -f %s' + notify: Restart sshd daemon + +- name: Find all existing SSH host keys + find: + paths: '/etc/ssh' + patterns: "ssh_host_*_key*" + register: existing_ssh_host_keys + when: regenerate_ssh_host_keys | bool + +- name: Delete previous existing SSH host keys + become: true + file: + path: "{{ item.path }}" + state: absent + loop: "{{ existing_ssh_host_keys.files }}" + when: regenerate_ssh_host_keys | bool + +- name: Generate new SSH host keys + become: true + command: ssh-keygen -A + changed_when: true + when: regenerate_ssh_host_keys | bool + notify: Restart sshd daemon diff --git a/roles/system-setup/tasks/user.yaml b/roles/system-setup/tasks/user.yaml new file mode 100755 index 0000000..39b4b54 --- /dev/null +++ b/roles/system-setup/tasks/user.yaml @@ -0,0 +1,25 @@ +--- +- name: "Create a new user {{ username }}" + user: + name: "{{ username }}" + password: "{{ password | password_hash('sha512') }}" + groups: + - sudo + shell: "{{ shell }}" + state: present + append: true + +- name: Allow sudo to be used without a password + lineinfile: + path: /etc/sudoers + state: present + regexp: '^%sudo' + line: '%sudo ALL=(ALL) NOPASSWD: ALL' + validate: 'visudo -cf %s' + when: passwordless_sudo | bool + +- name: Copy over the public SSH key + authorized_key: + user: "{{ username }}" + state: present + key: "{{ lookup('file', ssh_pubkey_file) }}" diff --git a/roles/system-setup/templates/sshd_config.j2 b/roles/system-setup/templates/sshd_config.j2 new file mode 100755 index 0000000..64fd7fe --- /dev/null +++ b/roles/system-setup/templates/sshd_config.j2 @@ -0,0 +1,13 @@ +{{ ansible_managed | comment }} + +PermitRootLogin no +PasswordAuthentication no +PubkeyAuthentication yes +AllowUsers admin + +ClientAliveInterval 300 +ClientAliveCountMax 2 + +Protocol 2 + +X11Forwarding no