diff --git a/accelerator/accelerator.yml b/accelerator/accelerator.yml index 2812067fa..6364a6566 100644 --- a/accelerator/accelerator.yml +++ b/accelerator/accelerator.yml @@ -1,4 +1,4 @@ -# Copyright 2024 Dell Inc. or its subsidiaries. All Rights Reserved. +# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,9 +13,8 @@ # limitations under the License. --- -- name: Check if virtual environment is active - ansible.builtin.import_playbook: ../utils/check_venv.yml - when: not ( check_venv_executed | default(false) | bool ) +- name: Include input project directory + ansible.builtin.import_playbook: ../utils/include_input_dir.yml - name: Update Inventory with ansible_host information ansible.builtin.import_playbook: ../utils/servicetag_host_mapping.yml diff --git a/accelerator/roles/accelerator_validation/tasks/validate_amd.yml b/accelerator/roles/accelerator_validation/tasks/validate_amd.yml index b8d28720f..d2c943d78 100644 --- a/accelerator/roles/accelerator_validation/tasks/validate_amd.yml +++ b/accelerator/roles/accelerator_validation/tasks/validate_amd.yml @@ -1,4 +1,4 @@ -# Copyright 2024 Dell Inc. or its subsidiaries. All Rights Reserved. +# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -25,8 +25,9 @@ file: "{{ software_config_json_file }}" name: software_config -- name: Include vars for {{ oim_os }} - ansible.builtin.include_vars: "{{ role_path }}/vars/{{ oim_os }}.yml" +- name: Set fact provision_os + ansible.builtin.set_fact: + provision_os: "{{ software_config.cluster_os_type }}" - name: Get amdgpu status ansible.builtin.set_fact: @@ -47,12 +48,6 @@ loop_control: loop_var: item -- name: Check if the rocm offline repo exists - ansible.builtin.stat: - path: "{{ offline_rocm_directory }}/rocm/" - register: check_rocm_repo - when: rocm_input_status - - name: Set amdgpu_config_status when: amdgpu_input_status block: @@ -60,22 +55,21 @@ ansible.builtin.set_fact: amdgpu_version: "{{ software_config.softwares | selectattr('name', 'equalto', 'amdgpu') | map(attribute='version') | first }}" - - name: Set amdgpu_version - ansible.builtin.set_fact: - amdgpu_directory: "{{ offline_rocm_directory }}/amdgpu/{{ amdgpu_version }}/" - - - name: Check amdgpu version directory exists or not - ansible.builtin.stat: - path: "{{ amdgpu_directory }}" - register: check_amdgpu_dir + - name: Get amdgpu repository details from Pulp + ansible.builtin.command: "{{ pulp_bin_path }} {{ os_package_map[provision_os] }} distribution list --name amdgpu_{{ amdgpu_version }}" + delegate_to: localhost + register: pulp_amdgpu_output + changed_when: false + no_log: true - - name: Set amdgpu_config_status based on directory existence + - name: Set amdgpu_config_status based on pulp rpm distribution ansible.builtin.set_fact: - amdgpu_config_status: "{{ check_amdgpu_dir.stat.exists | ternary(true, false) }}" + amdgpu_config_status: true + when: pulp_amdgpu_output.stdout | length > 0 rescue: - name: Log an error message ansible.builtin.debug: - msg: " {{ amdgpu_fail_msg }} " + msg: " {{ amdgpu_repo_fail_msg }} " - name: Set amdgpu_config_status to false ansible.builtin.set_fact: @@ -84,53 +78,50 @@ - name: Set rocm_config_status when: - rocm_input_status - - software_config.repo_config == 'always' or software_config.repo_config == 'partial' - - check_rocm_repo.stat.exists block: - name: Fetch rocm_version ansible.builtin.set_fact: rocm_version: "{{ software_config.amdgpu | selectattr('name', 'equalto', 'rocm') | map(attribute='version') | first }}" - - name: Set rocm_directory - ansible.builtin.set_fact: - rocm_directory: "{{ offline_rocm_directory }}/rocm/{{ rocm_version }}/" - - - name: Check rocm_directory exists or not - ansible.builtin.stat: - path: "{{ rocm_directory }}" - register: check_rocm_dir + - name: Get ROCm repository details from Pulp + ansible.builtin.command: "{{ pulp_bin_path }} {{ os_package_map[provision_os] }} distribution list --name rocm_{{ rocm_version }}" + delegate_to: localhost + register: check_rocm_repo + changed_when: false + no_log: true - - name: Set rocm_config_status based on directory existence - ansible.builtin.set_fact: - rocm_config_status: "{{ check_rocm_dir.stat.exists | ternary(true, false) }}" - - rescue: - - name: Log an error message - ansible.builtin.debug: - msg: " {{ amdgpu_fail_msg }} " - - - name: Set rocm_config_status to false - ansible.builtin.set_fact: - rocm_config_status: false - -- name: Set rocm_config_status - when: - - rocm_input_status - - software_config.repo_config == 'never' or software_config.repo_config == 'partial' - - not check_rocm_repo.stat.exists - block: - - name: Fetch rocm_version - ansible.builtin.set_fact: - rocm_version: "{{ software_config.amdgpu | selectattr('name', 'equalto', 'rocm') | map(attribute='version') | first }}" - - - name: Set rocm_config_status to true + - name: Set rocm_config_status based on pulp rpm distribution ansible.builtin.set_fact: rocm_config_status: true + when: check_rocm_repo.stdout | length > 0 rescue: - name: Log an error message ansible.builtin.debug: - msg: " {{ amdgpu_fail_msg }} " + msg: " {{ rocm_repo_fail_msg }} " - name: Set rocm_config_status to false ansible.builtin.set_fact: rocm_config_status: false + +# This will be used in future +# - name: Set rocm_config_status +# when: +# - rocm_input_status +# - software_config.repo_config == 'never' or software_config.repo_config == 'partial' +# - not check_rocm_repo.stat.exists +# block: +# - name: Fetch rocm_version +# ansible.builtin.set_fact: +# rocm_version: "{{ software_config.amdgpu | selectattr('name', 'equalto', 'rocm') | map(attribute='version') | first }}" + +# - name: Set rocm_config_status to true +# ansible.builtin.set_fact: +# rocm_config_status: true +# rescue: +# - name: Log an error message +# ansible.builtin.debug: +# msg: " {{ amdgpu_fail_msg }} " + +# - name: Set rocm_config_status to false +# ansible.builtin.set_fact: +# rocm_config_status: false diff --git a/accelerator/roles/accelerator_validation/tasks/validate_intel_gaudi.yml b/accelerator/roles/accelerator_validation/tasks/validate_intel_gaudi.yml index 154c60c4b..fa038dd6b 100644 --- a/accelerator/roles/accelerator_validation/tasks/validate_intel_gaudi.yml +++ b/accelerator/roles/accelerator_validation/tasks/validate_intel_gaudi.yml @@ -1,4 +1,4 @@ -# Copyright 2024 Intel Corporation. +# Copyright 2025 Intel Corporation. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -25,9 +25,6 @@ file: "{{ software_config_json_file }}" name: software_config -- name: Include vars for {{ oim_os }} - ansible.builtin.include_vars: "{{ role_path }}/vars/{{ oim_os }}.yml" - - name: Get Intel Gaudi status ansible.builtin.set_fact: intel_gaudi_input_status: true diff --git a/accelerator/roles/accelerator_validation/vars/main.yml b/accelerator/roles/accelerator_validation/vars/main.yml index 9d6cb034a..f19e32559 100644 --- a/accelerator/roles/accelerator_validation/vars/main.yml +++ b/accelerator/roles/accelerator_validation/vars/main.yml @@ -1,4 +1,4 @@ -# Copyright 2024 Dell Inc. or its subsidiaries. All Rights Reserved. +# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,10 +13,13 @@ # limitations under the License. --- -# Usage: amd_validation.yml -software_config_json_file: "{{ role_path }}/../../../input/software_config.json" - -# Usage: validate_amdgpu_rocm_repo.yml +# Usage: validate_amd.yml +software_config_json_file: "{{ input_project_dir }}/software_config.json" +pulp_bin_path: /usr/local/bin/pulp +os_package_map: + rhel: rpm + rocky: rpm + ubuntu: deb amdgpu_input_fail_msg: "Failed, software_config.json does not have the amdgpu software stack." amdgpu_version_fail_msg: "Failed, software_config.json does not have the version for AMDGPU." amdgpu_repo_fail_msg: "Failed, local_repo.yml is not executed for downloading AMDGPU packages." @@ -25,10 +28,13 @@ rocm_repo_fail_msg: "Failed, local_repo.yml is not executed for downloading ROCM amdgpu_fail_msg: "An error occurred while setting the rocm_config_status." # Usage: include_local_repo_config.yml -local_repo_config_file: "{{ role_path }}/../../../input/local_repo_config.yml" +local_repo_config_file: "{{ input_project_dir }}/local_repo_config.yml" local_repo_config_syntax_fail_msg: "Failed. Syntax errors present in local_repo_config.yml. Fix errors and re-run playbook again." # Usage: validate_intel_gaudi.yml +offline_intelgaudi_directory: "{{ repo_store_path }}/cluster/apt" +offline_gaudi_directory: "{{ repo_store_path }}/cluster/{{ oim_os }}/{{ oim_os_version }}/deb" +gaudi_search_pattern: "habanalabs*.deb" intel_gaudi_input_fail_msg: "Failed, software_config.json does not have the intelgaudi software stack." intel_gaudi_repo_fail_msg: "Failed, local_repo.yml is not executed for downloading Intel Gaudi driver packages." diff --git a/accelerator/roles/accelerator_validation/vars/rocky.yml b/accelerator/roles/accelerator_validation/vars/rocky.yml deleted file mode 120000 index ba2f905fb..000000000 --- a/accelerator/roles/accelerator_validation/vars/rocky.yml +++ /dev/null @@ -1 +0,0 @@ -redhat.yml \ No newline at end of file diff --git a/discovery/roles/discovery_validations/common/tasks/main.yml b/discovery/roles/discovery_validations/common/tasks/main.yml index 5036b78ad..a36ad01e8 100644 --- a/discovery/roles/discovery_validations/common/tasks/main.yml +++ b/discovery/roles/discovery_validations/common/tasks/main.yml @@ -77,8 +77,8 @@ # - name: Validate OFED and CUDA repo # ansible.builtin.include_tasks: validate_ofed_cuda_repo.yml -# - name: Validate AMDGPU and ROCm repo -# ansible.builtin.include_tasks: validate_amdgpu_rocm_repo.yml +- name: Validate AMDGPU and ROCm repo + ansible.builtin.include_tasks: validate_amdgpu_rocm_repo.yml # - name: Validate Broadcom repo # ansible.builtin.include_tasks: validate_broadcom_repo.yml diff --git a/discovery/roles/discovery_validations/common/tasks/validate_amdgpu_rocm_repo.yml b/discovery/roles/discovery_validations/common/tasks/validate_amdgpu_rocm_repo.yml index f91d5d7c4..36e21470f 100644 --- a/discovery/roles/discovery_validations/common/tasks/validate_amdgpu_rocm_repo.yml +++ b/discovery/roles/discovery_validations/common/tasks/validate_amdgpu_rocm_repo.yml @@ -1,4 +1,4 @@ -# Copyright 2024 Dell Inc. or its subsidiaries. All Rights Reserved. +# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -34,25 +34,30 @@ ansible.builtin.set_fact: amdgpu_version: "{{ user_config.softwares | selectattr('name', 'equalto', 'amdgpu') | map(attribute='version') | first }}" - - name: Set amdgpu_directory + - name: Get amdgpu repository details from Pulp + ansible.builtin.command: "{{ pulp_bin_path }} {{ os_package_map[provision_os] }} distribution list --name amdgpu_{{ amdgpu_version }}" + delegate_to: localhost + register: pulp_amdgpu_output + changed_when: false + + - name: Parse amdgpu repository details ansible.builtin.set_fact: - amdgpu_directory: "{{ offline_rocm_directory }}/amdgpu/{{ amdgpu_version }}/" + amdgpu_repo_list: "{{ pulp_amdgpu_output.stdout | from_json }}" + when: pulp_amdgpu_output.stdout | length > 0 - - name: Check amdgpu_directory exists or not - ansible.builtin.stat: - path: "{{ amdgpu_directory }}" - register: check_amdgpu_dir + - name: Set amdgpu_base_url and amdgpu_name if repository exists + ansible.builtin.set_fact: + amdgpu_base_url: "{{ amdgpu_repo_list[0]['base_url'] | default('') }}" + amdgpu_name: "{{ amdgpu_repo_list[0]['name'] | default('') }}" + amdgpu_config_status: true + when: amdgpu_repo_list | length > 0 - name: Warning - Please wait, This task will take few seconds ansible.builtin.pause: seconds: "{{ warning_wait_time }}" prompt: "{{ amdgpu_repo_warning_msg }}" - when: not check_amdgpu_dir.stat.exists + when: pulp_amdgpu_output.stdout | length == 0 - - name: Set amdgpu_config_status to true - ansible.builtin.set_fact: - amdgpu_config_status: true - when: check_amdgpu_dir.stat.exists rescue: - name: Warning - Please wait, This task will take few seconds ansible.builtin.pause: diff --git a/discovery/roles/discovery_validations/common/vars/main.yml b/discovery/roles/discovery_validations/common/vars/main.yml index a067811f2..279161de9 100644 --- a/discovery/roles/discovery_validations/common/vars/main.yml +++ b/discovery/roles/discovery_validations/common/vars/main.yml @@ -218,6 +218,7 @@ CUDA will not be installed on the nodes post provisioning." offline_iso_directory: "{{ repo_store_path }}/cluster/{{ provision_os }}/{{ provision_os_version }}/iso" # Usage: validate_amdgpu_rocm_repo.yml +pulp_bin_path: /usr/local/bin/pulp amdgpu_input_warning_msg: "[WARNING] software_config.json does not have the amdgpu software stack. Hence ROCm will not be installed on the nodes post provisioning." amdgpu_version_warning_msg: "[WARNING] software_config.json does not have the version for AMDGPU. @@ -228,7 +229,10 @@ rocm_version_warning_msg: "[WARNING] software_config.json does not have the vers Hence ROCm will not be installed on the nodes post provisioning." rocm_repo_warning_msg: "[WARNING] local_repo.yml is not executed for downloading ROCM packages. ROCm will not be installed on the nodes post provisioning." - +os_package_map: + rhel: rpm + rocky: rpm + ubuntu: deb # Usage: validate_intelgaudi_repo.yml intelgaudi_version_warning_msg: "[WARNING] software_config.json does not have the version for 'intelgaudi'. Hence Habana stack will not be installed on the nodes post provisioning." diff --git a/discovery/roles/postscripts/common/tasks/configure_postscripts.yml b/discovery/roles/postscripts/common/tasks/configure_postscripts.yml index 633d30d47..c9a7d5219 100644 --- a/discovery/roles/postscripts/common/tasks/configure_postscripts.yml +++ b/discovery/roles/postscripts/common/tasks/configure_postscripts.yml @@ -18,7 +18,7 @@ changed_when: true - name: Configure ubuntu postscripts - ansible.builtin.include_tasks: "{{ role_path }}/../{{ oim_os }}/tasks/configure_postscripts.yml" + ansible.builtin.include_tasks: "{{ role_path }}/../{{ provision_os }}/tasks/configure_postscripts.yml" - name: Configure ntp postscripts ansible.builtin.include_tasks: configure_postscripts_ntp.yml @@ -73,10 +73,10 @@ # - cuda_config_status # - cuda_repo_stat.stat.exists # -# - name: Configure postscripts for ROCm -# ansible.builtin.command: "{{ xcat_path }}/chdef all -p postscripts=omnia_rocm" -# changed_when: true -# when: amdgpu_config_status +- name: Configure postscripts for ROCm + ansible.builtin.command: "{{ xcat_path }}/chdef all -p postscripts=omnia_rocm" + changed_when: true + when: amdgpu_config_status # # - name: Configure postscripts for Intel Gaudi # ansible.builtin.command: "{{ xcat_path }}/chdef all -p postscripts=omnia_intelgaudi" diff --git a/discovery/roles/postscripts/common/tasks/main.yml b/discovery/roles/postscripts/common/tasks/main.yml index 3bfbe7121..407da2ae0 100644 --- a/discovery/roles/postscripts/common/tasks/main.yml +++ b/discovery/roles/postscripts/common/tasks/main.yml @@ -20,18 +20,17 @@ MANPATH: "{{ xcat_manpath_env }}" PERL_BADLANG: "{{ perl_badlang_env }}" block: - - name: Include vars for {{ oim_os }} - ansible.builtin.include_vars: "{{ role_path }}/../{{ oim_os }}/vars/main.yml" + - name: Include vars for {{ provision_os }} + ansible.builtin.include_vars: "{{ role_path }}/../{{ provision_os }}/vars/main.yml" -# These tasks will be updated after local repo changes -# - name: Configure postscripts on {{ oim_os }} -# ansible.builtin.include_tasks: "{{ role_path }}/../{{ oim_os }}/tasks/main.yml" + - name: Configure postscripts on {{ provision_os }} + ansible.builtin.include_tasks: "{{ role_path }}/../{{ provision_os }}/tasks/main.yml" - name: Check all node group status ansible.builtin.include_tasks: check_nodes_all.yml - name: Configuration of postbootscripts - ansible.builtin.include_tasks: "{{ role_path }}/../{{ oim_os }}/tasks/configure_postbootscripts.yml" + ansible.builtin.include_tasks: "{{ role_path }}/../{{ provision_os }}/tasks/configure_postbootscripts.yml" when: all_node_status - name: Configuration of postscripts diff --git a/discovery/roles/postscripts/common/templates/omnia_rocm.j2 b/discovery/roles/postscripts/common/templates/omnia_rocm.j2 index 524921c68..66f2762a1 100644 --- a/discovery/roles/postscripts/common/templates/omnia_rocm.j2 +++ b/discovery/roles/postscripts/common/templates/omnia_rocm.j2 @@ -14,12 +14,12 @@ if [[ $amd_check_display_ctrlr == *"Advanced Micro Devices"* || $amd_check_proce if [[ $validate_ubuntu_os == "1" ]] then - echo "deb [trusted=yes] http://{{ admin_nic_ip }}:80/install{{ repo_store_path }}/cluster/apt/amdgpu/{{ amdgpu_version }} ./" >> /etc/apt/sources.list.d/amdgpu.list + echo "deb [trusted=yes] http://{{ amdgpu_base_url }} ./" >> /etc/apt/sources.list.d/amdgpu.list sudo apt-get update sudo apt install "linux-headers-$(uname -r)" "linux-modules-extra-$(uname -r)" -y sudo apt install amdgpu-dkms -y - + rm /etc/apt/sources.list.d/amdgpu.list apt-get update @@ -28,12 +28,12 @@ if [[ $amd_check_display_ctrlr == *"Advanced Micro Devices"* || $amd_check_proce else sudo tee /etc/yum.repos.d/amdgpu.repo <<EOF -[amdgpu] -name=amdgpu -baseurl=http://{{ admin_nic_ip }}:80/install{{ repo_store_path }}/cluster/yum/amdgpu/{{ amdgpu_version }} +[{{ amdgpu_name }}] +name={{ amdgpu_name }} +baseurl={{ amdgpu_base_url }} enabled=1 gpgcheck=0 -{% if proxy_status %} +{% if proxy_status is defined and proxy_status %} proxy=_none_ {% endif %} EOF diff --git a/discovery/roles/postscripts/fedora/files/ib.rhels8.x86_64.pkglist b/discovery/roles/postscripts/rhel/files/ib.rhels8.x86_64.pkglist similarity index 100% rename from discovery/roles/postscripts/fedora/files/ib.rhels8.x86_64.pkglist rename to discovery/roles/postscripts/rhel/files/ib.rhels8.x86_64.pkglist diff --git a/discovery/roles/postscripts/fedora/files/omnia_disable_repo b/discovery/roles/postscripts/rhel/files/omnia_disable_repo similarity index 100% rename from discovery/roles/postscripts/fedora/files/omnia_disable_repo rename to discovery/roles/postscripts/rhel/files/omnia_disable_repo diff --git a/discovery/roles/postscripts/fedora/tasks/configure_cuda.yml b/discovery/roles/postscripts/rhel/tasks/configure_cuda.yml similarity index 100% rename from discovery/roles/postscripts/fedora/tasks/configure_cuda.yml rename to discovery/roles/postscripts/rhel/tasks/configure_cuda.yml diff --git a/discovery/roles/postscripts/fedora/tasks/configure_ofed.yml b/discovery/roles/postscripts/rhel/tasks/configure_ofed.yml similarity index 100% rename from discovery/roles/postscripts/fedora/tasks/configure_ofed.yml rename to discovery/roles/postscripts/rhel/tasks/configure_ofed.yml diff --git a/discovery/roles/postscripts/fedora/tasks/configure_postbootscripts.yml b/discovery/roles/postscripts/rhel/tasks/configure_postbootscripts.yml similarity index 100% rename from discovery/roles/postscripts/fedora/tasks/configure_postbootscripts.yml rename to discovery/roles/postscripts/rhel/tasks/configure_postbootscripts.yml diff --git a/discovery/roles/postscripts/fedora/tasks/configure_postscripts.yml b/discovery/roles/postscripts/rhel/tasks/configure_postscripts.yml similarity index 100% rename from discovery/roles/postscripts/fedora/tasks/configure_postscripts.yml rename to discovery/roles/postscripts/rhel/tasks/configure_postscripts.yml diff --git a/discovery/roles/postscripts/fedora/tasks/configure_rocm.yml b/discovery/roles/postscripts/rhel/tasks/configure_rocm.yml similarity index 100% rename from discovery/roles/postscripts/fedora/tasks/configure_rocm.yml rename to discovery/roles/postscripts/rhel/tasks/configure_rocm.yml diff --git a/discovery/roles/postscripts/fedora/tasks/main.yml b/discovery/roles/postscripts/rhel/tasks/main.yml similarity index 76% rename from discovery/roles/postscripts/fedora/tasks/main.yml rename to discovery/roles/postscripts/rhel/tasks/main.yml index e433c776e..aeac4bb11 100644 --- a/discovery/roles/postscripts/fedora/tasks/main.yml +++ b/discovery/roles/postscripts/rhel/tasks/main.yml @@ -13,8 +13,9 @@ # limitations under the License. --- -- name: Pre-requisities for postscripts - ansible.builtin.include_tasks: pre_requisite.yml +# These tasks will be updated after local repo changes for cuda +# - name: Pre-requisities for postscripts +# ansible.builtin.include_tasks: pre_requisite.yml # These tasks will be updated after local repo changes # - name: Configure OFED postscripts @@ -25,6 +26,6 @@ # ansible.builtin.include_tasks: configure_cuda.yml # when: cuda_config_status -# - name: Configure ROCm postscripts -# ansible.builtin.include_tasks: configure_rocm.yml -# when: amdgpu_config_status +- name: Configure ROCm postscripts + ansible.builtin.include_tasks: configure_rocm.yml + when: amdgpu_config_status diff --git a/discovery/roles/postscripts/fedora/tasks/pre_requisite.yml b/discovery/roles/postscripts/rhel/tasks/pre_requisite.yml similarity index 100% rename from discovery/roles/postscripts/fedora/tasks/pre_requisite.yml rename to discovery/roles/postscripts/rhel/tasks/pre_requisite.yml diff --git a/discovery/roles/postscripts/fedora/templates/omnia_cuda.j2 b/discovery/roles/postscripts/rhel/templates/omnia_cuda.j2 similarity index 100% rename from discovery/roles/postscripts/fedora/templates/omnia_cuda.j2 rename to discovery/roles/postscripts/rhel/templates/omnia_cuda.j2 diff --git a/discovery/roles/postscripts/fedora/templates/omnia_ofed.j2 b/discovery/roles/postscripts/rhel/templates/omnia_ofed.j2 similarity index 100% rename from discovery/roles/postscripts/fedora/templates/omnia_ofed.j2 rename to discovery/roles/postscripts/rhel/templates/omnia_ofed.j2 diff --git a/discovery/roles/postscripts/fedora/vars/main.yml b/discovery/roles/postscripts/rhel/vars/main.yml similarity index 75% rename from discovery/roles/postscripts/fedora/vars/main.yml rename to discovery/roles/postscripts/rhel/vars/main.yml index 26f1ab0c2..1c298e187 100644 --- a/discovery/roles/postscripts/fedora/vars/main.yml +++ b/discovery/roles/postscripts/rhel/vars/main.yml @@ -18,7 +18,7 @@ cuda_search_key: "cuda" # Usage: configure_ofed.yml ofed_postscripts_path: - - { src: "{{ role_path }}/../fedora/templates/omnia_ofed.j2", dest: "/install/postscripts/omnia_ofed", mode: "755" } + - { src: "{{ role_path }}/../rhel/templates/omnia_ofed.j2", dest: "/install/postscripts/omnia_ofed", mode: "755" } ofed_install_script_path: - { src: "/opt/xcat/share/xcat/ib/scripts/Mellanox/mlnxofed_ib_install", dest: "/install/postscripts/mlnxofed_ib_install", mode: "755" } rhel8_package_path: "/opt/xcat/share/xcat/install/rh/compute.rhels8.pkglist" @@ -26,7 +26,7 @@ mlnx_ofed_repo: /install/ofed repo_permission: "755" ofed_permission: "0644" ofed_package_path: - - { src: "{{ role_path }}/../fedora/files/ib.rhels8.x86_64.pkglist", dest: "/opt/xcat/share/xcat/install/rh/ib.rhels8.x86_64.pkglist", mode: "644" } + - { src: "{{ role_path }}/../rhel/files/ib.rhels8.x86_64.pkglist", dest: "/opt/xcat/share/xcat/install/rh/ib.rhels8.x86_64.pkglist", mode: "644" } # Usage: configure_cuda.yml cuda_tmp_path: /tmp/cuda @@ -34,13 +34,14 @@ cuda_core_path: /install/cuda/x86_64/cuda-core invalid_cuda_rpm_fail_msg: "Failed. Invalid cuda_toolkit_path: {{ cuda_toolkit_path }} provided in provision_config.yml. Make sure cuda rpm file is downloaded completely." cuda_postscripts_path: - - { src: "{{ role_path }}/../fedora/templates/omnia_cuda.j2", dest: "/install/postscripts/omnia_cuda", mode: "755" } + - { src: "{{ role_path }}/../rhel/templates/omnia_cuda.j2", dest: "/install/postscripts/omnia_cuda", mode: "755" } xcat_path: /opt/xcat/bin # Usage: configure_rocm.yml +pulp_port: 2225 rocm_postscripts_path: - { src: "{{ role_path }}/templates/omnia_rocm.j2", dest: "/install/postscripts/omnia_rocm", mode: "755" } # Usage: configure_postscripts.yml omnia_disable_repo_postscripts_path: - - { src: "{{ role_path }}/../fedora/files/omnia_disable_repo", dest: "/install/postscripts/omnia_disable_repo", mode: "755" } + - { src: "{{ role_path }}/../rhel/files/omnia_disable_repo", dest: "/install/postscripts/omnia_disable_repo", mode: "755" } diff --git a/input/config/rhel/9.4/amdgpu.json b/input/config/rhel/9.4/amdgpu.json new file mode 100644 index 000000000..1cf28d26d --- /dev/null +++ b/input/config/rhel/9.4/amdgpu.json @@ -0,0 +1,14 @@ +{ + "amdgpu": { + "cluster": [ + {"package": "kernel-devel", "type": "rpm", "repo_name": "baseos"}, + {"package": "kernel-headers", "type": "rpm", "repo_name": "baseos"}, + {"package": "amdgpu-dkms", "type": "rpm", "repo_name": "amdgpu"} + ] + }, + "rocm": { + "cluster": [ + {"package": "rocm", "type": "rpm", "repo_name": "rocm"} + ] + } +} \ No newline at end of file diff --git a/input/software_config.json b/input/software_config.json index 162e4452f..d6e135324 100644 --- a/input/software_config.json +++ b/input/software_config.json @@ -3,7 +3,7 @@ "cluster_os_version": "9.4", "repo_config": "always", "softwares": [ - {"name": "amdgpu", "version": "6.2.2"}, + {"name": "amdgpu", "version": "6.3.1"}, {"name": "openldap"}, {"name": "nfs"}, {"name": "k8s", "version":"1.29.5"}, @@ -13,7 +13,7 @@ ], "amdgpu": [ - {"name": "rocm", "version": "6.2.2" } + {"name": "rocm", "version": "6.3.1" } ], "pytorch": [ {"name": "pytorch_cpu"}, diff --git a/utils/performance_profile/performance_profile.yml b/utils/performance_profile/performance_profile.yml index 2eedc4f21..782656020 100644 --- a/utils/performance_profile/performance_profile.yml +++ b/utils/performance_profile/performance_profile.yml @@ -1,4 +1,4 @@ -# Copyright 2024 Dell Inc. or its subsidiaries. All Rights Reserved. +# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,6 +13,10 @@ # limitations under the License. --- +- name: Include input project directory + when: not project_dir_status | default(false) | bool + ansible.builtin.import_playbook: ../include_input_dir.yml + - name: Update Inventory with ansible_host information ansible.builtin.import_playbook: ../servicetag_host_mapping.yml when: not ( hostvars['127.0.0.1']['update_inventory_executed'] | default(false) | bool ) diff --git a/utils/performance_profile/roles/performance_profile/tasks/validate_input.yml b/utils/performance_profile/roles/performance_profile/tasks/validate_input.yml index d98dd9d28..ee352582d 100644 --- a/utils/performance_profile/roles/performance_profile/tasks/validate_input.yml +++ b/utils/performance_profile/roles/performance_profile/tasks/validate_input.yml @@ -1,4 +1,4 @@ -# Copyright 2024 Dell Inc. or its subsidiaries. All Rights Reserved. +# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,10 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. --- + +- name: Load software_config.json + ansible.builtin.include_vars: + file: "{{ software_config_json_file }}" + name: software_config + - name: Saving distribution of OS ansible.builtin.set_fact: - compute_os: "{{ ansible_facts['distribution'] | lower }}" - compute_os_version: "{{ ansible_distribution_version }}" + compute_os: "{{ software_config.cluster_os_type }}" + compute_os_version: "{{ software_config.cluster_os_version }}" - name: Validate inputs when: diff --git a/utils/performance_profile/roles/performance_profile/vars/main.yml b/utils/performance_profile/roles/performance_profile/vars/main.yml index 2cb0d7faf..9b4a99b30 100644 --- a/utils/performance_profile/roles/performance_profile/vars/main.yml +++ b/utils/performance_profile/roles/performance_profile/vars/main.yml @@ -1,4 +1,4 @@ -# Copyright 2024 Dell Inc. or its subsidiaries. All Rights Reserved. +# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -18,7 +18,7 @@ setup_performance_profile_path: "{{ role_path }}/tasks/setup_performance_profile.yml" # Usage: validate_inputs.yml - +software_config_json_file: "{{ input_project_dir }}/software_config.json" empty_inventory_fail_msg: "Failed. inventory not provided. Re-run playbook with inventory providing -i inventory." python_version: "{{ ansible_python_interpreter }}" validate_input_py: "{{ role_path }}/files/validate_input.py" diff --git a/utils/roles/update_user_repo/tasks/check_os_versions.yml b/utils/roles/update_user_repo/common/tasks/check_os_versions.yml similarity index 78% rename from utils/roles/update_user_repo/tasks/check_os_versions.yml rename to utils/roles/update_user_repo/common/tasks/check_os_versions.yml index 90d5b8197..06ca278ae 100644 --- a/utils/roles/update_user_repo/tasks/check_os_versions.yml +++ b/utils/roles/update_user_repo/common/tasks/check_os_versions.yml @@ -1,4 +1,4 @@ -# Copyright 2024 Dell Inc. or its subsidiaries. All Rights Reserved. +# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -31,14 +31,14 @@ ansible.builtin.set_fact: cluster_os_version: "{{ software_config.cluster_os_version }}" -- name: Set fact for Omnia Infrastructure Manager OS version - ansible.builtin.set_fact: - oim_os_version: "{{ ansible_distribution_version | lower }}" +# - name: Set fact for Omnia Infrastructure Manager OS version +# ansible.builtin.set_fact: +# oim_os_version: "{{ ansible_distribution_version | lower }}" -- name: Set flag if os versions donot match - ansible.builtin.set_fact: - os_no_match: true - when: cluster_os_version != oim_os_version +# - name: Set flag if os versions donot match +# ansible.builtin.set_fact: +# os_no_match: true +# when: cluster_os_version != oim_os_version - name: Set distro to jammy (Ubuntu 22 OS) ansible.builtin.set_fact: diff --git a/utils/roles/update_user_repo/tasks/find_new_repo.yml b/utils/roles/update_user_repo/common/tasks/find_new_repo.yml similarity index 93% rename from utils/roles/update_user_repo/tasks/find_new_repo.yml rename to utils/roles/update_user_repo/common/tasks/find_new_repo.yml index 52217dc10..6b7e63ccd 100644 --- a/utils/roles/update_user_repo/tasks/find_new_repo.yml +++ b/utils/roles/update_user_repo/common/tasks/find_new_repo.yml @@ -1,4 +1,4 @@ -# Copyright 2024 Dell Inc. or its subsidiaries. All Rights Reserved. +# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/utils/roles/update_user_repo/tasks/main.yml b/utils/roles/update_user_repo/common/tasks/main.yml similarity index 71% rename from utils/roles/update_user_repo/tasks/main.yml rename to utils/roles/update_user_repo/common/tasks/main.yml index e669ca3d0..9cc13b23d 100644 --- a/utils/roles/update_user_repo/tasks/main.yml +++ b/utils/roles/update_user_repo/common/tasks/main.yml @@ -1,4 +1,4 @@ -# Copyright 2024 Dell Inc. or its subsidiaries. All Rights Reserved. +# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,9 +17,6 @@ ansible.builtin.set_fact: cluster_os: "{{ ansible_distribution | lower }}" -- name: Include vars for {{ cluster_os }} - ansible.builtin.include_vars: "{{ role_path }}/vars/{{ cluster_os }}.yml" - - name: Validate the input files and values ansible.builtin.include_tasks: validation.yml @@ -29,7 +26,5 @@ - name: Update repos on cluster for {{ cluster_os }} block: - name: Update user repos for nodes on {{ cluster_os }} - ansible.builtin.include_tasks: update_user_repo_{{ cluster_os }}.yml - - - name: Update software repo for nodes on {{ cluster_os }} - ansible.builtin.include_tasks: update_software_repo_{{ cluster_os }}.yml + ansible.builtin.include_role: + name: "{{ role_path }}/../{{ cluster_os }}" diff --git a/utils/roles/update_user_repo/common/tasks/validate_repo.yml b/utils/roles/update_user_repo/common/tasks/validate_repo.yml new file mode 100644 index 000000000..65eff6668 --- /dev/null +++ b/utils/roles/update_user_repo/common/tasks/validate_repo.yml @@ -0,0 +1,54 @@ +# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# - name: Check omnia cluster repo exist +# ansible.builtin.stat: +# path: "{{ omnia_cluster_repo_path }}" +# register: cluster_repo_path_check + +# - name: Fail if omnia cluster repo does not exist +# ansible.builtin.fail: +# msg: "{{ omnia_cluster_repo_fail_msg }}" +# when: +# - (software_config.repo_config == 'always' or software_config.repo_config == 'partial') +# - not cluster_repo_path_check.stat.exists + +# - name: Check pip.conf exist +# ansible.builtin.stat: +# path: "{{ omnia_pip_conf_path }}" +# register: pip_conf_check + +# - name: Fail if pip.conf does not exist +# ansible.builtin.fail: +# msg: "{{ pip_conf_fail_msg }}" +# when: not pip_conf_check.stat.exists + +- name: Validate repo configuration + block: + - name: Check pulp distribution command + ansible.builtin.command: "{{ pulp_bin_path }} {{ os_package_map[cluster_os] }} distribution list" + delegate_to: localhost + register: check_pulp_repo + changed_when: false + + - name: Fail if pulp repo is not configured + ansible.builtin.fail: + msg: " {{ repo_fail_msg }} " + when: check_pulp_repo.stdout | length == 0 + + rescue: + - name: Failed - pulp repo is configured + ansible.builtin.fail: + msg: " {{ repo_fail_msg }} " diff --git a/utils/roles/update_user_repo/tasks/validation.yml b/utils/roles/update_user_repo/common/tasks/validation.yml similarity index 88% rename from utils/roles/update_user_repo/tasks/validation.yml rename to utils/roles/update_user_repo/common/tasks/validation.yml index 6736b4121..e905f487c 100644 --- a/utils/roles/update_user_repo/tasks/validation.yml +++ b/utils/roles/update_user_repo/common/tasks/validation.yml @@ -1,4 +1,4 @@ -# Copyright 2024 Dell Inc. or its subsidiaries. All Rights Reserved. +# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -31,9 +31,9 @@ - software_config.softwares is defined fail_msg: "{{ software_config_parameters_fail_msg }}" -- name: Assert the oim_os_type is {{ os_type }} +- name: Assert the oim_os_type is {{ cluster_os }} ansible.builtin.assert: - that: software_config.cluster_os_type == os_type + that: software_config.cluster_os_type == os_type_map[cluster_os] fail_msg: "{{ cluster_os_type_fail_msg }}" - name: Validate user_repo_url is defined correctly when not none diff --git a/utils/roles/update_user_repo/vars/main.yml b/utils/roles/update_user_repo/common/vars/main.yml similarity index 59% rename from utils/roles/update_user_repo/vars/main.yml rename to utils/roles/update_user_repo/common/vars/main.yml index 2d3991014..970161286 100644 --- a/utils/roles/update_user_repo/vars/main.yml +++ b/utils/roles/update_user_repo/common/vars/main.yml @@ -1,4 +1,4 @@ -# Copyright 2024 Dell Inc. or its subsidiaries. All Rights Reserved. +# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,19 +13,28 @@ # limitations under the License. --- -# Usage:update_software_repo_redhat.yml, update_user_repo_redhat.yml +# Usage:<os_type>/update_software_repo.yml software_version_default: "omnia_default" - -# Usage:update_software_repo_redhat.yml, update_user_repo_redhat.yml,create_software_repo_redhat.yml file_permission: "644" +local_repo_access_path: "/opt/omnia/provision/local_repo_access.yml" +version_invalid_fail_msg: "Ensure valid version is mentioned in software_config.json for " # Usage:validation.yml -software_config_json_file: "{{ role_path }}/../../../input/software_config.json" -local_repo_config_file: "{{ role_path }}/../../../input/local_repo_config.yml" +os_type_map: + redhat: rhel + rocky: rocky + ubuntu: ubuntu +input_project_dir: "{{ hostvars['localhost']['input_project_dir'] }}" +software_config_json_file: "{{ input_project_dir }}/software_config.json" +local_repo_config_file: "{{ input_project_dir }}/local_repo_config.yml" software_config_parameters_fail_msg: "Failed. Please ensure cluster_os_type, cluster_os_verion, repo_config, softwares are defined in software_config.json" -cluster_os_type_fail_msg: "Failed.The cluster_os_type should be defined as {{ os_type }} in software_config.json" +cluster_os_type_fail_msg: "Failed.The cluster_os_type should be defined as {{ os_type_map[cluster_os] }} in software_config.json" usr_repo_url_fail_msg: "Failed.Please ensure user_repo_url is valid with no jinja variables" -# Usage:update_software_repo_redhat.yml -local_repo_access_path: "/opt/omnia/offline/local_repo_access.yml" -version_invalid_fail_msg: "Ensure valid version is mentioned in software_config.json for " +# Usage:validate_repo.yml +pulp_bin_path: /usr/local/bin/pulp +os_package_map: + redhat: rpm + rocky: rpm + ubuntu: deb +repo_fail_msg: "Failed. Please execute local_repo.yml to create pulp repositories" \ No newline at end of file diff --git a/utils/roles/update_user_repo/tasks/create_software_repo_redhat.yml b/utils/roles/update_user_repo/redhat/tasks/create_software_repo.yml similarity index 51% rename from utils/roles/update_user_repo/tasks/create_software_repo_redhat.yml rename to utils/roles/update_user_repo/redhat/tasks/create_software_repo.yml index 6868382ba..72cdb3ed4 100644 --- a/utils/roles/update_user_repo/tasks/create_software_repo_redhat.yml +++ b/utils/roles/update_user_repo/redhat/tasks/create_software_repo.yml @@ -1,4 +1,4 @@ -# Copyright 2024 Dell Inc. or its subsidiaries. All Rights Reserved. +# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,19 +17,19 @@ ansible.builtin.assert: that: - "item.version is defined" - - item.version | default("",true) | length>0 + - item.version | default("",true) | length > 0 fail_msg: "{{ version_invalid_fail_msg }} {{ item.name }}" -- name: Check software repo exists in omnia repo on Omnia Infrastructure Manager - ansible.builtin.stat: - path: "{{ cluster_software_path }}/{{ item.name }}/{{ item.version }}" - register: check_path_stat +- name: Check software repo exists in pulp distribution + ansible.builtin.command: "{{ pulp_bin_path }} {{ os_package_map[cluster_os] }} distribution list --name {{ item.name }}_{{ item.version }} --field name,base_url" + register: check_repo_stat delegate_to: localhost connection: local - name: Set variable ansible.builtin.set_fact: - software_path_url: "http://{{ admin_nic_ip }}:80/install{{ cluster_software_path }}/{{ item.name }}/{{ item.version }}" + software_path_url: "{{ (check_repo_stat.stdout | from_json)[0]['base_url'] }}" + when: check_repo_stat | length > 0 - name: Check software repo present in {{ repo_path }} ansible.builtin.command: grep -r "{{ software_path_url }}" "{{ repo_path }}" @@ -37,9 +37,15 @@ changed_when: false failed_when: check_repo_url.stderr!='' -- name: Create repo file if software version exists on OIM - ansible.builtin.template: - src: "{{ repo_config_template_src }}" - dest: "{{ repo_path }}/{{ item.name }}-{{ item.version }}-repo.repo" - mode: "{{ file_permission }}" - when: check_path_stat.stat.exists and software_path_url not in check_repo_url.stdout +- name: Create repo file and set repos_update_status + when: check_repo_stat | length > 0 and software_path_url not in check_repo_url.stdout + block: + - name: Create repo file if software version exists on OIM + ansible.builtin.template: + src: "{{ repo_config_template_src }}" + dest: "{{ repo_path }}/{{ item.name }}-{{ item.version }}-repo.repo" + mode: "{{ file_permission }}" + + - name: Set repos_update_status + ansible.builtin.set_fact: + repos_update_status: true diff --git a/accelerator/roles/accelerator_validation/vars/redhat.yml b/utils/roles/update_user_repo/redhat/tasks/main.yml similarity index 65% rename from accelerator/roles/accelerator_validation/vars/redhat.yml rename to utils/roles/update_user_repo/redhat/tasks/main.yml index 7427888af..70f17a261 100644 --- a/accelerator/roles/accelerator_validation/vars/redhat.yml +++ b/utils/roles/update_user_repo/redhat/tasks/main.yml @@ -1,4 +1,4 @@ -# Copyright 2023 Dell Inc. or its subsidiaries. All Rights Reserved. +# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,5 +13,8 @@ # limitations under the License. --- -# Usage: validate_amd.yml -offline_rocm_directory: "{{ repo_store_path }}/cluster/yum" +- name: Update software repo for nodes on {{ cluster_os }} + ansible.builtin.include_tasks: "update_user_repo.yml" + +- name: Update software repo for nodes on {{ cluster_os }} + ansible.builtin.include_tasks: "update_software_repo.yml" \ No newline at end of file diff --git a/utils/roles/update_user_repo/tasks/update_software_repo_redhat.yml b/utils/roles/update_user_repo/redhat/tasks/update_software_repo.yml similarity index 90% rename from utils/roles/update_user_repo/tasks/update_software_repo_redhat.yml rename to utils/roles/update_user_repo/redhat/tasks/update_software_repo.yml index b20bdca30..e8245f314 100644 --- a/utils/roles/update_user_repo/tasks/update_software_repo_redhat.yml +++ b/utils/roles/update_user_repo/redhat/tasks/update_software_repo.yml @@ -1,4 +1,4 @@ -# Copyright 2024 Dell Inc. or its subsidiaries. All Rights Reserved. +# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -20,7 +20,7 @@ ansible.builtin.include_vars: "{{ local_repo_access_path }}" - name: Generate software repository configurations - ansible.builtin.include_tasks: create_software_repo_redhat.yml + ansible.builtin.include_tasks: create_software_repo.yml loop: "{{ software_config.softwares + software_config.amdgpu | default([]) }}" when: "'beegfs' in item.name or 'amdgpu' in item.name or 'rocm' in item.name" loop_control: @@ -51,7 +51,7 @@ - (software_config['repo_config']|lower == "never") block: - name: Find newly added omnia repos - ansible.builtin.include_tasks: find_new_repo.yml + ansible.builtin.include_tasks: "{{ role_path }}/../common/tasks/find_new_repo.yml" loop: "{{ omnia_repo_url | default([], true) }}" - name: Block to execute when new omnia repos are to be added @@ -72,11 +72,16 @@ with_indexed_items: "{{ omnia_repo_url }}" when: item.1.url in new_repos and software_version_default not in item.1.url + - name: Set repos_update_status + ansible.builtin.set_fact: + repos_update_status: true + - name: Clean the yum repos cache ansible.builtin.command: dnf clean all changed_when: true - name: Execute update repos + when: repos_update_status | default(false) block: - name: Update yum repos cache ansible.builtin.dnf: diff --git a/utils/roles/update_user_repo/tasks/update_user_repo_redhat.yml b/utils/roles/update_user_repo/redhat/tasks/update_user_repo.yml similarity index 88% rename from utils/roles/update_user_repo/tasks/update_user_repo_redhat.yml rename to utils/roles/update_user_repo/redhat/tasks/update_user_repo.yml index b0e976c9d..fcee17959 100644 --- a/utils/roles/update_user_repo/tasks/update_user_repo_redhat.yml +++ b/utils/roles/update_user_repo/redhat/tasks/update_user_repo.yml @@ -1,4 +1,4 @@ -# Copyright 2024 Dell Inc. or its subsidiaries. All Rights Reserved. +# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -23,11 +23,11 @@ ansible.builtin.set_fact: cluster_os_type: "{{ software_config.cluster_os_type }}" cluster_os_version: "{{ software_config.cluster_os_version }}" - omnia_repo_path: "{{ repo_store_path }}" - name: Initialise new_repos fact ansible.builtin.set_fact: new_repos: [] + repos_update_status: false - name: Block to execute when usr repos are configured when: @@ -37,7 +37,7 @@ - (software_config['repo_config'] == "partial") or (software_config['repo_config'] == "never") block: - name: Find newly added usr repos - ansible.builtin.include_tasks: find_new_repo.yml + ansible.builtin.include_tasks: "{{ role_path }}/../common/tasks/find_new_repo.yml" loop: "{{ user_repo_url | default([], true) }}" - name: Block to execute when new usr repos are added @@ -57,3 +57,7 @@ mode: "{{ file_permission }}" with_indexed_items: "{{ user_repo_url }}" when: item.1.url in new_repos and software_version_default not in item.1.url + + - name: Set repos_update_status + ansible.builtin.set_fact: + repos_update_status: true diff --git a/utils/roles/update_user_repo/templates/omnia_repo_config.j2 b/utils/roles/update_user_repo/redhat/templates/omnia_repo_config.j2 similarity index 66% rename from utils/roles/update_user_repo/templates/omnia_repo_config.j2 rename to utils/roles/update_user_repo/redhat/templates/omnia_repo_config.j2 index 0a7c571a3..3001ac174 100644 --- a/utils/roles/update_user_repo/templates/omnia_repo_config.j2 +++ b/utils/roles/update_user_repo/redhat/templates/omnia_repo_config.j2 @@ -1,6 +1,6 @@ {# For Omnia Repositories #} -[omnia_repo{{ item.0 + 1 }}_{{ansible_date_time.iso8601_basic}}] -name=Omnia Repository {{ item.0 + 1 }} {{ansible_date_time.iso8601_basic}} +[omnia_repo{{ item.0 + 1 }}_{{ ansible_date_time.iso8601_basic }}] +name=Omnia Repository {{ item.0 + 1 }} {{ ansible_date_time.iso8601_basic }} baseurl={{ item.1.url }} {% if item.1.gpgkey is defined %} {% if item.1.gpgkey | default("", true) | length > 1 %} diff --git a/utils/roles/update_user_repo/redhat/templates/repo_config_template.j2 b/utils/roles/update_user_repo/redhat/templates/repo_config_template.j2 new file mode 100644 index 000000000..b5f85de36 --- /dev/null +++ b/utils/roles/update_user_repo/redhat/templates/repo_config_template.j2 @@ -0,0 +1,9 @@ +[{{ item.name }}-{{ item.version }}-repo] +name={{ item.name }}-{{ item.version }}-repo +baseurl={{ software_path_url }} +enabled=1 +gpgcheck=0 +skip_if_unavailable=True +{% if proxy_status is defined and proxy_status %} +proxy=_none_ +{% endif %} diff --git a/utils/roles/update_user_repo/templates/user_repo_config.j2 b/utils/roles/update_user_repo/redhat/templates/user_repo_config.j2 similarity index 66% rename from utils/roles/update_user_repo/templates/user_repo_config.j2 rename to utils/roles/update_user_repo/redhat/templates/user_repo_config.j2 index d46253019..dd6ae6ba7 100644 --- a/utils/roles/update_user_repo/templates/user_repo_config.j2 +++ b/utils/roles/update_user_repo/redhat/templates/user_repo_config.j2 @@ -1,6 +1,6 @@ {# For User Repositories #} -[user_repo{{ item.0 + 1 }}_{{ansible_date_time.iso8601_basic}}] -name=User Repository {{ item.0 + 1 }} {{ansible_date_time.iso8601_basic}} +[user_repo{{ item.0 + 1 }}_{{ ansible_date_time.iso8601_basic }}] +name=User Repository {{ item.0 + 1 }} {{ ansible_date_time.iso8601_basic }} baseurl={{ item.1.url }} {% if item.1.gpgkey is defined %} {% if item.1.gpgkey | default("", true) | length > 1 %} diff --git a/utils/roles/update_user_repo/redhat/vars/main.yml b/utils/roles/update_user_repo/redhat/vars/main.yml new file mode 100644 index 000000000..bc6069e7c --- /dev/null +++ b/utils/roles/update_user_repo/redhat/vars/main.yml @@ -0,0 +1,33 @@ +# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# Usage:create_software_repo.yml, find_new_repo.yml +repo_path: "/etc/yum.repos.d" + +# Usage:create_software_repo.yml +repo_config_template_src: "repo_config_template.j2" + +# Usage:update_user_repo.yml +usr_repo_config_template_src: "user_repo_config.j2" + +# Usage:update_software_repo.yml +omnia_repo_config_template_src: "omnia_repo_config.j2" +repo_retries: 5 +repo_delay: 10 +repo_update_failure_msg: "The playbook failed due to an error while running dnf makecache. + Please check user_repo_url and omnia_repo_url_redhat in local_repo_config.yml and ensure they are correct and reachable" + +# Usage:update_user_repo.yml,update_software_repo.yml +os_type: "rhel" diff --git a/utils/roles/update_user_repo/rocky b/utils/roles/update_user_repo/rocky new file mode 120000 index 000000000..4d0827986 --- /dev/null +++ b/utils/roles/update_user_repo/rocky @@ -0,0 +1 @@ +redhat \ No newline at end of file diff --git a/utils/roles/update_user_repo/tasks/update_software_repo_rocky.yml b/utils/roles/update_user_repo/tasks/update_software_repo_rocky.yml deleted file mode 120000 index cc3698018..000000000 --- a/utils/roles/update_user_repo/tasks/update_software_repo_rocky.yml +++ /dev/null @@ -1 +0,0 @@ -update_software_repo_redhat.yml \ No newline at end of file diff --git a/utils/roles/update_user_repo/tasks/update_user_repo_rocky.yml b/utils/roles/update_user_repo/tasks/update_user_repo_rocky.yml deleted file mode 120000 index 694feb885..000000000 --- a/utils/roles/update_user_repo/tasks/update_user_repo_rocky.yml +++ /dev/null @@ -1 +0,0 @@ -update_user_repo_redhat.yml \ No newline at end of file diff --git a/utils/roles/update_user_repo/tasks/validate_repo.yml b/utils/roles/update_user_repo/tasks/validate_repo.yml deleted file mode 100644 index 493bd6dc5..000000000 --- a/utils/roles/update_user_repo/tasks/validate_repo.yml +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright 2024 Dell Inc. or its subsidiaries. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- - -- name: Check omnia cluster repo exist - ansible.builtin.stat: - path: "{{ omnia_cluster_repo_path }}" - register: cluster_repo_path_check - -- name: Fail if omnia cluster repo does not exist - ansible.builtin.fail: - msg: "{{ omnia_cluster_repo_fail_msg }}" - when: - - (software_config.repo_config == 'always' or software_config.repo_config == 'partial') - - not cluster_repo_path_check.stat.exists - -- name: Check pip.conf exist - ansible.builtin.stat: - path: "{{ omnia_pip_conf_path }}" - register: pip_conf_check - -- name: Fail if pip.conf does not exist - ansible.builtin.fail: - msg: "{{ pip_conf_fail_msg }}" - when: not pip_conf_check.stat.exists diff --git a/utils/roles/update_user_repo/templates/repo_config_template.j2 b/utils/roles/update_user_repo/templates/repo_config_template.j2 deleted file mode 100644 index e300a8cb8..000000000 --- a/utils/roles/update_user_repo/templates/repo_config_template.j2 +++ /dev/null @@ -1,9 +0,0 @@ -[{{ item.name }}-{{ item.version }}-repo] -name={{ item.name }}-{{item.version}}-repo -baseurl=http://{{ admin_nic_ip }}:80/install{{ omnia_repo_path }}/cluster/yum/{{ item.name }}/{{ item.version | default('') }} -enabled=1 -gpgcheck=0 -skip_if_unavailable=True -{% if proxy_status %} -proxy=_none_ -{% endif %} diff --git a/utils/roles/update_user_repo/tasks/create_software_repo_ubuntu.yml b/utils/roles/update_user_repo/ubuntu/tasks/create_software_repo.yml similarity index 97% rename from utils/roles/update_user_repo/tasks/create_software_repo_ubuntu.yml rename to utils/roles/update_user_repo/ubuntu/tasks/create_software_repo.yml index 3e4938a57..1311c2395 100644 --- a/utils/roles/update_user_repo/tasks/create_software_repo_ubuntu.yml +++ b/utils/roles/update_user_repo/ubuntu/tasks/create_software_repo.yml @@ -1,45 +1,45 @@ -# Copyright 2024 Dell Inc. or its subsidiaries. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- - -- name: Check version variable - ansible.builtin.assert: - that: - - "item.version is defined" - - item.version | default("",true) | length>0 - fail_msg: "{{ version_invalid_fail_msg }} {{ item.name }}" - -- name: Check software repo exists in omnia repo on Omnia Infrastructure Manager - ansible.builtin.stat: - path: "{{ cluster_software_path }}/{{ item.name }}/{{ item.version }}" - register: check_path_stat - delegate_to: localhost - connection: local - -- name: Set variable - ansible.builtin.set_fact: - software_path_url: "http://{{ admin_nic_ip }}:80/install{{ cluster_software_path }}/{{ item.name }}/{{ item.version }}" - -- name: Check software repo present in {{ repo_path }} - ansible.builtin.command: grep -r "{{ software_path_url }}" "{{ repo_path }}" - register: check_repo_url - changed_when: false - failed_when: check_repo_url.stderr!='' - -- name: Create repo file if software version exists on OIM - ansible.builtin.template: - src: "{{ repo_config_template_src }}" - dest: "{{ repo_path }}/{{ item.name }}-{{ item.version }}.list" - mode: "{{ file_permission }}" - when: check_path_stat.stat.exists and software_path_url not in check_repo_url.stdout +# Copyright 2024 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Check version variable + ansible.builtin.assert: + that: + - "item.version is defined" + - item.version | default("",true) | length>0 + fail_msg: "{{ version_invalid_fail_msg }} {{ item.name }}" + +- name: Check software repo exists in omnia repo on Omnia Infrastructure Manager + ansible.builtin.stat: + path: "{{ cluster_software_path }}/{{ item.name }}/{{ item.version }}" + register: check_path_stat + delegate_to: localhost + connection: local + +- name: Set variable + ansible.builtin.set_fact: + software_path_url: "http://{{ admin_nic_ip }}:80/install{{ cluster_software_path }}/{{ item.name }}/{{ item.version }}" + +- name: Check software repo present in {{ repo_path }} + ansible.builtin.command: grep -r "{{ software_path_url }}" "{{ repo_path }}" + register: check_repo_url + changed_when: false + failed_when: check_repo_url.stderr!='' + +- name: Create repo file if software version exists on OIM + ansible.builtin.template: + src: "{{ repo_config_template_src }}" + dest: "{{ repo_path }}/{{ item.name }}-{{ item.version }}.list" + mode: "{{ file_permission }}" + when: check_path_stat.stat.exists and software_path_url not in check_repo_url.stdout diff --git a/accelerator/roles/accelerator_validation/vars/ubuntu.yml b/utils/roles/update_user_repo/ubuntu/tasks/main.yml similarity index 59% rename from accelerator/roles/accelerator_validation/vars/ubuntu.yml rename to utils/roles/update_user_repo/ubuntu/tasks/main.yml index bb7759fc5..02de75a18 100644 --- a/accelerator/roles/accelerator_validation/vars/ubuntu.yml +++ b/utils/roles/update_user_repo/ubuntu/tasks/main.yml @@ -1,4 +1,4 @@ -# Copyright 2023 Dell Inc. or its subsidiaries. All Rights Reserved. +# Copyright 2024 Dell Inc. or its subsidiaries. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,10 +13,9 @@ # limitations under the License. --- -# Usage: validate_amd.yml -offline_rocm_directory: "{{ repo_store_path }}/cluster/apt" +# This need to be updated when we support for ubuntu +# - name: Update software repo for nodes on {{ cluster_os }} +# ansible.builtin.include_tasks: "update_user_repo.yml" -# Usage: validate_intel_gaudi.yml -offline_intelgaudi_directory: "{{ repo_store_path }}/cluster/apt" -offline_gaudi_directory: "{{ repo_store_path }}/cluster/{{ oim_os }}/{{ oim_os_version }}/deb" -gaudi_search_pattern: "habanalabs*.deb" +# - name: Update software repo for nodes on {{ cluster_os }} +# ansible.builtin.include_tasks: "update_software_repo.yml" \ No newline at end of file diff --git a/utils/roles/update_user_repo/tasks/update_software_repo_ubuntu.yml b/utils/roles/update_user_repo/ubuntu/tasks/update_software_repo.yml similarity index 97% rename from utils/roles/update_user_repo/tasks/update_software_repo_ubuntu.yml rename to utils/roles/update_user_repo/ubuntu/tasks/update_software_repo.yml index 5a80dbf7d..3966e496a 100644 --- a/utils/roles/update_user_repo/tasks/update_software_repo_ubuntu.yml +++ b/utils/roles/update_user_repo/ubuntu/tasks/update_software_repo.yml @@ -1,178 +1,178 @@ -# Copyright 2024 Dell Inc. or its subsidiaries. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- - -- name: Set default intel_config_status - ansible.builtin.set_fact: - intel_config_status: false - intelgaudi_config_status: false - -- name: Update beegfs,amdgpu,rocm repo when repo_config=partial,always - when: (software_config['repo_config']|lower == "partial") or (software_config['repo_config']|lower == "always") - block: - - name: Local local_repo_access.yml file - ansible.builtin.include_vars: "{{ local_repo_access_path }}" - - - name: Generate software repository configurations - ansible.builtin.include_tasks: create_software_repo_ubuntu.yml - loop: "{{ software_config.softwares + software_config.amdgpu | default([]) }}" - when: "'beegfs' in item.name or 'amdgpu' in item.name or 'rocm' in item.name or 'intelgaudi' in item.name" - loop_control: - loop_var: item - - - name: Set intelgaudi config status - ansible.builtin.set_fact: - intelgaudi_config_status: true - intelgaudi_version: "{{ item.version }}" - loop: "{{ software_config.softwares | default([]) }}" - when: "'intelgaudi' in item.name" - loop_control: - loop_var: item - - - name: Set intel config status - ansible.builtin.set_fact: - intel_config_status: true - intel_version: "{{ intelgaudi_version }}" - loop: "{{ software_config.intelgaudi | default([]) }}" - when: "intelgaudi_config_status and 'intel' in item.name" - loop_control: - loop_var: item - - - name: Generate software repository configurations for intelgaudi - ansible.builtin.include_tasks: create_software_repo_ubuntu.yml - loop: - - { name: "intel", version: "{{ intel_version }}" } - when: intel_config_status - loop_control: - loop_var: item - -- name: Set fact for software version from software_config json - ansible.builtin.set_fact: - "{{ item.name }}_version": "{{ item.version }}" - loop: "{{ software_config.softwares + software_config.amdgpu | default([]) }}" - when: "('beegfs' in item.name or 'amdgpu' in item.name or 'rocm' in item.name) and item.version is defined" - loop_control: - loop_var: item - # noqa: var-naming[no-jinja] - -- name: Set fact for omnia repo url - ansible.builtin.set_fact: - omnia_repo_url: "{{ lookup('ansible.builtin.vars', 'omnia_repo_url_' + os_type) }}" - -- name: Initialise the variable - ansible.builtin.set_fact: - new_repos: [] - -- name: Block to update omnia repos when repo_config is never - when: - - "omnia_repo_url is defined" - - "omnia_repo_url | type_debug == 'list'" - - "omnia_repo_url | length > 0" - - (software_config['repo_config']|lower == "never") - block: - - name: Find newly added omnia repos - ansible.builtin.include_tasks: find_new_repo.yml - loop: "{{ omnia_repo_url | default([], true) }}" - -- name: Block to execute when new omnia repos are to be added - when: - - "new_repos is defined" - - "new_repos | type_debug == 'list'" - - "new_repos | length > 0" - block: - - name: Print the repo urls for which repo files will be created - ansible.builtin.debug: - var: new_repos - - - name: Create directory {{ tmp_omnia_keyring_file_path }} - ansible.builtin.file: - path: "{{ tmp_omnia_keyring_file_path }}" - state: directory - mode: "{{ file_permission }}" - become: true - delegate_to: localhost - connection: local - run_once: true - - - name: Create directory {{ tmp_omnia_repo_path }} - ansible.builtin.file: - path: "{{ tmp_omnia_repo_path }}" - state: directory - mode: "{{ file_permission }}" - become: true - delegate_to: localhost - connection: local - run_once: true - - - name: Download GPG key for each omnia repository - ansible.builtin.shell: # noqa command-instead-of-module - cmd: "wget -q {{ item.1.gpgkey }} -O - | gpg --dearmor | sudo tee {{ tmp_omnia_keyring_file_path }}/omnia_repo{{ item.0 + 1 }}{{ansible_date_time.iso8601_basic}}.gpg" # noqa: yaml[line-length] risky-shell-pipe - with_indexed_items: "{{ omnia_repo_url }}" - when: - - item.1.gpgkey is defined and item.1.gpgkey != '' - - item.1.url in new_repos and software_version_default not in item.1.url - changed_when: false - delegate_to: localhost - connection: local - run_once: true - - - name: Download Public key for each omnia repository - ansible.builtin.shell: # noqa command-instead-of-module - cmd: "wget -q {{ item.1.publickey }} -O - | gpg --dearmor | sudo tee {{ tmp_omnia_keyring_file_path }}/omnia_repo{{ item.0 + 1 }}{{ansible_date_time.iso8601_basic}}.gpg" # noqa: yaml[line-length] risky-shell-pipe - with_indexed_items: "{{ omnia_repo_url }}" - when: - - item.1.publickey is defined and item.1.publickey != '' - - item.1.url in new_repos and software_version_default not in item.1.url - changed_when: false - delegate_to: localhost - connection: local - run_once: true - - - name: Configure omnia repositories - ansible.builtin.template: - src: "{{ omnia_repo_config_template_src }}" - dest: "{{ tmp_omnia_repo_path }}/omnia_repo{{ item.0 + 1 }}{{ansible_date_time.iso8601_basic}}.list" - mode: "{{ file_permission }}" - with_indexed_items: "{{ omnia_repo_url }}" - when: item.1.url in new_repos and software_version_default not in item.1.url - delegate_to: localhost - connection: local - run_once: true - - - name: Copy gpgkey to compute - ansible.builtin.copy: - src: "{{ tmp_omnia_keyring_file_path }}/" - dest: "{{ repo_path }}/../keyrings/" - mode: "{{ file_permission }}" - - - name: Copy omnia repo files to compute - ansible.builtin.copy: - src: "{{ tmp_omnia_repo_path }}/" - dest: "{{ repo_path }}/" - mode: "{{ file_permission }}" - -- name: Execute apt update on {{ repo_path }} - block: - - name: Update packages - ansible.builtin.apt: - update_cache: true - register: update_repos - until: update_repos is not failed - retries: "{{ repo_retries }}" - delay: "{{ repo_delay }}" - - rescue: - - name: Update cache failure - ansible.builtin.fail: - msg: "{{ apt_update_failure_msg }}" +# Copyright 2024 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Set default intel_config_status + ansible.builtin.set_fact: + intel_config_status: false + intelgaudi_config_status: false + +- name: Update beegfs,amdgpu,rocm repo when repo_config=partial,always + when: (software_config['repo_config']|lower == "partial") or (software_config['repo_config']|lower == "always") + block: + - name: Local local_repo_access.yml file + ansible.builtin.include_vars: "{{ local_repo_access_path }}" + + - name: Generate software repository configurations + ansible.builtin.include_tasks: create_software_repo_ubuntu.yml + loop: "{{ software_config.softwares + software_config.amdgpu | default([]) }}" + when: "'beegfs' in item.name or 'amdgpu' in item.name or 'rocm' in item.name or 'intelgaudi' in item.name" + loop_control: + loop_var: item + + - name: Set intelgaudi config status + ansible.builtin.set_fact: + intelgaudi_config_status: true + intelgaudi_version: "{{ item.version }}" + loop: "{{ software_config.softwares | default([]) }}" + when: "'intelgaudi' in item.name" + loop_control: + loop_var: item + + - name: Set intel config status + ansible.builtin.set_fact: + intel_config_status: true + intel_version: "{{ intelgaudi_version }}" + loop: "{{ software_config.intelgaudi | default([]) }}" + when: "intelgaudi_config_status and 'intel' in item.name" + loop_control: + loop_var: item + + - name: Generate software repository configurations for intelgaudi + ansible.builtin.include_tasks: create_software_repo_ubuntu.yml + loop: + - { name: "intel", version: "{{ intel_version }}" } + when: intel_config_status + loop_control: + loop_var: item + +- name: Set fact for software version from software_config json + ansible.builtin.set_fact: + "{{ item.name }}_version": "{{ item.version }}" + loop: "{{ software_config.softwares + software_config.amdgpu | default([]) }}" + when: "('beegfs' in item.name or 'amdgpu' in item.name or 'rocm' in item.name) and item.version is defined" + loop_control: + loop_var: item + # noqa: var-naming[no-jinja] + +- name: Set fact for omnia repo url + ansible.builtin.set_fact: + omnia_repo_url: "{{ lookup('ansible.builtin.vars', 'omnia_repo_url_' + os_type) }}" + +- name: Initialise the variable + ansible.builtin.set_fact: + new_repos: [] + +- name: Block to update omnia repos when repo_config is never + when: + - "omnia_repo_url is defined" + - "omnia_repo_url | type_debug == 'list'" + - "omnia_repo_url | length > 0" + - (software_config['repo_config']|lower == "never") + block: + - name: Find newly added omnia repos + ansible.builtin.include_tasks: find_new_repo.yml + loop: "{{ omnia_repo_url | default([], true) }}" + +- name: Block to execute when new omnia repos are to be added + when: + - "new_repos is defined" + - "new_repos | type_debug == 'list'" + - "new_repos | length > 0" + block: + - name: Print the repo urls for which repo files will be created + ansible.builtin.debug: + var: new_repos + + - name: Create directory {{ tmp_omnia_keyring_file_path }} + ansible.builtin.file: + path: "{{ tmp_omnia_keyring_file_path }}" + state: directory + mode: "{{ file_permission }}" + become: true + delegate_to: localhost + connection: local + run_once: true + + - name: Create directory {{ tmp_omnia_repo_path }} + ansible.builtin.file: + path: "{{ tmp_omnia_repo_path }}" + state: directory + mode: "{{ file_permission }}" + become: true + delegate_to: localhost + connection: local + run_once: true + + - name: Download GPG key for each omnia repository + ansible.builtin.shell: # noqa command-instead-of-module + cmd: "wget -q {{ item.1.gpgkey }} -O - | gpg --dearmor | sudo tee {{ tmp_omnia_keyring_file_path }}/omnia_repo{{ item.0 + 1 }}{{ansible_date_time.iso8601_basic}}.gpg" # noqa: yaml[line-length] risky-shell-pipe + with_indexed_items: "{{ omnia_repo_url }}" + when: + - item.1.gpgkey is defined and item.1.gpgkey != '' + - item.1.url in new_repos and software_version_default not in item.1.url + changed_when: false + delegate_to: localhost + connection: local + run_once: true + + - name: Download Public key for each omnia repository + ansible.builtin.shell: # noqa command-instead-of-module + cmd: "wget -q {{ item.1.publickey }} -O - | gpg --dearmor | sudo tee {{ tmp_omnia_keyring_file_path }}/omnia_repo{{ item.0 + 1 }}{{ansible_date_time.iso8601_basic}}.gpg" # noqa: yaml[line-length] risky-shell-pipe + with_indexed_items: "{{ omnia_repo_url }}" + when: + - item.1.publickey is defined and item.1.publickey != '' + - item.1.url in new_repos and software_version_default not in item.1.url + changed_when: false + delegate_to: localhost + connection: local + run_once: true + + - name: Configure omnia repositories + ansible.builtin.template: + src: "{{ omnia_repo_config_template_src }}" + dest: "{{ tmp_omnia_repo_path }}/omnia_repo{{ item.0 + 1 }}{{ansible_date_time.iso8601_basic}}.list" + mode: "{{ file_permission }}" + with_indexed_items: "{{ omnia_repo_url }}" + when: item.1.url in new_repos and software_version_default not in item.1.url + delegate_to: localhost + connection: local + run_once: true + + - name: Copy gpgkey to compute + ansible.builtin.copy: + src: "{{ tmp_omnia_keyring_file_path }}/" + dest: "{{ repo_path }}/../keyrings/" + mode: "{{ file_permission }}" + + - name: Copy omnia repo files to compute + ansible.builtin.copy: + src: "{{ tmp_omnia_repo_path }}/" + dest: "{{ repo_path }}/" + mode: "{{ file_permission }}" + +- name: Execute apt update on {{ repo_path }} + block: + - name: Update packages + ansible.builtin.apt: + update_cache: true + register: update_repos + until: update_repos is not failed + retries: "{{ repo_retries }}" + delay: "{{ repo_delay }}" + + rescue: + - name: Update cache failure + ansible.builtin.fail: + msg: "{{ apt_update_failure_msg }}" diff --git a/utils/roles/update_user_repo/tasks/update_user_repo_ubuntu.yml b/utils/roles/update_user_repo/ubuntu/tasks/update_user_repo.yml similarity index 100% rename from utils/roles/update_user_repo/tasks/update_user_repo_ubuntu.yml rename to utils/roles/update_user_repo/ubuntu/tasks/update_user_repo.yml diff --git a/utils/roles/update_user_repo/templates/omnia_repo_config_ubuntu.j2 b/utils/roles/update_user_repo/ubuntu/templates/omnia_repo_config_ubuntu.j2 similarity index 100% rename from utils/roles/update_user_repo/templates/omnia_repo_config_ubuntu.j2 rename to utils/roles/update_user_repo/ubuntu/templates/omnia_repo_config_ubuntu.j2 diff --git a/utils/roles/update_user_repo/templates/repo_config_template_ubuntu.j2 b/utils/roles/update_user_repo/ubuntu/templates/repo_config_template_ubuntu.j2 similarity index 100% rename from utils/roles/update_user_repo/templates/repo_config_template_ubuntu.j2 rename to utils/roles/update_user_repo/ubuntu/templates/repo_config_template_ubuntu.j2 diff --git a/utils/roles/update_user_repo/templates/user_repo_config_ubuntu.j2 b/utils/roles/update_user_repo/ubuntu/templates/user_repo_config_ubuntu.j2 similarity index 100% rename from utils/roles/update_user_repo/templates/user_repo_config_ubuntu.j2 rename to utils/roles/update_user_repo/ubuntu/templates/user_repo_config_ubuntu.j2 diff --git a/utils/roles/update_user_repo/vars/ubuntu.yml b/utils/roles/update_user_repo/ubuntu/vars/main.yml similarity index 100% rename from utils/roles/update_user_repo/vars/ubuntu.yml rename to utils/roles/update_user_repo/ubuntu/vars/main.yml diff --git a/utils/roles/update_user_repo/vars/redhat.yml b/utils/roles/update_user_repo/vars/redhat.yml deleted file mode 100644 index 9dc6e71c9..000000000 --- a/utils/roles/update_user_repo/vars/redhat.yml +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright 2024 Dell Inc. or its subsidiaries. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- - -# Usage:create_software_repo_redhat.yml, find_new_repo.yml -repo_path: "/etc/yum.repos.d" - -# Usage:create_software_repo_redhat.yml -repo_config_template_src: "{{ role_path }}/templates/repo_config_template.j2" -cluster_software_path: "{{ omnia_repo_path }}/cluster/yum" - -# Usage:update_user_repo_redhat.yml -usr_repo_config_template_src: "{{ role_path }}/templates/user_repo_config.j2" - -# Usage:update_software_repo_redhat.yml -omnia_repo_config_template_src: "{{ role_path }}/templates/omnia_repo_config.j2" -repo_retries: 5 -repo_delay: 10 -repo_update_failure_msg: "The playbook failed due to an error while running dnf makecache. - Please check user_repo_url and omnia_repo_url_redhat in local_repo_config.yml and ensure they are correct and reachable" - -# Usage:update_user_repo_redhat.yml,update_software_repo_redhat.yml -os_type: "rhel" - -# Usage: validate_repo.yml -omnia_cluster_repo_path: /etc/yum.repos.d/cluster-rpm-repo.repo -omnia_pip_conf_path: /etc/pip.conf -omnia_cluster_repo_fail_msg: "Failed. The playbook failed because the Omnia cluster repository file {{ omnia_cluster_repo_path }} is missing from the node. -This could be due to an issue during the OS installation. To resolve this, please reprovision the node and re-run the playbook." -pip_conf_fail_msg: "Failed. The playbook failed because the Omnia pip config file {{ omnia_pip_conf_path }} is missing from the node. -This could be due to an issue during the OS installation. To resolve this, please reprovision the node and re-run the playbook." diff --git a/utils/roles/update_user_repo/vars/rocky.yml b/utils/roles/update_user_repo/vars/rocky.yml deleted file mode 100644 index 8e4ae93fe..000000000 --- a/utils/roles/update_user_repo/vars/rocky.yml +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright 2024 Dell Inc. or its subsidiaries. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- - -# Usage:create_software_repo_redhat.yml, find_new_repo.yml -repo_path: "/etc/yum.repos.d" - -# Usage:create_software_repo_redhat.yml -repo_config_template_src: "{{ role_path }}/templates/repo_config_template.j2" -cluster_software_path: "{{ omnia_repo_path }}/cluster/yum" - -# Usage:update_user_repo_redhat.yml -usr_repo_config_template_src: "{{ role_path }}/templates/user_repo_config.j2" - -# Usage:update_software_repo_redhat.yml -omnia_repo_config_template_src: "{{ role_path }}/templates/omnia_repo_config.j2" -repo_retries: 5 -repo_delay: 10 - -# Usage:update_user_repo_redhat.yml,update_software_repo_redhat.yml -os_type: "rocky" - -# Usage: validate_repo.yml -omnia_cluster_repo_path: /etc/yum.repos.d/cluster-rpm-repo.repo -omnia_pip_conf_path: /etc/pip.conf -omnia_cluster_repo_fail_msg: "Failed. The playbook failed because the Omnia cluster repository file {{ omnia_cluster_repo_path }} is missing from the node. -This could be due to an issue during the OS installation. To resolve this, please reprovision the node and re-run the playbook." -pip_conf_fail_msg: "Failed. The playbook failed because the Omnia pip config file {{ omnia_pip_conf_path }} is missing from the node. -This could be due to an issue during the OS installation. To resolve this, please reprovision the node and re-run the playbook." diff --git a/utils/update_user_repo.yml b/utils/update_user_repo.yml index 9ef708627..c4fdc1cd8 100644 --- a/utils/update_user_repo.yml +++ b/utils/update_user_repo.yml @@ -1,4 +1,4 @@ -# Copyright 2024 Dell Inc. or its subsidiaries. All Rights Reserved. +# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,9 +13,9 @@ # limitations under the License. --- -- name: Check if virtual environment is active - ansible.builtin.import_playbook: check_venv.yml - when: not ( check_venv_executed | default(false) | bool ) +- name: Include input project directory + when: not project_dir_status | default(false) | bool + ansible.builtin.import_playbook: include_input_dir.yml - name: Validate whether cross-os is present hosts: localhost @@ -24,11 +24,11 @@ tasks: - name: Validate cluster and Omnia Infrastructure Manager OS versions ansible.builtin.include_role: - name: update_user_repo + name: update_user_repo/common tasks_from: check_os_versions.yml - name: Update repo and registry hosts: slurm_control_node, slurm_node, kube_control_plane, kube_node, auth_server, login, etcd roles: - - update_user_repo - - update_user_registry + - update_user_repo/common + # - update_user_registry # Needs to updated