diff --git a/docs/dictionary/en-custom.txt b/docs/dictionary/en-custom.txt index be0f304407..0eef11538f 100644 --- a/docs/dictionary/en-custom.txt +++ b/docs/dictionary/en-custom.txt @@ -1,9 +1,28 @@ +APIs +Amartya +AssignedTeam +CP +CPython +ClusterServiceVersion +FreeIPA +IDM +IMVHO +IdP +Idempotency +LDAP +LLM +MachineConfig +NodeHealthCheck +RHCOS +SNO +Sinha +UTF +ZipFile aaabbcc abcdefghij addr afuscoar alertmanager -Amartya amartyasinha ansible ansibleee @@ -11,7 +30,6 @@ ansibletest ansibletests ansibleuser ansiblevars -APIs apiversion apivips appcreds @@ -20,12 +38,12 @@ aqc args arx arxcruz -AssignedTeam auth authfile autoconfiguration autohold autoholds +autologin autoscale autostart awk @@ -89,7 +107,6 @@ cli client clusterimageset clusterpool -ClusterServiceVersion cmd cn cni @@ -104,9 +121,7 @@ containerfile controlplane coredns coreos -CP cpus -CPython crashloopbackoff crb crc @@ -143,6 +158,7 @@ deprovision deps dest dev +devscript devscripts devsetup dfb @@ -184,8 +200,8 @@ epel epyc etcd eth -extraimages extraRPMs +extraimages ezzmy favorit fbqufbqkfbzxrja @@ -200,7 +216,6 @@ flbxutz fmw fqdn freefonts -FreeIPA frmo fsid fultonj @@ -237,27 +252,25 @@ hotfix href hsm hsms +httpd https ic icjbuue icokicagy -IDM -IdP -Idempotency idrac -imagecontentsourcepolicy iface igfsbg igmp igogicbjyxbzig ihbyb +imagecontentsourcepolicy img -IMVHO ingressvips ini init installplan installyamls +internalapi ipaddr ipam ipi @@ -305,18 +318,16 @@ kuttl kvm lacp lajly -LDAP ldp libguestfs libvirt +libvirt's libvirtd libvirterror -libvirt's ljaumtawojy ljaumtaxojy ljaumtayojy lkid -LLM lmxpynzpcnrdcmfkbwluihnvy localhost localnet @@ -371,9 +382,9 @@ netconfig netmask networkattachmentdefinition networkconfig +networker networkmanager networktype -networker nfs nftables nhc @@ -385,7 +396,6 @@ nmstate nncp nobuild nodeexporter -NodeHealthCheck nodenetworkconfigurationpolicy nodepool nodeps @@ -402,12 +412,12 @@ nwy nzgdh oauth observability -oidc oc ocp ocpbm ocppr odkvmf +oidc okd ol olm @@ -537,7 +547,6 @@ sha shiftstack shiftstackclient sig -Sinha sizepercent skbg skiplist @@ -604,7 +613,6 @@ uoyt uri usermod usr -UTF utils uuid vbibob @@ -661,7 +669,6 @@ ytm yxivcnvul yyoje yyyy -ZipFile zlcbwcm zm zpbgugcmjkihbvb diff --git a/roles/devscripts/README.md b/roles/devscripts/README.md index 85e55bf8f3..26f83a123d 100644 --- a/roles/devscripts/README.md +++ b/roles/devscripts/README.md @@ -50,9 +50,13 @@ networks. * `cifmw_devscripts_external_net` (dict) Key/value pair containing information about the network infrastructure. Refer [section](#supported-keys-in-cifmw_devscripts_external_net). +* `cifmw_devscripts_ext_iface` (str) The network interface name used in the + NMState static IP template for OCP nodes. Defaults to `enp2s0`. Override for + bare metal hosts whose RHCOS interface name differs (e.g. `eno12399`). * `cifmw_devscripts_cinder_volume_pvs` (list) a list of physical disks to be used for creating cinder-volumes volume-group. By default, the list contains `/dev/vda`. +* `cifmw_devscripts_sno` (bool) Enable Single Node OpenShift. Defaults to `false`. ### Secrets management diff --git a/roles/devscripts/defaults/main.yml b/roles/devscripts/defaults/main.yml index a8a675a9c0..818fa17d0f 100644 --- a/roles/devscripts/defaults/main.yml +++ b/roles/devscripts/defaults/main.yml @@ -68,6 +68,9 @@ cifmw_devscripts_create_logical_volume: false cifmw_devscripts_cinder_volume_pvs: - /dev/vda +cifmw_devscripts_ext_iface: "enp2s0" + cifmw_devscripts_config_overrides: {} cifmw_devscripts_installer_timeout: 7200 # 2 hours +cifmw_devscripts_sno: false cifmw_devscripts_etcd_slow_profile: true diff --git a/roles/devscripts/tasks/build_config.yml b/roles/devscripts/tasks/build_config.yml index 7667cc7d52..5611f5a845 100644 --- a/roles/devscripts/tasks/build_config.yml +++ b/roles/devscripts/tasks/build_config.yml @@ -40,6 +40,13 @@ devscripts_config_patches }} +- name: Check if num_masters is equal to 1 when cifmw_devscripts_sno + when: cifmw_devscripts_sno and cifmw_devscripts_config['num_masters'] | int != 1 + ansible.builtin.fail: + msg: >- + Make sure, vms.ocp.amount (num_masters) is set to 1, + when SNO scenario is set. + - name: Replace OCP version if "stable-" alias used when: - cifmw_devscripts_config.openshift_version.startswith("stable-") diff --git a/roles/devscripts/tasks/main.yml b/roles/devscripts/tasks/main.yml index 6a87bf0237..5a8c4d11dc 100644 --- a/roles/devscripts/tasks/main.yml +++ b/roles/devscripts/tasks/main.yml @@ -47,11 +47,122 @@ (cifmw_devscripts_repo_dir, 'logs' ) | path_join }} block: +# - name: Do workaround when SNO set +# when: cifmw_devscripts_sno +# block: +# - name: Copy workaround script +# ansible.builtin.copy: +# content: | +# TARGET="192.168.111.2" +# MAX_FAILURES=10 +# FAILURES=0 +# while true; do +# if ping -c1 -W1 "$TARGET" > /dev/null 2>&1; then +# echo "bootstrap in progress..." +# sleep 1 +# else +# FAILURES=$FAILURES+1 +# fi +# +# if [[ $FAILURES -ge $MAX_FAILURES ]]; then +# echo "Bootstrap probably is completed. Continue..." +# break +# fi +# +# sleep 1 +# done +# +# if [ -f /etc/NetworkManager/dnsmasq.d/openshift-ocp.conf ]; then +# if grep -q '192.168.111.2' /etc/NetworkManager/dnsmasq.d/openshift-ocp.conf ; then +# sudo sed -i 's/192.168.111.2/192.168.111.10/g' /etc/NetworkManager/dnsmasq.d/openshift-ocp.conf; +# fi; +# if grep -q '192.168.111.3' /etc/NetworkManager/dnsmasq.d/openshift-ocp.conf ; then +# sudo sed -i 's/192.168.111.3/192.168.111.10/g' /etc/NetworkManager/dnsmasq.d/openshift-ocp.conf; +# fi; +# fi +# +# # hypervisor - cifmw-dnsmasq +# sudo find /etc/cifmw-dnsmasq.d -type f -exec sed -i 's/192.168.111.2/192.168.111.10/g' {} \; +# sudo find /etc/cifmw-dnsmasq.d -type f -exec sed -i 's/192.168.111.3/192.168.111.10/g' {} \; +# +# # dig apps.ocp.openstack.lab => 192.168.111.10 +# # dig api.ocp.openstack.lab => 192.168.111.10 +# +# ssh-keyscan -H 192.168.111.10 >> {{ ansible_user_dir }}/.ssh/known_hosts +# +# # /etc/hosts +# ssh -i {{ ansible_user_dir }}/ci-framework-data/artifacts/cifmw_ocp_access_key \ +# core@192.168.111.10 \ +# bash -c "echo '192.168.111.10 api-int.ocp.openstack.lab api.ocp.openstack.lab' | sudo tee -a /etc/hosts" +# +# ssh -i {{ ansible_user_dir }}/ci-framework-data/artifacts/cifmw_ocp_access_key \ +# core@192.168.111.10 \ +# bash -c "echo '192.168.111.10 apps.ocp.openstack.lab' | sudo tee -a /etc/hosts" +# +# # Coredns +# ssh -i {{ ansible_user_dir }}/ci-framework-data/artifacts/cifmw_ocp_access_key \ +# core@192.168.111.10 sudo sed -i 's/192.168.111.2/192.168.111.10/g' /etc/coredns/Corefile +# +# ssh -i {{ ansible_user_dir }}/ci-framework-data/artifacts/cifmw_ocp_access_key \ +# core@192.168.111.10 sudo sed -i 's/192.168.111.3/192.168.111.10/g' /etc/coredns/Corefile +# +# # workaround for missing docker dir +# ssh -i {{ ansible_user_dir }}/ci-framework-data/artifacts/cifmw_ocp_access_key \ +# core@192.168.111.10 sudo mkdir -p /etc/docker +# +# sudo systemctl restart cifmw-dnsmasq +# sleep 5 +# +# export KUBECONFIG={{ ansible_user_dir }}/src/github.com/openshift-metal3/dev-scripts/ocp/ocp/auth/kubeconfig +# +# # Add missing puzzle for ingress. Without that, openshift-ingress-operator +# # would be crashing (it is waiting for openshift-ingress router) +# oc apply -f https://raw.githubusercontent.com/openshift/router/refs/heads/master/deploy/route_crd.yaml +# oc apply -f https://raw.githubusercontent.com/openshift/router/refs/heads/master/deploy/router_rbac.yaml +# oc apply -f https://raw.githubusercontent.com/openshift/router/refs/heads/master/deploy/router.yaml +# +# # new +# # oc patch ingresscontroller default -n openshift-ingress-operator --type=merge -p '{"spec":{"endpointPublishingStrategy":{"type":"HostNetwork"},"replicas":1}}' +# +# # oc patch deployment apiserver -n openshift-apiserver --type=merge -p '{"spec":{"template":{"metadata":{"labels":{"openshift-apiserver-anti-affinity":"false"}}}}}' +# # oc -n openshift-apiserver rollout restart deployment apiserver +# +# oc -n openshift-dns rollout restart daemonsets dns-default +# oc -n openshift-dns rollout restart daemonsets node-resolver +# +# sleep 120 +# +# dest: "{{ cifmw_devscripts_repo_dir }}/07-sno-workaround.sh" +# +# - name: Add the workaround script execution into 06_create_cluster.sh script +# ansible.builtin.replace: +# path: "{{ cifmw_devscripts_repo_dir }}/06_create_cluster.sh" +# regexp: '^create_cluster \${OCP_DIR}' +# replace: | +# # +# # NOTE(dpawlik): Right now, quit 06_create_cluster.sh, otherwise +# # it will get timeout from devscript role. +# # The value is set to 25 min and should be enough to deploy, +# # but if not, you can increase to more. +# # +# timeout {{ cifmw_devscripts_sno_bootstrap_timeout | default(1800) }} bash -c "source utils.sh && create_cluster ${OCP_DIR}" || true +# bash -x 07-sno-workaround.sh +# exit 0 + - name: Run devscripts make all + when: not cifmw_devscripts_sno | default(true) cifmw.general.ci_script: chdir: "{{ cifmw_devscripts_repo_dir }}" output_dir: "{{ cifmw_devscripts_artifacts_dir }}" script: "timeout {{ cifmw_devscripts_installer_timeout }} make all" + + - name: Run devscripts make agent when SNO + when: cifmw_devscripts_sno | default(false) + cifmw.general.ci_script: + chdir: "{{ cifmw_devscripts_repo_dir }}" + output_dir: "{{ cifmw_devscripts_artifacts_dir }}" + script: "timeout {{ cifmw_devscripts_installer_timeout }} make agent" + always: - name: Gather logs register: _deploy_logs diff --git a/roles/devscripts/templates/conf_ciuser.j2 b/roles/devscripts/templates/conf_ciuser.j2 index f143b68dca..01f75454cf 100644 --- a/roles/devscripts/templates/conf_ciuser.j2 +++ b/roles/devscripts/templates/conf_ciuser.j2 @@ -7,6 +7,32 @@ set +x export CI_TOKEN=$(cat {{ cifmw_devscripts_repo_dir }}/ci_token) set -x +{% if not cifmw_devscripts_sno %} {% for item in cifmw_devscripts_config %} export {{ item.upper() }}="{{ cifmw_devscripts_config[item] }}" {% endfor %} +{% endif %} + +{% if cifmw_devscripts_sno %} + +{% for item in ['working_dir', 'assets_extra_folder', 'openshift_release_type', + 'openshift_version', 'cluster_name', 'base_domain', 'ntp_servers', + 'external_bootstrap_mac', 'vm_extradisks', + 'vm_extradisks_list', 'ssh_pub_key'] %} +export {{ item.upper() }}="{{ cifmw_devscripts_config[item] }}" +{% endfor %} +# https://github.com/openshift-metal3/dev-scripts/blob/master/agent/docs/none-and-external-platform.md +# https://github.com/openshift-metal3/dev-scripts/blob/master/common.sh#L470 +# SNO Configuration +export IP_STACK="v4" +export AGENT_E2E_TEST_SCENARIO="SNO_IPV4" +export AGENT_PLATFORM_TYPE="none" +export NUM_MASTERS=1 +export NUM_WORKERS=0 + +# export API_VIP="192.168.111.10" +# export INGRESS_VIP="192.168.111.4" +# export PROVISIONING_HOST_IP="192.168.111.1" +# export CLUSTER_DHCP_RANGE="192.168.111.10,192.168.111.100" +# export DNS_IP="{{ ansible_default_ipv4.address }}" +{% endif %} diff --git a/roles/devscripts/templates/nmstate.j2 b/roles/devscripts/templates/nmstate.j2 index e84640afe3..b6001a2fbc 100644 --- a/roles/devscripts/templates/nmstate.j2 +++ b/roles/devscripts/templates/nmstate.j2 @@ -1,6 +1,6 @@ networkConfig: interfaces: - - name: enp2s0 + - name: {{ cifmw_devscripts_ext_iface }} type: ethernet state: up {% if cifmw_devscripts_config.ip_stack != 'v6' %} @@ -25,4 +25,4 @@ networkConfig: config: - destination: 0.0.0.0/0 next-hop-address: "{{ net_gateway }}" - next-hop-interface: enp2s0 + next-hop-interface: {{ cifmw_devscripts_ext_iface }} diff --git a/roles/dnsmasq/tasks/manage_host_record.yml b/roles/dnsmasq/tasks/manage_host_record.yml index 6b1ec17447..e29413eb83 100644 --- a/roles/dnsmasq/tasks/manage_host_record.yml +++ b/roles/dnsmasq/tasks/manage_host_record.yml @@ -35,6 +35,15 @@ validate: "/usr/sbin/dnsmasq -C %s --test" loop: "{{ cifmw_dnsmasq_host_record }}" +- name: Remove crc.testing from host_records + when: cifmw_devscripts_sno | default(false) + become: true + ansible.builtin.lineinfile: + path: "{{ cifmw_dnsmasq_basedir }}/host_records.conf" + regexp: 'crc\.testing' + state: absent + notify: Restart dnsmasq + # NOTE(dpawlik): Add short name like controller-0 # into the /etc/hosts file, to avoid errors when all the configuration # is only stored in ssh config files. diff --git a/roles/reproducer/README.md b/roles/reproducer/README.md index 084b4999dc..44f1a36bd6 100644 --- a/roles/reproducer/README.md +++ b/roles/reproducer/README.md @@ -1,8 +1,22 @@ # reproducer -Role to deploy close to CI layout on a hypervisor. + +Role to deploy close to CI layout on a hypervisor. Supports both libvirt/KVM +virtual machine deployments (via dev-scripts) and agent-based bare metal +Single Node OpenShift (SNO) deployments on physical hardware managed through +iDRAC Redfish APIs. ## Privilege escalation -None + +Bare metal deployment requires privilege escalation for `/etc/hosts` +management and running the ISO HTTP server via podman. + +## Exposed tags + +* `bootstrap`: All reproducer bootstrap tasks. +* `bootstrap_layout`: Layout-related bootstrap tasks. +* `devscripts_layout`: Dev-scripts OCP layout tasks (libvirt path). +* `ocp_layout`: OCP layout tasks (libvirt path). +* `bm_ocp_layout`: Agent-based bare metal OCP layout tasks. ## Parameters @@ -24,7 +38,11 @@ None * `cifmw_reproducer_skip_fetch_repositories`: (Bool) Skip fetching repositories from zuul var and simply copy the code from the ansible controller. Defaults to `false`. * `cifmw_reproducer_supported_hypervisor_os`: (List) List of supported hypervisor operating systems and their minimum version. * `cifmw_reproducer_minimum_hardware_requirements`: (Dict) Define minimum hardware requirements for specific scenarios. Example below +* `cifmw_reproducer_allow_one_ocp` (Bool) Allow to deploy OpenShift cluster just with one master node. + NOTE: When using devscript, remember to set `cifmw_devscripts_sno` to `true`, otherwise it would fail. + If you don't set `cifmw_devscripts_sno` to `true`, minimum value for `ocp` nodes is `2`. * `cifmw_reproducer_computes_rhos_release_args`: (String) Arguments to use when installing rhos-release repos on compute nodes. Not defined by default, and `cifmw_repo_setup_rhos_release_args` is used instead. +* `cifmw_reproducer_bm_ocp`: (Bool) Enable agent-based bare metal OCP SNO deployment instead of libvirt/dev-scripts. Defaults to `false`. ### Advanced parameters Those parameters shouldn't be used, unless the user is able to understand potential issues in their environment. @@ -44,6 +62,154 @@ Those parameters shouldn't be used, unless the user is able to understand potent - If a job with content-provider is launched a **first** time with `cifmw_reproducer_run_content_provider: false`, it will NOT RUN the content-provider, **leading to a crash of the job run**. +## Agent-based bare metal OCP SNO deployment + +When `cifmw_reproducer_bm_ocp: true`, the role performs an agent-based +installation on a physical bare metal host managed +via iDRAC Redfish APIs. The workflow generates a self-contained agent ISO on +the Zuul controller, pushes it to the target host's iDRAC via Redfish +VirtualMedia, and waits for the host to self-install. + +### Network architecture + +Three routed isolated networks (no shared L2 domain required): + +| Network | Purpose | +| --- | --- | +| BMC management | iDRAC interfaces; controller reaches iDRAC via routing | +| BMO provision | Node's 1st NIC, OS interface IP; VirtualMedia boot | +| Controller | Zuul controller; serves the agent ISO to iDRAC | + +A 2nd NIC on the node carries isolated MetalLB networks for RHOSO EDPM +services (ctlplane, internalapi, storage, tenant) via VLANs. + +The `api` and `*.apps` DNS names resolve directly to the node's BMO +provision IP via `/etc/hosts` entries managed by the role. + +### Bare metal parameters + +#### Required (typically set in the scenario's vars.yaml) + +| Parameter | Type | Description | +| --- | --- | --- | +| `cifmw_bm_agent_cluster_name` | str | OpenShift cluster name | +| `cifmw_bm_agent_base_domain` | str | Base domain for the cluster | +| `cifmw_bm_agent_machine_network` | str | BMO provision network CIDR | +| `cifmw_bm_agent_node_ip` | str | Node IP on the BMO provision network | +| `cifmw_bm_agent_node_iface` | str | RHCOS interface name on the BMO provision network | +| `cifmw_bm_agent_bmc_host` | str | iDRAC hostname or IP on the BMC management network | +| `cifmw_devscripts_bm_nodes` | list | Single-element list with `mac` and `root_device` keys | +| `cifmw_bm_agent_openshift_version` | str | OCP version (e.g. `"4.18.3"`); or set `cifmw_bm_agent_release_image` instead | + +#### Optional (have defaults or are auto-discovered) + +| Parameter | Type | Default | Description | +| --- | --- | --- | --- | +| `cifmw_bm_agent_release_image` | str | `$OPENSHIFT_RELEASE_IMAGE` | Alternative to version: extract `openshift-install` from a release image | +| `cifmw_bm_agent_iso_http_port` | int | `80` | Port for the podman HTTP server that serves the agent ISO (only a privileged port may accept external traffic on Zuul controllers) | +| `cifmw_bm_agent_installer_timeout` | int | `7200` | Total seconds before the installer times out (split between bootstrap and install phases) | +| `cifmw_manage_secrets_pullsecret_file` | str | `~/pull-secret` | Path to the pull secret JSON file | +| `cifmw_bmc_credentials_file` | str | `~/secrets/idrac_access.yaml` | Path to a YAML file with `username` and `password` keys for iDRAC | +| `cifmw_bm_agent_enable_usb_boot` | bool | `false` | Allow the role to automatically enable `GenericUsbBoot` in BIOS (requires a power cycle) | +| `cifmw_bm_agent_vmedia_uefi_path` | str | auto-discovered | UEFI device path for the Virtual Optical Drive; auto-discovered from UEFI boot options if omitted | +| `cifmw_bm_agent_core_password` | str | — | Set a `core` user password post-install via MachineConfig | +| `cifmw_bm_agent_live_debug` | bool | `false` | Patch the agent ISO with password, autologin, and systemd debug shell on `tty6` for discovery-phase console access (requires `cifmw_bm_agent_core_password`) | + +### Secrets management + +The bare metal path requires two secret files: + +#### BMC credentials + +A YAML file at `cifmw_bmc_credentials_file` (default `~/secrets/idrac_access.yaml`) +with the following structure: + +```yaml +username: root +password: +``` + +#### Pull secret + +The OCP pull secret JSON at `cifmw_manage_secrets_pullsecret_file` +(default `~/pull-secret`). + +### Bare metal task files + +The agent-based deployment is composed of reusable task files under +`tasks/bm_*.yml`: + +| Task file | Description | +| --- | --- | +| `bm_ocp_layout.yml` | Main orchestrator: validates variables, generates ISO, serves it via HTTP, manages VirtualMedia, waits for install completion | +| `bm_power_on.yml` | Idempotent power-on via Redfish with POST wait (retries 30x at 10s intervals) | +| `bm_power_off.yml` | Idempotent force power-off via Redfish with confirmation wait | +| `bm_check_usb_boot.yml` | Reads `GenericUsbBoot` BIOS attribute and fails if disabled | +| `bm_ensure_usb_boot.yml` | Wraps `bm_check_usb_boot.yml`; if disabled and `cifmw_bm_agent_enable_usb_boot` is true, sets the BIOS attribute, creates a config job, and power-cycles to apply | +| `bm_eject_vmedia.yml` | Ejects VirtualMedia from the iDRAC Virtual Optical Drive | +| `bm_discover_vmedia_target.yml` | Discovers or validates the UEFI device path for VirtualMedia, clears pending iDRAC config jobs, and sets a one-time boot override | +| `bm_patch_agent_iso.yml` | Patches the agent ISO ignition with core password, autologin, and debug shell (used when `cifmw_bm_agent_live_debug` is true) | +| `bm_core_password_machineconfig.yml` | Generates a MachineConfig manifest to set the core user password hash post-install | + +### openshift-install acquisition + +The `openshift-install` binary is obtained automatically via one of two +methods, depending on which variable is set: + +* **By version** (`cifmw_bm_agent_openshift_version`): downloads the tarball + from `https://mirror.openshift.com/pub/openshift-v4/clients/ocp//openshift-install-linux.tar.gz` + and extracts it. +* **By release image** (`cifmw_bm_agent_release_image` or + `OPENSHIFT_RELEASE_IMAGE` env var): runs + `oc adm release extract --command=openshift-install` against the image. + +If the binary already exists in the working directory it is reused. + +### Deployment workflow + +1. Validate required variables +2. Ensure `GenericUsbBoot` is enabled in BIOS (auto-enable with power cycle if allowed) +3. Power off the host +4. Generate SSH keys, template `install-config.yaml` and `agent-config.yaml` +5. Acquire `openshift-install` binary (see above) and run `openshift-install agent create image` to build the agent ISO +6. Optionally patch the ISO for discovery-phase console access +7. Serve the ISO via a root podman httpd container (rootless podman cannot use privileged ports) +8. Eject any existing VirtualMedia, then insert the agent ISO +9. Discover the Virtual Optical Drive UEFI path and set a one-time boot override +10. Power on the host +11. Verify BIOS `GenericUsbBoot` is enabled after POST +12. Add `/etc/hosts` entries for `api`/`api-int` and `*.apps` domains +13. Wait for bootstrap and install to complete +14. Copy kubeconfig and kubeadmin-password to the dev-scripts-compatible auth directory +15. Eject VirtualMedia and stop the HTTP server + +## Molecule tests + +### bm_redfish scenario + +The `bm_redfish` Molecule scenario validates the bare metal Redfish task files +(`bm_power_on`, `bm_power_off`, `bm_check_usb_boot`, `bm_ensure_usb_boot`, +`bm_eject_vmedia`, `bm_discover_vmedia_target`) against a stateful Python +mock iDRAC server that simulates Redfish API responses over HTTPS. + +The mock server (`molecule/bm_redfish/files/mock_idrac.py`) provides: + +* Stateful GET/POST/PATCH handlers for power, BIOS, VirtualMedia, boot + override, and job queue Redfish endpoints +* A `/test/reset` admin endpoint to set mock state between test cases +* A `/test/state` endpoint to query current mock state for assertions +* Self-signed TLS certificates generated during `prepare.yml` + +Test coverage: + +| Test file | Scenarios | +| --- | --- | +| `test_power_off.yml` | Already off (idempotent), On -> Off | +| `test_power_on.yml` | Already on (idempotent), Off -> On | +| `test_check_usb_boot.yml` | Enabled (succeeds), Disabled (expected failure) | +| `test_ensure_usb_boot.yml` | Already enabled (no cycle), Disabled + auto-enable (BIOS change + cycle), Disabled + no auto-enable (expected failure) | +| `test_eject_vmedia.yml` | Inserted (ejects), Not inserted (idempotent) | +| `test_discover_vmedia.yml` | Auto-discover, user-provided valid path, user-provided invalid path (expected failure) | ## Warning This role isn't intended to be called outside of the `reproducer.yml` playbook. @@ -85,6 +251,26 @@ cifmw_reproducer_repositories: dest: "{{ remote_base_dir }}/openstack-operators" ``` +### Agent-based bare metal SNO + +Minimal vars.yaml for a bare metal SNO deployment: + +```YAML +cifmw_reproducer_bm_ocp: true +cifmw_bm_agent_cluster_name: ocp +cifmw_bm_agent_base_domain: example.com +cifmw_bm_agent_machine_network: "192.168.10.0/24" +cifmw_bm_agent_node_ip: "192.168.10.50" +cifmw_bm_agent_node_iface: eno12399np0 +cifmw_bm_agent_bmc_host: idrac.mgmt.example.com +cifmw_bm_agent_openshift_version: "4.18.3" +cifmw_bm_agent_enable_usb_boot: true + +cifmw_devscripts_bm_nodes: + - mac: "b0:7b:25:xx:yy:zz" + root_device: /dev/sda +``` + #### Example `cifmw_reproducer_minimum_hardware_requirements`: ```YAML cifmw_reproducer_minimum_hardware_requirements: @@ -92,3 +278,10 @@ cifmw_reproducer_minimum_hardware_requirements: memory: "32 GB" disk: "200 GB" ``` + +## References + +* [ci-framework reproducer documentation](https://ci-framework.readthedocs.io/en/latest/roles/reproducer.html) +* [dev-scripts](https://github.com/openshift-metal3/dev-scripts) +* [Redfish API specification](https://www.dmtf.org/standards/redfish) +* [Dell iDRAC Redfish API Guide](https://developer.dell.com/apis/2978/versions/6.xx/reference) diff --git a/roles/reproducer/defaults/main.yml b/roles/reproducer/defaults/main.yml index 63f059552d..24bc4782d9 100644 --- a/roles/reproducer/defaults/main.yml +++ b/roles/reproducer/defaults/main.yml @@ -49,6 +49,8 @@ cifmw_reproducer_validate_ocp_layout: true # This parameter is a string, the other value in the CI is "ironic" cifmw_reproducer_ironic_node_name_prefix: +cifmw_reproducer_allow_one_ocp: false +cifmw_reproducer_bm_ocp: false # Optional: Define local_link_connection information for Ironic node ports. # This is used when generating ironic_nodes.yaml. The structure is a dictionary diff --git a/roles/reproducer/files/patch_ignition.py b/roles/reproducer/files/patch_ignition.py new file mode 100644 index 0000000000..8431f4578d --- /dev/null +++ b/roles/reproducer/files/patch_ignition.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python3 +"""Patch an Ignition JSON file with a core user password and console autologin. + +Applies three mechanisms for discovery-phase access on the agent live ISO: +1. passwd.users[].passwordHash (standard ignition, may not work on live ISO) +2. set-core-password.service (oneshot that forces the hash into /etc/shadow) +3. getty@tty{1,2} autologin (console autologin drop-ins) +""" +import json +import sys + +ign_file = sys.argv[1] +pw_hash = sys.argv[2] + +with open(ign_file) as f: + ign = json.load(f) + +users = ign.setdefault("passwd", {}).setdefault("users", []) +core_user = next((u for u in users if u.get("name") == "core"), None) +if core_user is None: + core_user = {"name": "core"} + users.append(core_user) +core_user["passwordHash"] = pw_hash + +units = ign.setdefault("systemd", {}).setdefault("units", []) + +autologin_dropin = { + "name": "autologin.conf", + "contents": ( + "[Service]\nExecStart=\n" + "ExecStart=-/sbin/agetty --autologin core --noclear %I $TERM\n" + ), +} +for tty_svc in ["getty@tty1.service", "getty@tty2.service"]: + units.append( + { + "name": tty_svc, + "dropins": [autologin_dropin], + } + ) + +with open(ign_file, "w") as f: + json.dump(ign, f) diff --git a/roles/reproducer/files/redact_ignition.py b/roles/reproducer/files/redact_ignition.py new file mode 100644 index 0000000000..134392058e --- /dev/null +++ b/roles/reproducer/files/redact_ignition.py @@ -0,0 +1,16 @@ +#!/usr/bin/env python3 +"""Create a redacted copy of an Ignition JSON file (strip pull-secret sources).""" +import json +import sys + +src, dst = sys.argv[1], sys.argv[2] + +with open(src) as f: + d = json.load(f) + +for s in d.get("storage", {}).get("files", []): + if "pull" in s.get("path", "").lower(): + s["contents"]["source"] = "data:,REDACTED" + +with open(dst, "w") as f: + json.dump(d, f, indent=2) diff --git a/roles/reproducer/molecule/bm_redfish/cleanup.yml b/roles/reproducer/molecule/bm_redfish/cleanup.yml new file mode 100644 index 0000000000..089bf53f9b --- /dev/null +++ b/roles/reproducer/molecule/bm_redfish/cleanup.yml @@ -0,0 +1,28 @@ +--- +- name: Cleanup mock iDRAC server + hosts: all + gather_facts: false + ignore_unreachable: true + vars: + _mock_dir: /tmp/mock_idrac + _mock_port: 8443 + tasks: + - name: Stop mock iDRAC server + ansible.builtin.shell: + cmd: "pkill -f '[m]ock_idrac.py.*--port {{ _mock_port }}' || true" + changed_when: false + failed_when: false + + - name: Show mock server log + ansible.builtin.command: + cmd: "cat {{ _mock_dir }}/mock_idrac.log" + register: _mock_log + changed_when: false + failed_when: false + + - name: Print mock server log + ansible.builtin.debug: + var: _mock_log.stdout_lines + when: + - _mock_log is not unreachable + - _mock_log.stdout_lines | default([]) | length > 0 diff --git a/roles/reproducer/molecule/bm_redfish/converge.yml b/roles/reproducer/molecule/bm_redfish/converge.yml new file mode 100644 index 0000000000..e20fb58996 --- /dev/null +++ b/roles/reproducer/molecule/bm_redfish/converge.yml @@ -0,0 +1,30 @@ +--- +- name: Converge -- test bm_* Redfish tasks against mock iDRAC + hosts: instance + gather_facts: false + vars_files: + - vars/common.yml + tasks: + - name: Test bm_power_off + ansible.builtin.include_tasks: + file: tasks/test_power_off.yml + + - name: Test bm_power_on + ansible.builtin.include_tasks: + file: tasks/test_power_on.yml + + - name: Test bm_check_usb_boot + ansible.builtin.include_tasks: + file: tasks/test_check_usb_boot.yml + + - name: Test bm_ensure_usb_boot + ansible.builtin.include_tasks: + file: tasks/test_ensure_usb_boot.yml + + - name: Test bm_eject_vmedia + ansible.builtin.include_tasks: + file: tasks/test_eject_vmedia.yml + + - name: Test bm_discover_vmedia_target + ansible.builtin.include_tasks: + file: tasks/test_discover_vmedia.yml diff --git a/roles/reproducer/molecule/bm_redfish/files/mock_idrac.py b/roles/reproducer/molecule/bm_redfish/files/mock_idrac.py new file mode 100644 index 0000000000..a97f4473ef --- /dev/null +++ b/roles/reproducer/molecule/bm_redfish/files/mock_idrac.py @@ -0,0 +1,366 @@ +#!/usr/bin/env python3 +"""Mock iDRAC Redfish server for molecule testing of bm_* Ansible tasks. + +Implements stateful Redfish endpoints matching Dell iDRAC behavior: +- Power management (On/Off/ForceOff) +- BIOS settings (GenericUsbBoot) +- VirtualMedia insert/eject +- Boot override (UefiTarget, Once) +- Boot options enumeration +- Job queue management + +State can be reset between tests via POST /test/reset. +""" +import json +import socket +import ssl +import sys +import threading +from http.server import HTTPServer, BaseHTTPRequestHandler + + +class IDRACState: + """In-memory iDRAC state that simulates power cycle semantics.""" + + def __init__(self): + self.reset() + + def reset(self, overrides=None): + self.power_state = "Off" + self.usb_boot = "Disabled" + self.usb_boot_pending = None + self.vmedia_inserted = False + self.vmedia_image = "" + self.boot_override_target = "None" + self.boot_override_enabled = "Disabled" + self.uefi_target = None + self.pending_jobs = [] + if overrides: + for k, v in overrides.items(): + if hasattr(self, k): + setattr(self, k, v) + + def apply_pending_bios(self): + """Apply pending BIOS changes on power cycle (Off -> On).""" + if self.usb_boot_pending is not None: + self.usb_boot = self.usb_boot_pending + self.usb_boot_pending = None + self.pending_jobs = [j for j in self.pending_jobs if j != "bios_config"] + + +STATE = IDRACState() + +BOOT_OPTIONS = { + "Boot0001": { + "@odata.id": "/redfish/v1/Systems/System.Embedded.1/BootOptions/Boot0001", + "Id": "Boot0001", + "Name": "PXE Device 1: Embedded NIC 1 Port 1 Partition 1", + "DisplayName": "PXE Device 1: Embedded NIC 1 Port 1 Partition 1", + "UefiDevicePath": "VenHw(3A191845-5F86-4E78-8FCE-C4CFF59F9DAA)", + "BootOptionEnabled": True, + }, + "Boot0003": { + "@odata.id": "/redfish/v1/Systems/System.Embedded.1/BootOptions/Boot0003", + "Id": "Boot0003", + "Name": "Virtual Floppy Drive", + "DisplayName": "Virtual Floppy Drive", + "UefiDevicePath": "PciRoot(0x0)/Pci(0x14,0x0)/USB(0xD,0x0)/USB(0x0,0x0)/USB(0x2,0x0)/Unit(0x1)", + "BootOptionEnabled": True, + }, + "Boot0004": { + "@odata.id": "/redfish/v1/Systems/System.Embedded.1/BootOptions/Boot0004", + "Id": "Boot0004", + "Name": "Virtual Optical Drive", + "DisplayName": "Virtual Optical Drive", + "UefiDevicePath": "PciRoot(0x0)/Pci(0x14,0x0)/USB(0xD,0x0)/USB(0x0,0x0)/USB(0x2,0x0)/Unit(0x0)", + "BootOptionEnabled": True, + }, + "Boot0005": { + "@odata.id": "/redfish/v1/Systems/System.Embedded.1/BootOptions/Boot0005", + "Id": "Boot0005", + "Name": "Integrated RAID Controller 1: Red Hat Enterprise Linux", + "DisplayName": "Integrated RAID Controller 1: Red Hat Enterprise Linux", + "UefiDevicePath": "HD(2,GPT,FF726BC2-263F-EE4A-BAE7-7CACE574EBD8,0x1000,0x3F800)/\\EFI\\redhat\\shimx64.efi", + "BootOptionEnabled": True, + }, + "Boot0006": { + "@odata.id": "/redfish/v1/Systems/System.Embedded.1/BootOptions/Boot0006", + "Id": "Boot0006", + "Name": "Generic USB Boot", + "DisplayName": "Generic USB Boot", + "UefiDevicePath": "VenHw(0C8CB6CC-13AE-45F4-BBCD-6A25E98AC250)", + "BootOptionEnabled": True, + }, +} + +VIRTUAL_OPTICAL_PATH = BOOT_OPTIONS["Boot0004"]["UefiDevicePath"] + + +class RedfishHandler(BaseHTTPRequestHandler): + + def log_message(self, format, *args): + sys.stderr.write("[mock-idrac] %s\n" % (format % args)) + + def _send_json(self, data, status=200): + body = json.dumps(data).encode() + self.send_response(status) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + + def _read_body(self): + length = int(self.headers.get("Content-Length", 0)) + if length: + return json.loads(self.rfile.read(length)) + return {} + + # ---- routing ---- + + def do_GET(self): + path = self.path.rstrip("/") + + if path == "/redfish/v1/Systems/System.Embedded.1": + return self._get_system() + + if path == "/redfish/v1/Systems/System.Embedded.1/Bios": + return self._get_bios() + + if path == "/redfish/v1/Systems/System.Embedded.1/BootOptions": + return self._get_boot_options_collection() + + if path.startswith("/redfish/v1/Systems/System.Embedded.1/BootOptions/Boot"): + boot_id = path.rsplit("/", 1)[-1] + return self._get_boot_option(boot_id) + + if path == "/redfish/v1/Managers/iDRAC.Embedded.1/VirtualMedia/CD": + return self._get_vmedia() + + if path == "/test/state": + return self._get_test_state() + + self._send_json({"error": "not found", "path": path}, 404) + + def do_POST(self): + path = self.path.rstrip("/") + body = self._read_body() + + if path == "/test/reset": + return self._post_test_reset(body) + + if path.endswith("Actions/ComputerSystem.Reset"): + return self._post_reset(body) + + if path == "/redfish/v1/Managers/iDRAC.Embedded.1/Jobs": + return self._post_create_job(body) + + if path.endswith("DellJobService/Actions/DellJobService.DeleteJobQueue"): + return self._post_clear_jobs(body) + + if path.endswith("Actions/VirtualMedia.InsertMedia"): + return self._post_insert_media(body) + + if path.endswith("Actions/VirtualMedia.EjectMedia"): + return self._post_eject_media(body) + + self._send_json({"error": "not found", "path": path}, 404) + + def do_PATCH(self): + path = self.path.rstrip("/") + body = self._read_body() + + if path == "/redfish/v1/Systems/System.Embedded.1": + return self._patch_system(body) + + if path == "/redfish/v1/Systems/System.Embedded.1/Bios/Settings": + return self._patch_bios_settings(body) + + self._send_json({"error": "not found", "path": path}, 404) + + # ---- GET handlers ---- + + def _get_system(self): + self._send_json( + { + "@odata.id": "/redfish/v1/Systems/System.Embedded.1", + "PowerState": STATE.power_state, + "Boot": { + "BootSourceOverrideTarget": STATE.boot_override_target, + "BootSourceOverrideEnabled": STATE.boot_override_enabled, + "UefiTargetBootSourceOverride": STATE.uefi_target, + "BootOptions": { + "@odata.id": "/redfish/v1/Systems/System.Embedded.1/BootOptions" + }, + "BootSourceOverrideTarget@Redfish.AllowableValues": [ + "None", + "Pxe", + "Floppy", + "Cd", + "Hdd", + "BiosSetup", + "Utilities", + "UefiTarget", + "SDCard", + "UefiHttp", + ], + }, + } + ) + + def _get_bios(self): + self._send_json( + { + "@odata.id": "/redfish/v1/Systems/System.Embedded.1/Bios", + "Attributes": { + "GenericUsbBoot": STATE.usb_boot, + "BootMode": "Uefi", + }, + } + ) + + def _get_boot_options_collection(self): + self._send_json( + { + "@odata.id": "/redfish/v1/Systems/System.Embedded.1/BootOptions", + "Members": [ + {"@odata.id": opt["@odata.id"]} for opt in BOOT_OPTIONS.values() + ], + "Members@odata.count": len(BOOT_OPTIONS), + } + ) + + def _get_boot_option(self, boot_id): + if boot_id in BOOT_OPTIONS: + self._send_json(BOOT_OPTIONS[boot_id]) + else: + self._send_json({"error": "not found"}, 404) + + def _get_vmedia(self): + self._send_json( + { + "@odata.id": "/redfish/v1/Managers/iDRAC.Embedded.1/VirtualMedia/CD", + "Inserted": STATE.vmedia_inserted, + "Image": STATE.vmedia_image if STATE.vmedia_inserted else None, + "ImageName": ( + STATE.vmedia_image.rsplit("/", 1)[-1] + if STATE.vmedia_inserted and STATE.vmedia_image + else None + ), + "ConnectedVia": "URI" if STATE.vmedia_inserted else "NotConnected", + "WriteProtected": True, + } + ) + + def _get_test_state(self): + self._send_json( + { + "power_state": STATE.power_state, + "usb_boot": STATE.usb_boot, + "usb_boot_pending": STATE.usb_boot_pending, + "vmedia_inserted": STATE.vmedia_inserted, + "vmedia_image": STATE.vmedia_image, + "boot_override_target": STATE.boot_override_target, + "boot_override_enabled": STATE.boot_override_enabled, + "uefi_target": STATE.uefi_target, + "pending_jobs": STATE.pending_jobs, + } + ) + + # ---- POST handlers ---- + + def _post_test_reset(self, body): + STATE.reset(body) + self._send_json({"status": "reset"}) + + def _post_reset(self, body): + reset_type = body.get("ResetType", "") + if reset_type == "On": + if STATE.power_state == "On": + return self._send_json({"error": "already on"}, 409) + STATE.apply_pending_bios() + STATE.power_state = "On" + elif reset_type in ("ForceOff", "GracefulShutdown"): + STATE.power_state = "Off" + elif reset_type == "ForceRestart": + STATE.apply_pending_bios() + STATE.power_state = "On" + else: + return self._send_json({"error": f"unknown ResetType: {reset_type}"}, 400) + self._send_json({"status": "ok"}, 204) + + def _post_create_job(self, body): + target = body.get("TargetSettingsURI", "") + if "Bios" in target: + STATE.pending_jobs.append("bios_config") + self._send_json({"JobID": "JID_TEST_001"}, 200) + + def _post_clear_jobs(self, body): + STATE.pending_jobs.clear() + self._send_json({"status": "cleared"}, 200) + + def _post_insert_media(self, body): + STATE.vmedia_inserted = True + STATE.vmedia_image = body.get("Image", "http://test/test.iso") + self._send_json({"status": "inserted"}, 204) + + def _post_eject_media(self, body): + STATE.vmedia_inserted = False + STATE.vmedia_image = "" + self._send_json({"status": "ejected"}, 204) + + # ---- PATCH handlers ---- + + def _patch_system(self, body): + boot = body.get("Boot", {}) + if "BootSourceOverrideTarget" in boot: + STATE.boot_override_target = boot["BootSourceOverrideTarget"] + if "BootSourceOverrideEnabled" in boot: + STATE.boot_override_enabled = boot["BootSourceOverrideEnabled"] + if "UefiTargetBootSourceOverride" in boot: + STATE.uefi_target = boot["UefiTargetBootSourceOverride"] + self._send_json({"status": "ok"}, 200) + + def _patch_bios_settings(self, body): + attrs = body.get("Attributes", {}) + if "GenericUsbBoot" in attrs: + STATE.usb_boot_pending = attrs["GenericUsbBoot"] + self._send_json({"status": "ok"}, 200) + + +class DualStackHTTPServer(HTTPServer): + """HTTPServer that supports IPv4 and IPv6.""" + + address_family = socket.AF_INET6 + allow_reuse_address = True + + def server_bind(self): + self.socket.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0) + super().server_bind() + + +def run(port=8443, certfile=None, keyfile=None): + try: + server = DualStackHTTPServer(("::", port), RedfishHandler) + except OSError: + server = HTTPServer(("0.0.0.0", port), RedfishHandler) + if certfile and keyfile: + ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) + ctx.load_cert_chain(certfile, keyfile) + server.socket = ctx.wrap_socket(server.socket, server_side=True) + proto = "https" + else: + proto = "http" + print(f"Mock iDRAC listening on {proto}://0.0.0.0:{port}") + sys.stdout.flush() + server.serve_forever() + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("--port", type=int, default=8443) + parser.add_argument("--cert", default=None) + parser.add_argument("--key", default=None) + args = parser.parse_args() + run(port=args.port, certfile=args.cert, keyfile=args.key) diff --git a/roles/reproducer/molecule/bm_redfish/molecule.yml b/roles/reproducer/molecule/bm_redfish/molecule.yml new file mode 100644 index 0000000000..b568695d75 --- /dev/null +++ b/roles/reproducer/molecule/bm_redfish/molecule.yml @@ -0,0 +1,11 @@ +--- +# Mainly used to override the defaults set in .config/molecule/ +# By default, it uses the "config_podman.yml" - in CI, it will use +# "config_local.yml". +log: true +provisioner: + name: ansible + log: true + env: + ANSIBLE_STDOUT_CALLBACK: default +prerun: false diff --git a/roles/reproducer/molecule/bm_redfish/prepare.yml b/roles/reproducer/molecule/bm_redfish/prepare.yml new file mode 100644 index 0000000000..7be0f034dd --- /dev/null +++ b/roles/reproducer/molecule/bm_redfish/prepare.yml @@ -0,0 +1,56 @@ +--- +- name: Prepare mock iDRAC server + hosts: all + gather_facts: false + vars: + _mock_dir: /tmp/mock_idrac + _mock_port: 8443 + tasks: + - name: Create mock server directory + ansible.builtin.file: + path: "{{ _mock_dir }}" + state: directory + mode: "0755" + + - name: Generate self-signed TLS certificate + ansible.builtin.command: + cmd: >- + openssl req -x509 -newkey rsa:2048 -nodes + -keyout {{ _mock_dir }}/server.key + -out {{ _mock_dir }}/server.crt + -days 1 -subj '/CN=localhost' + creates: "{{ _mock_dir }}/server.crt" + + - name: Copy mock iDRAC server script + ansible.builtin.copy: + src: mock_idrac.py + dest: "{{ _mock_dir }}/mock_idrac.py" + mode: "0755" + + - name: Start mock iDRAC server + ansible.builtin.shell: + cmd: >- + nohup python3 {{ _mock_dir }}/mock_idrac.py + --port {{ _mock_port }} + --cert {{ _mock_dir }}/server.crt + --key {{ _mock_dir }}/server.key + > {{ _mock_dir }}/mock_idrac.log 2>&1 & + creates: "{{ _mock_dir }}/mock_idrac.pid" + register: _mock_start + + - name: Record mock server PID + ansible.builtin.shell: + cmd: >- + pgrep -f 'mock_idrac.py.*--port {{ _mock_port }}' + > {{ _mock_dir }}/mock_idrac.pid + changed_when: false + + - name: Wait for mock iDRAC to respond + ansible.builtin.uri: + url: "https://localhost:{{ _mock_port }}/test/state" + validate_certs: false + status_code: [200] + retries: 10 + delay: 1 + register: _mock_health + until: _mock_health.status == 200 diff --git a/roles/reproducer/molecule/bm_redfish/tasks/test_check_usb_boot.yml b/roles/reproducer/molecule/bm_redfish/tasks/test_check_usb_boot.yml new file mode 100644 index 0000000000..ddb3c60eb9 --- /dev/null +++ b/roles/reproducer/molecule/bm_redfish/tasks/test_check_usb_boot.yml @@ -0,0 +1,52 @@ +--- +# Test bm_check_usb_boot.yml: succeeds when enabled, fails when disabled. + +- name: "Check_usb_boot case 1: GenericUsbBoot Enabled -- succeeds" + block: + - name: Reset mock with usb_boot Enabled + ansible.builtin.uri: + url: "{{ _mock_reset_url }}" + method: POST + body_format: json + body: + usb_boot: "Enabled" + validate_certs: false + status_code: [200] + + - name: Include bm_check_usb_boot + ansible.builtin.include_role: + name: reproducer + tasks_from: bm_check_usb_boot.yml + + - name: Assert _usb_boot_enabled is true + ansible.builtin.assert: + that: + - _usb_boot_enabled | bool + fail_msg: "_usb_boot_enabled should be true when GenericUsbBoot is Enabled" + +- name: "Check_usb_boot case 2: GenericUsbBoot Disabled -- fails" + block: + - name: Reset mock with usb_boot Disabled + ansible.builtin.uri: + url: "{{ _mock_reset_url }}" + method: POST + body_format: json + body: + usb_boot: "Disabled" + validate_certs: false + status_code: [200] + + - name: Include bm_check_usb_boot (expect failure) + ansible.builtin.include_role: + name: reproducer + tasks_from: bm_check_usb_boot.yml + + - name: Should not reach here + ansible.builtin.fail: + msg: "bm_check_usb_boot should have failed when GenericUsbBoot is Disabled" + rescue: + - name: Assert failure was about GenericUsbBoot + ansible.builtin.assert: + that: + - "'GenericUsbBoot' in ansible_failed_result.msg" + fail_msg: "Unexpected failure: {{ ansible_failed_result.msg }}" diff --git a/roles/reproducer/molecule/bm_redfish/tasks/test_discover_vmedia.yml b/roles/reproducer/molecule/bm_redfish/tasks/test_discover_vmedia.yml new file mode 100644 index 0000000000..819468491f --- /dev/null +++ b/roles/reproducer/molecule/bm_redfish/tasks/test_discover_vmedia.yml @@ -0,0 +1,107 @@ +--- +# Test bm_discover_vmedia_target.yml: auto-discover, validate, and invalid path. +# The task file also clears jobs, sets boot override, and verifies VirtualMedia. + +- name: "Discover_vmedia case 1: auto-discover Virtual Optical Drive" + block: + - name: Reset mock with VirtualMedia inserted + ansible.builtin.uri: + url: "{{ _mock_reset_url }}" + method: POST + body_format: json + body: + vmedia_inserted: true + vmedia_image: "http://test/agent.iso" + power_state: "Off" + validate_certs: false + status_code: [200] + + - name: Include bm_discover_vmedia_target (auto-discover) + ansible.builtin.include_role: + name: reproducer + tasks_from: bm_discover_vmedia_target.yml + + - name: Assert discovered path is Virtual Optical Drive + ansible.builtin.assert: + that: + - cifmw_bm_agent_vmedia_uefi_path is defined + - "'USB' in cifmw_bm_agent_vmedia_uefi_path" + - "'Unit(0x0)' in cifmw_bm_agent_vmedia_uefi_path" + fail_msg: >- + Expected Virtual Optical Drive path, got: + {{ cifmw_bm_agent_vmedia_uefi_path | default('undefined') }} + + - name: Query mock state after discover + ansible.builtin.uri: + url: "{{ _mock_state_url }}" + validate_certs: false + register: _state + + - name: Assert boot override was set + ansible.builtin.assert: + that: + - _state.json.boot_override_target == "UefiTarget" + - _state.json.boot_override_enabled == "Once" + - _state.json.uefi_target is not none + +- name: "Discover_vmedia case 2: user-provided valid UEFI path" + block: + - name: Reset mock with VirtualMedia inserted + ansible.builtin.uri: + url: "{{ _mock_reset_url }}" + method: POST + body_format: json + body: + vmedia_inserted: true + vmedia_image: "http://test/agent.iso" + validate_certs: false + status_code: [200] + + - name: Include bm_discover_vmedia_target with valid explicit path + vars: + cifmw_bm_agent_vmedia_uefi_path: "PciRoot(0x0)/Pci(0x14,0x0)/USB(0xD,0x0)/USB(0x0,0x0)/USB(0x2,0x0)/Unit(0x0)" + ansible.builtin.include_role: + name: reproducer + tasks_from: bm_discover_vmedia_target.yml + + - name: Query mock state + ansible.builtin.uri: + url: "{{ _mock_state_url }}" + validate_certs: false + register: _state + + - name: Assert boot override set with user-provided path + ansible.builtin.assert: + that: + - _state.json.boot_override_target == "UefiTarget" + - _state.json.uefi_target == "PciRoot(0x0)/Pci(0x14,0x0)/USB(0xD,0x0)/USB(0x0,0x0)/USB(0x2,0x0)/Unit(0x0)" + +- name: "Discover_vmedia case 3: user-provided invalid UEFI path -- fails" + block: + - name: Reset mock with VirtualMedia inserted + ansible.builtin.uri: + url: "{{ _mock_reset_url }}" + method: POST + body_format: json + body: + vmedia_inserted: true + validate_certs: false + status_code: [200] + + - name: Include bm_discover_vmedia_target with invalid path (expect failure) + vars: + cifmw_bm_agent_vmedia_uefi_path: "PciRoot(0x0)/INVALID/PATH" + ansible.builtin.include_role: + name: reproducer + tasks_from: bm_discover_vmedia_target.yml + + - name: Should not reach here + ansible.builtin.fail: + msg: "bm_discover_vmedia_target should have failed with an invalid UEFI path" + rescue: + - name: Assert failure was about UEFI path + ansible.builtin.assert: + that: + - "'not listed in UEFI boot options' in ansible_failed_result.msg + or 'Assertion failed' in (ansible_failed_result.msg | default(''))" + fail_msg: "Unexpected failure: {{ ansible_failed_result.msg | default('unknown') }}" diff --git a/roles/reproducer/molecule/bm_redfish/tasks/test_eject_vmedia.yml b/roles/reproducer/molecule/bm_redfish/tasks/test_eject_vmedia.yml new file mode 100644 index 0000000000..c80890baca --- /dev/null +++ b/roles/reproducer/molecule/bm_redfish/tasks/test_eject_vmedia.yml @@ -0,0 +1,61 @@ +--- +# Test bm_eject_vmedia.yml: ejects inserted VirtualMedia. + +- name: "Eject_vmedia case 1: VirtualMedia inserted -- ejects" + block: + - name: Reset mock with VirtualMedia inserted + ansible.builtin.uri: + url: "{{ _mock_reset_url }}" + method: POST + body_format: json + body: + vmedia_inserted: true + vmedia_image: "http://test/agent.iso" + validate_certs: false + status_code: [200] + + - name: Include bm_eject_vmedia + ansible.builtin.include_role: + name: reproducer + tasks_from: bm_eject_vmedia.yml + + - name: Query mock state after eject + ansible.builtin.uri: + url: "{{ _mock_state_url }}" + validate_certs: false + register: _state + + - name: Assert VirtualMedia is ejected + ansible.builtin.assert: + that: + - not (_state.json.vmedia_inserted | bool) + fail_msg: "VirtualMedia should be ejected" + +- name: "Eject_vmedia case 2: VirtualMedia not inserted -- no error" + block: + - name: Reset mock with VirtualMedia not inserted + ansible.builtin.uri: + url: "{{ _mock_reset_url }}" + method: POST + body_format: json + body: + vmedia_inserted: false + validate_certs: false + status_code: [200] + + - name: Include bm_eject_vmedia (idempotent) + ansible.builtin.include_role: + name: reproducer + tasks_from: bm_eject_vmedia.yml + + - name: Query mock state + ansible.builtin.uri: + url: "{{ _mock_state_url }}" + validate_certs: false + register: _state + + - name: Assert VirtualMedia is still not inserted + ansible.builtin.assert: + that: + - not (_state.json.vmedia_inserted | bool) + fail_msg: "VirtualMedia should remain not inserted" diff --git a/roles/reproducer/molecule/bm_redfish/tasks/test_ensure_usb_boot.yml b/roles/reproducer/molecule/bm_redfish/tasks/test_ensure_usb_boot.yml new file mode 100644 index 0000000000..4ffe05dcd5 --- /dev/null +++ b/roles/reproducer/molecule/bm_redfish/tasks/test_ensure_usb_boot.yml @@ -0,0 +1,102 @@ +--- +# Test bm_ensure_usb_boot.yml: skip if enabled, enable+cycle if allowed, fail if not. + +- name: "Ensure_usb_boot case 1: already Enabled -- no power cycle" + block: + - name: Reset mock with usb_boot Enabled and host Off + ansible.builtin.uri: + url: "{{ _mock_reset_url }}" + method: POST + body_format: json + body: + usb_boot: "Enabled" + power_state: "Off" + validate_certs: false + status_code: [200] + + - name: Include bm_ensure_usb_boot + vars: + cifmw_bm_agent_enable_usb_boot: true + ansible.builtin.include_role: + name: reproducer + tasks_from: bm_ensure_usb_boot.yml + + - name: Query mock state + ansible.builtin.uri: + url: "{{ _mock_state_url }}" + validate_certs: false + register: _state + + - name: Assert host stayed Off (no power cycle needed) + ansible.builtin.assert: + that: + - _state.json.power_state == "Off" + - _state.json.usb_boot == "Enabled" + fail_msg: "No power cycle expected when already enabled" + +- name: "Ensure_usb_boot case 2: Disabled + auto-enable -- BIOS change + power cycle" + block: + - name: Reset mock with usb_boot Disabled and host Off + ansible.builtin.uri: + url: "{{ _mock_reset_url }}" + method: POST + body_format: json + body: + usb_boot: "Disabled" + power_state: "Off" + validate_certs: false + status_code: [200] + + - name: Include bm_ensure_usb_boot with auto-enable + vars: + cifmw_bm_agent_enable_usb_boot: true + ansible.builtin.include_role: + name: reproducer + tasks_from: bm_ensure_usb_boot.yml + + - name: Query mock state after auto-enable + ansible.builtin.uri: + url: "{{ _mock_state_url }}" + validate_certs: false + register: _state + + - name: Assert BIOS updated and host left Off + ansible.builtin.assert: + that: + - _state.json.usb_boot == "Enabled" + - _state.json.power_state == "Off" + fail_msg: >- + Expected usb_boot=Enabled and power_state=Off, + got usb_boot={{ _state.json.usb_boot }} + power_state={{ _state.json.power_state }} + +- name: "Ensure_usb_boot case 3: Disabled + auto-enable off -- fails" + block: + - name: Reset mock with usb_boot Disabled + ansible.builtin.uri: + url: "{{ _mock_reset_url }}" + method: POST + body_format: json + body: + usb_boot: "Disabled" + power_state: "Off" + validate_certs: false + status_code: [200] + + - name: Include bm_ensure_usb_boot without auto-enable (expect failure) + vars: + cifmw_bm_agent_enable_usb_boot: false + ansible.builtin.include_role: + name: reproducer + tasks_from: bm_ensure_usb_boot.yml + + - name: Should not reach here + ansible.builtin.fail: + msg: "bm_ensure_usb_boot should have failed when auto-enable is off" + rescue: + - name: Assert failure mentions GenericUsbBoot or auto-enable + ansible.builtin.assert: + that: + - "'GenericUsbBoot' in ansible_failed_result.msg + or 'enable_usb_boot' in ansible_failed_result.msg" + fail_msg: "Unexpected failure: {{ ansible_failed_result.msg }}" diff --git a/roles/reproducer/molecule/bm_redfish/tasks/test_power_off.yml b/roles/reproducer/molecule/bm_redfish/tasks/test_power_off.yml new file mode 100644 index 0000000000..c606e64846 --- /dev/null +++ b/roles/reproducer/molecule/bm_redfish/tasks/test_power_off.yml @@ -0,0 +1,60 @@ +--- +# Test bm_power_off.yml: idempotent power-off via Redfish mock. + +- name: "Power_off case 1: host already off -- no action taken" + block: + - name: Reset mock to power Off + ansible.builtin.uri: + url: "{{ _mock_reset_url }}" + method: POST + body_format: json + body: + power_state: "Off" + validate_certs: false + status_code: [200] + + - name: Include bm_power_off + ansible.builtin.include_role: + name: reproducer + tasks_from: bm_power_off.yml + + - name: Query mock state after power_off (already off) + ansible.builtin.uri: + url: "{{ _mock_state_url }}" + validate_certs: false + register: _state_after + + - name: Assert host is still Off + ansible.builtin.assert: + that: + - _state_after.json.power_state == "Off" + fail_msg: "Expected Off, got {{ _state_after.json.power_state }}" + +- name: "Power_off case 2: host On -- powers off" + block: + - name: Reset mock to power On + ansible.builtin.uri: + url: "{{ _mock_reset_url }}" + method: POST + body_format: json + body: + power_state: "On" + validate_certs: false + status_code: [200] + + - name: Include bm_power_off + ansible.builtin.include_role: + name: reproducer + tasks_from: bm_power_off.yml + + - name: Query mock state after power_off (was on) + ansible.builtin.uri: + url: "{{ _mock_state_url }}" + validate_certs: false + register: _state_after + + - name: Assert host is now Off + ansible.builtin.assert: + that: + - _state_after.json.power_state == "Off" + fail_msg: "Expected Off, got {{ _state_after.json.power_state }}" diff --git a/roles/reproducer/molecule/bm_redfish/tasks/test_power_on.yml b/roles/reproducer/molecule/bm_redfish/tasks/test_power_on.yml new file mode 100644 index 0000000000..7e45f922f5 --- /dev/null +++ b/roles/reproducer/molecule/bm_redfish/tasks/test_power_on.yml @@ -0,0 +1,60 @@ +--- +# Test bm_power_on.yml: idempotent power-on via Redfish mock. + +- name: "Power_on case 1: host already On -- no action taken" + block: + - name: Reset mock to power On + ansible.builtin.uri: + url: "{{ _mock_reset_url }}" + method: POST + body_format: json + body: + power_state: "On" + validate_certs: false + status_code: [200] + + - name: Include bm_power_on + ansible.builtin.include_role: + name: reproducer + tasks_from: bm_power_on.yml + + - name: Query mock state after power_on (already on) + ansible.builtin.uri: + url: "{{ _mock_state_url }}" + validate_certs: false + register: _state_after + + - name: Assert host is still On + ansible.builtin.assert: + that: + - _state_after.json.power_state == "On" + fail_msg: "Expected On, got {{ _state_after.json.power_state }}" + +- name: "Power_on case 2: host Off -- powers on" + block: + - name: Reset mock to power Off + ansible.builtin.uri: + url: "{{ _mock_reset_url }}" + method: POST + body_format: json + body: + power_state: "Off" + validate_certs: false + status_code: [200] + + - name: Include bm_power_on + ansible.builtin.include_role: + name: reproducer + tasks_from: bm_power_on.yml + + - name: Query mock state after power_on (was off) + ansible.builtin.uri: + url: "{{ _mock_state_url }}" + validate_certs: false + register: _state_after + + - name: Assert host is now On + ansible.builtin.assert: + that: + - _state_after.json.power_state == "On" + fail_msg: "Expected On, got {{ _state_after.json.power_state }}" diff --git a/roles/reproducer/molecule/bm_redfish/vars/common.yml b/roles/reproducer/molecule/bm_redfish/vars/common.yml new file mode 100644 index 0000000000..b7e2890f8b --- /dev/null +++ b/roles/reproducer/molecule/bm_redfish/vars/common.yml @@ -0,0 +1,11 @@ +--- +_bmc_host: "localhost:8443" +_bmc_creds: + username: "testuser" + password: "testpass" +_redfish_headers: + Accept: "application/json" + Content-Type: "application/json" + OData-Version: "4.0" +_mock_reset_url: "https://localhost:8443/test/reset" +_mock_state_url: "https://localhost:8443/test/state" diff --git a/roles/reproducer/tasks/bm_check_usb_boot.yml b/roles/reproducer/tasks/bm_check_usb_boot.yml new file mode 100644 index 0000000000..4cd7c1d2d0 --- /dev/null +++ b/roles/reproducer/tasks/bm_check_usb_boot.yml @@ -0,0 +1,25 @@ +--- +# Check that GenericUsbBoot is enabled in BIOS. Fails if disabled. +# Requires: _bmc_host, _bmc_creds, _redfish_headers +- name: Read GenericUsbBoot BIOS attribute + ansible.builtin.uri: + url: "https://{{ _bmc_host }}/redfish/v1/Systems/System.Embedded.1/Bios" + method: GET + headers: "{{ _redfish_headers }}" + user: "{{ _bmc_creds.username }}" + password: "{{ _bmc_creds.password }}" + validate_certs: false + force_basic_auth: true + return_content: true + register: _bios_attrs + +- name: Set GenericUsbBoot status fact + ansible.builtin.set_fact: + _usb_boot_enabled: >- + {{ _bios_attrs.json.Attributes.GenericUsbBoot | default('Disabled') == 'Enabled' }} + +- name: Fail if GenericUsbBoot is disabled + when: not (_usb_boot_enabled | bool) + ansible.builtin.fail: + msg: >- + GenericUsbBoot is Disabled in BIOS. VirtualMedia cannot boot host {{ _bmc_host }}. diff --git a/roles/reproducer/tasks/bm_core_password_machineconfig.yml b/roles/reproducer/tasks/bm_core_password_machineconfig.yml new file mode 100644 index 0000000000..a41fd22aa6 --- /dev/null +++ b/roles/reproducer/tasks/bm_core_password_machineconfig.yml @@ -0,0 +1,37 @@ +--- +# Configure core user console access via MachineConfig (post-install). +# Creates a MachineConfig that sets the core user password hash. +# Requires: _work_dir, cifmw_bm_agent_core_password (user input) +- name: Generate password hash + ansible.builtin.command: + cmd: "openssl passwd -6 -salt 16charsofsalt '{{ cifmw_bm_agent_core_password }}'" + register: _password_hash + changed_when: false + no_log: true + +- name: Create openshift manifests directory + ansible.builtin.file: + path: "{{ _work_dir }}/openshift" + state: directory + mode: "0755" + +- name: Write core password MachineConfig + no_log: true + ansible.builtin.copy: + dest: "{{ _work_dir }}/openshift/99-core-password.yaml" + mode: "0600" + content: | + apiVersion: machineconfiguration.openshift.io/v1 + kind: MachineConfig + metadata: + labels: + machineconfiguration.openshift.io/role: master + name: 99-core-password + spec: + config: + ignition: + version: 3.2.0 + passwd: + users: + - name: core + passwordHash: {{ _password_hash.stdout }} diff --git a/roles/reproducer/tasks/bm_discover_vmedia_target.yml b/roles/reproducer/tasks/bm_discover_vmedia_target.yml new file mode 100644 index 0000000000..6b626b7613 --- /dev/null +++ b/roles/reproducer/tasks/bm_discover_vmedia_target.yml @@ -0,0 +1,158 @@ +--- +# Discover or validate the UEFI device path for the iDRAC Virtual Optical Drive, +# clear any pending iDRAC config jobs, and set a one-time boot override. +# Requires: _bmc_host, _bmc_creds, _redfish_headers +- name: Fetch UEFI boot option IDs + ansible.builtin.uri: + url: "https://{{ _bmc_host }}/redfish/v1/Systems/System.Embedded.1/BootOptions" + method: GET + headers: "{{ _redfish_headers }}" + user: "{{ _bmc_creds.username }}" + password: "{{ _bmc_creds.password }}" + validate_certs: false + force_basic_auth: true + return_content: true + register: _boot_option_list + +- name: Fetch each UEFI boot option detail + ansible.builtin.uri: + url: "https://{{ _bmc_host }}{{ item['@odata.id'] }}" + method: GET + headers: "{{ _redfish_headers }}" + user: "{{ _bmc_creds.username }}" + password: "{{ _bmc_creds.password }}" + validate_certs: false + force_basic_auth: true + return_content: true + register: _boot_options + loop: "{{ _boot_option_list.json.Members }}" + loop_control: + label: "{{ item['@odata.id'] | basename }}" + +- name: Build list of known UEFI device paths + ansible.builtin.set_fact: + _known_uefi_paths: >- + {{ _boot_options.results | map(attribute='json') | + selectattr('UefiDevicePath', 'defined') | + map(attribute='UefiDevicePath') | list }} + +- name: Validate user-provided VirtualMedia UEFI path + when: cifmw_bm_agent_vmedia_uefi_path is defined + ansible.builtin.assert: + that: + - cifmw_bm_agent_vmedia_uefi_path in _known_uefi_paths + fail_msg: >- + cifmw_bm_agent_vmedia_uefi_path '{{ cifmw_bm_agent_vmedia_uefi_path }}' + is not listed in UEFI boot options. Available paths: + {{ _boot_options.results | map(attribute='json') | + map(attribute='DisplayName', default='?') | zip(_known_uefi_paths) | + map('join', ' -> ') | list | join(', ') }} + +- name: Auto-discover Virtual Optical Drive boot path + when: cifmw_bm_agent_vmedia_uefi_path is not defined + block: + - name: Find Virtual Optical Drive boot path + ansible.builtin.set_fact: + cifmw_bm_agent_vmedia_uefi_path: >- + {{ item.json.UefiDevicePath }} + when: "'Virtual Optical' in item.json.DisplayName | default('')" + loop: "{{ _boot_options.results }}" + loop_control: + label: "{{ item.json.DisplayName | default('unknown') }}" + + - name: Fail if no Virtual Optical Drive found + when: cifmw_bm_agent_vmedia_uefi_path is not defined + ansible.builtin.fail: + msg: >- + Could not find a Virtual Optical Drive in UEFI boot options. + Set cifmw_bm_agent_vmedia_uefi_path manually in vars.yaml. + +- name: Show VirtualMedia UEFI boot target + ansible.builtin.debug: + msg: "VirtualMedia UEFI path: {{ cifmw_bm_agent_vmedia_uefi_path }}" + +- name: Clear pending iDRAC config jobs that block boot override + ansible.builtin.uri: + url: "https://{{ _bmc_host }}/redfish/v1/Dell/Managers/iDRAC.Embedded.1/DellJobService/Actions/DellJobService.DeleteJobQueue" + method: POST + headers: "{{ _redfish_headers }}" + body_format: json + body: + JobID: "JID_CLEARALL" + user: "{{ _bmc_creds.username }}" + password: "{{ _bmc_creds.password }}" + validate_certs: false + force_basic_auth: true + status_code: [200, 202, 204] + failed_when: false + +- name: Wait for iDRAC to settle after clearing jobs + ansible.builtin.pause: + seconds: 10 + +- name: Set one-time boot from Virtual Optical Drive + ansible.builtin.uri: + url: "https://{{ _bmc_host }}/redfish/v1/Systems/System.Embedded.1" + method: PATCH + headers: "{{ _redfish_headers }}" + body_format: json + body: + Boot: + BootSourceOverrideTarget: UefiTarget + UefiTargetBootSourceOverride: "{{ cifmw_bm_agent_vmedia_uefi_path }}" + BootSourceOverrideEnabled: Once + user: "{{ _bmc_creds.username }}" + password: "{{ _bmc_creds.password }}" + validate_certs: false + force_basic_auth: true + status_code: [200, 204] + +- name: Verify boot override was applied + ansible.builtin.uri: + url: "https://{{ _bmc_host }}/redfish/v1/Systems/System.Embedded.1" + method: GET + headers: "{{ _redfish_headers }}" + user: "{{ _bmc_creds.username }}" + password: "{{ _bmc_creds.password }}" + validate_certs: false + force_basic_auth: true + return_content: true + status_code: [200] + register: _boot_verify + +- name: Assert boot override is set correctly + ansible.builtin.assert: + that: + - _boot_verify.json.Boot.BootSourceOverrideTarget == 'UefiTarget' + - _boot_verify.json.Boot.BootSourceOverrideEnabled == 'Once' + - _boot_verify.json.Boot.UefiTargetBootSourceOverride == cifmw_bm_agent_vmedia_uefi_path + fail_msg: >- + Boot override not applied. + Target: {{ _boot_verify.json.Boot.BootSourceOverrideTarget }} + (expected UefiTarget), + Enabled: {{ _boot_verify.json.Boot.BootSourceOverrideEnabled }} + (expected Once), + UefiPath: {{ _boot_verify.json.Boot.UefiTargetBootSourceOverride }} + (expected {{ cifmw_bm_agent_vmedia_uefi_path }}) + +- name: Verify VirtualMedia is still inserted + ansible.builtin.uri: + url: "https://{{ _bmc_host }}/redfish/v1/Managers/iDRAC.Embedded.1/VirtualMedia/CD" + method: GET + headers: "{{ _redfish_headers }}" + user: "{{ _bmc_creds.username }}" + password: "{{ _bmc_creds.password }}" + validate_certs: false + force_basic_auth: true + return_content: true + status_code: [200] + register: _vmedia_check + +- name: Assert VirtualMedia ISO is mounted + ansible.builtin.assert: + that: + - _vmedia_check.json.Inserted | bool + fail_msg: >- + VirtualMedia is not inserted. Image: {{ _vmedia_check.json.Image | default('none') }}, + Inserted: {{ _vmedia_check.json.Inserted | default('unknown') }}. + The ISO may have been ejected during job queue clearing. diff --git a/roles/reproducer/tasks/bm_eject_vmedia.yml b/roles/reproducer/tasks/bm_eject_vmedia.yml new file mode 100644 index 0000000000..b21447313a --- /dev/null +++ b/roles/reproducer/tasks/bm_eject_vmedia.yml @@ -0,0 +1,21 @@ +--- +# Eject VirtualMedia from iDRAC. +# Requires: _bmc_host, _bmc_creds, _redfish_headers +- name: Eject VirtualMedia + ansible.builtin.uri: + url: "https://{{ _bmc_host }}/redfish/v1/Managers/iDRAC.Embedded.1/VirtualMedia/CD/Actions/VirtualMedia.EjectMedia" + method: POST + headers: "{{ _redfish_headers }}" + body_format: json + body: {} + user: "{{ _bmc_creds.username }}" + password: "{{ _bmc_creds.password }}" + validate_certs: false + force_basic_auth: true + status_code: [200, 204, 400, 500] + register: _eject_result + failed_when: false + +- name: Wait for VirtualMedia eject to settle + ansible.builtin.pause: + seconds: 5 diff --git a/roles/reproducer/tasks/bm_ensure_usb_boot.yml b/roles/reproducer/tasks/bm_ensure_usb_boot.yml new file mode 100644 index 0000000000..987ca8ddaa --- /dev/null +++ b/roles/reproducer/tasks/bm_ensure_usb_boot.yml @@ -0,0 +1,59 @@ +--- +# Ensure GenericUsbBoot is enabled in BIOS so VirtualMedia can boot. +# When a change is needed: sets BIOS attribute, creates config job, +# power-cycles to apply the change, then leaves the host powered off. +# +# Requires: _bmc_host, _bmc_creds, _redfish_headers + +- name: Check and optionally enable GenericUsbBoot + block: + - name: Check current GenericUsbBoot state + ansible.builtin.include_tasks: bm_check_usb_boot.yml + + rescue: + - name: Fail if auto-enable is off + when: not (cifmw_bm_agent_enable_usb_boot | default(false)) | bool + ansible.builtin.fail: + msg: >- + GenericUsbBoot is Disabled in BIOS. VirtualMedia cannot boot + host {{ _bmc_host }}. Set cifmw_bm_agent_enable_usb_boot: true + in vars.yaml to allow this playbook to enable it automatically + (requires a reboot cycle). + + - name: Set GenericUsbBoot BIOS attribute + ansible.builtin.uri: + url: "https://{{ _bmc_host }}/redfish/v1/Systems/System.Embedded.1/Bios/Settings" + method: PATCH + headers: "{{ _redfish_headers }}" + body_format: json + body: + Attributes: + GenericUsbBoot: "Enabled" + user: "{{ _bmc_creds.username }}" + password: "{{ _bmc_creds.password }}" + validate_certs: false + force_basic_auth: true + status_code: [200, 204] + + - name: Create BIOS config job to schedule the change + ansible.builtin.uri: + url: "https://{{ _bmc_host }}/redfish/v1/Managers/iDRAC.Embedded.1/Jobs" + method: POST + headers: "{{ _redfish_headers }}" + body_format: json + body: + TargetSettingsURI: "/redfish/v1/Systems/System.Embedded.1/Bios/Settings" + user: "{{ _bmc_creds.username }}" + password: "{{ _bmc_creds.password }}" + validate_certs: false + force_basic_auth: true + status_code: [200, 202, 204] + + - name: Power off before applying BIOS change + ansible.builtin.include_tasks: bm_power_off.yml + + - name: Power on to apply BIOS config job during POST + ansible.builtin.include_tasks: bm_power_on.yml + + - name: Power off after BIOS change applied + ansible.builtin.include_tasks: bm_power_off.yml diff --git a/roles/reproducer/tasks/bm_ocp_layout.yml b/roles/reproducer/tasks/bm_ocp_layout.yml new file mode 100644 index 0000000000..4acda13b5c --- /dev/null +++ b/roles/reproducer/tasks/bm_ocp_layout.yml @@ -0,0 +1,383 @@ +--- +# Agent-based bare metal SNO deployment. +# +# Three isolated networks (no shared L2 domain): +# BMC mgmt network — iDRAC interfaces; controller reaches iDRAC via routing +# BMO provision net — node's 1st NIC, OS interface IP; virtual media boot, +# no PXE/DHCP +# Controller network — Zuul controller; serves the agent ISO to iDRAC +# +# A 2nd NIC on the node carries isolated MetalLB networks for RHOSO +# EDPM services (ctlplane, internalapi, storage, tenant) via VLANs. +# +# Bypasses dev-scripts entirely. Generates a self-contained ISO on the +# controller, pushes it to the target host's iDRAC (BMC mgmt network) +# via Redfish VirtualMedia, and waits for the host to self-install on +# the BMO provision network. +# +# The api and *.apps DNS names resolve directly to bkr06's BMO provision IP. +# +# Required variables (typically set in the scenario's vars.yaml): +# cifmw_bm_agent_cluster_name +# cifmw_bm_agent_base_domain +# cifmw_bm_agent_machine_network (BMO provision network CIDR) +# cifmw_bm_agent_node_ip (node IP on BMO provision net) +# cifmw_bm_agent_node_iface (RHCOS iface on BMO provision net) +# cifmw_bm_agent_bmc_host (iDRAC on BMC mgmt network) +# cifmw_devscripts_bm_nodes (list with mac / root_device) +# cifmw_bm_agent_openshift_version (e.g. "4.18.3"; or set +# cifmw_bm_agent_release_image (OPENSHIFT_RELEASE_IMAGE) instead) +# +# Optional (have defaults or are auto-discovered): +# cifmw_bm_agent_iso_http_port (default: 80) +# cifmw_bm_agent_installer_timeout (default: 7200) +# cifmw_manage_secrets_pullsecret_file (default: ~/secrets/pull_secret.json) +# cifmw_bm_agent_enable_usb_boot (enable it for ensuring GenericUsbBoot in BIOS) +# cifmw_bm_agent_vmedia_uefi_path (UEFI path of the Virtual Optical Drive; +# auto-discovered if omitted) +# cifmw_bm_agent_core_password (set core password post-install via MachineConfig) +# cifmw_bm_agent_live_debug (patch the agent ISO with password, autologin, +# and systemd debug shell on tty6 for +# discovery-phase access; requires +# cifmw_bm_agent_core_password) + +- name: Assert agent-based bare metal variables are set + ansible.builtin.assert: + that: + - cifmw_bm_agent_cluster_name is defined + - cifmw_bm_agent_base_domain is defined + - cifmw_bm_agent_machine_network is defined + - cifmw_bm_agent_node_ip is defined + - cifmw_bm_agent_node_iface is defined + - cifmw_bm_agent_bmc_host is defined + - cifmw_devscripts_bm_nodes is defined + - cifmw_devscripts_bm_nodes | length == 1 + fail_msg: >- + Missing agent-based bare metal variables. + Check vars.yaml for cifmw_bm_agent_* and cifmw_devscripts_bm_nodes. + +- name: Set internal facts + ansible.builtin.set_fact: + _cluster_name: "{{ cifmw_bm_agent_cluster_name }}" + _base_domain: "{{ cifmw_bm_agent_base_domain }}" + _machine_network: "{{ cifmw_bm_agent_machine_network }}" + _node_ip: "{{ cifmw_bm_agent_node_ip }}" + _node_iface: "{{ cifmw_bm_agent_node_iface }}" + _node_mac: "{{ cifmw_devscripts_bm_nodes[0].mac }}" + _bmc_host: "{{ cifmw_bm_agent_bmc_host }}" + _iso_http_port: "{{ cifmw_bm_agent_iso_http_port | default(80) }}" + _installer_timeout: "{{ cifmw_bm_agent_installer_timeout | default(7200) }}" + _work_dir: "{{ cifmw_reproducer_basedir }}/artifacts/agent-install" + _creds_file: >- + {{ cifmw_bmc_credentials_file | + default(ansible_user_dir ~ '/secrets/idrac_access.yaml') }} + _pull_secret_file: >- + {{ cifmw_manage_secrets_pullsecret_file | + default(ansible_user_dir ~ '/pull-secret') }} + _kubeconfig_dest: >- + {{ + (cifmw_devscripts_repo_dir | default( + ansible_user_dir ~ '/src/github.com/openshift-metal3/dev-scripts'), + 'ocp', cifmw_bm_agent_cluster_name, 'auth') + | path_join + }} + +- name: Read BMC credentials + ansible.builtin.include_vars: + file: "{{ _creds_file }}" + name: _bmc_creds + +- name: Set common Redfish request parameters + ansible.builtin.set_fact: + _redfish_headers: + Accept: application/json + OData-Version: "4.0" + +- name: Ensure BIOS allows VirtualMedia boot + ansible.builtin.include_tasks: bm_ensure_usb_boot.yml + +- name: Ensure bare metal host is powered off for agent install + ansible.builtin.include_tasks: bm_power_off.yml + +- name: Create agent-install working directory + ansible.builtin.file: + path: "{{ _work_dir }}" + state: directory + mode: "0755" + +- name: Read pull secret + no_log: true + ansible.builtin.slurp: + src: "{{ _pull_secret_file }}" + register: _pull_secret_raw + +- name: Set pull secret fact + no_log: true + ansible.builtin.set_fact: + _pull_secret: "{{ _pull_secret_raw.content | b64decode | trim }}" + +- name: Generate SSH key for cluster access + community.crypto.openssh_keypair: + path: "{{ _work_dir }}/agent_ssh_key" + type: ed25519 + force: false + register: _ssh_key + +- name: Set SSH public key fact + ansible.builtin.set_fact: + _ssh_pub_key: "{{ _ssh_key.public_key }}" + +- name: Resolve openshift-install acquisition method + ansible.builtin.set_fact: + _release_image: >- + {{ cifmw_bm_agent_release_image | + default(lookup('env', 'OPENSHIFT_RELEASE_IMAGE') | default('', true)) }} + _ocp_version: >- + {{ cifmw_bm_agent_openshift_version | default('') }} + +- name: Assert an OCP version or release image is provided + ansible.builtin.assert: + that: + - _ocp_version | length > 0 or _release_image | length > 0 + fail_msg: >- + No OCP version or release image provided. + Set cifmw_bm_agent_openshift_version (e.g. "4.18.3") in vars.yaml, + or cifmw_bm_agent_release_image / OPENSHIFT_RELEASE_IMAGE env var. + +- name: Check if openshift-install already exists (local debug case) + ansible.builtin.stat: + path: "{{ _work_dir }}/openshift-install" + register: _oi_bin + +- name: Download openshift-install from OCP mirror + when: + - not _oi_bin.stat.exists + - _release_image | length == 0 + - _ocp_version | length > 0 + block: + - name: Download openshift-install tarball + ansible.builtin.get_url: + url: "https://mirror.openshift.com/pub/openshift-v4/clients/ocp/{{ _ocp_version }}/openshift-install-linux.tar.gz" + dest: "{{ _work_dir }}/openshift-install-linux.tar.gz" + mode: "0644" + + - name: Extract openshift-install binary + ansible.builtin.unarchive: + src: "{{ _work_dir }}/openshift-install-linux.tar.gz" + dest: "{{ _work_dir }}" + remote_src: true + extra_opts: + - openshift-install + +- name: Extract openshift-install from release image + when: + - not _oi_bin.stat.exists + - _release_image | length > 0 + ansible.builtin.command: + cmd: >- + oc adm release extract + --command=openshift-install + --to={{ _work_dir }} + --registry-config={{ _pull_secret_file }} + {{ _release_image }} + creates: "{{ _work_dir }}/openshift-install" + +- name: Template install-config.yaml + no_log: true + ansible.builtin.template: + src: agent_install_config.yaml.j2 + dest: "{{ _work_dir }}/install-config.yaml" + mode: "0600" + +- name: Template agent-config.yaml + ansible.builtin.template: + src: agent_config.yaml.j2 + dest: "{{ _work_dir }}/agent-config.yaml" + mode: "0644" + +- name: Remove stale agent state from previous runs + ansible.builtin.file: + path: "{{ item }}" + state: absent + loop: + - "{{ _work_dir }}/.openshift_install_state.json" + - "{{ _work_dir }}/agent.x86_64.iso" + - "{{ _work_dir }}/auth" + - "{{ _work_dir }}/openshift" + +- name: Configure core user console access via MachineConfig + when: cifmw_bm_agent_core_password | default('') | length > 0 + ansible.builtin.include_tasks: bm_core_password_machineconfig.yml + +- name: Persist configs before openshift-install consumes them + ansible.builtin.copy: + src: "{{ _work_dir }}/{{ item }}" + dest: "{{ _work_dir }}/{{ item }}.bak" + remote_src: true + mode: "0600" + loop: + - install-config.yaml + - agent-config.yaml + +- name: Strip pullSecret from install-config backup + ansible.builtin.lineinfile: + path: "{{ _work_dir }}/install-config.yaml.bak" + regexp: '^pullSecret:' + line: "pullSecret: ''" + +- name: Generate agent ISO + ansible.builtin.command: + cmd: "{{ _work_dir }}/openshift-install agent create image --dir {{ _work_dir }}" + +- name: Patch agent ISO ignition for discovery-phase console access + when: (cifmw_bm_agent_live_debug | default(false)) | bool + ansible.builtin.include_tasks: bm_patch_agent_iso.yml + +- name: Set controller IP fact + ansible.builtin.set_fact: + _controller_ip: >- + {{ hostvars[inventory_hostname]['nodepool']['interface_ip'] | + default(ansible_default_ipv4.address | + default(ansible_host)) }} + +- name: Show ISO URL that iDRAC will fetch + ansible.builtin.debug: + msg: "ISO URL for iDRAC: http://{{ _controller_ip }}:{{ _iso_http_port }}/agent.x86_64.iso" + +- name: Stop any existing agent-iso-server container + become: true + ansible.builtin.command: + cmd: podman rm -f agent-iso-server + failed_when: false + changed_when: false + +- name: Serve agent ISO via podman httpd + become: true + ansible.builtin.command: + cmd: >- + podman run -d --name agent-iso-server + -v {{ _work_dir }}:/var/www/html:ro,Z + -p {{ _controller_ip }}:{{ _iso_http_port }}:8080 + registry.access.redhat.com/ubi9/httpd-24:latest + register: _httpd_start + +- name: Check agent-iso-server container is running + become: true + ansible.builtin.command: + cmd: podman ps --filter name=agent-iso-server --format '{{ '{{' }}.Status{{ '}}' }}' + register: _httpd_status + changed_when: false + +- name: Show container status + ansible.builtin.debug: + msg: "agent-iso-server status: {{ _httpd_status.stdout }}" + +- name: Wait for HTTP server to respond + ansible.builtin.uri: + url: "http://{{ _controller_ip }}:{{ _iso_http_port }}/agent.x86_64.iso" + method: HEAD + register: _http_check + retries: 10 + delay: 3 + until: _http_check.status == 200 + +- name: Eject any existing VirtualMedia before insert + ansible.builtin.include_tasks: bm_eject_vmedia.yml + +- name: Insert agent ISO via VirtualMedia + ansible.builtin.uri: + url: "https://{{ _bmc_host }}/redfish/v1/Managers/iDRAC.Embedded.1/VirtualMedia/CD/Actions/VirtualMedia.InsertMedia" + method: POST + headers: "{{ _redfish_headers }}" + body_format: json + body: + Image: "http://{{ _controller_ip }}:{{ _iso_http_port }}/agent.x86_64.iso" + Inserted: true + WriteProtected: true + user: "{{ _bmc_creds.username }}" + password: "{{ _bmc_creds.password }}" + validate_certs: false + force_basic_auth: true + status_code: [200, 204] + register: _insert_media + retries: 3 + delay: 10 + until: _insert_media.status in [200, 204] + +- name: Discover VirtualMedia target and set one-time boot override + ansible.builtin.include_tasks: bm_discover_vmedia_target.yml + +- name: Power on bare metal host and wait for POST + ansible.builtin.include_tasks: bm_power_on.yml + +- name: Verify BIOS GenericUsbBoot is enabled after POST + ansible.builtin.include_tasks: bm_check_usb_boot.yml + +- name: Add api / api-int host entry before waiting for install + become: true + ansible.builtin.lineinfile: + path: /etc/hosts + regexp: '\sapi\.{{ _cluster_name }}\.{{ _base_domain }}\s' + line: >- + {{ _node_ip }} api.{{ _cluster_name }}.{{ _base_domain }} + api-int.{{ _cluster_name }}.{{ _base_domain }} + state: present + +- name: Add wildcard apps host entry before waiting for install + become: true + ansible.builtin.lineinfile: + path: /etc/hosts + regexp: '\sconsole-openshift-console\.apps\.{{ _cluster_name }}\.{{ _base_domain }}\s' + line: >- + {{ _node_ip }} console-openshift-console.apps.{{ _cluster_name }}.{{ _base_domain }} + oauth-openshift.apps.{{ _cluster_name }}.{{ _base_domain }} + canary-openshift-ingress-canary.apps.{{ _cluster_name }}.{{ _base_domain }} + state: present + +- name: Wait for bootstrap to complete + ansible.builtin.command: + cmd: >- + {{ _work_dir }}/openshift-install agent wait-for bootstrap-complete + --dir {{ _work_dir }} + --log-level info + register: _bootstrap_wait + timeout: "{{ _installer_timeout | int // 2 }}" + +- name: Wait for install to complete + ansible.builtin.command: + cmd: >- + {{ _work_dir }}/openshift-install agent wait-for install-complete + --dir {{ _work_dir }} + --log-level info + register: _install_wait + timeout: "{{ _installer_timeout | int // 2 }}" + +- name: Create kubeconfig destination directory + ansible.builtin.file: + path: "{{ _kubeconfig_dest }}" + state: directory + mode: "0755" + +- name: Copy kubeconfig + ansible.builtin.copy: + src: "{{ _work_dir }}/auth/kubeconfig" + dest: "{{ _kubeconfig_dest }}/kubeconfig" + remote_src: true + mode: "0600" + +- name: Copy kubeadmin-password + ansible.builtin.copy: + src: "{{ _work_dir }}/auth/kubeadmin-password" + dest: "{{ _kubeconfig_dest }}/kubeadmin-password" + remote_src: true + mode: "0600" + +- name: Eject VirtualMedia after install + ansible.builtin.include_tasks: bm_eject_vmedia.yml + +- name: Stop HTTP ISO server + become: true + ansible.builtin.command: + cmd: podman rm -f agent-iso-server + failed_when: false + changed_when: false diff --git a/roles/reproducer/tasks/bm_patch_agent_iso.yml b/roles/reproducer/tasks/bm_patch_agent_iso.yml new file mode 100644 index 0000000000..c909fb0db1 --- /dev/null +++ b/roles/reproducer/tasks/bm_patch_agent_iso.yml @@ -0,0 +1,94 @@ +--- +# Patch agent ISO ignition for discovery-phase console access. +# Injects core password, autologin on tty1/tty2, set-core-password.service, +# and enables systemd debug shell on tty6 via kernel arguments. +# Requires: _work_dir, cifmw_bm_agent_core_password (user input) +- name: Check if coreos-installer exists + ansible.builtin.stat: + path: "{{ _work_dir }}/coreos-installer" + register: _ci_bin + +- name: Download coreos-installer + when: not _ci_bin.stat.exists + ansible.builtin.get_url: + url: "https://mirror.openshift.com/pub/openshift-v4/clients/coreos-installer/latest/coreos-installer_amd64" + dest: "{{ _work_dir }}/coreos-installer" + mode: "0755" + +- name: Extract ignition from agent ISO + no_log: "{{ cifmw_nolog | default(true) | bool }}" + ansible.builtin.command: + cmd: >- + {{ _work_dir }}/coreos-installer iso ignition show + {{ _work_dir }}/agent.x86_64.iso + register: _agent_ign_extract + +- name: Write extracted ignition to file + no_log: "{{ cifmw_nolog | default(true) | bool }}" + ansible.builtin.copy: + dest: "{{ _work_dir }}/agent.ign" + content: "{{ _agent_ign_extract.stdout }}" + mode: "0600" + +- name: Generate ignition password hash + ansible.builtin.command: + cmd: "openssl passwd -6 -salt 16charsofsalt '{{ cifmw_bm_agent_core_password }}'" + register: _ign_password_hash + changed_when: false + no_log: true + +- name: Copy ignition patch script + ansible.builtin.copy: + src: patch_ignition.py + dest: "{{ _work_dir }}/patch_ignition.py" + mode: "0755" + +- name: Patch ignition with password and autologin + no_log: true + ansible.builtin.command: + cmd: >- + python3 {{ _work_dir }}/patch_ignition.py + {{ _work_dir }}/agent.ign + {{ _ign_password_hash.stdout }} + +- name: Copy ignition redact script + ansible.builtin.copy: + src: redact_ignition.py + dest: "{{ _work_dir }}/redact_ignition.py" + mode: "0755" + +- name: Save redacted ignition backup for inspection + ansible.builtin.command: + cmd: >- + python3 {{ _work_dir }}/redact_ignition.py + {{ _work_dir }}/agent.ign + {{ _work_dir }}/agent.ign.patched.bak + changed_when: false + +- name: Remove existing ignition from agent ISO + ansible.builtin.command: + cmd: >- + {{ _work_dir }}/coreos-installer iso ignition remove + {{ _work_dir }}/agent.x86_64.iso + failed_when: false + +- name: Embed patched ignition into agent ISO + no_log: true + ansible.builtin.command: + cmd: >- + {{ _work_dir }}/coreos-installer iso ignition embed + -i {{ _work_dir }}/agent.ign + {{ _work_dir }}/agent.x86_64.iso + +- name: Enable debug shell via kernel arguments + ansible.builtin.command: + cmd: >- + {{ _work_dir }}/coreos-installer iso kargs modify + -a systemd.debug_shell=/dev/tty6 + {{ _work_dir }}/agent.x86_64.iso + failed_when: false + +- name: Clean up raw ignition file + ansible.builtin.file: + path: "{{ _work_dir }}/agent.ign" + state: absent diff --git a/roles/reproducer/tasks/bm_power_off.yml b/roles/reproducer/tasks/bm_power_off.yml new file mode 100644 index 0000000000..775b31165b --- /dev/null +++ b/roles/reproducer/tasks/bm_power_off.yml @@ -0,0 +1,45 @@ +--- +# Enusre power off the bare metal host. +# Requires: _bmc_host, _bmc_creds, _redfish_headers +- name: Query current power state + ansible.builtin.uri: + url: "https://{{ _bmc_host }}/redfish/v1/Systems/System.Embedded.1" + method: GET + headers: "{{ _redfish_headers }}" + user: "{{ _bmc_creds.username }}" + password: "{{ _bmc_creds.password }}" + validate_certs: false + force_basic_auth: true + status_code: [200] + register: _power_state_check + +- name: Force power off via Redfish + when: _power_state_check.json.PowerState != 'Off' + ansible.builtin.uri: + url: "https://{{ _bmc_host }}/redfish/v1/Systems/System.Embedded.1/Actions/ComputerSystem.Reset" + method: POST + headers: "{{ _redfish_headers }}" + body_format: json + body: + ResetType: ForceOff + user: "{{ _bmc_creds.username }}" + password: "{{ _bmc_creds.password }}" + validate_certs: false + force_basic_auth: true + status_code: [200, 204] + +- name: Wait for host to power off + when: _power_state_check.json.PowerState != 'Off' + ansible.builtin.uri: + url: "https://{{ _bmc_host }}/redfish/v1/Systems/System.Embedded.1" + method: GET + headers: "{{ _redfish_headers }}" + user: "{{ _bmc_creds.username }}" + password: "{{ _bmc_creds.password }}" + validate_certs: false + force_basic_auth: true + status_code: [200] + register: _power_off_wait + retries: 12 + delay: 10 + until: _power_off_wait.json.PowerState == 'Off' diff --git a/roles/reproducer/tasks/bm_power_on.yml b/roles/reproducer/tasks/bm_power_on.yml new file mode 100644 index 0000000000..9ffca8098f --- /dev/null +++ b/roles/reproducer/tasks/bm_power_on.yml @@ -0,0 +1,44 @@ +--- +# Power on the bare metal host and wait for POST to complete. +# Requires: _bmc_host, _bmc_creds, _redfish_headers +- name: Query current power state + ansible.builtin.uri: + url: "https://{{ _bmc_host }}/redfish/v1/Systems/System.Embedded.1" + method: GET + headers: "{{ _redfish_headers }}" + user: "{{ _bmc_creds.username }}" + password: "{{ _bmc_creds.password }}" + validate_certs: false + force_basic_auth: true + status_code: [200] + register: _power_state + +- name: Power on bare metal host + when: _power_state.json.PowerState != 'On' + ansible.builtin.uri: + url: "https://{{ _bmc_host }}/redfish/v1/Systems/System.Embedded.1/Actions/ComputerSystem.Reset" + method: POST + headers: "{{ _redfish_headers }}" + body_format: json + body: + ResetType: "On" + user: "{{ _bmc_creds.username }}" + password: "{{ _bmc_creds.password }}" + validate_certs: false + force_basic_auth: true + status_code: [200, 204] + +- name: Wait for host POST to complete + ansible.builtin.uri: + url: "https://{{ _bmc_host }}/redfish/v1/Systems/System.Embedded.1" + method: GET + headers: "{{ _redfish_headers }}" + user: "{{ _bmc_creds.username }}" + password: "{{ _bmc_creds.password }}" + validate_certs: false + force_basic_auth: true + status_code: [200] + register: _post_check + retries: 30 + delay: 10 + until: _post_check.json.PowerState == 'On' diff --git a/roles/reproducer/tasks/configure_bm_ocp_controller.yml b/roles/reproducer/tasks/configure_bm_ocp_controller.yml new file mode 100644 index 0000000000..76713a6ebe --- /dev/null +++ b/roles/reproducer/tasks/configure_bm_ocp_controller.yml @@ -0,0 +1,161 @@ +--- +# Simplified controller setup for bare metal OCP SNO. +# The current host IS controller-0 (registered via add_host). +# Provides the subset of configure_controller.yml needed when +# there are no VMs and no libvirt_manager layout. +- name: Configure controller for bare metal OCP + when: + - _use_bm_ocp | default(false) | bool + block: + - name: Ensure ci-framework-data directories exist + ansible.builtin.file: + path: "{{ cifmw_reproducer_controller_basedir }}/{{ item }}" + state: directory + mode: "0755" + loop: + - parameters + - artifacts + - logs + + - name: Install required packages + become: true + ansible.builtin.package: + name: + - bash-completion + - bind-utils + - git-core + - make + - podman + - python3-jmespath + - python3-netaddr + - python3-pip + - jq + + - name: Ensure /etc/ci/env is created + become: true + ansible.builtin.file: + path: /etc/ci/env + state: directory + mode: "0755" + + - name: Manage secrets on controller-0 + vars: + cifmw_manage_secrets_basedir: "{{ cifmw_reproducer_controller_basedir }}" + cifmw_manage_secrets_owner: "{{ cifmw_reproducer_controller_user }}" + block: + - name: Initialize secret manager + ansible.builtin.import_role: + name: manage_secrets + + - name: Inject secrets + ansible.builtin.import_role: + name: manage_secrets + tasks_from: reproducer.yml + + - name: Write reproducer-variables.yml + vars: + _filtered_vars: >- + {{ + hostvars[inventory_hostname] | default({}) | + dict2items | + selectattr('key', 'match', + '^(pre|post|cifmw)_(?!install_yamls|devscripts).*') | + rejectattr('key', 'equalto', 'cifmw_target_host') | + rejectattr('key', 'equalto', 'cifmw_basedir') | + rejectattr('key', 'equalto', 'cifmw_path') | + rejectattr('key', 'equalto', 'cifmw_extras') | + rejectattr('key', 'equalto', 'cifmw_openshift_kubeconfig') | + rejectattr('key', 'equalto', 'cifmw_openshift_token') | + rejectattr('key', 'equalto', 'cifmw_networking_env_definition') | + rejectattr('key', 'match', '^cifmw_use_(?!lvms).*') | + rejectattr('key', 'match', '^cifmw_reproducer.*') | + rejectattr('key', 'match', '^cifmw_rhol.*') | + rejectattr('key', 'match', '^cifmw_discover.*') | + rejectattr('key', 'match', '^cifmw_libvirt_manager.*') | + rejectattr('key', 'match', '^cifmw_manage_secrets_(pullsecret|citoken).*') | + items2dict + }} + ansible.builtin.copy: + mode: "0644" + dest: "{{ cifmw_reproducer_controller_basedir }}/parameters/reproducer-variables.yml" + content: "{{ _filtered_vars | to_nice_yaml }}" + + - name: Create reproducer-variables.yml symlink + ansible.builtin.file: + dest: "{{ cifmw_reproducer_controller_user_dir }}/reproducer-variables.yml" + src: "{{ cifmw_reproducer_controller_basedir }}/parameters/reproducer-variables.yml" + state: link + + - name: Write openshift-environment.yml + ansible.builtin.copy: + mode: "0644" + dest: "{{ cifmw_reproducer_controller_basedir }}/parameters/openshift-environment.yml" + content: |- + {% raw %} + --- + cifmw_basedir: "{{ ansible_user_dir }}/ci-framework-data" + cifmw_openshift_login_password_file: >- + {{ ansible_user_dir }}/.kube/kubeadmin-password + cifmw_openshift_login_kubeconfig: >- + {{ ansible_user_dir }}/.kube/config + cifmw_architecture_automation_file: >- + {{ + ( + cifmw_architecture_repo, + 'automation/vars', + cifmw_architecture_scenario~'.yaml' + ) | ansible.builtin.path_join + }} + {% endraw %} + + - name: Create openshift-environment.yml symlink + ansible.builtin.file: + dest: "{{ cifmw_reproducer_controller_user_dir }}/openshift-environment.yml" + src: "{{ cifmw_reproducer_controller_basedir }}/parameters/openshift-environment.yml" + state: link + + - name: Create minimal zuul_inventory.yml + ansible.builtin.copy: + mode: "0644" + dest: "{{ cifmw_reproducer_controller_basedir }}/artifacts/zuul_inventory.yml" + content: | + all: + hosts: + localhost: + ansible_connection: local + + - name: Install ansible dependencies + ansible.builtin.pip: + requirements: "{{ _reqs_file }}" + vars: + _reqs_check: >- + {{ + ( + cifmw_reproducer_controller_user_dir, + cifmw_repo_relative | default('src/github.com/openstack-k8s-operators/ci-framework'), + 'common-requirements.txt' + ) | path_join + }} + _reqs_file: >- + {{ + (_reqs_check is file) | + ternary( + _reqs_check, + 'https://raw.githubusercontent.com/openstack-k8s-operators/ci-framework/main/common-requirements.txt' + ) + }} + failed_when: false + + - name: Generate networking definition + when: + - cifmw_networking_definition is defined or + cifmw_networking_env_definition is defined + vars: + cifmw_networking_mapper_basedir: "{{ cifmw_reproducer_controller_basedir }}" + cifmw_networking_mapper_gather_facts: false + ansible.builtin.import_role: + name: networking_mapper + + - name: Configure ntp service + ansible.builtin.include_role: + name: cifmw_ntp diff --git a/roles/reproducer/tasks/main.yml b/roles/reproducer/tasks/main.yml index deebf619d0..64ce17fa90 100644 --- a/roles/reproducer/tasks/main.yml +++ b/roles/reproducer/tasks/main.yml @@ -45,6 +45,8 @@ - name: Assert no conflicting parameters were passed tags: - always + when: + - not (cifmw_reproducer_bm_ocp | default(false) | bool) ansible.builtin.assert: that: - (_cifmw_libvirt_manager_layout.vms.crc is defined) or @@ -86,10 +88,36 @@ (_cifmw_libvirt_manager_layout.vms.ocp.amount is defined and _cifmw_libvirt_manager_layout.vms.ocp.amount|int > 0) }} + _use_bm_ocp: >- + {{ cifmw_reproducer_bm_ocp | default(false) | bool }} ansible.builtin.set_fact: _use_crc: "{{ _use_crc }}" _use_ocp: "{{ _use_ocp }}" - _has_openshift: "{{ _use_ocp or _use_crc }}" + _use_bm_ocp: "{{ _use_bm_ocp }}" + _has_openshift: "{{ _use_ocp or _use_crc or _use_bm_ocp }}" + +- name: Prepare bare metal OCP environment + when: + - _use_bm_ocp | bool + tags: + - always + block: + - name: Register current host as controller-0 + ansible.builtin.add_host: + name: controller-0 + ansible_connection: local + ansible_user: "{{ ansible_user_id }}" + ansible_ssh_user: "{{ ansible_user_id }}" + groups: + - controllers + + - name: Set minimal libvirt layout for bare metal OCP + when: + - _cifmw_libvirt_manager_layout is not defined + ansible.builtin.set_fact: + _cifmw_libvirt_manager_layout: + vms: {} + networks: {} - name: Ensure directories are present tags: @@ -182,6 +210,87 @@ - devscripts_layout - ocp_layout +- name: Consume dev-scripts for bare metal OCP SNO + when: + - _use_bm_ocp | default(false) | bool + tags: + - bootstrap + - bootstrap_layout + - devscripts_layout + - ocp_layout + - bm_ocp_layout + ansible.builtin.include_tasks: + file: bm_ocp_layout.yml + apply: + tags: + - bootstrap + - bootstrap_layout + - devscripts_layout + - ocp_layout + - bm_ocp_layout + +- name: Post-setup for bare metal OCP SNO + when: + - _use_bm_ocp | default(false) | bool + tags: + - bootstrap + - bootstrap_layout + - bm_ocp_layout + vars: + _auth_path: >- + {{ + ( + cifmw_devscripts_repo_dir | default( + ansible_user_dir ~ '/src/github.com/openshift-metal3/dev-scripts'), + 'ocp', + cifmw_bm_agent_cluster_name | + default(cifmw_devscripts_config.cluster_name | default('ocp')), + 'auth' + ) | ansible.builtin.path_join + }} + block: + - name: Slurp kubeconfig from dev-scripts + register: _devscripts_kubeconfig + ansible.builtin.slurp: + path: "{{ (_auth_path, 'kubeconfig') | path_join }}" + + - name: Slurp kubeadmin-password from dev-scripts + register: _devscripts_kubeadm + ansible.builtin.slurp: + path: "{{ (_auth_path, 'kubeadmin-password') | path_join }}" + + - name: Ensure .kube directory exists + ansible.builtin.file: + path: "{{ ansible_user_dir }}/.kube" + state: directory + mode: "0750" + + - name: Copy kubeconfig to local ~/.kube/config + ansible.builtin.copy: + dest: "{{ ansible_user_dir }}/.kube/config" + content: "{{ _devscripts_kubeconfig.content | b64decode }}" + mode: "0640" + + - name: Copy kubeadmin-password + ansible.builtin.copy: + dest: "{{ ansible_user_dir }}/.kube/kubeadmin-password" + content: "{{ _devscripts_kubeadm.content | b64decode }}" + mode: "0600" + + - name: Expose openshift_login related facts + ansible.builtin.import_role: + name: openshift_login + tasks_from: set_cluster_fact.yml + vars: + cifmw_openshift_login_load_kubeconfig: >- + {{ (_auth_path, 'kubeconfig') | path_join }} + cifmw_openshift_login_load_kubeadmin: >- + {{ (_auth_path, 'kubeadmin-password') | path_join }} + + - name: Set wait for OCP cluster flag + ansible.builtin.set_fact: + _wait_ocp_cluster: true + - name: Load the architecture local kustomize patches when: - cifmw_architecture_scenario is defined @@ -204,11 +313,27 @@ - bootstrap_layout - bootstrap_env +- name: Configure controller for bare metal OCP + when: + - _use_bm_ocp | default(false) | bool + tags: + - bootstrap + - bootstrap_layout + - bootstrap_env + ansible.builtin.include_tasks: + file: configure_bm_ocp_controller.yml + apply: + tags: + - bootstrap + - bootstrap_layout + - bootstrap_env + - name: Apply VLAN ids to TAP type interfaces. when: - _cifmw_libvirt_manager_layout.networks is defined - - _use_ocp - - ( + - _use_ocp or (_use_bm_ocp | default(false) | bool) + - (_use_bm_ocp | default(false) | bool) or + ( _cifmw_libvirt_manager_layout.vms.ocp.target is defined and _cifmw_libvirt_manager_layout.vms.ocp.target == inventory_hostname ) or @@ -261,6 +386,8 @@ _cifmw_libvirt_manager_layout.vms.controller.target is undefined block: - name: Push local code + when: + - not (_use_bm_ocp | default(false) | bool) tags: - bootstrap_repositories - bootstrap diff --git a/roles/reproducer/tasks/ocp_layout_assertions.yml b/roles/reproducer/tasks/ocp_layout_assertions.yml index d48736bc42..5629977e32 100644 --- a/roles/reproducer/tasks/ocp_layout_assertions.yml +++ b/roles/reproducer/tasks/ocp_layout_assertions.yml @@ -51,12 +51,13 @@ - _element.disksize is defined - _element.disksize | int > _disk - _element.amount is defined - - _element.amount >= _min_ocp + - _element.amount >= _min_ocp or cifmw_reproducer_allow_one_ocp quiet: true msg: >- Ensure you provide enough memory (>=16), cpus (>=10) and disksize (>50) to ocp nodes, set a correct amount (>=3) - and uefi is set to true in cifmw_libvirt_manager.vms.ocp + or set local setup (cifmw_reproducer_allow_one_ocp) and + uefi is set to true in cifmw_libvirt_manager.vms.ocp - name: Ensure we have needed data for ocp_worker if defined when: diff --git a/roles/reproducer/tasks/validations.yml b/roles/reproducer/tasks/validations.yml index 04cfdc9f1e..e9e822d8ae 100644 --- a/roles/reproducer/tasks/validations.yml +++ b/roles/reproducer/tasks/validations.yml @@ -1,5 +1,7 @@ --- - name: Ensure we pass needed parameter + when: + - not (cifmw_reproducer_bm_ocp | default(false) | bool) ansible.builtin.assert: that: - cifmw_use_libvirt is defined diff --git a/roles/reproducer/templates/agent_config.yaml.j2 b/roles/reproducer/templates/agent_config.yaml.j2 new file mode 100644 index 0000000000..b2ad62467e --- /dev/null +++ b/roles/reproducer/templates/agent_config.yaml.j2 @@ -0,0 +1,10 @@ +apiVersion: v1alpha1 +metadata: + name: {{ _cluster_name }} +rendezvousIP: {{ _node_ip }} +hosts: + - hostname: {{ _cluster_name }}-master-0 + role: master + interfaces: + - name: {{ _node_iface }} + macAddress: {{ _node_mac }} diff --git a/roles/reproducer/templates/agent_install_config.yaml.j2 b/roles/reproducer/templates/agent_install_config.yaml.j2 new file mode 100644 index 0000000000..b2822c7c79 --- /dev/null +++ b/roles/reproducer/templates/agent_install_config.yaml.j2 @@ -0,0 +1,27 @@ +apiVersion: v1 +metadata: + name: {{ _cluster_name }} +baseDomain: {{ _base_domain }} +compute: + - architecture: amd64 + hyperthreading: Enabled + name: worker + replicas: 0 +controlPlane: + architecture: amd64 + hyperthreading: Enabled + name: master + replicas: 1 +networking: + clusterNetwork: + - cidr: 10.128.0.0/14 + hostPrefix: 23 + machineNetwork: + - cidr: {{ _machine_network }} + networkType: OVNKubernetes + serviceNetwork: + - 172.30.0.0/16 +platform: + none: {} +pullSecret: '{{ _pull_secret | to_json }}' +sshKey: '{{ _ssh_pub_key }}' diff --git a/scenarios/reproducers/va-hci-minimal-sno.yml b/scenarios/reproducers/va-hci-minimal-sno.yml new file mode 100644 index 0000000000..f28ffeecc4 --- /dev/null +++ b/scenarios/reproducers/va-hci-minimal-sno.yml @@ -0,0 +1,131 @@ +--- +cifmw_parent_scenario: "scenarios/reproducers/va-hci-base.yml" + +cifmw_devscripts_sno: true +cifmw_reproducer_allow_one_ocp: true +# Time for "killing" metal3 bootstrap script, because +# it would not finish itself (workaround needed). +# Check 07-sno-workaround.sh provided by +# roles/devscripts/tasks/main.yml play. +cifmw_devscripts_sno_bootstrap_timeout: 1800 + +# HERE if you want to override kustomization, you can uncomment this parameter +# and push the data structure you want to apply. +# cifmw_architecture_user_kustomize: +# stage_0: +# 'network-values': +# data: +# starwars: Obiwan + +# HERE, if you want to stop the deployment loop at any stage, you can uncomment +# the following parameter and update the value to match the stage you want to +# reach. Known stages are: +# pre_kustomize_stage_INDEX +# pre_apply_stage_INDEX +# post_apply_stage_INDEX +# +# cifmw_deploy_architecture_stopper: + +cifmw_libvirt_manager_configuration: + networks: + osp_trunk: | + + osp_trunk + + + + + + + ocpbm: | + + ocpbm + + + + + + + ocppr: | + + ocppr + + + + vms: + # https://github.com/openshift-metal3/dev-scripts/blob/master/common.sh#L470 + ocp: + amount: 1 + admin_user: core + image_local_dir: "{{ cifmw_basedir }}/images/" + disk_file_name: "ocp_master" + disksize: "100" + extra_disks_num: 3 + extra_disks_size: "50G" + cpus: 16 + memory: 32 + root_part_id: 4 + uefi: true + nets: + - ocppr + - ocpbm + - osp_trunk + compute: + uefi: "{{ cifmw_use_uefi }}" + root_part_id: "{{ cifmw_root_partition_id }}" + amount: "{{ [cifmw_libvirt_manager_compute_amount|int, 3] | max }}" + image_url: "{{ cifmw_discovered_image_url }}" + sha256_image_name: "{{ cifmw_discovered_hash }}" + image_local_dir: "{{ cifmw_basedir }}/images/" + disk_file_name: "base-os.qcow2" + disksize: "{{ [cifmw_libvirt_manager_compute_disksize|int, 50] | max }}" + memory: "{{ [cifmw_libvirt_manager_compute_memory|int, 8] | max }}" + cpus: "{{ [cifmw_libvirt_manager_compute_cpus|int, 4] | max }}" + extra_disks_num: 3 + extra_disks_size: 30G + nets: + - ocpbm + - osp_trunk + controller: + uefi: "{{ cifmw_use_uefi }}" + root_part_id: "{{ cifmw_root_partition_id }}" + image_url: "{{ cifmw_discovered_image_url }}" + sha256_image_name: "{{ cifmw_discovered_hash }}" + image_local_dir: "{{ cifmw_basedir }}/images/" + disk_file_name: "base-os.qcow2" + disksize: 50 + memory: 8 + cpus: 4 + nets: + - ocpbm + - osp_trunk + +## devscript support for OCP deploy +cifmw_devscripts_config_overrides: + fips_mode: "{{ cifmw_fips_enabled | default(false) | bool }}" + +# Note: with that extra_network_names "osp_trunk", we instruct +# devscripts role to create a new network, and associate it to +# the OCP nodes. This one is a "private network", and will hold +# the VLANs used for network isolation. + +# Please create a custom env file to provide: +# cifmw_devscripts_ci_token: +# cifmw_devscripts_pull_secret: + +# Test Ceph file and object storage (block is enabled by default) +cifmw_ceph_daemons_layout: + rgw_enabled: true + dashboard_enabled: false + cephfs_enabled: true + ceph_nfs_enabled: false + +# Vars related to update_containers cinder volume and manila share +cifmw_update_containers_cindervolumes: + - ceph +cifmw_update_containers_manilashares: + - share1