containers · rhatdan · Sep 16, 2024 · Sep 10, 2024
@@ -1,4 +1,4 @@
-ARG DRIVER_TOOLKIT_IMAGE="quay.io/ai-lab/nvidia-builder:latest"
+ARG DRIVER_TOOLKIT_IMAGE="quay.io/ai-lab/intel-builder:latest"
 ARG BASEIMAGE="quay.io/centos-bootc/centos-bootc:stream9"
 
 FROM ${DRIVER_TOOLKIT_IMAGE} as builder
@@ -40,6 +40,10 @@ COPY --from=builder /home/builder/usr/src/habanalabs-${DRIVER_VERSION}/drivers/i
 COPY --from=builder /home/builder/usr/src/habanalabs-${DRIVER_VERSION}/drivers/net/ethernet/intel/hbl_cn/habanalabs_cn.ko /tmp/extra/habanalabs_cn.ko
 COPY --from=builder /home/builder/usr/src/habanalabs-${DRIVER_VERSION}/drivers/net/ethernet/intel/hbl_en/habanalabs_en.ko /tmp/extra/habanalabs_en.ko
 COPY --from=builder /home/builder/lib/firmware/habanalabs /tmp/firmware/habanalabs
+COPY --from=builder /home/builder/usr/bin/hl-smi /usr/bin/hl-smi
+
+
+COPY duplicated/common/usr /usr
 
 RUN . /etc/os-release \
     && export OS_VERSION_MAJOR=$(echo ${VERSION} | cut -d'.' -f 1) \
@@ -49,16 +53,15 @@ RUN . /etc/os-release \
     && mv /tmp/firmware/habanalabs /lib/firmware \
     && depmod -a ${KERNEL_VERSION}.${TARGET_ARCH}
 
-RUN dnf install -y ${EXTRA_RPM_PACKAGES} \
+RUN mv /etc/selinux /etc/selinux.tmp \
+    dnf install -y ${EXTRA_RPM_PACKAGES} \
     cloud-init \
     skopeo \
     rsync \
     && dnf clean all \
+    && mv /etc/selinux.tmp /etc/selinux \
     && ln -s ../cloud-init.target /usr/lib/systemd/system/default.target.wants
 
-ARG INSTRUCTLAB_IMAGE="quay.io/ai-lab/instructlab-intel:latest"
-
-ARG SSHPUBKEY
 
 # The --build-arg "SSHPUBKEY=$(cat ~/.ssh/id_rsa.pub)" option inserts your
 # public key into the image, allowing root access via ssh.
@@ -68,10 +71,33 @@ RUN if [ -n "${SSHPUBKEY}" ]; then \
 	    echo ${SSHPUBKEY} > /usr/ssh/root.keys && chmod 0600 /usr/ssh/root.keys; \
 fi
 
-# Prepull the instructlab image
-RUN if [ -f "/run/.input/instructlab-intel/oci-layout" ]; then \
+# Setup /usr/lib/containers/storage as an additional store for images.
+# Remove once the base images have this set by default.
+# Also make sure not to duplicate if a base image already has it specified.
+RUN grep -q /usr/lib/containers/storage /etc/containers/storage.conf || \
+    sed -i -e '/additionalimage.*/a "/usr/lib/containers/storage",' \
+	/etc/containers/storage.conf
+
+COPY duplicated/ilab-wrapper/ilab /usr/bin/ilab
+RUN chmod +x /usr/bin/ilab
+
+ARG INSTRUCTLAB_IMAGE="quay.io/ai-lab/instructlab-intel:latest"
+ARG INSTRUCTLAB_IMAGE_PULL_SECRET="instructlab-intel-pull"
+
+
+# Added for running as an OCI Container to prevent Overlay on Overlay issues.
+VOLUME /var/lib/containers
+
+RUN --mount=type=secret,id=${INSTRUCTLAB_IMAGE_PULL_SECRET}/.dockerconfigjson \
+    if [ -f "/run/.input/instructlab-intel/oci-layout" ]; then \
          IID=$(podman --root /usr/lib/containers/storage pull oci:/run/.input/instructlab-intel) && \
          podman --root /usr/lib/containers/storage image tag ${IID} ${INSTRUCTLAB_IMAGE}; \
+    elif [ -f "/run/secrets/${INSTRUCTLAB_IMAGE_PULL_SECRET}/.dockerconfigjson" ]; then \
+         IID=$(sudo podman --root /usr/lib/containers/storage pull --authfile /run/secrets/${INSTRUCTLAB_IMAGE_PULL_SECRET}/.dockerconfigjson ${INSTRUCTLAB_IMAGE}); \
     else \
          IID=$(sudo podman --root /usr/lib/containers/storage pull ${INSTRUCTLAB_IMAGE}); \
     fi
+RUN podman system reset --force 2>/dev/null
+
+LABEL image_version_id="${IMAGE_VERSION_ID}"
+
@@ -0,0 +1 @@
+../upgrade-informer.service
@@ -0,0 +1 @@
+../upgrade-informer.timer
@@ -0,0 +1,12 @@
+[Unit]
+Description=Check for available RHEL AI upgrade
+ConditionPathExists=/run/ostree-booted
+After=network-online.target
+StartLimitIntervalSec=400
+StartLimitBurst=3
+
+[Service]
+Type=oneshot
+ExecStart=/usr/libexec/upgrade-informer
+Restart=on-failure
+RestartSec=90
@@ -0,0 +1,11 @@
+[Unit]
+Description=Runs upgrade informer periodically
+ConditionPathExists=/run/ostree-booted
+
+[Timer]
+OnBootSec=1h
+OnUnitInactiveSec=1day
+RandomizedDelaySec=2h
+
+[Install]
+WantedBy=timers.target
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+# Run the command and capture its output
+output=$(bootc upgrade --check | sed -e 1q)
+message_file="/etc/motd.d/upgrade-message"
+bootc_auth="/etc/ostree/auth.json"
+
+if [[ $output == Update\ available* ]]; then
+    if [[ ! -f $message_file ]]; then
+        echo "New version was found"
+        bootc_image=$(awk '{print $4}' <<< "$output")
+        # If auth file exists we should use it
+        auth_params=""
+        if [[ -f $bootc_auth ]]; then
+            auth_params="--authfile $bootc_auth"
+        fi
+
+        # Get image version
+        # shellcheck disable=SC2086
+        image_version_id=$(skopeo inspect --format json $auth_params "$bootc_image" | jq -r '.Labels | .["image_version_id"] // empty')
+
+        # If upgrade available, write the output to the file
+        cat > $message_file << EOF
+
+**   Attention!   **
+** A new $image_version_id version is available **
+** In order to apply it run: bootc upgrade --apply
+** Please note that the system will reboot after the upgrade **
+
+EOF
+    fi
+else
+    echo "No upgrade was found"
+    rm $message_file 2> /dev/null
+fi
+
+echo "Finished running upgrade informer"
@@ -0,0 +1,145 @@
+#!/bin/bash
+
+echo-err() { echo "$@" >&2; }
+
+verify_range() {
+    subuid_range="$1"
+    username="$2"
+    NUMBER_OF_MATCHING_SUBUID_RANGES=$(if [[ -z "$subuid_range" ]]; then echo 0; else wc -l <<<"$subuid_range"; fi)
+
+    if [[ "$NUMBER_OF_MATCHING_SUBUID_RANGES" == 0 ]]; then
+        echo-err "No /etc/subuid range found for user $username ($UID)"
+        exit 1
+    elif [[ "$NUMBER_OF_MATCHING_SUBUID_RANGES" != 1 ]]; then
+        # TODO: Handle multiple subuid ranges. But for now, hard fail
+        echo-err "Multiple /etc/subuid ranges found for user $username ($UID), this is currently unsupported:"
+        echo-err "$subuid_range"
+        exit 1
+    fi
+}
+
+check_insights() {
+    if [[ -f /etc/insights-client/machine-id ]]; then
+        return
+    fi
+    if [[ -f /etc/ilab/insights-opt-out ]]; then
+        return
+    fi
+    local ID
+    eval "$(grep ^ID= /etc/os-release)"
+    if [[ "$ID" != "rhel" ]]; then
+        return
+    fi
+    cat << EOF
+This host is not connected to Red Hat Insights.
+
+To connect this host to Red Hat Insights run the following command:
+sudo rhc connect --organization <org_id> --activation-key <your_activation_key>
+
+To generate an Activation Key:
+https://console.redhat.com/insights/connector/activation-keys (this page will also display your Organization ID).
+
+For more information on Red Hat Insights, please visit:
+https://docs.redhat.com/en/documentation/subscription_central/1-latest/html/getting_started_with_activation_keys_on_the_hybrid_cloud_console/assembly-creating-managing-activation-keys
+EOF
+    exit 1
+}
+
+check_insights
+
+# Template values replaced by container build
+CONTAINER_DEVICE="__REPLACE_CONTAINER_DEVICE__"
+IMAGE_NAME="__REPLACE_IMAGE_NAME__"
+
+ENTRYPOINT="ilab"
+PARAMS=("$@")
+
+if [[ -n "$ILAB_HOME" ]]; then
+    HOME="$ILAB_HOME"
+fi
+
+for dir in "$HOME/.cache" "$HOME/.config" "$HOME/.local"; do
+    mkdir -p "$dir"
+done
+
+if [[ "$1" = "shell" ]]; then
+    ENTRYPOINT=bash
+    PARAMS=()
+fi
+
+# If you need to mount additional volumes into the container, you can specify them
+# using the ILAB_ADDITIONAL_MOUNTS environment variable.
+#
+# Example ILAB_ADDITIONAL_MOUNTS usage:
+#
+# ILAB_ADDITIONAL_MOUNTS="/host/path:/container/path /host/path2:/container/path2"
+#
+# If your path contains spaces, you can use quotes:
+#
+# ILAB_ADDITIONAL_MOUNTS="/host/path:/container/path '/host/path with spaces':/container/path"
+ADDITIONAL_MOUNTS=()
+if [ -n "${ILAB_ADDITIONAL_MOUNTS}" ]; then
+    # (eval is used here to allow the user to specify mounts that might have spaces in them)
+    eval "ADDITIONAL_MOUNTS=(${ILAB_ADDITIONAL_MOUNTS})"
+fi
+ADDITIONAL_MOUNT_OPTIONS=()
+for PODMAN_MOUNT in "${ADDITIONAL_MOUNTS[@]}"; do
+    ADDITIONAL_MOUNT_OPTIONS+=("-v" "$PODMAN_MOUNT")
+done
+
+# Add pull-secret to additional mounts
+# In case of normal user, /run/user is used (XDG_RUNTIME_DIR), if root, it will be /run/containers
+for authfile in \
+    "${XDG_RUNTIME_DIR}/containers/auth.json" \
+    /run/user/${UID}/containers/auth.json \
+    /run/containers/${UID}/auth.json
+do
+    if [[ -f "$authfile" ]]; then
+        ADDITIONAL_MOUNT_OPTIONS+=("-v" "$authfile:/run/containers/0/auth.json")
+        break
+    fi
+done
+
+# We run the container as sudo in order to be able to access the root container
+# storage, which has the ilab image pre-pulled. But for security reasons we map
+# root UID 0 inside the container to the current user's UID (and all the other
+# subuids to the user's /etc/subuid range) so that we're effectively running
+# the container as the current user.
+#
+# In the future, we will run podman as the current user, once we figure a
+# reasonable way for the current user to access the root's user container
+# storage.
+if [[ "$UID" == 0 ]]; then
+    # If we're already running as root, we don't need to map any UIDs
+    IMPERSONATE_CURRENT_USER_PODMAN_FLAGS=()
+else
+    CURRENT_USER_NAME=$(id --user --name)
+    CURRENT_USER_SUBUID_RANGE=$(awk \
+        --field-separator ':' \
+        --assign current_user="$CURRENT_USER_NAME" \
+        --assign current_uid="$UID" \
+        '$1 == current_user || $1 == current_uid {print $2 ":" $3}' \
+        /etc/subuid)
+
+    verify_range "$CURRENT_USER_SUBUID_RANGE" "$CURRENT_USER_NAME"
+
+    IMPERSONATE_CURRENT_USER_PODMAN_FLAGS=("--uidmap" "0:$UID" "--uidmap" "1:$CURRENT_USER_SUBUID_RANGE")
+fi
+
+PRESERVE_ENV="VLLM_LOGGING_LEVEL,NCCL_DEBUG,HOME,HF_TOKEN"
+PODMAN_COMMAND=("sudo" "--preserve-env=$PRESERVE_ENV" "podman" "run" "--rm" "-it"
+    "${IMPERSONATE_CURRENT_USER_PODMAN_FLAGS[@]}"
+    "--device" "${CONTAINER_DEVICE}"
+    "--security-opt" "label=disable" "--net" "host"
+    "--shm-size" "10G"
+    "--pids-limit" "-1"
+    "-v" "$HOME:$HOME"
+    "${ADDITIONAL_MOUNT_OPTIONS[@]}"
+    "--env" "VLLM_LOGGING_LEVEL"
+    "--env" "HOME"
+    "--env" "NCCL_DEBUG"
+    "--entrypoint" "$ENTRYPOINT"
+    "--env" "HF_TOKEN"
+    "${IMAGE_NAME}")
+
+exec "${PODMAN_COMMAND[@]}" "${PARAMS[@]}"