Skip to content

Commit

Permalink
Add workaround and update for intel-bootc
Browse files Browse the repository at this point in the history
Added workaround for libdnf,hl-smi binary and ilab wrapper.
Also added duplicated directory for common files working with Konflux CI.

Signed-off-by: Enrique Belarte Luque <[email protected]>
  • Loading branch information
enriquebelarte committed Sep 10, 2024
1 parent 185a957 commit 1b55651
Show file tree
Hide file tree
Showing 7 changed files with 240 additions and 7 deletions.
40 changes: 33 additions & 7 deletions training/intel-bootc/Containerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
ARG DRIVER_TOOLKIT_IMAGE="quay.io/ai-lab/nvidia-builder:latest"
ARG DRIVER_TOOLKIT_IMAGE="quay.io/ai-lab/intel-builder:latest"
ARG BASEIMAGE="quay.io/centos-bootc/centos-bootc:stream9"

FROM ${DRIVER_TOOLKIT_IMAGE} as builder
Expand Down Expand Up @@ -40,6 +40,10 @@ COPY --from=builder /home/builder/usr/src/habanalabs-${DRIVER_VERSION}/drivers/i
COPY --from=builder /home/builder/usr/src/habanalabs-${DRIVER_VERSION}/drivers/net/ethernet/intel/hbl_cn/habanalabs_cn.ko /tmp/extra/habanalabs_cn.ko
COPY --from=builder /home/builder/usr/src/habanalabs-${DRIVER_VERSION}/drivers/net/ethernet/intel/hbl_en/habanalabs_en.ko /tmp/extra/habanalabs_en.ko
COPY --from=builder /home/builder/lib/firmware/habanalabs /tmp/firmware/habanalabs
COPY --from=builder /home/builder/usr/bin/hl-smi /usr/bin/hl-smi


COPY duplicated/common/usr /usr

RUN . /etc/os-release \
&& export OS_VERSION_MAJOR=$(echo ${VERSION} | cut -d'.' -f 1) \
Expand All @@ -49,16 +53,15 @@ RUN . /etc/os-release \
&& mv /tmp/firmware/habanalabs /lib/firmware \
&& depmod -a ${KERNEL_VERSION}.${TARGET_ARCH}

RUN dnf install -y ${EXTRA_RPM_PACKAGES} \
RUN mv /etc/selinux /etc/selinux.tmp \
dnf install -y ${EXTRA_RPM_PACKAGES} \
cloud-init \
skopeo \
rsync \
&& dnf clean all \
&& mv /etc/selinux.tmp /etc/selinux \
&& ln -s ../cloud-init.target /usr/lib/systemd/system/default.target.wants

ARG INSTRUCTLAB_IMAGE="quay.io/ai-lab/instructlab-intel:latest"

ARG SSHPUBKEY

# The --build-arg "SSHPUBKEY=$(cat ~/.ssh/id_rsa.pub)" option inserts your
# public key into the image, allowing root access via ssh.
Expand All @@ -68,10 +71,33 @@ RUN if [ -n "${SSHPUBKEY}" ]; then \
echo ${SSHPUBKEY} > /usr/ssh/root.keys && chmod 0600 /usr/ssh/root.keys; \
fi

# Prepull the instructlab image
RUN if [ -f "/run/.input/instructlab-intel/oci-layout" ]; then \
# Setup /usr/lib/containers/storage as an additional store for images.
# Remove once the base images have this set by default.
# Also make sure not to duplicate if a base image already has it specified.
RUN grep -q /usr/lib/containers/storage /etc/containers/storage.conf || \
sed -i -e '/additionalimage.*/a "/usr/lib/containers/storage",' \
/etc/containers/storage.conf

COPY duplicated/ilab-wrapper/ilab /usr/bin/ilab
RUN chmod +x /usr/bin/ilab

ARG INSTRUCTLAB_IMAGE="quay.io/ai-lab/instructlab-intel:latest"
ARG INSTRUCTLAB_IMAGE_PULL_SECRET="instructlab-intel-pull"


# Added for running as an OCI Container to prevent Overlay on Overlay issues.
VOLUME /var/lib/containers

RUN --mount=type=secret,id=${INSTRUCTLAB_IMAGE_PULL_SECRET}/.dockerconfigjson \
if [ -f "/run/.input/instructlab-intel/oci-layout" ]; then \
IID=$(podman --root /usr/lib/containers/storage pull oci:/run/.input/instructlab-intel) && \
podman --root /usr/lib/containers/storage image tag ${IID} ${INSTRUCTLAB_IMAGE}; \
elif [ -f "/run/secrets/${INSTRUCTLAB_IMAGE_PULL_SECRET}/.dockerconfigjson" ]; then \
IID=$(sudo podman --root /usr/lib/containers/storage pull --authfile /run/secrets/${INSTRUCTLAB_IMAGE_PULL_SECRET}/.dockerconfigjson ${INSTRUCTLAB_IMAGE}); \
else \
IID=$(sudo podman --root /usr/lib/containers/storage pull ${INSTRUCTLAB_IMAGE}); \
fi
RUN podman system reset --force 2>/dev/null

LABEL image_version_id="${IMAGE_VERSION_ID}"

Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[Unit]
Description=Check for available RHEL AI upgrade
ConditionPathExists=/run/ostree-booted
After=network-online.target
StartLimitIntervalSec=400
StartLimitBurst=3

[Service]
Type=oneshot
ExecStart=/usr/libexec/upgrade-informer
Restart=on-failure
RestartSec=90
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[Unit]
Description=Runs upgrade informer periodically
ConditionPathExists=/run/ostree-booted

[Timer]
OnBootSec=1h
OnUnitInactiveSec=1day
RandomizedDelaySec=2h

[Install]
WantedBy=timers.target
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/bin/bash

# Run the command and capture its output
output=$(bootc upgrade --check | sed -e 1q)
message_file="/etc/motd.d/upgrade-message"
bootc_auth="/etc/ostree/auth.json"

if [[ $output == Update\ available* ]]; then
if [[ ! -f $message_file ]]; then
echo "New version was found"
bootc_image=$(awk '{print $4}' <<< "$output")
# If auth file exists we should use it
auth_params=""
if [[ -f $bootc_auth ]]; then
auth_params="--authfile $bootc_auth"
fi

# Get image version
# shellcheck disable=SC2086
image_version_id=$(skopeo inspect --format json $auth_params "$bootc_image" | jq -r '.Labels | .["image_version_id"] // empty')

# If upgrade available, write the output to the file
cat > $message_file << EOF
** Attention! **
** A new $image_version_id version is available **
** In order to apply it run: bootc upgrade --apply
** Please note that the system will reboot after the upgrade **
EOF
fi
else
echo "No upgrade was found"
rm $message_file 2> /dev/null
fi

echo "Finished running upgrade informer"
145 changes: 145 additions & 0 deletions training/intel-bootc/duplicated/ilab-wrapper/ilab
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
#!/bin/bash

echo-err() { echo "$@" >&2; }

verify_range() {
subuid_range="$1"
username="$2"
NUMBER_OF_MATCHING_SUBUID_RANGES=$(if [[ -z "$subuid_range" ]]; then echo 0; else wc -l <<<"$subuid_range"; fi)

if [[ "$NUMBER_OF_MATCHING_SUBUID_RANGES" == 0 ]]; then
echo-err "No /etc/subuid range found for user $username ($UID)"
exit 1
elif [[ "$NUMBER_OF_MATCHING_SUBUID_RANGES" != 1 ]]; then
# TODO: Handle multiple subuid ranges. But for now, hard fail
echo-err "Multiple /etc/subuid ranges found for user $username ($UID), this is currently unsupported:"
echo-err "$subuid_range"
exit 1
fi
}

check_insights() {
if [[ -f /etc/insights-client/machine-id ]]; then
return
fi
if [[ -f /etc/ilab/insights-opt-out ]]; then
return
fi
local ID
eval "$(grep ^ID= /etc/os-release)"
if [[ "$ID" != "rhel" ]]; then
return
fi
cat << EOF
This host is not connected to Red Hat Insights.
To connect this host to Red Hat Insights run the following command:
sudo rhc connect --organization <org_id> --activation-key <your_activation_key>
To generate an Activation Key:
https://console.redhat.com/insights/connector/activation-keys (this page will also display your Organization ID).
For more information on Red Hat Insights, please visit:
https://docs.redhat.com/en/documentation/subscription_central/1-latest/html/getting_started_with_activation_keys_on_the_hybrid_cloud_console/assembly-creating-managing-activation-keys
EOF
exit 1
}

check_insights

# Template values replaced by container build
CONTAINER_DEVICE="__REPLACE_CONTAINER_DEVICE__"
IMAGE_NAME="__REPLACE_IMAGE_NAME__"

ENTRYPOINT="ilab"
PARAMS=("$@")

if [[ -n "$ILAB_HOME" ]]; then
HOME="$ILAB_HOME"
fi

for dir in "$HOME/.cache" "$HOME/.config" "$HOME/.local"; do
mkdir -p "$dir"
done

if [[ "$1" = "shell" ]]; then
ENTRYPOINT=bash
PARAMS=()
fi

# If you need to mount additional volumes into the container, you can specify them
# using the ILAB_ADDITIONAL_MOUNTS environment variable.
#
# Example ILAB_ADDITIONAL_MOUNTS usage:
#
# ILAB_ADDITIONAL_MOUNTS="/host/path:/container/path /host/path2:/container/path2"
#
# If your path contains spaces, you can use quotes:
#
# ILAB_ADDITIONAL_MOUNTS="/host/path:/container/path '/host/path with spaces':/container/path"
ADDITIONAL_MOUNTS=()
if [ -n "${ILAB_ADDITIONAL_MOUNTS}" ]; then
# (eval is used here to allow the user to specify mounts that might have spaces in them)
eval "ADDITIONAL_MOUNTS=(${ILAB_ADDITIONAL_MOUNTS})"
fi
ADDITIONAL_MOUNT_OPTIONS=()
for PODMAN_MOUNT in "${ADDITIONAL_MOUNTS[@]}"; do
ADDITIONAL_MOUNT_OPTIONS+=("-v" "$PODMAN_MOUNT")
done

# Add pull-secret to additional mounts
# In case of normal user, /run/user is used (XDG_RUNTIME_DIR), if root, it will be /run/containers
for authfile in \
"${XDG_RUNTIME_DIR}/containers/auth.json" \
/run/user/${UID}/containers/auth.json \
/run/containers/${UID}/auth.json
do
if [[ -f "$authfile" ]]; then
ADDITIONAL_MOUNT_OPTIONS+=("-v" "$authfile:/run/containers/0/auth.json")
break
fi
done

# We run the container as sudo in order to be able to access the root container
# storage, which has the ilab image pre-pulled. But for security reasons we map
# root UID 0 inside the container to the current user's UID (and all the other
# subuids to the user's /etc/subuid range) so that we're effectively running
# the container as the current user.
#
# In the future, we will run podman as the current user, once we figure a
# reasonable way for the current user to access the root's user container
# storage.
if [[ "$UID" == 0 ]]; then
# If we're already running as root, we don't need to map any UIDs
IMPERSONATE_CURRENT_USER_PODMAN_FLAGS=()
else
CURRENT_USER_NAME=$(id --user --name)
CURRENT_USER_SUBUID_RANGE=$(awk \
--field-separator ':' \
--assign current_user="$CURRENT_USER_NAME" \
--assign current_uid="$UID" \
'$1 == current_user || $1 == current_uid {print $2 ":" $3}' \
/etc/subuid)

verify_range "$CURRENT_USER_SUBUID_RANGE" "$CURRENT_USER_NAME"

IMPERSONATE_CURRENT_USER_PODMAN_FLAGS=("--uidmap" "0:$UID" "--uidmap" "1:$CURRENT_USER_SUBUID_RANGE")
fi

PRESERVE_ENV="VLLM_LOGGING_LEVEL,NCCL_DEBUG,HOME,HF_TOKEN"
PODMAN_COMMAND=("sudo" "--preserve-env=$PRESERVE_ENV" "podman" "run" "--rm" "-it"
"${IMPERSONATE_CURRENT_USER_PODMAN_FLAGS[@]}"
"--device" "${CONTAINER_DEVICE}"
"--security-opt" "label=disable" "--net" "host"
"--shm-size" "10G"
"--pids-limit" "-1"
"-v" "$HOME:$HOME"
"${ADDITIONAL_MOUNT_OPTIONS[@]}"
"--env" "VLLM_LOGGING_LEVEL"
"--env" "HOME"
"--env" "NCCL_DEBUG"
"--entrypoint" "$ENTRYPOINT"
"--env" "HF_TOKEN"
"${IMAGE_NAME}")

exec "${PODMAN_COMMAND[@]}" "${PARAMS[@]}"

0 comments on commit 1b55651

Please sign in to comment.