Skip to content

[SOW MS3] Centos stream9 PyTorch image support #1090

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Aug 30, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion .circleci/docker/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,12 @@ if [[ "$image" == *cuda* && ${OS} == "ubuntu" ]]; then
fi
fi

if [[ "$image" == *centos9* ]]; then
DOCKERFILE_NAME="Dockerfile.centos.stream"
else
DOCKERFILE_NAME="Dockerfile"
fi

# Build image
# TODO: build-arg THRIFT is not turned on for any image, remove it once we confirm
# it's no longer needed.
Expand Down Expand Up @@ -330,7 +336,7 @@ docker build \
--build-arg "ROCM_VERSION=${ROCM_VERSION:-}" \
--build-arg "PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH:-gfx900;gfx906}" \
--build-arg "IMAGE_NAME=${IMAGE_NAME}" \
-f $(dirname ${DOCKERFILE})/Dockerfile \
-f $(dirname ${DOCKERFILE})/${DOCKERFILE_NAME} \
-t "$tmp_tag" \
"$@" \
.
Expand Down
107 changes: 107 additions & 0 deletions .circleci/docker/centos-rocm/Dockerfile.centos.stream
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
ARG CENTOS_VERSION

FROM quay.io/centos/centos:stream${CENTOS_VERSION}


# Set AMD gpu targets to build for
ARG PYTORCH_ROCM_ARCH
ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}

# Install required packages to build Caffe2

# Install common dependencies (so that this step can be cached separately)
ARG EC2
ADD ./common/install_base.sh install_base.sh
RUN bash ./install_base.sh && rm install_base.sh

#Install langpack
RUN yum install -y glibc-langpack-en

# Update CentOS git version
RUN yum -y remove git
RUN yum -y remove git-*
RUN yum install -y git

# Install devtoolset
RUN dnf install -y rpmdevtools
ENV BASH_ENV "/etc/profile"

# Install ninja
RUN dnf --enablerepo=crb install -y ninja-build

# (optional) Install non-default glibc version
ARG GLIBC_VERSION
ADD ./common/install_glibc.sh install_glibc.sh
RUN if [ -n "${GLIBC_VERSION}" ]; then bash ./install_glibc.sh; fi
RUN rm install_glibc.sh

# Install user
ADD ./common/install_user.sh install_user.sh
RUN bash ./install_user.sh && rm install_user.sh

# Install conda and other packages (e.g., numpy, pytest)
ENV PATH /opt/conda/bin:$PATH
ARG ANACONDA_PYTHON_VERSION
ADD requirements-ci.txt /opt/conda/requirements-ci.txt
ADD ./common/install_conda.sh install_conda.sh
RUN bash ./install_conda.sh && rm install_conda.sh
RUN rm /opt/conda/requirements-ci.txt

# (optional) Install protobuf for ONNX
ARG PROTOBUF
ADD ./common/install_protobuf.sh install_protobuf.sh
RUN if [ -n "${PROTOBUF}" ]; then bash ./install_protobuf.sh; fi
RUN rm install_protobuf.sh
ENV INSTALLED_PROTOBUF ${PROTOBUF}

# (optional) Install database packages like LMDB and LevelDB
ARG DB
ADD ./common/install_db.sh install_db.sh
RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
RUN rm install_db.sh
ENV INSTALLED_DB ${DB}

# (optional) Install vision packages like OpenCV and ffmpeg
ARG VISION
ADD ./common/install_vision.sh install_vision.sh
RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi
RUN rm install_vision.sh
ENV INSTALLED_VISION ${VISION}

# Install rocm
ARG ROCM_VERSION
ADD ./common/install_rocm.sh install_rocm.sh
RUN bash ./install_rocm.sh
RUN rm install_rocm.sh
ENV PATH /opt/rocm/bin:$PATH
ENV PATH /opt/rocm/hcc/bin:$PATH
ENV PATH /opt/rocm/hip/bin:$PATH
ENV PATH /opt/rocm/opencl/bin:$PATH
ENV PATH /opt/rocm/llvm/bin:$PATH
ENV MAGMA_HOME /opt/rocm/magma
ENV LANG en_US.utf8
ENV LC_ALL en_US.utf8

# (optional) Install non-default CMake version
ARG CMAKE_VERSION
ADD ./common/install_cmake.sh install_cmake.sh
RUN if [ -n "${CMAKE_VERSION}" ]; then bash ./install_cmake.sh; fi
RUN rm install_cmake.sh

# (optional) Install non-default Ninja version
ARG NINJA_VERSION
ADD ./common/install_ninja.sh install_ninja.sh
RUN if [ -n "${NINJA_VERSION}" ]; then bash ./install_ninja.sh; fi
RUN rm install_ninja.sh

# Install ccache/sccache (do this last, so we get priority in PATH)
ADD ./common/install_cache.sh install_cache.sh
ENV PATH /opt/cache/bin:$PATH
RUN bash ./install_cache.sh && rm install_cache.sh

# Include BUILD_ENVIRONMENT environment variable in image
ARG BUILD_ENVIRONMENT
ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}

USER jenkins
CMD ["bash"]
27 changes: 22 additions & 5 deletions .circleci/docker/common/install_base.sh
Original file line number Diff line number Diff line change
Expand Up @@ -66,14 +66,21 @@ install_ubuntu() {
install_centos() {
# Need EPEL for many packages we depend on.
# See http://fedoraproject.org/wiki/EPEL
yum --enablerepo=extras install -y epel-release
# extras repo is not there for CentOS 9 and epel-release is already part of repo list
if [[ $OS_VERSION == 9 ]]; then
yum install -y epel-release
ALLOW_ERASE="--allowerasing"
else
yum --enablerepo=extras install -y epel-release
ALLOW_ERASE=""
fi

ccache_deps="asciidoc docbook-dtds docbook-style-xsl libxslt"
numpy_deps="gcc-gfortran"
# Note: protobuf-c-{compiler,devel} on CentOS are too old to be used
# for Caffe2. That said, we still install them to make sure the build
# system opts to build/use protoc and libprotobuf from third-party.
yum install -y \
yum install -y $ALLOW_ERASE \
$ccache_deps \
$numpy_deps \
autoconf \
Expand All @@ -91,22 +98,31 @@ install_centos() {
glog-devel \
hiredis-devel \
libstdc++-devel \
libsndfile-devel \
make \
opencv-devel \
sudo \
wget \
vim

if [[ $OS_VERSION == 9 ]]
then
dnf --enablerepo=crb -y install libsndfile-devel
else
yum install -y \
opencv-devel \
libsndfile-devel
fi

# Cleanup
yum clean all
rm -rf /var/cache/yum
rm -rf /var/lib/yum/yumdb
rm -rf /var/lib/yum/history
}

# Install base packages depending on the base OS
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
OS_VERSION=$(grep -oP '(?<=^VERSION_ID=).+' /etc/os-release | tr -d '"')

# Install base packages depending on the base OS
case "$ID" in
ubuntu)
install_ubuntu
Expand All @@ -121,6 +137,7 @@ case "$ID" in
esac

# Install Valgrind separately since the apt-get version is too old.
if [[ $ID == centos && $OS_VERSION == 7 ]]; then WGET_FLAG="--no-check-certificate" ; else WGET_FLAG=""; fi
mkdir valgrind_build && cd valgrind_build
VALGRIND_VERSION=3.16.1
wget https://ossci-linux.s3.amazonaws.com/valgrind-${VALGRIND_VERSION}.tar.bz2
Expand Down
20 changes: 16 additions & 4 deletions .circleci/docker/common/install_db.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,23 @@ install_ubuntu() {
install_centos() {
# Need EPEL for many packages we depend on.
# See http://fedoraproject.org/wiki/EPEL
yum --enablerepo=extras install -y epel-release
if [[ $OS_VERSION == 9 ]]; then
yum install -y epel-release
else
yum --enablerepo=extras install -y epel-release
fi

yum install -y \
hiredis-devel \
leveldb-devel \
lmdb-devel \
snappy-devel
leveldb-devel

if [[ $OS_VERSION == 9 ]]; then
dnf --enablerepo=crb -y install lmdb-devel snappy-devel
else
yum install -y \
lmdb-devel \
snappy-devel
fi

# Cleanup
yum clean all
Expand All @@ -33,6 +43,8 @@ install_centos() {
rm -rf /var/lib/yum/history
}

OS_VERSION=$(grep -oP '(?<=^VERSION_ID=).+' /etc/os-release | tr -d '"')

# Install base packages depending on the base OS
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
case "$ID" in
Expand Down
45 changes: 37 additions & 8 deletions .circleci/docker/common/install_rocm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ ver() {
}

# Map ROCm version to AMDGPU version
declare -A AMDGPU_VERSIONS=( ["4.5.2"]="21.40.2" ["5.0"]="21.50" ["5.1.1"]="22.10.1" )
declare -A AMDGPU_VERSIONS=( ["4.5.2"]="21.40.2" ["5.0"]="21.50" ["5.1.1"]="22.10.1" ["5.2"]="22.20" )

install_ubuntu() {
apt-get update
Expand Down Expand Up @@ -101,14 +101,24 @@ install_centos() {
yum update -y
yum install -y kmod
yum install -y wget
yum install -y openblas-devel

if [[ $OS_VERSION == 9 ]]; then
dnf install -y openblas-serial
dnf install -y dkms kernel-headers kernel-devel
else
yum install -y openblas-devel
yum install -y dkms kernel-headers-`uname -r` kernel-devel-`uname -r`
fi

yum install -y epel-release
yum install -y dkms kernel-headers-`uname -r` kernel-devel-`uname -r`

if [[ $(ver $ROCM_VERSION) -ge $(ver 4.5) ]]; then
# Add amdgpu repository
local amdgpu_baseurl="https://repo.radeon.com/amdgpu/${AMDGPU_VERSIONS[$ROCM_VERSION]}/rhel/7.9/main/x86_64"
if [[ $OS_VERSION == 9 ]]; then
local amdgpu_baseurl="https://repo.radeon.com/amdgpu/${AMDGPU_VERSIONS[$ROCM_VERSION]}/rhel/9.0/main/x86_64"
else
local amdgpu_baseurl="https://repo.radeon.com/amdgpu/${AMDGPU_VERSIONS[$ROCM_VERSION]}/rhel/7.9/main/x86_64"
fi
echo "[AMDGPU]" > /etc/yum.repos.d/amdgpu.repo
echo "name=AMDGPU" >> /etc/yum.repos.d/amdgpu.repo
echo "baseurl=${amdgpu_baseurl}" >> /etc/yum.repos.d/amdgpu.repo
Expand All @@ -117,23 +127,40 @@ install_centos() {
echo "gpgkey=http://repo.radeon.com/rocm/rocm.gpg.key" >> /etc/yum.repos.d/amdgpu.repo
fi

local rocm_baseurl="http://repo.radeon.com/rocm/yum/${ROCM_VERSION}"
if [[ $OS_VERSION == 9 ]]; then
local rocm_baseurl="invalid-url"
else
local rocm_baseurl="http://repo.radeon.com/rocm/yum/${ROCM_VERSION}/main"
fi
echo "[ROCm]" > /etc/yum.repos.d/rocm.repo
echo "name=ROCm" >> /etc/yum.repos.d/rocm.repo
echo "baseurl=${rocm_baseurl}" >> /etc/yum.repos.d/rocm.repo
echo "enabled=1" >> /etc/yum.repos.d/rocm.repo
echo "gpgcheck=1" >> /etc/yum.repos.d/rocm.repo
echo "gpgkey=http://repo.radeon.com/rocm/rocm.gpg.key" >> /etc/yum.repos.d/rocm.repo

yum update -y

yum install -y \
if [[ $OS_VERSION == 9 ]]; then
yum update -y --nogpgcheck
dnf --enablerepo=crb install -y perl-File-BaseDir
yum install -y --nogpgcheck rocm-ml-sdk rocm-developer-tools
else
yum update -y
yum install -y \
rocm-dev \
rocm-utils \
rocm-libs \
rccl \
rocprofiler-dev \
roctracer-dev
fi

# if search fails it will abort this script; use true to avoid case where search fails
MIOPENKERNELS=$(yum -q search miopenkernels | grep miopenkernels- | awk '{print $1}'| grep -F kdb. || true)
if [[ "x${MIOPENKERNELS}" = x ]]; then
echo "miopenkernels package not available"
else
yum install -y ${MIOPENKERNELS}
fi

install_magma

Expand All @@ -144,6 +171,8 @@ install_centos() {
rm -rf /var/lib/yum/history
}

OS_VERSION=$(grep -oP '(?<=^VERSION_ID=).+' /etc/os-release | tr -d '"')

# Install Python packages depending on the base OS
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
case "$ID" in
Expand Down
15 changes: 10 additions & 5 deletions .circleci/docker/common/install_vision.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,14 @@ install_ubuntu() {
install_centos() {
# Need EPEL for many packages we depend on.
# See http://fedoraproject.org/wiki/EPEL
yum --enablerepo=extras install -y epel-release

yum install -y \
opencv-devel \
ffmpeg-devel
if [[ $OS_VERSION == 9 ]]; then
yum install -y epel-release
else
yum --enablerepo=extras install -y epel-release
yum install -y \
opencv-devel \
ffmpeg-devel
fi

# Cleanup
yum clean all
Expand All @@ -29,6 +32,8 @@ install_centos() {
rm -rf /var/lib/yum/history
}

OS_VERSION=$(grep -oP '(?<=^VERSION_ID=).+' /etc/os-release | tr -d '"')

# Install base packages depending on the base OS
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
case "$ID" in
Expand Down