Please follow the instructions in https://software.intel.com/content/www/us/en/develop/tools/oneapi/base-toolkit.html to install Intel oneAPI Base Toolkit on the management node according to your own needs.
Please follow the instructions in https://software.intel.com/content/www/us/en/develop/tools/oneapi/hpc-toolkit.html to install Intel oneAPI HPC Toolkit on the management node according to your own needs.
Before install AI Analytics Toolkit,make sure intel oneAPI Base Toolkit installed. Please follow the instructions in https://software.intel.com/content/www/us/en/develop/tools/oneapi/ai-analytics-toolkit.html to install Intel oneAPI AI Analytics Toolkit on the management node according to your own needs.
Run the following commands to initialize modulefiles for LiCO on the management node:
x# where is the intel oneapi installed, /opt/intel/oneapi is the default path
ONEAPI_PATH="/opt/intel/oneapi"
source $ONEAPI_PATH/setvars.sh
bash $ONEAPI_PATH/modulefiles-setup.sh
Please create a shared directory based on the upper directory of the oneapi installation path (task /opt/intel as an example):
Step 1. Manage node sharing /opt/intel for Intel oneAPI:
xxxxxxxxxx
echo "/opt/intel *(rw,no_subtree_check,no_root_squash)" >> /etc/exports
exportfs -a
Step 2. Configure the shared directory:
xxxxxxxxxx
# IP address of management node in the compute intranet
MANAGER_NODE_IP="192.168.0.1"
nodeshell all "echo '${MANAGER_NODE_IP}:/opt/intel /opt/intel nfs nfsvers=4.0,nodev,noatime \
0 0' >> /etc/fstab"
Step 3. Mount shared directory
xxxxxxxxxx
nodeshell all mkdir -p /opt/intel
nodeshell all mount /opt/intel
Step 4. change write permission for socwatch
xxxxxxxxxx
cd /opt/intel/oneapi/vtune/latest/socwatch
chmod 777 x64
Add the LiCO module path on the login and compute nodes.
xxxxxxxxxx
# where is the intel oneapi installed, /opt/intel/oneapi is the default path
ONEAPI_PATH="/opt/intel/oneapi"
nodeshell all "sed -i s#/opt/ohpc/pub/modulefiles#/opt/ohpc/pub/modulefiles:$ONEAPI_PATH/modulefiles#g /etc/profile.d/lmod.sh"
nodeshell all "sed -i s#/opt/ohpc/pub/modulefiles#/opt/ohpc/pub/modulefiles:$ONEAPI_PATH/modulefiles#g /etc/profile.d/lmod.csh"
nodeshell all "source /etc/profile.d/lmod.sh"
Run the following commands on the management node:
xxxxxxxxxx
# where is the intel oneapi installed, /opt/intel/oneapi is the default path
ONEAPI_PATH="/opt/intel/oneapi"
sed -i s#/opt/ohpc/pub/modulefiles#/opt/ohpc/pub/modulefiles:$ONEAPI_PATH/modulefiles#g /etc/profile.d/lmod.sh
sed -i s#/opt/ohpc/pub/modulefiles#/opt/ohpc/pub/modulefiles:$ONEAPI_PATH/modulefiles#g /etc/profile.d/lmod.csh
source /etc/profile.d/lmod.sh
sed -i s#/opt/ohpc/pub/modulefiles#/opt/ohpc/pub/modulefiles:$ONEAPI_PATH/modulefiles#g /etc/lico/lico.ini.d/template.ini
lico lmod_sync
xxxxxxxxxx
vi /etc/lico/lico.ini.d/oneapi.ini
INTEL_MODULE_PATH = "<oneAPI install dir>"
ENABLE = true
xxxxxxxxxx
lico init
xxxxxxxxxx
# add two kernel parameters in /etc/sysctl.conf on the compute nodes
nodeshell compute "echo 'kernel.kptr_restrict=0' >> /etc/sysctl.conf"
nodeshell compute "echo 'kernel.perf_event_paranoid=0' >> /etc/sysctl.conf"
nodeshell compute sysctl -p /etc/sysctl.conf
xxxxxxxxxx
vim /opt/intel/oneapi/vtune/latest/backend/config.yml
# change the type from passphrase to reverse-proxy
#type:passphrase
type: reverse-proxy
header: Authorization
Run the following commands on the compute nodes:
xxxxxxxxxx
# where is the intel oneapi installed, /opt/intel/oneapi is the default path
ONEAPI_PATH="/opt/intel/oneapi"
nodeshell compute "cd ${ONEAPI_PATH}/vtune/latest/sepdk/src && sudo ./rmmod-sep"
nodeshell -c 1 compute "cd ${ONEAPI_PATH}/vtune/latest/sepdk/src && sudo ./build-driver -ni"
# The following errors may be reported:
# c3: ERROR: kernel source directory "/usr/src/linux-4.18.0-305.3.1.el8.x86_64" either does not existor not a valid kernel source directory.
# c3:
# c3: Please use the following command to install kernel header on CentOS:
# c3: yum install kernel-devel-4.18.0-305.3.1.el8.x86_64
#
# If an error occurs, install the corresponding program on the corresponding node as prompted:
# eg: nodeshell c3 "sudo yum install -y kernel-devel-4.18.0-305.3.1.el8.x86_64"
nodeshell compute "cd ${ONEAPI_PATH}/vtune/latest/sepdk/src && sudo ./insmod-sep"
nodeshell compute "cd ${ONEAPI_PATH}/vtune/latest/sepdk/src && sudo ./boot-script -i -g vtune"
nodeshell compute "sed -i 's#^After.*#& network.target\nRequiresMountsFor=${ONEAPI_PATH}#g' /usr/lib/systemd/system/sep5.service"
Run the following commands on the compute nodes:
xxxxxxxxxx
nodeshell compute "dnf install -y dstat"
Run the following commands on the management node:
xxxxxxxxxx
dnf install sqlite
Run the following commands on the management node to check if the installation is successful:
xxxxxxxxxx
# The output may be /opt/intel/oneapi/intelpython/latest/bin/mpirun
which mpirun
# The output may be /opt/intel/oneapi/intelpython/latest/bin/mpitune
which mpitune
# The output may be /opt/intel/oneapi/mpi/2021.1.1/bin/mpiicc
which mpiicc
Run the following commands on the management node to check whether the module is successfully configured:
xxxxxxxxxx
# The output contains /opt/intel/oneapi/modulefiles information
module ava
Run the following commands on the management node to check whether the intel python is installed.
xxxxxxxxxx
[root@head ~]# source /opt/intel/oneapi/setvars.sh
[root@head ~]# conda env list
# conda environments:
#
base * /opt/intel/oneapi/intelpython/latest
2022.1.0 /opt/intel/oneapi/intelpython/latest/envs/2022.1.0
modin /opt/intel/oneapi/intelpython/latest/envs/modin
modin-0.13.3 /opt/intel/oneapi/intelpython/latest/envs/modin-0.13.3
pytorch /opt/intel/oneapi/intelpython/latest/envs/pytorch
pytorch-1.10.0 /opt/intel/oneapi/intelpython/latest/envs/pytorch-1.10.0
tensorflow /opt/intel/oneapi/intelpython/latest/envs/tensorflow
tensorflow-2.8.0 /opt/intel/oneapi/intelpython/latest/envs/tensorflow-2.8.0
Run the following commands on the management node to check whether the Intel driver is installed.
# The output might be the following:
# c1: sep5 2793472 0
# c1: socperf3 602112 1 sep5
# c2: sep5 2793472 0
# c2: socperf3 602112 1 sep5
nodeshell compute "lsmod |grep sep"