Server: Intel Xeon E5-2680 v4
RAM: 128 GB
Motherboard: Supermicro X10DRU-i+
Hypervisor: Proxmox VE 8.4.8
Kernel: 6.8.12-13-pve
GPU: AMD MI50 (Vega 20)
PCI Adresse: 08:00.0
VM: Ubuntu 22.04.5 Server 10Core, 32Gb RAM, Mi50 200GB NVMe
nano /etc/default/grub
GRUB_CMDLINE_LINUX_DEFAULT="quiet intel_iommu=on"
GRUB_CMDLINE_LINUX_DEFAULT="quiet amd_iommu=on"
update-grub
nano /etc/kernel/cmdline
intel_iommu=on
amd_iommu=on
intel_iommu=on pcie_acs_override=downstream,multifunction
oder
amd_iommu=on pcie_acs_override=downstream,multifunction
proxmox-boot-tool refresh
nano /etc/modules
vfio
vfio_iommu_type1
vfio_pci
vfio_virqfd
lspci | grep -i amd | grep -i vga
lspci -n -s 08:00 -v
echo "options vfio_iommu_type1 allow_unsafe_interrupts=1" > /etc/modprobe.d/iommu_unsafe_interrupts.conf
echo "options kvm ignore_msrs=1" > /etc/modprobe.d/kvm.conf
echo "blacklist amdgpu" >> /etc/modprobe.d/blacklist.conf
echo "options vfio-pci ids=1002:66a3 disable_vga=1" > /etc/modprobe.d/vfio.conf
update-initramfs -u -k all
reboot
apt update
apt install -y git dkms build-essential
apt install -y pve-headers-$(uname -r)
cd /tmp
git clone https://github.com/gnif/vendor-reset.git
cd vendor-reset
sudo dkms remove vendor-reset/0.1.1 --all 2>/dev/null || true
sudo rm -rf /usr/src/vendor-reset-0.1.1
sudo rm -rf /var/lib/dkms/vendor-reset
sudo dkms install .
echo "vendor-reset" | sudo tee -a /etc/modules
sudo cp udev/99-vendor-reset.rules /etc/udev/rules.d/
sudo update-initramfs -u -k all
reboot
wget https://repo.radeon.com/amdgpu-install/6.4.3/ubuntu/jammy/amdgpu-install_6.4.60403-1_all.deb
sudo apt install ./amdgpu-install_6.4.60403-1_all.deb
sudo apt update
sudo apt install python3-setuptools python3-wheel
sudo usermod -a -G render,video $LOGNAME
sudo apt install rocm
rocm-smi
Erwartetes Ergebnis Beispiel:
Device: MI50
Temp: ~50°C
Power: ~20-40W
VRAM: 16GB
Wenn alles korrekt läuft: