mirror of
https://github.com/kneutron/ansitest.git
synced 2025-01-16 04:42:55 +08:00
263 lines
8.1 KiB
Bash
263 lines
8.1 KiB
Bash
#!/bin/bash
|
|
|
|
# REF: https://github.com/kneutron/ansitest/blob/master/proxmox/HOWTO-make-a-file-backed-backup-of-proxmox-rpool.docx
|
|
|
|
# 2024.Jun kneutron
|
|
|
|
# Objective: Bare-metal Restore a proxmox zfs boot/root disk from a mirror file over sshfs and reboot into it (Disaster Recovery)
|
|
# Useful for P2V or if both disks in the mirror failed
|
|
|
|
# NOTE - proxmox-backup-zfs-bootroot.sh should have been run beforehand!!
|
|
|
|
# Tested with "systemrescuecd + zfs" ISO
|
|
# https://github.com/nchevsky/systemrescue-zfs/releases
|
|
|
|
# NOTE - this is a PROOF OF CONCEPT script and is NOT meant to be run blindly!
|
|
# you need to EDIT IT before running!
|
|
|
|
# MAKE SURE you specify the correct destination disk and other info! Author takes NO RESPONSIBILITY for data loss!!
|
|
|
|
# This should hopefully get your full environment back online and running in a minimum amount of time :)
|
|
|
|
# IT IS HIGHLY RECOMMENDED to test this process in a VM first before you rely on it!
|
|
# NOTE if restoring to VM, you should turn off Secure Boot in the vm BIOS
|
|
|
|
|
|
# TODO EDITME before running!
|
|
destdir=/mnt/macpro-sgtera2
|
|
# Will be mounted on this server / local dir
|
|
|
|
sshfsmountthis=/Volumes/sgtera2
|
|
# destination directory on the remote side - this is where the mirror backup file lives
|
|
# TODO add a subdir here for different systems
|
|
|
|
loginid=dave
|
|
# for sshfs
|
|
|
|
destserver=172.16.25.12 # macpro-static
|
|
# hostname or IP; IP is better for systemrescue environment
|
|
|
|
# NOTE this **needs** to be the correct disk!
|
|
# Default is set to be a proxmox restore-VM with a SCSI disk; virtio destination disk would be vda
|
|
# TODO EDITME
|
|
zfsroot=sda
|
|
# Obtain from /dev/disk/by-id
|
|
# ALL DATA ON THIS DISK WILL BE OVERWRITTEN FOR THE RESTORE PROCESS - YOU HAVE BEEN WARNED!
|
|
|
|
# failexit.mrg
|
|
function failexit () {
|
|
echo '! Something failed! Code: '"$1 $2" # code # (and optional description)
|
|
exit $1
|
|
}
|
|
|
|
set -u # abort if var undefined
|
|
|
|
# Takes only 1 arg, use in loop if multiple
|
|
function install-if-missing () {
|
|
[ $(which $1 |wc -l) -gt 0 ] || apt-get install -y $1
|
|
}
|
|
|
|
# NOTE this will not work on systemrescuecd, as its not Debian
|
|
# You can make your own rescue environment simply by installing Debian+ZFS to usb drive, and add any necessary recovery packages
|
|
install-if-missing sshfs
|
|
|
|
mkdir -pv $destdir
|
|
# if not already mounted
|
|
if [ $(df -T |grep sshfs |grep -c $destdir) -eq 0 ]; then
|
|
echo "Mounting $destdir on $destserver"
|
|
# TODO change this to samba, nfs, whatever works for you
|
|
# TODO ssh-copy-id for passwordless access
|
|
sshfs -o Ciphers=chacha20-poly1305@openssh.com \
|
|
$loginid@$destserver:$sshfsmountthis $destdir
|
|
fi
|
|
|
|
if [ $(df -T |grep -c $destdir) -eq 0 ]; then
|
|
failexit 40 "$destdir is not mounted, cannot proceed"
|
|
fi
|
|
|
|
cd $destdir
|
|
|
|
echo "
|
|
Currently available restore mirror disks:"
|
|
ls -lh *disk
|
|
|
|
# TODO EDITME - this is the restore/mirror file
|
|
mirfile="proxmox-rpool-mirror-41.5G-zfs-efi.disk"
|
|
|
|
echo "
|
|
Selected restore disk:
|
|
$mirfile"
|
|
echo ''
|
|
echo "Hit Enter to proceed with this default, ^C to abort, or input another filename here to use:"
|
|
read infile
|
|
|
|
[ "$infile" = "" ] || mirfile=$infile
|
|
[ -e $mirfile ] || failexit 44 "Cannot find $PWD/$mirfile"
|
|
|
|
|
|
efifile="dd-efi-part2-rpool-mirror-41.5G-zfs.dd.gz"
|
|
ls -lh dd*gz
|
|
echo ''
|
|
echo "Selected EFI restore disk:
|
|
$efifile"
|
|
echo "Hit Enter to proceed with this, or input another filename here to use:"
|
|
read infile
|
|
|
|
[ "$infile" = "" ] || efifile=$infile
|
|
[ -e $efifile ] || failexit 45 "Cannot find $PWD/$efifile"
|
|
|
|
zpool export -f rpool # if $0 run >1
|
|
|
|
echo ''
|
|
echo "$(date) - Attempting to import rpool from $PWD - NOTE THIS MAY NOT WORK if there are multiple restore disks in the same dir"
|
|
zpool import -f -N -d $PWD rpool # without mounting any datasets
|
|
|
|
#Breakdown of options:
|
|
# -f # Force - likely necessary since we offlined this disk and it wasn't "exported properly"
|
|
# -N # Do not mount zfs datasets, just import this pool
|
|
# -d $PWD # Look in the current directory for the pool disk(s)
|
|
|
|
zpool status rpool -v |awk 'NF>0'
|
|
|
|
result=$(zpool list)
|
|
[ "$result" = "no pools available" ] && failexit 50 "Unable to find / import rpool! Cannot continue"
|
|
|
|
guessdisk=$(zpool status -v |grep UNAVAIL |head -n 1 |awk '{print $1}')
|
|
# if no disk needs detaching, we skip this
|
|
# bash if not
|
|
if [ ! "$guessdisk" = "" ]; then
|
|
|
|
echo "Guessing disk to detach from rpool:
|
|
$guessdisk"
|
|
|
|
echo "Hit Enter to proceed with this, or input another disk identifier here to use:"
|
|
echo "PROTIP - run $0 from GNU screen or tmux so you can use the keyboard to copypasta"
|
|
read indisk
|
|
|
|
[ "$indisk" = "" ] || guessdisk=$indisk
|
|
[ $(zpool status rpool -v |grep -c $guessdisk) -eq 0 ] && failexit 60 "$guessdisk does not appear to be in the rpool"
|
|
|
|
echo "Detaching $guessdisk from rpool"
|
|
zpool detach rpool $guessdisk
|
|
fi
|
|
|
|
# Remember, "rpool" is basically mounted over the network at this point
|
|
# We still need to mirror it to local disk and restore EFI partition so it (hopefully) boots
|
|
|
|
# NOTE nothing can be actively mounted on the destination disk - including swap
|
|
echo ''
|
|
echo "$(date) - Creating new partition table on /dev/$zfsroot - ALL EXISTING DATA WILL BE ERASED"
|
|
|
|
time parted -s /dev/$zfsroot mklabel gpt || failexit 99 "Failed to create fresh GPT partition table on /dev/$zfsroot"
|
|
|
|
echo ''
|
|
echo "Recreating boot/root partitions on /dev/$zfsroot"
|
|
|
|
# TODO change from 512 to 1G for "standard" proxmox efi partition
|
|
# The -n options are sizes, and the -t sets the filesystem type.
|
|
#-n 2:0:+1G \
|
|
sgdisk -g \
|
|
-n 1:0:+1M \
|
|
-n 2:0:+512M \
|
|
-n 3:0:0 \
|
|
-t 1:8300 \
|
|
-t 2:EF00 \
|
|
-t 3:BF01 \
|
|
-p /dev/$zfsroot
|
|
|
|
time sync; sleep 1
|
|
gdisk -l /dev/$zfsroot
|
|
|
|
echo ''
|
|
echo "$(date) - Restoring EFI to /dev/${zfsroot}2"
|
|
time gzip -cd $efifile |dd of=/dev/${zfsroot}2 bs=1M status=progress || failexit 101 "Failed to restore EFI partition"
|
|
|
|
# NOTE I have not bothered to re-size the ZFS partition on the replacement disk
|
|
# to take advantage of any increased disk space
|
|
|
|
# Tips for resizing here:
|
|
# REF: https://sirlagz.net/2023/07/03/updated-live-resize-lvm-on-linux/
|
|
|
|
#zpool attach rpool /mnt/macpro-sgtera2/rpool-mirror-64gig-thumbdrive-zfs-efi.disk \
|
|
# ata-Samsung_Portable_SSD_T5_S49WNV0MC04217F-part3
|
|
|
|
echo ''
|
|
echo "$(date) - Beginning restore process"
|
|
zpool labelclear /dev/${zfsroot}3 # shouldnt be necessary, but why not
|
|
|
|
time zpool attach rpool $destdir/$mirfile \
|
|
/dev/${zfsroot}3 || failexit 102 "Failed to attach physical disk ${zfsroot}3 to rpool $mirfile"
|
|
|
|
function watchresilver () {
|
|
sdate=$(date)
|
|
|
|
# do forever
|
|
while :; do
|
|
clear
|
|
echo "Pool: rpool - NOW: $(date) -- Watchresilver started: $sdate"
|
|
|
|
zpool status rpool |grep -A 2 'in progress' || break 2
|
|
zpool iostat -v rpool #2 3 &
|
|
# zpool iostat -T d -v $1 2 3 & # with timestamp
|
|
sleep 5
|
|
date
|
|
done
|
|
|
|
ndate=$(date)
|
|
|
|
zpool status -v rpool |awk 'NF>0' # skip blank lines
|
|
echo "o Resilver watch rpool start: $sdate // Completed: $ndate"
|
|
}
|
|
|
|
watchresilver;
|
|
|
|
echo "Now we need to take the file-backed copy offline, leaving it intact for later use if
|
|
needed - NOTE you do NOT want to issue a detach or a zpool split at this
|
|
point because this is an rpool, and a detach will not guarantee usable data
|
|
on the detached disk. We just offline it, and reboot to get out from under it
|
|
(so to speak.)"
|
|
|
|
(set -x
|
|
zpool offline rpool $destdir/$mirfile
|
|
)
|
|
|
|
# reboot
|
|
|
|
echo ''
|
|
echo "
|
|
After rebooting, you will get dumped into the initramfs because ZFS has
|
|
temporarily lost its mind and can't import from the cache file properly.
|
|
|
|
Easy fix:
|
|
|
|
(initramfs) zpool import -f
|
|
Then hit Control+D, boot process resumes and should survive further reboots.
|
|
|
|
If the boot process does NOT continue, hard reboot and issue:
|
|
|
|
(initramfs) rm -fv /etc/zfs/zpool.cache
|
|
(initramfs) zpool import -a -f -d /dev/disk/by-id
|
|
|
|
Then hit ^D to continue the boot process. Further reboots should now work OK.
|
|
|
|
Finally, without the target/backup $destdir being mounted, you should detach the file-backed copy
|
|
from the rpool to get it out of DEGRADED state:
|
|
|
|
# zpool detach rpool $destdir/$mirfile
|
|
|
|
And you should be back to a bootable single-disk Proxmox rpool.
|
|
(Its probably a good idea to write these instructions down.)
|
|
|
|
Refer to the online reference if you want to attach a mirror to a single-disk rpool:
|
|
|
|
https://pve.proxmox.com/pve-docs/pve-admin-guide.html#sysadmin_zfs_change_failed_dev
|
|
|
|
https://www.reddit.com/r/Proxmox/comments/spbdlw/how_to_add_to_proxmox_ve_bootmirrored_zfs_disks/
|
|
|
|
"
|
|
|
|
date;
|
|
echo $0 DONE
|
|
|
|
exit;
|