ansitest/ZFS/zfs-mk-draid-90drives.sh

927 lines
51 KiB
Bash
Raw Normal View History

2021-07-03 23:12:31 -05:00
#!/bin/bash
echo "$0 - 2021 Dave Bechtel - make a ZFS DRAID pool"
echo "- pass arg1='reset' to destroy test pool"
echo "- pass arg1='fail' and arg2=dev2fail to simulate failure"
# Requires at least zfs 2.1.0
DBI=/dev/disk/by-id
# total disks for pool / children
2021-07-04 02:23:26 -05:00
td=82 # 90 - spares and rootdisk
2021-07-03 23:12:31 -05:00
# raidz level (usually 2)
2021-07-04 03:50:24 -05:00
rzl=2
2021-07-03 23:12:31 -05:00
# spares
spr=1
# TODO EDITME
zp=zdraidtest
function zps () {
zpool status -v |awk 'NF>0'
}
#pooldisks=$(echo /dev/sd{b..y})
#pooldisks1=$(echo /dev/sd{b..m})
#pooldisks2=$(echo /dev/sd{n..y})
#pooldisks=$pooldisks1' '$pooldisks2 # need entire set for reset
2021-07-04 02:23:26 -05:00
2021-07-03 23:12:31 -05:00
pooldisks01=$(echo /dev/sd{b..g}) # a is rootdisk
pooldisks02=$(echo /dev/sd{h..m})
pooldisks03=$(echo /dev/sd{n..s})
pooldisks04=$(echo /dev/sd{t..y}) # z is spare
pooldisks05=$(echo /dev/sda{a..f}) #abcdef
pooldisks06=$(echo /dev/sda{g..l}) #ghijkl
pooldisks07=$(echo /dev/sda{m..r}) #mnopqr
pooldisks08=$(echo /dev/sda{s..x}) #stuvwx # yz are spares
pooldisks09=$(echo /dev/sdb{a..f}) #abcdef
pooldisks10=$(echo /dev/sdb{g..l}) #ghijkl
pooldisks11=$(echo /dev/sdb{m..r}) #mnopqr
2021-07-04 02:23:26 -05:00
pooldisks12=$(echo /dev/sdb{s..x}) #stuvwx # yz are spares
2021-07-03 23:12:31 -05:00
pooldisks13=$(echo /dev/sdc{a..f}) #abcdef
2021-07-04 02:23:26 -05:00
pooldisks14=$(echo /dev/sdc{g..l}) #ghijkl # no idle spares, only virtual
2021-07-03 23:12:31 -05:00
#pooldisks=$pooldisks1' '$pooldisks2' '$pooldisks3' '$pooldisks4 # need entire set for reset
pooldisks=$pooldisks01' '$pooldisks02' '$pooldisks03' '$pooldisks04' '$pooldisks05' '$pooldisks06
pooldisks=$pooldisks' '$pooldisks07' '$pooldisks08' '$pooldisks09' '$pooldisks10' '$pooldisks11
pooldisks=$pooldisks' '$pooldisks12' '$pooldisks13' '$pooldisks14
# need entire set for reset
# sdb sdc sdd sde sdf sdg sdh sdi sdj sdk sdl sdm sdn sdo sdp sdq sdr sds sdt sdu sdv sdw sdx sdy
# 1 2 3 4 5 6 1 2 3 4 5 6 1 2 3 4 5 6 1 2 3 4 5 6
# D D D Z2 Z2 S
# extending to 32 disks
#pooldisks2=$(echo /dev/sda{a..h})
#sdaa sdab sdac sdad sdae sdaf sdag sdah
# failexit.mrg
function failexit () {
echo '! Something failed! Code: '"$1 $2" # code # (and optional description)
exit $1
}
if [ "$1" = "reset" ]; then
zpool destroy $zp
for d in $pooldisks; do
echo -e -n "o Clearing label for disk $d \r"
zpool labelclear -f "$d"1
done
echo ''
zpool status -v
exit; # early
fi
if [ "$1" = "fail" ]; then
echo "$(date) - Simulating disk failure for $(ls -l $DBI |grep $2)"
echo offline > /sys/block/$2/device/state
cat /sys/block/$2/device/state
2021-07-04 02:23:26 -05:00
time dd if=/dev/urandom of=/$zp/^^tmpfileDELME bs=1M count=$td; sync
2021-07-03 23:12:31 -05:00
# force a write; if not work, try scrub
zps
exit; # early
fi
# zpool create <pool> draid[<parity>][:<data>d][:<children>c][:<spares>s] <vdevs...>
# ex: draid2:4d:1s:11c
# SLOW writing to zstd-3
# draid$rzl:8d:12'c':$spr's' $pooldisks1 \
# draid$rzl:8d:12'c':$spr's' $pooldisks2 \
iteration=1
if [ "$iteration" = "1" ]; then
# compression=zstd-3
# -o ashift=12
( set -x
time zpool create -o autoexpand=on -O atime=off -O compression=lz4 \
$zp \
2021-07-04 03:50:24 -05:00
draid$rzl:8d:12'c':$spr's' $pooldisks01 $pooldisks02 \
draid$rzl:8d:12'c':$spr's' $pooldisks03 $pooldisks04 \
draid$rzl:8d:12'c':$spr's' $pooldisks05 $pooldisks06 \
draid$rzl:8d:12'c':$spr's' $pooldisks07 $pooldisks08 \
draid$rzl:8d:12'c':$spr's' $pooldisks09 $pooldisks10 \
draid$rzl:8d:12'c':$spr's' $pooldisks11 $pooldisks12 \
draid$rzl:8d:12'c':$spr's' $pooldisks13 $pooldisks14 \
2021-07-03 23:12:31 -05:00
|| failexit 101 "Failed to create DRAID"
)
else
# One Big Mother
# -o ashift=12
# raidz level (usually 2)
rzl=2
# spares
spr=4
( set -x
time zpool create -o autoexpand=on -O atime=off -O compression=lz4 \
$zp \
draid$rzl:6d:$td'c':$spr's' $pooldisks \
|| failexit 101 "Failed to create DRAID"
)
fi
2021-07-04 02:23:26 -05:00
rc=$?
[ $rc -gt 0 ] && exit $rc
# ^ Need this check because of subshell, will not exit early otherwise
# cre8 datasets
2021-07-03 23:12:31 -05:00
# requires external script in the same PATH
# going with lz4 so not limited by CPU for compression
zfs-newds.sh 11 $zp shrcompr
zfs-newds.sh 10 $zp notshrcompr
2021-07-04 02:23:26 -05:00
zfs-newds.sh 00 $zp notshrnotcompr
2021-07-03 23:12:31 -05:00
zps
zpool list
zfs list
df -hT |egrep 'ilesystem|zfs'
echo "NOTE - best practice is to export the pool and # zpool import -a -d $DBI"
date
exit;
# REFS:
https://openzfs.github.io/openzfs-docs/Basic%20Concepts/dRAID%20Howto.html
https://klarasystems.com/articles/openzfs-draid-finally/
https://www.reddit.com/r/zfs/comments/lnoh7v/im_trying_to_understand_how_draid_works_but_im/
https://insider-voice.com/a-deep-dive-into-the-new-openzfs-2-1-distributed-raid-topology/
https://docs.google.com/presentation/d/1uo0nBfY84HIhEqGWEx-Tbm8fPbJKtIP3ICo4toOPcJo/edit#slide=id.g9d6b9fd59f_0_27
Group size must divide evenly into draid size
E.g., 30 drives can only support
3 drive group
5 drive group
10 drive group
15 drive group
Only need to specify group size at creation
Group Size - the number of pieces the data is partitioned into plus the amount of parity
o The amount of parity determines the redundancy
o The number of data pieces determines the overhead
dRAID Size - the number of drives used for data
(Does not include spare drives)
-----
Iteration 1:
# make a draid with raidz1, x14 VDEVs, 4 data disks, 6 children, 1 spare
# since we are using fairly small (4GB) disks this should not be an issue
+ zpool create -o autoexpand=on -O atime=off -O compression=lz4 zdraidtest \
draid1:4d:6c:1s /dev/sdb /dev/sdc /dev/sdd /dev/sde /dev/sdf /dev/sdg \
draid1:4d:6c:1s /dev/sdh /dev/sdi /dev/sdj /dev/sdk /dev/sdl /dev/sdm \
draid1:4d:6c:1s /dev/sdn /dev/sdo /dev/sdp /dev/sdq /dev/sdr /dev/sds \
draid1:4d:6c:1s /dev/sdt /dev/sdu /dev/sdv /dev/sdw /dev/sdx /dev/sdy \
draid1:4d:6c:1s /dev/sdaa /dev/sdab /dev/sdac /dev/sdad /dev/sdae /dev/sdaf \
draid1:4d:6c:1s /dev/sdag /dev/sdah /dev/sdai /dev/sdaj /dev/sdak /dev/sdal \
draid1:4d:6c:1s /dev/sdam /dev/sdan /dev/sdao /dev/sdap /dev/sdaq /dev/sdar \
draid1:4d:6c:1s /dev/sdas /dev/sdat /dev/sdau /dev/sdav /dev/sdaw /dev/sdax \
draid1:4d:6c:1s /dev/sdba /dev/sdbb /dev/sdbc /dev/sdbd /dev/sdbe /dev/sdbf \
draid1:4d:6c:1s /dev/sdbg /dev/sdbh /dev/sdbi /dev/sdbj /dev/sdbk /dev/sdbl \
draid1:4d:6c:1s /dev/sdbm /dev/sdbn /dev/sdbo /dev/sdbp /dev/sdbq /dev/sdbr \
draid1:4d:6c:1s /dev/sdbs /dev/sdbt /dev/sdbu /dev/sdbv /dev/sdbw /dev/sdbx \
draid1:4d:6c:1s /dev/sdca /dev/sdcb /dev/sdcc /dev/sdcd /dev/sdce /dev/sdcf \
draid1:4d:6c:1s /dev/sdcg /dev/sdch /dev/sdci /dev/sdcj /dev/sdck /dev/sdcl
real 0m15.619s
+ zfs create -o atime=off -o compression=lz4 -o sharesmb=on -o xattr=sa -o recordsize=1024k zdraidtest/shrcompr
cannot share 'zdraidtest/shrcompr: system error': SMB share creation failed
filesystem successfully created, but not shared
changed ownership of '/zdraidtest/shrcompr' from root to user
Filesystem Type Size Used Avail Use% Mounted on
zdraidtest/shrcompr zfs 196G 1.0M 196G 1% /zdraidtest/shrcompr
+ zfs create -o atime=off -o compression=lz4 -o sharesmb=off -o recordsize=1024k zdraidtest/notshrcompr
changed ownership of '/zdraidtest/notshrcompr' from root to user
Filesystem Type Size Used Avail Use% Mounted on
zdraidtest/notshrcompr zfs 196G 1.0M 196G 1% /zdraidtest/notshrcompr
pool: zdraidtest
state: ONLINE
config:
NAME STATE READ WRITE CKSUM
zdraidtest ONLINE 0 0 0
draid1:4d:6c:1s-0 ONLINE 0 0 0
sdb ONLINE 0 0 0
sdc ONLINE 0 0 0
sdd ONLINE 0 0 0
sde ONLINE 0 0 0
sdf ONLINE 0 0 0
sdg ONLINE 0 0 0
draid1:4d:6c:1s-1 ONLINE 0 0 0
sdh ONLINE 0 0 0
sdi ONLINE 0 0 0
sdj ONLINE 0 0 0
sdk ONLINE 0 0 0
sdl ONLINE 0 0 0
sdm ONLINE 0 0 0
draid1:4d:6c:1s-2 ONLINE 0 0 0
sdn ONLINE 0 0 0
sdo ONLINE 0 0 0
sdp ONLINE 0 0 0
sdq ONLINE 0 0 0
sdr ONLINE 0 0 0
sds ONLINE 0 0 0
draid1:4d:6c:1s-3 ONLINE 0 0 0
sdt ONLINE 0 0 0
sdu ONLINE 0 0 0
sdv ONLINE 0 0 0
sdw ONLINE 0 0 0
sdx ONLINE 0 0 0
sdy ONLINE 0 0 0
draid1:4d:6c:1s-4 ONLINE 0 0 0
sdaa ONLINE 0 0 0
sdab ONLINE 0 0 0
sdac ONLINE 0 0 0
sdad ONLINE 0 0 0
sdae ONLINE 0 0 0
sdaf ONLINE 0 0 0
draid1:4d:6c:1s-5 ONLINE 0 0 0
sdag ONLINE 0 0 0
sdah ONLINE 0 0 0
sdai ONLINE 0 0 0
sdaj ONLINE 0 0 0
sdak ONLINE 0 0 0
sdal ONLINE 0 0 0
draid1:4d:6c:1s-6 ONLINE 0 0 0
sdam ONLINE 0 0 0
sdan ONLINE 0 0 0
sdao ONLINE 0 0 0
sdap ONLINE 0 0 0
sdaq ONLINE 0 0 0
sdar ONLINE 0 0 0
draid1:4d:6c:1s-7 ONLINE 0 0 0
sdas ONLINE 0 0 0
sdat ONLINE 0 0 0
sdau ONLINE 0 0 0
sdav ONLINE 0 0 0
sdaw ONLINE 0 0 0
sdax ONLINE 0 0 0
draid1:4d:6c:1s-8 ONLINE 0 0 0
sdba ONLINE 0 0 0
sdbb ONLINE 0 0 0
sdbc ONLINE 0 0 0
sdbd ONLINE 0 0 0
sdbe ONLINE 0 0 0
sdbf ONLINE 0 0 0
draid1:4d:6c:1s-9 ONLINE 0 0 0
sdbg ONLINE 0 0 0
sdbh ONLINE 0 0 0
sdbi ONLINE 0 0 0
sdbj ONLINE 0 0 0
sdbk ONLINE 0 0 0
sdbl ONLINE 0 0 0
draid1:4d:6c:1s-10 ONLINE 0 0 0
sdbm ONLINE 0 0 0
sdbn ONLINE 0 0 0
sdbo ONLINE 0 0 0
sdbp ONLINE 0 0 0
sdbq ONLINE 0 0 0
sdbr ONLINE 0 0 0
draid1:4d:6c:1s-11 ONLINE 0 0 0
sdbs ONLINE 0 0 0
sdbt ONLINE 0 0 0
sdbu ONLINE 0 0 0
sdbv ONLINE 0 0 0
sdbw ONLINE 0 0 0
sdbx ONLINE 0 0 0
draid1:4d:6c:1s-12 ONLINE 0 0 0
sdca ONLINE 0 0 0
sdcb ONLINE 0 0 0
sdcc ONLINE 0 0 0
sdcd ONLINE 0 0 0
sdce ONLINE 0 0 0
sdcf ONLINE 0 0 0
draid1:4d:6c:1s-13 ONLINE 0 0 0
sdcg ONLINE 0 0 0
sdch ONLINE 0 0 0
sdci ONLINE 0 0 0
sdcj ONLINE 0 0 0
sdck ONLINE 0 0 0
sdcl ONLINE 0 0 0
spares
draid1-0-0 AVAIL
draid1-1-0 AVAIL
draid1-2-0 AVAIL
draid1-3-0 AVAIL
draid1-4-0 AVAIL
draid1-5-0 AVAIL
draid1-6-0 AVAIL
draid1-7-0 AVAIL
draid1-8-0 AVAIL
draid1-9-0 AVAIL
draid1-10-0 AVAIL
draid1-11-0 AVAIL
draid1-12-0 AVAIL
draid1-13-0 AVAIL
errors: No known data errors
NAME SIZE ALLOC FREE CKPOINT EXPANDSZ FRAG CAP DEDUP HEALTH ALTROOT
zdraidtest 252G 895K 252G - - 0% 0% 1.00x ONLINE -
NAME USED AVAIL REFER MOUNTPOINT
zdraidtest 497K 195G 51.9K /zdraidtest
zdraidtest/notshrcompr 51.9K 195G 51.9K /zdraidtest/notshrcompr
zdraidtest/shrcompr 51.9K 195G 51.9K /zdraidtest/shrcompr
Filesystem Type Size Used Avail Use% Mounted on
zdraidtest zfs 196G 128K 196G 1% /zdraidtest
zdraidtest/shrcompr zfs 196G 1.0M 196G 1% /zdraidtest/shrcompr
zdraidtest/notshrcompr zfs 196G 1.0M 196G 1% /zdraidtest/notshrcompr
NOTE - best practice is to export the pool and # zpool import -a -d /dev/disk/by-id
-----
draid$rzl:6d:$td'c':$spr's' $pooldisks \
Iteration 2 - make a DRAID raidz2 with 6 data disks, (84) children, 4 spares = more space available
Note that we are allocating more virtual spares than the raidz2 level, as well as having
idle hotspare disks available - we can sustain 2 failures with no data loss, replace them
with virtual spares, and once the resilver finishes we should be able to sustain ANOTHER 2 fails
HOWEVER - if we get 4x simultaneous fails, the pool I/O gets suspended and we HAVE TO reboot
because the zfs commands will hang
+ zpool create -o autoexpand=on -O atime=off -O compression=lz4 zdraidtest \
draid2:6d:84c:4s \
/dev/sdb /dev/sdc /dev/sdd /dev/sde /dev/sdf /dev/sdg \
/dev/sdh /dev/sdi /dev/sdj /dev/sdk /dev/sdl /dev/sdm /dev/sdn /dev/sdo \
/dev/sdp /dev/sdq /dev/sdr /dev/sds /dev/sdt /dev/sdu /dev/sdv /dev/sdw \
/dev/sdx /dev/sdy /dev/sdaa /dev/sdab /dev/sdac /dev/sdad /dev/sdae \
/dev/sdaf /dev/sdag /dev/sdah /dev/sdai /dev/sdaj /dev/sdak /dev/sdal \
/dev/sdam /dev/sdan /dev/sdao /dev/sdap /dev/sdaq /dev/sdar /dev/sdas \
/dev/sdat /dev/sdau /dev/sdav /dev/sdaw /dev/sdax /dev/sdba /dev/sdbb \
/dev/sdbc /dev/sdbd /dev/sdbe /dev/sdbf /dev/sdbg /dev/sdbh /dev/sdbi \
/dev/sdbj /dev/sdbk /dev/sdbl /dev/sdbm /dev/sdbn /dev/sdbo /dev/sdbp \
/dev/sdbq /dev/sdbr /dev/sdbs /dev/sdbt /dev/sdbu /dev/sdbv /dev/sdbw \
/dev/sdbx /dev/sdca /dev/sdcb /dev/sdcc /dev/sdcd /dev/sdce /dev/sdcf \
/dev/sdcg /dev/sdch /dev/sdci /dev/sdcj /dev/sdck /dev/sdcl
real 0m11.846s
+ zfs create -o atime=off -o compression=lz4 -o sharesmb=on -o xattr=sa -o recordsize=1024k zdraidtest/shrcompr
cannot share 'zdraidtest/shrcompr: system error': SMB share creation failed
filesystem successfully created, but not shared
changed ownership of '/zdraidtest/shrcompr' from root to user
Filesystem Type Size Used Avail Use% Mounted on
zdraidtest/shrcompr zfs 211G 1.0M 211G 1% /zdraidtest/shrcompr
+ zfs create -o atime=off -o compression=lz4 -o sharesmb=off -o recordsize=1024k zdraidtest/notshrcompr
changed ownership of '/zdraidtest/notshrcompr' from root to user
Filesystem Type Size Used Avail Use% Mounted on
zdraidtest/notshrcompr zfs 211G 1.0M 211G 1% /zdraidtest/notshrcompr
pool: zdraidtest
state: ONLINE
config:
NAME STATE READ WRITE CKSUM
zdraidtest ONLINE 0 0 0
draid2:6d:84c:4s-0 ONLINE 0 0 0
sdb ONLINE 0 0 0
sdc ONLINE 0 0 0
sdd ONLINE 0 0 0
sde ONLINE 0 0 0
sdf ONLINE 0 0 0
sdg ONLINE 0 0 0
sdh ONLINE 0 0 0
sdi ONLINE 0 0 0
sdj ONLINE 0 0 0
sdk ONLINE 0 0 0
sdl ONLINE 0 0 0
sdm ONLINE 0 0 0
sdn ONLINE 0 0 0
sdo ONLINE 0 0 0
sdp ONLINE 0 0 0
sdq ONLINE 0 0 0
sdr ONLINE 0 0 0
sds ONLINE 0 0 0
sdt ONLINE 0 0 0
sdu ONLINE 0 0 0
sdv ONLINE 0 0 0
sdw ONLINE 0 0 0
sdx ONLINE 0 0 0
sdy ONLINE 0 0 0
sdaa ONLINE 0 0 0
sdab ONLINE 0 0 0
sdac ONLINE 0 0 0
sdad ONLINE 0 0 0
sdae ONLINE 0 0 0
sdaf ONLINE 0 0 0
sdag ONLINE 0 0 0
sdah ONLINE 0 0 0
sdai ONLINE 0 0 0
sdaj ONLINE 0 0 0
sdak ONLINE 0 0 0
sdal ONLINE 0 0 0
sdam ONLINE 0 0 0
sdan ONLINE 0 0 0
sdao ONLINE 0 0 0
sdap ONLINE 0 0 0
sdaq ONLINE 0 0 0
sdar ONLINE 0 0 0
sdas ONLINE 0 0 0
sdat ONLINE 0 0 0
sdau ONLINE 0 0 0
sdav ONLINE 0 0 0
sdaw ONLINE 0 0 0
sdax ONLINE 0 0 0
sdba ONLINE 0 0 0
sdbb ONLINE 0 0 0
sdbc ONLINE 0 0 0
sdbd ONLINE 0 0 0
sdbe ONLINE 0 0 0
sdbf ONLINE 0 0 0
sdbg ONLINE 0 0 0
sdbh ONLINE 0 0 0
sdbi ONLINE 0 0 0
sdbj ONLINE 0 0 0
sdbk ONLINE 0 0 0
sdbl ONLINE 0 0 0
sdbm ONLINE 0 0 0
sdbn ONLINE 0 0 0
sdbo ONLINE 0 0 0
sdbp ONLINE 0 0 0
sdbq ONLINE 0 0 0
sdbr ONLINE 0 0 0
sdbs ONLINE 0 0 0
sdbt ONLINE 0 0 0
sdbu ONLINE 0 0 0
sdbv ONLINE 0 0 0
sdbw ONLINE 0 0 0
sdbx ONLINE 0 0 0
sdca ONLINE 0 0 0
sdcb ONLINE 0 0 0
sdcc ONLINE 0 0 0
sdcd ONLINE 0 0 0
sdce ONLINE 0 0 0
sdcf ONLINE 0 0 0
sdcg ONLINE 0 0 0
sdch ONLINE 0 0 0
sdci ONLINE 0 0 0
sdcj ONLINE 0 0 0
sdck ONLINE 0 0 0
sdcl ONLINE 0 0 0
spares
draid2-0-0 AVAIL
draid2-0-1 AVAIL
draid2-0-2 AVAIL
draid2-0-3 AVAIL
errors: No known data errors
NAME SIZE ALLOC FREE CKPOINT EXPANDSZ FRAG CAP DEDUP HEALTH ALTROOT
zdraidtest 292G 1.21M 292G - - 0% 0% 1.00x ONLINE -
NAME USED AVAIL REFER MOUNTPOINT
zdraidtest 634K 211G 77.4K /zdraidtest
zdraidtest/notshrcompr 77.4K 211G 77.4K /zdraidtest/notshrcompr
zdraidtest/shrcompr 77.4K 211G 77.4K /zdraidtest/shrcompr
Filesystem Type Size Used Avail Use% Mounted on
zdraidtest zfs 211G 128K 211G 1% /zdraidtest
zdraidtest/shrcompr zfs 211G 1.0M 211G 1% /zdraidtest/shrcompr
zdraidtest/notshrcompr zfs 211G 1.0M 211G 1% /zdraidtest/notshrcompr
NOTE - best practice is to export the pool and # zpool import -a -d /dev/disk/by-id
-----
2021-07-04 02:23:26 -05:00
Here is a simulated severely degraded draidZ2 pool with multiple drive failures and spares in use:
zpool replace zdraidtest sdbf draid2-0-0
zpool replace zdraidtest sdbg draid2-0-1
pool: zdraidtest
state: DEGRADED
status: One or more devices could not be used because the label is missing or
invalid. Sufficient replicas exist for the pool to continue
functioning in a degraded state.
action: Replace the device using 'zpool replace'.
see: https://openzfs.github.io/openzfs-docs/msg/ZFS-8000-4J
scan: resilvered 192K in 00:00:00 with 0 errors on Sat Jul 3 17:13:50 2021
config:
NAME STATE READ WRITE CKSUM
zdraidtest DEGRADED 0 0 0
draid2:6d:84c:4s-0 DEGRADED 0 0 0
sdb ONLINE 0 0 0
sdc ONLINE 0 0 0
sdd ONLINE 0 0 0
sde ONLINE 0 0 0
sdf ONLINE 0 0 0
sdg ONLINE 0 0 0
sdh ONLINE 0 0 0
sdi ONLINE 0 0 0
sdj ONLINE 0 0 0
sdk ONLINE 0 0 0
sdl ONLINE 0 0 0
sdm ONLINE 0 0 0
sdn ONLINE 0 0 0
sdo ONLINE 0 0 0
sdp ONLINE 0 0 0
sdq ONLINE 0 0 0
sdr ONLINE 0 0 0
sds ONLINE 0 0 0
sdt ONLINE 0 0 0
sdu ONLINE 0 0 0
sdv ONLINE 0 0 0
sdw ONLINE 0 0 0
sdx ONLINE 0 0 0
sdy ONLINE 0 0 0
sdaa ONLINE 0 0 0
sdab ONLINE 0 0 0
sdac ONLINE 0 0 0
sdad ONLINE 0 0 0
sdae ONLINE 0 0 0
sdaf ONLINE 0 0 0
sdag ONLINE 0 0 0
sdah ONLINE 0 0 0
sdai ONLINE 0 0 0
sdaj ONLINE 0 0 0
sdak ONLINE 0 0 0
sdal ONLINE 0 0 0
sdam ONLINE 0 0 0
sdan ONLINE 0 0 0
sdao ONLINE 0 0 0
sdap ONLINE 0 0 0
sdaq ONLINE 0 0 0
sdar ONLINE 0 0 0
sdas ONLINE 0 0 0
sdat ONLINE 0 0 0
sdau ONLINE 0 0 0
sdav ONLINE 0 0 0
sdaw ONLINE 0 0 0
sdax ONLINE 0 0 0
sdba ONLINE 0 0 0
sdbb ONLINE 0 0 0
sdbc ONLINE 0 0 0
sdbd ONLINE 0 0 0
sdbe ONLINE 0 0 0
spare-53 DEGRADED 0 0 0
sdbf UNAVAIL 0 0 0
draid2-0-0 ONLINE 0 0 0
spare-54 DEGRADED 0 0 0
sdbg UNAVAIL 0 0 0
draid2-0-1 ONLINE 0 0 0
sdbh ONLINE 0 0 0
sdbi ONLINE 0 0 0
sdbj ONLINE 0 0 0
sdbk ONLINE 0 0 0
sdbl ONLINE 0 0 0
sdbm ONLINE 0 0 0
sdbn ONLINE 0 0 0
sdbo ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VBa5ac7f8f-8a80f392 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VB3e6c2216-e4a84097 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VBdf46700d-57ffca34 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VB93e2157f-ddab1d56 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VBa832bbfb-72039324 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VBfaec32a1-3e70d636 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VBf341e4d8-2d3ef1c1 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VB6e562a6b-0a70e7a7 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VB2b5edd5d-4a72ce8e ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VB7ba0f6fc-682efc82 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VB159ac173-a10e8776 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VBd640cdea-57f6bda0 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VB17592c2d-adb9e154 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VB6fd46fec-854516be ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VB5eb81d86-30bd0f13 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VB104bd37f-4cfbff43 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VB0482fe5a-9533bcec ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VBf406b98e-63085d50 ONLINE 0 0 0
scsi-1ATA_VBOX_HARDDISK_VB2bb3428a-78b4c7f1 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VB3ccc0544-8e178e88 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VBb0b1ed03-fa8cfd99 ONLINE 0 0 0
spares
draid2-0-0 INUSE currently in use
draid2-0-1 INUSE currently in use
draid2-0-2 AVAIL
draid2-0-3 AVAIL
errors: No known data errors
2021-07-03 23:12:31 -05:00
2021-07-04 02:23:26 -05:00
NOTE that we still have 2x extra virtual spares available, and idle hotspare disks allocated;
We should be able to sustain 2 more disk failures but not 3.
At this point we will also simfail sdo and sdaq.
pool: zdraidtest
state: DEGRADED
status: One or more devices could not be used because the label is missing or
invalid. Sufficient replicas exist for the pool to continue
functioning in a degraded state.
action: Replace the device using 'zpool replace'.
see: https://openzfs.github.io/openzfs-docs/msg/ZFS-8000-4J
scan: resilvered 192K in 00:00:00 with 0 errors on Sat Jul 3 17:13:50 2021
config:
NAME STATE READ WRITE CKSUM
zdraidtest DEGRADED 0 0 0
draid2:6d:84c:4s-0 DEGRADED 0 0 0
sdb ONLINE 0 0 0
sdc ONLINE 0 0 0
sdd ONLINE 0 0 0
sde ONLINE 0 0 0
sdf ONLINE 0 0 0
sdg ONLINE 0 0 0
sdh ONLINE 0 0 0
sdi ONLINE 0 0 0
sdj ONLINE 0 0 0
sdk ONLINE 0 0 0
sdl ONLINE 0 0 0
sdm ONLINE 0 0 0
sdn ONLINE 0 0 0
sdo UNAVAIL 0 0 0
sdp ONLINE 0 0 0
sdq ONLINE 0 0 0
sdr ONLINE 0 0 0
sds ONLINE 0 0 0
sdt ONLINE 0 0 0
sdu ONLINE 0 0 0
sdv ONLINE 0 0 0
sdw ONLINE 0 0 0
sdx ONLINE 0 0 0
sdy ONLINE 0 0 0
sdaa ONLINE 0 0 0
sdab ONLINE 0 0 0
sdac ONLINE 0 0 0
sdad ONLINE 0 0 0
sdae ONLINE 0 0 0
sdaf ONLINE 0 0 0
sdag ONLINE 0 0 0
sdah ONLINE 0 0 0
sdai ONLINE 0 0 0
sdaj ONLINE 0 0 0
sdak ONLINE 0 0 0
sdal ONLINE 0 0 0
sdam ONLINE 0 0 0
sdan ONLINE 0 0 0
sdao ONLINE 0 0 0
sdap ONLINE 0 0 0
sdaq UNAVAIL 0 0 0
sdar ONLINE 0 0 0
sdas ONLINE 0 0 0
sdat ONLINE 0 0 0
sdau ONLINE 0 0 0
sdav ONLINE 0 0 0
sdaw ONLINE 0 0 0
sdax ONLINE 0 0 0
sdba ONLINE 0 0 0
sdbb ONLINE 0 0 0
sdbc ONLINE 0 0 0
sdbd ONLINE 0 0 0
sdbe ONLINE 0 0 0
spare-53 DEGRADED 0 0 0
sdbf UNAVAIL 0 0 0
draid2-0-0 ONLINE 0 0 0
spare-54 DEGRADED 0 0 0
sdbg UNAVAIL 0 0 0
draid2-0-1 ONLINE 0 0 0
sdbh ONLINE 0 0 0
sdbi ONLINE 0 0 0
sdbj ONLINE 0 0 0
sdbk ONLINE 0 0 0
sdbl ONLINE 0 0 0
sdbm ONLINE 0 0 0
sdbn ONLINE 0 0 0
sdbo ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VBa5ac7f8f-8a80f392 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VB3e6c2216-e4a84097 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VBdf46700d-57ffca34 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VB93e2157f-ddab1d56 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VBa832bbfb-72039324 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VBfaec32a1-3e70d636 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VBf341e4d8-2d3ef1c1 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VB6e562a6b-0a70e7a7 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VB2b5edd5d-4a72ce8e ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VB7ba0f6fc-682efc82 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VB159ac173-a10e8776 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VBd640cdea-57f6bda0 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VB17592c2d-adb9e154 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VB6fd46fec-854516be ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VB5eb81d86-30bd0f13 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VB104bd37f-4cfbff43 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VB0482fe5a-9533bcec ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VBf406b98e-63085d50 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VB2bb3428a-78b4c7f1 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VB3ccc0544-8e178e88 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VBb0b1ed03-fa8cfd99 ONLINE 0 0 0
spares
draid2-0-0 INUSE currently in use
draid2-0-1 INUSE currently in use
draid2-0-2 AVAIL
draid2-0-3 AVAIL
errors: No known data errors
As long as we replace the failed drives with either the available virtual spares or idling hotspare disks,
the pool will continue in a degraded state with no data loss.
Two virtual spares for draid2:6d:84c:4s-0 are still good to go.
If ANY other drive also fails at this point without a virtual or real spare put into use,
we will have a dead pool.
FIX: zpool replace zdraidtest sdbg sdbz
^ This puts the inuse spare back into an AVAIL state since the virtual spare was replaced with an actual drive
zpool replace zdraidtest sdaq sdaz
zpool replace zdraidtest sdo sday
zpool replace zdraidtest sdbf sdby
pool: zdraidtest
state: ONLINE
scan: resilvered 470K in 00:00:01 with 0 errors on Sat Jul 3 17:32:43 2021
config:
NAME STATE READ WRITE CKSUM
zdraidtest ONLINE 0 0 0
draid2:6d:84c:4s-0 ONLINE 0 0 0
sdb ONLINE 0 0 0
sdc ONLINE 0 0 0
sdd ONLINE 0 0 0
sde ONLINE 0 0 0
sdf ONLINE 0 0 0
sdg ONLINE 0 0 0
sdh ONLINE 0 0 0
sdi ONLINE 0 0 0
sdj ONLINE 0 0 0
sdk ONLINE 0 0 0
sdl ONLINE 0 0 0
sdm ONLINE 0 0 0
sdn ONLINE 0 0 0
sday ONLINE 0 0 0
sdp ONLINE 0 0 0
sdq ONLINE 0 0 0
sdr ONLINE 0 0 0
sds ONLINE 0 0 0
sdt ONLINE 0 0 0
sdu ONLINE 0 0 0
sdv ONLINE 0 0 0
sdw ONLINE 0 0 0
sdx ONLINE 0 0 0
sdy ONLINE 0 0 0
sdaa ONLINE 0 0 0
sdab ONLINE 0 0 0
sdac ONLINE 0 0 0
sdad ONLINE 0 0 0
sdae ONLINE 0 0 0
sdaf ONLINE 0 0 0
sdag ONLINE 0 0 0
sdah ONLINE 0 0 0
sdai ONLINE 0 0 0
sdaj ONLINE 0 0 0
sdak ONLINE 0 0 0
sdal ONLINE 0 0 0
sdam ONLINE 0 0 0
sdan ONLINE 0 0 0
sdao ONLINE 0 0 0
sdap ONLINE 0 0 0
sdaz ONLINE 0 0 0
sdar ONLINE 0 0 0
sdas ONLINE 0 0 0
sdat ONLINE 0 0 0
sdau ONLINE 0 0 0
sdav ONLINE 0 0 0
sdaw ONLINE 0 0 0
sdax ONLINE 0 0 0
sdba ONLINE 0 0 0
sdbb ONLINE 0 0 0
sdbc ONLINE 0 0 0
sdbd ONLINE 0 0 0
sdbe ONLINE 0 0 0
sdby ONLINE 0 0 0
sdbz ONLINE 0 0 0
sdbh ONLINE 0 0 0
sdbi ONLINE 0 0 0
sdbj ONLINE 0 0 0
sdbk ONLINE 0 0 0
sdbl ONLINE 0 0 0
sdbm ONLINE 0 0 0
sdbn ONLINE 0 0 0
sdbo ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VBa5ac7f8f-8a80f392 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VB3e6c2216-e4a84097 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VBdf46700d-57ffca34 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VB93e2157f-ddab1d56 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VBa832bbfb-72039324 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VBfaec32a1-3e70d636 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VBf341e4d8-2d3ef1c1 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VB6e562a6b-0a70e7a7 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VB2b5edd5d-4a72ce8e ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VB7ba0f6fc-682efc82 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VB159ac173-a10e8776 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VBd640cdea-57f6bda0 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VB17592c2d-adb9e154 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VB6fd46fec-854516be ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VB5eb81d86-30bd0f13 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VB104bd37f-4cfbff43 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VB0482fe5a-9533bcec ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VBf406b98e-63085d50 ONLINE 0 0 0
scsi-1ATA_VBOX_HARDDISK_VB2bb3428a-78b4c7f1 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VB3ccc0544-8e178e88 ONLINE 0 0 0
scsi-SATA_VBOX_HARDDISK_VBb0b1ed03-fa8cfd99 ONLINE 0 0 0
spares
draid2-0-0 AVAIL
draid2-0-1 AVAIL
draid2-0-2 AVAIL
draid2-0-3 AVAIL
errors: No known data errors
...and now the pool is whole again, all the virtual spares are back in place - things might be a little
disordered as far as tracking what drives are in use now but we had multiple drive failures even after
virtual spares were put in use and the pool is still going. ' zpool history ' can help.
2021-07-03 23:12:31 -05:00
-----
NOTE if you simulate/take a drive offline, you cant just "echo online" to it later, that wont bring it back up!
try rescan-scsi-bus.sh or reboot
2021-07-04 02:23:26 -05:00
FIX: if a drive is offline, replace it temporarily with a builtin virtual spare:
2021-07-03 23:12:31 -05:00
# zpool replace zdraidtest sdd draid2-0-0
# zps
pool: zdraidtest
state: DEGRADED
status: One or more devices could not be used because the label is missing or
invalid. Sufficient replicas exist for the pool to continue
functioning in a degraded state.
action: Replace the device using 'zpool replace'.
see: https://openzfs.github.io/openzfs-docs/msg/ZFS-8000-4J
scan: resilvered 0B in 00:00:00 with 0 errors on Sat Jul 3 14:43:51 2021
config:
NAME STATE READ WRITE CKSUM
zdraidtest DEGRADED 0 0 0
draid2:5d:24c:2s-0 DEGRADED 0 0 0
sdb ONLINE 0 0 0
sdc ONLINE 0 0 0
spare-2 DEGRADED 0 0 0
sdd UNAVAIL 0 0 0
draid2-0-0 ONLINE 0 0 0
sde ONLINE 0 0 0
sdf ONLINE 0 0 0
sdg ONLINE 0 0 0
sdh ONLINE 0 0 0
sdi ONLINE 0 0 0
sdj ONLINE 0 0 0
sdk ONLINE 0 0 0
sdl ONLINE 0 0 0
sdm ONLINE 0 0 0
sdn ONLINE 0 0 0
sdo ONLINE 0 0 0
sdp ONLINE 0 0 0
sdq ONLINE 0 0 0
sdr ONLINE 0 0 0
sds ONLINE 0 0 0
sdt ONLINE 0 0 0
sdu ONLINE 0 0 0
sdv ONLINE 0 0 0
sdw ONLINE 0 0 0
sdx ONLINE 0 0 0
sdy ONLINE 0 0 0
spares
draid2-0-0 INUSE currently in use
draid2-0-1 AVAIL
errors: No known data errors
HOWTO fix the above situation with the same disk (you rebooted / it came back online) and decouple the in-use spare:
zpool export -a
fdisk -l /dev/sdd # scsi-SATA_VBOX_HARDDISK_VBbcc6c97e-f68b8368
zpool labelclear /dev/sdd
zpool labelclear -f /dev/sdd1
zpool import -a
zpool status -v # This will show a degraded pool with a missing disk
# This wont work but gives useful info:
zpool replace zdraidtest spare-2 scsi-SATA_VBOX_HARDDISK_VBbcc6c97e-f68b8368 # got error, use detach
zpool detach zdraidtest 2582498653363374334 # this was listed as UNAVAIL with the spare in-use underneath it
zpool status -v # should now show only the spare where sdd was
# we labelcleared it so it should be ready for re-use;
# if you want to be really thorough you can DD zeros to the entire drive but not really necessary
zpool replace zdraidtest draid2-0-0 scsi-SATA_VBOX_HARDDISK_VBbcc6c97e-f68b8368 # same disk (sdd) but labelcleared
zpool status -v