mirror of
https://github.com/kneutron/ansitest.git
synced 2025-01-16 04:42:55 +08:00
Add files via upload
This commit is contained in:
parent
e989440ae5
commit
d5d946eba8
668
ZFS/zfs-mk-draid-2-vdevs--24d.sh
Normal file
668
ZFS/zfs-mk-draid-2-vdevs--24d.sh
Normal file
@ -0,0 +1,668 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
echo "$0 - 2021 Dave Bechtel - make a ZFS DRAID pool"
|
||||||
|
echo "- pass arg1='reset' to destroy test pool"
|
||||||
|
echo "- pass arg1='fail' and arg2=dev2fail to simulate failure"
|
||||||
|
echo "Reboot to clear simulated device failures before issuing 'reset'"
|
||||||
|
|
||||||
|
# Requires at least zfs 2.1.0
|
||||||
|
DD=/dev/disk
|
||||||
|
DBI=/dev/disk/by-id
|
||||||
|
|
||||||
|
# total disks for pool / children
|
||||||
|
td=24
|
||||||
|
|
||||||
|
# raidz level (usually 2)
|
||||||
|
rzl=1
|
||||||
|
|
||||||
|
# spares
|
||||||
|
spr=2
|
||||||
|
|
||||||
|
# TODO EDITME
|
||||||
|
zp=zdraidtest
|
||||||
|
|
||||||
|
function zps () {
|
||||||
|
zpool status -v |awk 'NF>0'
|
||||||
|
}
|
||||||
|
|
||||||
|
#pooldisks=$(echo /dev/sd{b..y})
|
||||||
|
pooldisks1=$(echo /dev/sd{b..m})
|
||||||
|
pooldisks2=$(echo /dev/sd{n..y})
|
||||||
|
pooldisks=$pooldisks1' '$pooldisks2 # need entire set for reset
|
||||||
|
# sdb sdc sdd sde sdf sdg sdh sdi sdj sdk sdl sdm sdn sdo sdp sdq sdr sds sdt sdu sdv sdw sdx sdy
|
||||||
|
|
||||||
|
# extending to 32 disks
|
||||||
|
#pooldisks2=$(echo /dev/sda{a..h})
|
||||||
|
#sdaa sdab sdac sdad sdae sdaf sdag sdah
|
||||||
|
|
||||||
|
# failexit.mrg
|
||||||
|
function failexit () {
|
||||||
|
echo '! Something failed! Code: '"$1 $2" # code # (and optional description)
|
||||||
|
exit $1
|
||||||
|
}
|
||||||
|
|
||||||
|
# cre8 drive translation table - NOTE 32 disk config gets overridden vv
|
||||||
|
source ~/bin/boojum/draid-pooldisks-assoc.sh $td
|
||||||
|
|
||||||
|
# Flame the pool and start over from 0
|
||||||
|
if [ "$1" = "reset" ]; then
|
||||||
|
logger "$(date) - $0 - RESET issued - destroying $zp"
|
||||||
|
|
||||||
|
# no need to worry if its not imported / already destroyed
|
||||||
|
if [ $(zpool list |grep -c $zp) -gt 0 ]; then
|
||||||
|
zpool destroy $zp || failexit 999 "Failed to destroy $zp"
|
||||||
|
fi
|
||||||
|
|
||||||
|
for d in $pooldisks; do
|
||||||
|
echo -e -n "o Clearing label for disk $d \r"
|
||||||
|
zpool labelclear -f "$d"1
|
||||||
|
done
|
||||||
|
echo ''
|
||||||
|
# also reset hotspares
|
||||||
|
# echo ${hotspares[@]}
|
||||||
|
# zpool status -v |egrep 'sdz|sday|sdaz|sdby|sdbz|sdcy|sdcz'
|
||||||
|
for d in ${hotspares[@]}; do
|
||||||
|
#echo $d # DEBUG
|
||||||
|
echo -e -n "o Clearing label for Hotspare disk $d \r"
|
||||||
|
zpool labelclear -f "/dev/$d"1
|
||||||
|
done
|
||||||
|
echo ''
|
||||||
|
|
||||||
|
zpool status -v
|
||||||
|
|
||||||
|
exit; # early
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
# Simulate a drive failure; if zed daemon is running, a spare should auto kick in
|
||||||
|
if [ "$1" = "fail" ]; then
|
||||||
|
# NOTE we do NO error checking here, so if you fail your ROOT DISK, THAT'S ON YOU!
|
||||||
|
|
||||||
|
# also cp syslog
|
||||||
|
echo "$(date) - $0 - Simulating disk failure for $2 $(ls -lR $DD |grep $2)" |tee |logger
|
||||||
|
echo offline > /sys/block/$2/device/state
|
||||||
|
cat /sys/block/$2/device/state |tee |logger
|
||||||
|
|
||||||
|
time dd if=/dev/urandom of=/$zp/^^tmpfileDELME bs=1M count=$td; sync
|
||||||
|
# force a write; if not work, try scrub
|
||||||
|
|
||||||
|
zps
|
||||||
|
|
||||||
|
exit; # early
|
||||||
|
fi
|
||||||
|
|
||||||
|
# TODO EDITME
|
||||||
|
#iteration=OBM
|
||||||
|
iteration=2
|
||||||
|
if [ "$iteration" = "1" ]; then
|
||||||
|
# compression=zstd-3
|
||||||
|
# -o ashift=12
|
||||||
|
# raidz level (usually 2)
|
||||||
|
rzl=1
|
||||||
|
# Vspares - this is a 96-drive pool, you DON'T want to skimp!
|
||||||
|
spr=2
|
||||||
|
( set -x
|
||||||
|
time zpool create -o autoreplace=on -o autoexpand=on -O atime=off -O compression=lz4 \
|
||||||
|
$zp \
|
||||||
|
draid$rzl:8d:12'c':$spr's' $pooldisks1 \
|
||||||
|
draid$rzl:8d:12'c':$spr's' $pooldisks2 \
|
||||||
|
|| failexit 101 "Failed to create DRAID"
|
||||||
|
)
|
||||||
|
elif [ "$iteration" = "2" ]; then
|
||||||
|
# zpool create <pool> draid[<parity>][:<data>d][:<children>c][:<spares>s] <vdevs...>
|
||||||
|
# ex: draid2:4d:1s:11c
|
||||||
|
# raidz level (usually 2)
|
||||||
|
rzl=2
|
||||||
|
# Vspares - this is a 96-drive pool, you DON'T want to skimp!
|
||||||
|
spr=2
|
||||||
|
( set -x
|
||||||
|
time zpool create -o ashift=12 -o autoexpand=on -O atime=off -O compression=zstd-3 \
|
||||||
|
$zp \
|
||||||
|
draid$rzl:8d:12'c':$spr's' $pooldisks1 \
|
||||||
|
draid$rzl:8d:12'c':$spr's' $pooldisks2 \
|
||||||
|
|| failexit 101 "Failed to create DRAID"
|
||||||
|
)
|
||||||
|
else
|
||||||
|
# One Big Mother
|
||||||
|
# -o ashift=12
|
||||||
|
# raidz level (usually 2)
|
||||||
|
rzl=2
|
||||||
|
# spares - this is a 96-drive pool, you DON'T want to skimp!
|
||||||
|
spr=2
|
||||||
|
( set -x
|
||||||
|
time zpool create -o autoreplace=on -o autoexpand=on -O atime=off -O compression=lz4 \
|
||||||
|
$zp \
|
||||||
|
draid$rzl:8d:$td'c':$spr's' $pooldisks \
|
||||||
|
|| failexit 101 "Failed to create DRAID"
|
||||||
|
)
|
||||||
|
fi
|
||||||
|
|
||||||
|
rc=$?
|
||||||
|
[ $rc -gt 0 ] && exit $rc
|
||||||
|
# ^ Need this check because of subshell, will not exit early otherwise
|
||||||
|
|
||||||
|
# [ $(zpool list |grep -c "no pools") -eq 0 ] && \
|
||||||
|
# zpool add $zp spare ${hotspares[@]}
|
||||||
|
|
||||||
|
|
||||||
|
# The below will not work: gets error
|
||||||
|
# "requested number of dRAID data disks per group 10 is too high, at most 8 disks are available for data"
|
||||||
|
#( set -x
|
||||||
|
#time zpool create -o ashift=12 -o autoexpand=on -O atime=off -O compression=zstd-3 \
|
||||||
|
# $zp \
|
||||||
|
# draid$rzl:10d:12'c':$spr's' $pooldisks1 \
|
||||||
|
# draid$rzl:10d:12'c':$spr's' $pooldisks2 \
|
||||||
|
#|| failexit 101 "Failed to create DRAID"
|
||||||
|
#)
|
||||||
|
|
||||||
|
# cre8 datasets
|
||||||
|
# requires external script in the same PATH
|
||||||
|
# going with lz4 so not limited by CPU for compression
|
||||||
|
zfs-newds.sh 11 $zp shrcompr
|
||||||
|
zfs-newds.sh 10 $zp notshrcompr
|
||||||
|
zfs-newds-zstd.sh 10 $zp notshrcompr-zstd
|
||||||
|
zfs-newds.sh 00 $zp notshrnotcompr
|
||||||
|
|
||||||
|
zps
|
||||||
|
zpool list
|
||||||
|
zfs list
|
||||||
|
|
||||||
|
df -hT |egrep 'ilesystem|zfs'
|
||||||
|
|
||||||
|
echo "NOTE - best practice is to export the pool and # zpool import -a -d $DBI"
|
||||||
|
|
||||||
|
date
|
||||||
|
exit;
|
||||||
|
|
||||||
|
|
||||||
|
# REFS:
|
||||||
|
https://openzfs.github.io/openzfs-docs/Basic%20Concepts/dRAID%20Howto.html
|
||||||
|
|
||||||
|
https://www.reddit.com/r/zfs/comments/lnoh7v/im_trying_to_understand_how_draid_works_but_im/
|
||||||
|
|
||||||
|
https://insider-voice.com/a-deep-dive-into-the-new-openzfs-2-1-distributed-raid-topology/
|
||||||
|
|
||||||
|
https://docs.google.com/presentation/d/1uo0nBfY84HIhEqGWEx-Tbm8fPbJKtIP3ICo4toOPcJo/edit#slide=id.g9d6b9fd59f_0_27
|
||||||
|
|
||||||
|
Group size must divide evenly into draid size
|
||||||
|
E.g., 30 drives can only support
|
||||||
|
3 drive group
|
||||||
|
5 drive group
|
||||||
|
10 drive group
|
||||||
|
15 drive group
|
||||||
|
|
||||||
|
Only need to specify group size at creation
|
||||||
|
|
||||||
|
Group Size - the number of pieces the data is partitioned into plus the amount of parity
|
||||||
|
o The amount of parity determines the redundancy
|
||||||
|
o The number of data pieces determines the overhead
|
||||||
|
|
||||||
|
dRAID Size - the number of drives used for data
|
||||||
|
(Does not include spare drives)
|
||||||
|
|
||||||
|
-----
|
||||||
|
|
||||||
|
# make a draidz1 with x2 VDEVs, 8 data disks, 12 children, 2 spares (per vdev)
|
||||||
|
|
||||||
|
Defining for 24 disks in pool b4 hotspares (1)
|
||||||
|
|
||||||
|
+ zpool create -o autoreplace=on -o autoexpand=on -O atime=off -O compression=lz4 zdraidtest \
|
||||||
|
draid1:8d:12c:2s /dev/sdb /dev/sdc /dev/sdd /dev/sde /dev/sdf /dev/sdg /dev/sdh /dev/sdi /dev/sdj /dev/sdk /dev/sdl /dev/sdm \
|
||||||
|
draid1:8d:12c:2s /dev/sdn /dev/sdo /dev/sdp /dev/sdq /dev/sdr /dev/sds /dev/sdt /dev/sdu /dev/sdv /dev/sdw /dev/sdx /dev/sdy
|
||||||
|
real 0m3.304s
|
||||||
|
|
||||||
|
+ zfs create -o atime=off -o compression=lz4 -o sharesmb=on -o xattr=sa -o recordsize=1024k zdraidtest/shrcompr
|
||||||
|
changed ownership of '/zdraidtest/shrcompr' from root to user
|
||||||
|
Filesystem Type Size Used Avail Use% Mounted on
|
||||||
|
zdraidtest/shrcompr zfs 63G 1.0M 63G 1% /zdraidtest/shrcompr
|
||||||
|
|
||||||
|
+ zfs create -o atime=off -o compression=lz4 -o sharesmb=off -o recordsize=1024k zdraidtest/notshrcompr
|
||||||
|
changed ownership of '/zdraidtest/notshrcompr' from root to user
|
||||||
|
Filesystem Type Size Used Avail Use% Mounted on
|
||||||
|
zdraidtest/notshrcompr zfs 63G 1.0M 63G 1% /zdraidtest/notshrcompr
|
||||||
|
|
||||||
|
+ zfs create -o atime=off -o compression=zstd-3 -o sharesmb=off -o recordsize=1024k zdraidtest/notshrcompr-zstd
|
||||||
|
changed ownership of '/zdraidtest/notshrcompr-zstd' from root to user
|
||||||
|
Filesystem Type Size Used Avail Use% Mounted on
|
||||||
|
zdraidtest/notshrcompr-zstd zfs 63G 1.0M 63G 1% /zdraidtest/notshrcompr-zstd
|
||||||
|
|
||||||
|
+ zfs create -o atime=off -o compression=off -o sharesmb=off -o recordsize=1024k zdraidtest/notshrnotcompr
|
||||||
|
changed ownership of '/zdraidtest/notshrnotcompr' from root to user
|
||||||
|
Filesystem Type Size Used Avail Use% Mounted on
|
||||||
|
zdraidtest/notshrnotcompr zfs 63G 1.0M 63G 1% /zdraidtest/notshrnotcompr
|
||||||
|
|
||||||
|
pool: zdraidtest
|
||||||
|
state: ONLINE
|
||||||
|
config:
|
||||||
|
NAME STATE READ WRITE CKSUM
|
||||||
|
zdraidtest ONLINE 0 0 0
|
||||||
|
draid1:8d:12c:2s-0 ONLINE 0 0 0
|
||||||
|
sdb ONLINE 0 0 0
|
||||||
|
sdc ONLINE 0 0 0
|
||||||
|
sdd ONLINE 0 0 0
|
||||||
|
sde ONLINE 0 0 0
|
||||||
|
sdf ONLINE 0 0 0
|
||||||
|
sdg ONLINE 0 0 0
|
||||||
|
sdh ONLINE 0 0 0
|
||||||
|
sdi ONLINE 0 0 0
|
||||||
|
sdj ONLINE 0 0 0
|
||||||
|
sdk ONLINE 0 0 0
|
||||||
|
sdl ONLINE 0 0 0
|
||||||
|
sdm ONLINE 0 0 0
|
||||||
|
draid1:8d:12c:2s-1 ONLINE 0 0 0
|
||||||
|
sdn ONLINE 0 0 0
|
||||||
|
sdo ONLINE 0 0 0
|
||||||
|
sdp ONLINE 0 0 0
|
||||||
|
sdq ONLINE 0 0 0
|
||||||
|
sdr ONLINE 0 0 0
|
||||||
|
sds ONLINE 0 0 0
|
||||||
|
sdt ONLINE 0 0 0
|
||||||
|
sdu ONLINE 0 0 0
|
||||||
|
sdv ONLINE 0 0 0
|
||||||
|
sdw ONLINE 0 0 0
|
||||||
|
sdx ONLINE 0 0 0
|
||||||
|
sdy ONLINE 0 0 0
|
||||||
|
spares
|
||||||
|
draid1-0-0 AVAIL
|
||||||
|
draid1-0-1 AVAIL
|
||||||
|
draid1-1-0 AVAIL
|
||||||
|
draid1-1-1 AVAIL
|
||||||
|
errors: No known data errors
|
||||||
|
|
||||||
|
NAME SIZE ALLOC FREE CKPOINT EXPANDSZ FRAG CAP DEDUP HEALTH ALTROOT
|
||||||
|
zdraidtest 73.0G 1.77M 73.0G - - 0% 0% 1.00x ONLINE -
|
||||||
|
|
||||||
|
NAME USED AVAIL REFER MOUNTPOINT
|
||||||
|
zdraidtest 1.15M 62.9G 112K /zdraidtest
|
||||||
|
zdraidtest/notshrcompr 96.0K 62.9G 96.0K /zdraidtest/notshrcompr
|
||||||
|
zdraidtest/notshrcompr-zstd 96.0K 62.9G 96.0K /zdraidtest/notshrcompr-zstd
|
||||||
|
zdraidtest/notshrnotcompr 96.0K 62.9G 96.0K /zdraidtest/notshrnotcompr
|
||||||
|
zdraidtest/shrcompr 96.0K 62.9G 96.0K /zdraidtest/shrcompr
|
||||||
|
Filesystem Type Size Used Avail Use% Mounted on
|
||||||
|
zdraidtest zfs 63G 128K 63G 1% /zdraidtest
|
||||||
|
zdraidtest/shrcompr zfs 63G 1.0M 63G 1% /zdraidtest/shrcompr
|
||||||
|
zdraidtest/notshrcompr zfs 63G 1.0M 63G 1% /zdraidtest/notshrcompr
|
||||||
|
zdraidtest/notshrcompr-zstd zfs 63G 1.0M 63G 1% /zdraidtest/notshrcompr-zstd
|
||||||
|
zdraidtest/notshrnotcompr zfs 63G 1.0M 63G 1% /zdraidtest/notshrnotcompr
|
||||||
|
NOTE - best practice is to export the pool and # zpool import -a -d /dev/disk/by-id
|
||||||
|
|
||||||
|
-----
|
||||||
|
|
||||||
|
Here is a severely degraded pool with (6) drive fails and all vspares in use, still going strong with no data loss
|
||||||
|
despite being raidz1 -- NOTE if we had some pspares configured it could take even more damage:
|
||||||
|
|
||||||
|
pool: zdraidtest
|
||||||
|
state: DEGRADED
|
||||||
|
status: One or more devices could not be used because the label is missing or
|
||||||
|
invalid. Sufficient replicas exist for the pool to continue
|
||||||
|
functioning in a degraded state.
|
||||||
|
action: Replace the device using 'zpool replace'.
|
||||||
|
see: https://openzfs.github.io/openzfs-docs/msg/ZFS-8000-4J
|
||||||
|
scan: scrub repaired 0B in 00:00:49 with 0 errors on Wed Jul 7 21:51:51 2021
|
||||||
|
scan: resilvered (draid1:8d:12c:2s-0) 273M in 00:00:34 with 0 errors on Wed Jul 7 21:38:06 2021
|
||||||
|
scan: resilvered (draid1:8d:12c:2s-1) 779M in 00:00:38 with 0 errors on Wed Jul 7 21:51:02 2021
|
||||||
|
config:
|
||||||
|
NAME STATE READ WRITE CKSUM
|
||||||
|
zdraidtest DEGRADED 0 0 0
|
||||||
|
draid1:8d:12c:2s-0 DEGRADED 0 0 0
|
||||||
|
spare-0 DEGRADED 0 0 0
|
||||||
|
sdb UNAVAIL 0 0 0
|
||||||
|
draid1-0-0 ONLINE 0 0 0
|
||||||
|
sdc ONLINE 0 0 0
|
||||||
|
spare-2 DEGRADED 0 0 0
|
||||||
|
sdd UNAVAIL 0 0 0
|
||||||
|
draid1-0-1 ONLINE 0 0 0
|
||||||
|
sde ONLINE 0 0 0
|
||||||
|
sdf UNAVAIL 0 0 0
|
||||||
|
sdg ONLINE 0 0 0
|
||||||
|
sdh ONLINE 0 0 0
|
||||||
|
sdi ONLINE 0 0 0
|
||||||
|
sdj ONLINE 0 0 0
|
||||||
|
sdk ONLINE 0 0 0
|
||||||
|
sdl ONLINE 0 0 0
|
||||||
|
sdm ONLINE 0 0 0
|
||||||
|
draid1:8d:12c:2s-1 DEGRADED 0 0 0
|
||||||
|
spare-0 DEGRADED 0 0 0
|
||||||
|
sdn UNAVAIL 0 0 0
|
||||||
|
draid1-1-0 ONLINE 0 0 0
|
||||||
|
sdo ONLINE 0 0 0
|
||||||
|
spare-2 DEGRADED 0 0 0
|
||||||
|
sdp UNAVAIL 0 0 0
|
||||||
|
draid1-1-1 ONLINE 0 0 0
|
||||||
|
sdq ONLINE 0 0 0
|
||||||
|
sdr UNAVAIL 0 0 0
|
||||||
|
sds ONLINE 0 0 0
|
||||||
|
sdt ONLINE 0 0 0
|
||||||
|
sdu ONLINE 0 0 0
|
||||||
|
sdv ONLINE 0 0 0
|
||||||
|
sdw ONLINE 0 0 0
|
||||||
|
sdx ONLINE 0 0 0
|
||||||
|
sdy ONLINE 0 0 0
|
||||||
|
spares
|
||||||
|
draid1-0-0 INUSE currently in use
|
||||||
|
draid1-0-1 INUSE currently in use
|
||||||
|
draid1-1-0 INUSE currently in use
|
||||||
|
draid1-1-1 INUSE currently in use
|
||||||
|
errors: No known data errors
|
||||||
|
|
||||||
|
-----
|
||||||
|
|
||||||
|
# source draid-pooldisks-assoc.sh 24
|
||||||
|
Defining for 24 disks in pool b4 hotspares (1)
|
||||||
|
Dumping shortdisk == longdisk assoc array to /tmp/draid-pooldisks-assoc.log
|
||||||
|
|
||||||
|
# zpool add $zp spare ${hotspares[@]}
|
||||||
|
|
||||||
|
sdy ONLINE 0 0 0
|
||||||
|
spares
|
||||||
|
draid1-0-0 INUSE currently in use
|
||||||
|
draid1-0-1 INUSE currently in use
|
||||||
|
draid1-1-0 INUSE currently in use
|
||||||
|
draid1-1-1 INUSE currently in use
|
||||||
|
sdz AVAIL
|
||||||
|
errors: No known data errors
|
||||||
|
|
||||||
|
We have added a hotspare on the fly but the ZED daemon hasnt done anything with the already-unavail disks,
|
||||||
|
still need to do a manual replace.
|
||||||
|
|
||||||
|
# zpool replace $zp sdf sdz
|
||||||
|
|
||||||
|
pool: zdraidtest
|
||||||
|
state: DEGRADED
|
||||||
|
status: One or more devices could not be used because the label is missing or
|
||||||
|
invalid. Sufficient replicas exist for the pool to continue
|
||||||
|
functioning in a degraded state.
|
||||||
|
action: Replace the device using 'zpool replace'.
|
||||||
|
see: https://openzfs.github.io/openzfs-docs/msg/ZFS-8000-4J
|
||||||
|
scan: resilvered 1.57G in 00:00:42 with 0 errors on Wed Jul 7 21:59:23 2021
|
||||||
|
|
||||||
|
draid1:8d:12c:2s-0 DEGRADED 0 0 0
|
||||||
|
spare-0 DEGRADED 0 0 0
|
||||||
|
sdb UNAVAIL 0 0 0
|
||||||
|
draid1-0-0 ONLINE 0 0 0
|
||||||
|
sdc ONLINE 0 0 0
|
||||||
|
spare-2 DEGRADED 0 0 0
|
||||||
|
sdd UNAVAIL 0 0 0
|
||||||
|
draid1-0-1 ONLINE 0 0 0
|
||||||
|
sde ONLINE 0 0 0
|
||||||
|
spare-4 DEGRADED 0 0 0
|
||||||
|
sdf UNAVAIL 0 0 0
|
||||||
|
sdz ONLINE 0 0 0
|
||||||
|
sdg ONLINE 0 0 0
|
||||||
|
|
||||||
|
We still have a quandary however, the pool will still show as DEGRADED because a hotspare was used;
|
||||||
|
to clear this condition we can reboot and bring sdf back online + detach sdz to roll it back to the hotspares,
|
||||||
|
or replace sdf with a permanent replacement:
|
||||||
|
|
||||||
|
# zpool replace $zp sdf sdaa
|
||||||
|
|
||||||
|
pool: zdraidtest
|
||||||
|
state: DEGRADED
|
||||||
|
status: One or more devices is currently being resilvered. The pool will
|
||||||
|
continue to function, possibly in a degraded state.
|
||||||
|
action: Wait for the resilver to complete.
|
||||||
|
scan: resilver in progress since Wed Jul 7 22:03:14 2021
|
||||||
|
23.1G scanned at 987M/s, 14.1G issued at 604M/s, 23.1G total
|
||||||
|
806M resilvered, 61.15% done, 00:00:15 to go
|
||||||
|
config:
|
||||||
|
NAME STATE READ WRITE CKSUM
|
||||||
|
zdraidtest DEGRADED 0 0 0
|
||||||
|
draid1:8d:12c:2s-0 DEGRADED 0 0 0
|
||||||
|
spare-0 DEGRADED 0 0 0
|
||||||
|
sdb UNAVAIL 0 0 0
|
||||||
|
draid1-0-0 ONLINE 0 0 0 (resilvering)
|
||||||
|
sdc ONLINE 0 0 0 (resilvering)
|
||||||
|
spare-2 DEGRADED 0 0 0
|
||||||
|
sdd UNAVAIL 0 0 0
|
||||||
|
draid1-0-1 ONLINE 0 0 0 (resilvering)
|
||||||
|
sde ONLINE 0 0 0 (resilvering)
|
||||||
|
spare-4 DEGRADED 0 0 0
|
||||||
|
replacing-0 DEGRADED 0 0 0
|
||||||
|
sdf UNAVAIL 0 0 0
|
||||||
|
sdaa ONLINE 0 0 0 (resilvering)
|
||||||
|
sdz ONLINE 0 0 0 (resilvering)
|
||||||
|
sdg ONLINE 0 0 0 (resilvering)
|
||||||
|
|
||||||
|
scan: resilvered 1.89G in 00:00:52 with 0 errors on Wed Jul 7 22:04:06 2021
|
||||||
|
|
||||||
|
sde ONLINE 0 0 0
|
||||||
|
sdaa ONLINE 0 0 0
|
||||||
|
sdg ONLINE 0 0 0
|
||||||
|
|
||||||
|
spares
|
||||||
|
draid1-0-0 INUSE currently in use
|
||||||
|
draid1-0-1 INUSE currently in use
|
||||||
|
draid1-1-0 INUSE currently in use
|
||||||
|
draid1-1-1 INUSE currently in use
|
||||||
|
sdz AVAIL
|
||||||
|
errors: No known data errors
|
||||||
|
|
||||||
|
# zpool replace $zp sdr sdab
|
||||||
|
|
||||||
|
scan: resilvered 1.52G in 00:01:05 with 0 errors on Wed Jul 7 22:07:17 2021
|
||||||
|
|
||||||
|
-----
|
||||||
|
|
||||||
|
Another iteration - raidz2
|
||||||
|
|
||||||
|
# make a draidz2 with x2 VDEVs, 8 data disks, 12 children, 2 spares (per vdev)
|
||||||
|
|
||||||
|
pool: zdraidtest
|
||||||
|
state: ONLINE
|
||||||
|
config:
|
||||||
|
NAME STATE READ WRITE CKSUM
|
||||||
|
zdraidtest ONLINE 0 0 0
|
||||||
|
draid2:8d:12c:2s-0 ONLINE 0 0 0
|
||||||
|
sdb ONLINE 0 0 0
|
||||||
|
sdc ONLINE 0 0 0
|
||||||
|
sdd ONLINE 0 0 0
|
||||||
|
sde ONLINE 0 0 0
|
||||||
|
sdf ONLINE 0 0 0
|
||||||
|
sdg ONLINE 0 0 0
|
||||||
|
sdh ONLINE 0 0 0
|
||||||
|
sdi ONLINE 0 0 0
|
||||||
|
sdj ONLINE 0 0 0
|
||||||
|
sdk ONLINE 0 0 0
|
||||||
|
sdl ONLINE 0 0 0
|
||||||
|
sdm ONLINE 0 0 0
|
||||||
|
draid2:8d:12c:2s-1 ONLINE 0 0 0
|
||||||
|
sdn ONLINE 0 0 0
|
||||||
|
sdo ONLINE 0 0 0
|
||||||
|
sdp ONLINE 0 0 0
|
||||||
|
sdq ONLINE 0 0 0
|
||||||
|
sdr ONLINE 0 0 0
|
||||||
|
sds ONLINE 0 0 0
|
||||||
|
sdt ONLINE 0 0 0
|
||||||
|
sdu ONLINE 0 0 0
|
||||||
|
sdv ONLINE 0 0 0
|
||||||
|
sdw ONLINE 0 0 0
|
||||||
|
sdx ONLINE 0 0 0
|
||||||
|
sdy ONLINE 0 0 0
|
||||||
|
spares
|
||||||
|
draid2-0-0 AVAIL
|
||||||
|
draid2-0-1 AVAIL
|
||||||
|
draid2-1-0 AVAIL
|
||||||
|
draid2-1-1 AVAIL
|
||||||
|
errors: No known data errors
|
||||||
|
|
||||||
|
-----
|
||||||
|
|
||||||
|
Here is the above pool in a severely degraded condition, one more drive failure will kill it but we have
|
||||||
|
sustained (8) drive failures despite it being a raidz2 - and still no data loss:
|
||||||
|
|
||||||
|
pool: zdraidtest
|
||||||
|
state: DEGRADED
|
||||||
|
status: One or more devices could not be used because the label is missing or
|
||||||
|
invalid. Sufficient replicas exist for the pool to continue
|
||||||
|
functioning in a degraded state.
|
||||||
|
action: Replace the device using 'zpool replace'.
|
||||||
|
see: https://openzfs.github.io/openzfs-docs/msg/ZFS-8000-4J
|
||||||
|
scan: scrub repaired 0B in 00:00:36 with 0 errors on Wed Jul 7 22:30:03 2021
|
||||||
|
scan: resilvered (draid2:8d:12c:2s-0) 319M in 00:00:10 with 0 errors on Wed Jul 7 22:23:40 2021
|
||||||
|
scan: resilvered (draid2:8d:12c:2s-1) 598M in 00:00:30 with 0 errors on Wed Jul 7 22:29:27 2021
|
||||||
|
config:
|
||||||
|
NAME STATE READ WRITE CKSUM
|
||||||
|
zdraidtest DEGRADED 0 0 0
|
||||||
|
draid2:8d:12c:2s-0 DEGRADED 0 0 0
|
||||||
|
spare-0 DEGRADED 0 0 0
|
||||||
|
sdb UNAVAIL 0 0 0
|
||||||
|
draid2-0-0 ONLINE 0 0 0
|
||||||
|
sdc ONLINE 0 0 0
|
||||||
|
spare-2 DEGRADED 0 0 0
|
||||||
|
sdd UNAVAIL 0 0 0
|
||||||
|
draid2-0-1 ONLINE 0 0 0
|
||||||
|
sde ONLINE 0 0 0
|
||||||
|
sdf UNAVAIL 0 0 0
|
||||||
|
sdg ONLINE 0 0 0
|
||||||
|
sdh UNAVAIL 0 0 0
|
||||||
|
sdi ONLINE 0 0 0
|
||||||
|
sdj ONLINE 0 0 0
|
||||||
|
sdk ONLINE 0 0 0
|
||||||
|
sdl ONLINE 0 0 0
|
||||||
|
sdm ONLINE 0 0 0
|
||||||
|
draid2:8d:12c:2s-1 DEGRADED 0 0 0
|
||||||
|
spare-0 DEGRADED 0 0 0
|
||||||
|
sdn UNAVAIL 0 0 0
|
||||||
|
draid2-1-0 ONLINE 0 0 0
|
||||||
|
sdo ONLINE 0 0 0
|
||||||
|
spare-2 DEGRADED 0 0 0
|
||||||
|
sdp UNAVAIL 0 0 0
|
||||||
|
draid2-1-1 ONLINE 0 0 0
|
||||||
|
sdq ONLINE 0 0 0
|
||||||
|
sdr UNAVAIL 0 0 0
|
||||||
|
sds ONLINE 0 0 0
|
||||||
|
sdt UNAVAIL 0 0 0
|
||||||
|
sdu ONLINE 0 0 0
|
||||||
|
sdv ONLINE 0 0 0
|
||||||
|
sdw ONLINE 0 0 0
|
||||||
|
sdx ONLINE 0 0 0
|
||||||
|
sdy ONLINE 0 0 0
|
||||||
|
spares
|
||||||
|
draid2-0-0 INUSE currently in use
|
||||||
|
draid2-0-1 INUSE currently in use
|
||||||
|
draid2-1-0 INUSE currently in use
|
||||||
|
draid2-1-1 INUSE currently in use
|
||||||
|
errors: No known data errors
|
||||||
|
|
||||||
|
|
||||||
|
-----
|
||||||
|
|
||||||
|
NOTE if you simulate/take a drive offline, you cant just "echo online" to it later, that wont bring it back up!
|
||||||
|
try rescan-scsi-bus.sh or reboot
|
||||||
|
|
||||||
|
FIX: if a drive is offline, replace it temporarily with a builtin spare:
|
||||||
|
# zpool replace zdraidtest sdd draid2-0-0
|
||||||
|
|
||||||
|
# zps
|
||||||
|
pool: zdraidtest
|
||||||
|
state: DEGRADED
|
||||||
|
status: One or more devices could not be used because the label is missing or
|
||||||
|
invalid. Sufficient replicas exist for the pool to continue
|
||||||
|
functioning in a degraded state.
|
||||||
|
action: Replace the device using 'zpool replace'.
|
||||||
|
see: https://openzfs.github.io/openzfs-docs/msg/ZFS-8000-4J
|
||||||
|
scan: resilvered 0B in 00:00:00 with 0 errors on Sat Jul 3 14:43:51 2021
|
||||||
|
config:
|
||||||
|
NAME STATE READ WRITE CKSUM
|
||||||
|
zdraidtest DEGRADED 0 0 0
|
||||||
|
draid2:5d:24c:2s-0 DEGRADED 0 0 0
|
||||||
|
sdb ONLINE 0 0 0
|
||||||
|
sdc ONLINE 0 0 0
|
||||||
|
spare-2 DEGRADED 0 0 0
|
||||||
|
sdd UNAVAIL 0 0 0
|
||||||
|
draid2-0-0 ONLINE 0 0 0
|
||||||
|
sde ONLINE 0 0 0
|
||||||
|
sdf ONLINE 0 0 0
|
||||||
|
sdg ONLINE 0 0 0
|
||||||
|
sdh ONLINE 0 0 0
|
||||||
|
sdi ONLINE 0 0 0
|
||||||
|
sdj ONLINE 0 0 0
|
||||||
|
sdk ONLINE 0 0 0
|
||||||
|
sdl ONLINE 0 0 0
|
||||||
|
sdm ONLINE 0 0 0
|
||||||
|
sdn ONLINE 0 0 0
|
||||||
|
sdo ONLINE 0 0 0
|
||||||
|
sdp ONLINE 0 0 0
|
||||||
|
sdq ONLINE 0 0 0
|
||||||
|
sdr ONLINE 0 0 0
|
||||||
|
sds ONLINE 0 0 0
|
||||||
|
sdt ONLINE 0 0 0
|
||||||
|
sdu ONLINE 0 0 0
|
||||||
|
sdv ONLINE 0 0 0
|
||||||
|
sdw ONLINE 0 0 0
|
||||||
|
sdx ONLINE 0 0 0
|
||||||
|
sdy ONLINE 0 0 0
|
||||||
|
spares
|
||||||
|
draid2-0-0 INUSE currently in use
|
||||||
|
draid2-0-1 AVAIL
|
||||||
|
errors: No known data errors
|
||||||
|
|
||||||
|
HOWTO fix the above situation with the same disk (you rebooted / it came back online) and decouple the in-use spare:
|
||||||
|
|
||||||
|
pool: zdraidtest
|
||||||
|
state: ONLINE
|
||||||
|
scan: resilvered 4.42G in 00:01:14 with 0 errors on Wed Jul 7 22:12:23 2021
|
||||||
|
config:
|
||||||
|
NAME STATE READ WRITE CKSUM
|
||||||
|
zdraidtest ONLINE 0 0 0
|
||||||
|
draid1:8d:12c:2s-0 ONLINE 0 0 0
|
||||||
|
spare-0 ONLINE 0 0 0
|
||||||
|
sdb ONLINE 0 0 0
|
||||||
|
draid1-0-0 ONLINE 0 0 0
|
||||||
|
sdc ONLINE 0 0 0
|
||||||
|
spare-2 ONLINE 0 0 0
|
||||||
|
sdd ONLINE 0 0 0
|
||||||
|
draid1-0-1 ONLINE 0 0 0
|
||||||
|
sde ONLINE 0 0 0
|
||||||
|
sdaa ONLINE 0 0 0
|
||||||
|
sdg ONLINE 0 0 0
|
||||||
|
sdh ONLINE 0 0 0
|
||||||
|
sdi ONLINE 0 0 0
|
||||||
|
sdj ONLINE 0 0 0
|
||||||
|
sdk ONLINE 0 0 0
|
||||||
|
sdl ONLINE 0 0 0
|
||||||
|
sdm ONLINE 0 0 0
|
||||||
|
draid1:8d:12c:2s-1 ONLINE 0 0 0
|
||||||
|
spare-0 ONLINE 0 0 0
|
||||||
|
sdn ONLINE 0 0 0
|
||||||
|
draid1-1-0 ONLINE 0 0 0
|
||||||
|
sdo ONLINE 0 0 0
|
||||||
|
spare-2 ONLINE 0 0 0
|
||||||
|
sdp ONLINE 0 0 0
|
||||||
|
draid1-1-1 ONLINE 0 0 0
|
||||||
|
sdq ONLINE 0 0 0
|
||||||
|
sdab ONLINE 0 0 0
|
||||||
|
sds ONLINE 0 0 0
|
||||||
|
sdt ONLINE 0 0 0
|
||||||
|
sdu ONLINE 0 0 0
|
||||||
|
sdv ONLINE 0 0 0
|
||||||
|
sdw ONLINE 0 0 0
|
||||||
|
sdx ONLINE 0 0 0
|
||||||
|
sdy ONLINE 0 0 0
|
||||||
|
spares
|
||||||
|
draid1-0-0 INUSE currently in use
|
||||||
|
draid1-0-1 INUSE currently in use
|
||||||
|
draid1-1-0 INUSE currently in use
|
||||||
|
draid1-1-1 INUSE currently in use
|
||||||
|
sdz AVAIL
|
||||||
|
errors: No known data errors
|
||||||
|
|
||||||
|
The drives are all back, but the vspares are all in use.
|
||||||
|
|
||||||
|
FIX: # zpool detach $zp draid1-0-0
|
||||||
|
# zpool detach $zp draid1-0-1
|
||||||
|
# zpool detach $zp draid1-1-0
|
||||||
|
# zpool detach $zp draid1-1-1
|
||||||
|
|
||||||
|
spares
|
||||||
|
draid1-0-0 AVAIL
|
||||||
|
draid1-0-1 AVAIL
|
||||||
|
draid1-1-0 AVAIL
|
||||||
|
draid1-1-1 AVAIL
|
||||||
|
sdz AVAIL
|
||||||
|
errors: No known data errors
|
||||||
|
|
||||||
|
NOTE if you get the following error after rebooting and bringing dead drives back, it should work OK after a scrub:
|
||||||
|
|
||||||
|
# zpool detach $zp draid2-0-0
|
||||||
|
cannot detach draid2-0-0: no valid replicas
|
@ -3,6 +3,7 @@
|
|||||||
echo "$0 - 2021 Dave Bechtel - make a ZFS DRAID pool"
|
echo "$0 - 2021 Dave Bechtel - make a ZFS DRAID pool"
|
||||||
echo "- pass arg1='reset' to destroy test pool"
|
echo "- pass arg1='reset' to destroy test pool"
|
||||||
echo "- pass arg1='fail' and arg2=dev2fail to simulate failure"
|
echo "- pass arg1='fail' and arg2=dev2fail to simulate failure"
|
||||||
|
echo "Reboot to clear simulated device failures before issuing 'reset'"
|
||||||
|
|
||||||
# Requires at least zfs 2.1.0
|
# Requires at least zfs 2.1.0
|
||||||
DD=/dev/disk
|
DD=/dev/disk
|
||||||
@ -12,7 +13,7 @@ DBI=/dev/disk/by-id
|
|||||||
td=24 # 26 - root and 1 spare
|
td=24 # 26 - root and 1 spare
|
||||||
|
|
||||||
# raidz level (usually 2)
|
# raidz level (usually 2)
|
||||||
rzl=2
|
rzl=1
|
||||||
|
|
||||||
# spares - per vdev
|
# spares - per vdev
|
||||||
spr=1
|
spr=1
|
||||||
@ -30,14 +31,20 @@ function zps () {
|
|||||||
#pooldisks2=$(echo /dev/sd{n..y})
|
#pooldisks2=$(echo /dev/sd{n..y})
|
||||||
#pooldisks=$pooldisks1' '$pooldisks2 # need entire set for reset
|
#pooldisks=$pooldisks1' '$pooldisks2 # need entire set for reset
|
||||||
|
|
||||||
|
# 24 disks = groups of 8
|
||||||
pooldisks1=$(echo /dev/sd{b..i}) # bcdefghi
|
pooldisks1=$(echo /dev/sd{b..i}) # bcdefghi
|
||||||
pooldisks2=$(echo /dev/sd{j..q}) # jklmnopq
|
pooldisks2=$(echo /dev/sd{j..q}) # jklmnopq
|
||||||
pooldisks3=$(echo /dev/sd{r..y}) # rstuvwxy # z is phys spare
|
pooldisks3=$(echo /dev/sd{r..y}) # rstuvwxy # z is reserved phys spare
|
||||||
pooldisks=$pooldisks1' '$pooldisks2' '$pooldisks3 # need entire set for reset
|
pooldisks=$pooldisks1' '$pooldisks2' '$pooldisks3 # need entire set for reset
|
||||||
#pooldisks=$pooldisks1' '$pooldisks2' '$pooldisks3' '$pooldisks4' '$pooldisks5' '$pooldisks6 # need entire set for reset
|
#pooldisks=$pooldisks1' '$pooldisks2' '$pooldisks3' '$pooldisks4' '$pooldisks5' '$pooldisks6 # need entire set for reset
|
||||||
# sdb sdc sdd sde sdf sdg sdh sdi sdj sdk sdl sdm sdn sdo sdp sdq sdr sds sdt sdu sdv sdw sdx sdy
|
|
||||||
|
# 24, groups of 6 drives = 4 vdevs
|
||||||
# 1 2 3 4 5 6 1 2 3 4 5 6 1 2 3 4 5 6 1 2 3 4 5 6
|
# 1 2 3 4 5 6 1 2 3 4 5 6 1 2 3 4 5 6 1 2 3 4 5 6
|
||||||
# D D D Z2 Z2 S
|
# sdb sdc sdd sde sdf sdg sdh sdi sdj sdk sdl sdm sdn sdo sdp sdq sdr sds sdt sdu sdv sdw sdx sdy
|
||||||
|
# D D D Z2 Z2 S = draid2:3d:6'c':1's'
|
||||||
|
# D D D D Z1 S = draid1:4d:6'c':1's'
|
||||||
|
# D D D D D Z1 (no vspare) = draid1:5d:6'c':0's'
|
||||||
|
|
||||||
|
|
||||||
# extending to 32 disks
|
# extending to 32 disks
|
||||||
#pooldisks2=$(echo /dev/sda{a..h})
|
#pooldisks2=$(echo /dev/sda{a..h})
|
||||||
@ -49,26 +56,46 @@ function failexit () {
|
|||||||
exit $1
|
exit $1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# cre8 drive translation table - NOTE 32 disk config gets overridden vv
|
||||||
|
source ~/bin/boojum/draid-pooldisks-assoc.sh $td
|
||||||
|
|
||||||
|
# Flame the pool and start over from 0
|
||||||
if [ "$1" = "reset" ]; then
|
if [ "$1" = "reset" ]; then
|
||||||
zpool destroy $zp
|
|
||||||
|
# no need to worry if its not imported / already destroyed
|
||||||
|
if [ $(zpool list |grep -c $zp) -gt 0 ]; then
|
||||||
|
zpool destroy $zp || failexit 999 "Failed to destroy $zp"
|
||||||
|
fi
|
||||||
|
|
||||||
for d in $pooldisks; do
|
for d in $pooldisks; do
|
||||||
echo -e -n "o Clearing label for disk $d \r"
|
echo -e -n "o Clearing label for disk $d \r"
|
||||||
zpool labelclear -f "$d"1
|
zpool labelclear -f "$d"1
|
||||||
done
|
done
|
||||||
echo ''
|
echo ''
|
||||||
|
# also reset hotspares
|
||||||
|
# echo ${hotspares[@]}
|
||||||
|
# zpool status -v |egrep 'sdz|sday|sdaz|sdby|sdbz|sdcy|sdcz'
|
||||||
|
for d in ${hotspares[@]}; do
|
||||||
|
#echo $d # DEBUG
|
||||||
|
echo -e -n "o Clearing label for Hotspare disk $d \r"
|
||||||
|
zpool labelclear -f "/dev/$d"1
|
||||||
|
done
|
||||||
|
echo ''
|
||||||
|
|
||||||
zpool status -v
|
zpool status -v
|
||||||
|
|
||||||
exit; # early
|
exit; # early
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Simulate a drive failure; if zed daemon is running, a spare should auto kick in
|
||||||
if [ "$1" = "fail" ]; then
|
if [ "$1" = "fail" ]; then
|
||||||
# NOTE we do NO error checking here, so if you fail your ROOT DISK, THAT'S ON YOU!
|
# NOTE we do NO error checking here, so if you fail your ROOT DISK, THAT'S ON YOU!
|
||||||
|
|
||||||
echo "$(date) - Simulating disk failure for $2 $(ls -lR $DD |grep $2)" |tee |logger
|
echo "$(date) - $0 - Simulating disk failure for $2 $(ls -lR $DD |grep $2)" |tee |logger
|
||||||
echo offline > /sys/block/$2/device/state
|
echo offline > /sys/block/$2/device/state
|
||||||
cat /sys/block/$2/device/state |tee |logger
|
cat /sys/block/$2/device/state |tee |logger
|
||||||
|
|
||||||
time dd if=/dev/urandom of=/$zp/^^tmpfileDELME bs=1M count=1; sync
|
time dd if=/dev/urandom of=/$zp/^^tmpfileDELME bs=1M count=$td; sync
|
||||||
# force a write; if not work, try scrub
|
# force a write; if not work, try scrub
|
||||||
|
|
||||||
zps
|
zps
|
||||||
@ -79,27 +106,29 @@ fi
|
|||||||
# zpool create <pool> draid[<parity>][:<data>d][:<children>c][:<spares>s] <vdevs...>
|
# zpool create <pool> draid[<parity>][:<data>d][:<children>c][:<spares>s] <vdevs...>
|
||||||
# ex: draid2:4d:1s:11c
|
# ex: draid2:4d:1s:11c
|
||||||
|
|
||||||
|
# data - The number of data devices per redundancy group
|
||||||
|
# In general a smaller value of D will increase IOPS, improve the compression
|
||||||
|
# ratio, and speed up resilvering at the expense of total usable capacity.
|
||||||
|
|
||||||
# SLOW writing to zstd-3
|
# SLOW writing to zstd-3
|
||||||
# draid$rzl:8d:12'c':$spr's' $pooldisks1 \
|
# draid$rzl:8d:12'c':$spr's' $pooldisks1 \
|
||||||
# draid$rzl:8d:12'c':$spr's' $pooldisks2 \
|
# draid$rzl:8d:12'c':$spr's' $pooldisks2 \
|
||||||
|
|
||||||
iteration=3
|
# handy REF: https://arstechnica.com/gadgets/2021/07/a-deep-dive-into-openzfs-2-1s-new-distributed-raid-topology/
|
||||||
|
|
||||||
|
# groups of 8 drives = 3 vdevs (z is reserved for physical hotspare)
|
||||||
|
# sdb sdc sdd sde sdf sdg sdh sdi sdj sdk sdl sdm sdn sdo sdp sdq sdr sds sdt sdu sdv sdw sdx sdy
|
||||||
|
# 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8
|
||||||
|
# D D D D D Z2 Z2 S = draid2:5d:8'c':1's'
|
||||||
|
# D D D D D D Z1 S = draid1:6d:8'c':1's'
|
||||||
|
# D D D D D D D Z1 (no vspare) = draid1:7d:8'c':0's'
|
||||||
|
|
||||||
|
# TODO EDITME
|
||||||
|
iteration=1
|
||||||
if [ "$iteration" = "1" ]; then
|
if [ "$iteration" = "1" ]; then
|
||||||
# compression=zstd-3
|
# compression=zstd-3
|
||||||
# -o ashift=12
|
# -o ashift=12
|
||||||
( set -x
|
# NOTE will NOT do 7d
|
||||||
time zpool create -o autoexpand=on -o autoreplace=on -O atime=off -O compression=lz4 \
|
|
||||||
$zp \
|
|
||||||
draid$rzl:5d:8'c':$spr's' $pooldisks1 \
|
|
||||||
draid$rzl:5d:8'c':$spr's' $pooldisks2 \
|
|
||||||
draid$rzl:5d:8'c':$spr's' $pooldisks3 \
|
|
||||||
|| failexit 101 "Failed to create DRAID"
|
|
||||||
)
|
|
||||||
elif [ "$iteration" = "2" ]; then
|
|
||||||
# raidz level (usually 2)
|
|
||||||
rzl=1
|
|
||||||
# spares - per vdev
|
|
||||||
spr=1
|
|
||||||
( set -x
|
( set -x
|
||||||
time zpool create -o autoexpand=on -o autoreplace=on -O atime=off -O compression=lz4 \
|
time zpool create -o autoexpand=on -o autoreplace=on -O atime=off -O compression=lz4 \
|
||||||
$zp \
|
$zp \
|
||||||
@ -108,6 +137,19 @@ time zpool create -o autoexpand=on -o autoreplace=on -O atime=off -O compression
|
|||||||
draid$rzl:6d:8'c':$spr's' $pooldisks3 \
|
draid$rzl:6d:8'c':$spr's' $pooldisks3 \
|
||||||
|| failexit 101 "Failed to create DRAID"
|
|| failexit 101 "Failed to create DRAID"
|
||||||
)
|
)
|
||||||
|
elif [ "$iteration" = "2" ]; then
|
||||||
|
# raidz level (usually 2)
|
||||||
|
rzl=2
|
||||||
|
# spares - per vdev
|
||||||
|
spr=1
|
||||||
|
( set -x
|
||||||
|
time zpool create -o autoexpand=on -o autoreplace=on -O atime=off -O compression=lz4 \
|
||||||
|
$zp \
|
||||||
|
draid$rzl:5d:8'c':$spr's' $pooldisks1 \
|
||||||
|
draid$rzl:5d:8'c':$spr's' $pooldisks2 \
|
||||||
|
draid$rzl:5d:8'c':$spr's' $pooldisks3 \
|
||||||
|
|| failexit 101 "Failed to create DRAID"
|
||||||
|
)
|
||||||
elif [ "$iteration" = "3" ]; then
|
elif [ "$iteration" = "3" ]; then
|
||||||
# This appears to be a "useless" config, you gain nothing apparent by going down to 5D
|
# This appears to be a "useless" config, you gain nothing apparent by going down to 5D
|
||||||
# raidz level (usually 2)
|
# raidz level (usually 2)
|
||||||
@ -123,23 +165,29 @@ time zpool create -o autoexpand=on -o autoreplace=on -O atime=off -O compression
|
|||||||
|| failexit 101 "Failed to create DRAID"
|
|| failexit 101 "Failed to create DRAID"
|
||||||
)
|
)
|
||||||
else
|
else
|
||||||
# One Big Mother
|
# One Big Mother, 1 vspare, 0x pspares
|
||||||
# -o ashift=12
|
# -o ashift=12
|
||||||
# raidz level (usually 2)
|
# raidz level (usually 2)
|
||||||
rzl=2
|
rzl=1
|
||||||
# spares
|
# spares
|
||||||
spr=2
|
spr=1
|
||||||
( set -x
|
( set -x
|
||||||
time zpool create -o autoexpand=on -O atime=off -O compression=lz4 \
|
time zpool create -o autoexpand=on -O atime=off -O compression=lz4 \
|
||||||
$zp \
|
$zp \
|
||||||
draid$rzl:6d:$td'c':$spr's' $pooldisks \
|
draid$rzl:8d:$td'c':$spr's' $pooldisks \
|
||||||
|| failexit 101 "Failed to create DRAID"
|
|| failexit 101 "Failed to create DRAID"
|
||||||
)
|
)
|
||||||
|
#rc=$?
|
||||||
|
#[ $rc -gt 0 ] && exit $rc
|
||||||
fi
|
fi
|
||||||
|
|
||||||
rc=$?
|
rc=$?
|
||||||
[ $rc -gt 0 ] && exit $rc
|
[ $rc -gt 0 ] && exit $rc
|
||||||
# ^ Need this check because of subshell, will not exit early otherwise
|
# ^ Need this check because of subshell, will not exit early otherwise
|
||||||
|
|
||||||
|
#[ $(zpool list |grep -c "no pools") -eq 0 ] && \
|
||||||
|
# zpool add $zp spare ${hotspares[@]}
|
||||||
|
|
||||||
# The below will not work: gets error
|
# The below will not work: gets error
|
||||||
# "requested number of dRAID data disks per group 6 is too high, at most 3 disks are available for data"
|
# "requested number of dRAID data disks per group 6 is too high, at most 3 disks are available for data"
|
||||||
#( set -x
|
#( set -x
|
||||||
@ -156,6 +204,7 @@ rc=$?
|
|||||||
# going with lz4 so not limited by CPU for compression
|
# going with lz4 so not limited by CPU for compression
|
||||||
zfs-newds.sh 11 $zp shrcompr
|
zfs-newds.sh 11 $zp shrcompr
|
||||||
zfs-newds.sh 10 $zp notshrcompr
|
zfs-newds.sh 10 $zp notshrcompr
|
||||||
|
zfs-newds-zstd.sh 10 $zp notshrcompr-zstd
|
||||||
zfs-newds.sh 00 $zp notshrnotcompr
|
zfs-newds.sh 00 $zp notshrnotcompr
|
||||||
|
|
||||||
zps
|
zps
|
||||||
@ -164,9 +213,6 @@ zfs list
|
|||||||
|
|
||||||
df -hT |egrep 'ilesystem|zfs'
|
df -hT |egrep 'ilesystem|zfs'
|
||||||
|
|
||||||
# cre8 drive translation table
|
|
||||||
draid-pooldisks-assoc.sh &
|
|
||||||
|
|
||||||
echo "NOTE - best practice is to export the pool and # zpool import -a -d $DBI"
|
echo "NOTE - best practice is to export the pool and # zpool import -a -d $DBI"
|
||||||
|
|
||||||
date
|
date
|
||||||
@ -202,17 +248,16 @@ dRAID Size - the number of drives used for data
|
|||||||
|
|
||||||
-----
|
-----
|
||||||
|
|
||||||
# make a draid with raidz2, x4 VDEVs, 3 data disks, 6 children, 1 spare
|
# make a OneBigMother 24-disk draid with raidz1, 1 VDEV, 8 data disks, 24 children, 0 vspare + 5 pspares
|
||||||
|
|
||||||
zpool create -o ashift=12 -o autoexpand=on -O atime=off -O compression=lz4 \
|
+ zpool create -o autoexpand=on -O atime=off -O compression=lz4 zdraidtest \
|
||||||
zdraidtest \
|
draid1:8d:24c:0s \
|
||||||
draid2:3d:6c:1s /dev/sdb /dev/sdc /dev/sdd /dev/sde /dev/sdf /dev/sdg \
|
/dev/sdb /dev/sdc /dev/sdd /dev/sde /dev/sdf /dev/sdg /dev/sdh /dev/sdi \
|
||||||
draid2:3d:6c:1s /dev/sdh /dev/sdi /dev/sdj /dev/sdk /dev/sdl /dev/sdm \
|
/dev/sdj /dev/sdk /dev/sdl /dev/sdm /dev/sdn /dev/sdo /dev/sdp /dev/sdq \
|
||||||
draid2:3d:6c:1s /dev/sdn /dev/sdo /dev/sdp /dev/sdq /dev/sdr /dev/sds \
|
/dev/sdr /dev/sds /dev/sdt /dev/sdu /dev/sdv /dev/sdw /dev/sdx /dev/sdy
|
||||||
draid2:3d:6c:1s /dev/sdt /dev/sdu /dev/sdv /dev/sdw /dev/sdx /dev/sdy
|
real 0m3.210s
|
||||||
real 0m3.515s
|
|
||||||
user 0m0.039s
|
Dumping shortdisk == longdisk assoc array to /tmp/draid-pooldisks-assoc.log
|
||||||
sys 0m0.136s
|
|
||||||
|
|
||||||
+ zfs create -o atime=off -o compression=lz4 -o sharesmb=on -o xattr=sa -o recordsize=1024k zdraidtest/shrcompr
|
+ zfs create -o atime=off -o compression=lz4 -o sharesmb=on -o xattr=sa -o recordsize=1024k zdraidtest/shrcompr
|
||||||
cannot share 'zdraidtest/shrcompr: system error': SMB share creation failed
|
cannot share 'zdraidtest/shrcompr: system error': SMB share creation failed
|
||||||
@ -220,41 +265,44 @@ filesystem successfully created, but not shared
|
|||||||
changed ownership of '/zdraidtest/shrcompr' from root to user
|
changed ownership of '/zdraidtest/shrcompr' from root to user
|
||||||
|
|
||||||
Filesystem Type Size Used Avail Use% Mounted on
|
Filesystem Type Size Used Avail Use% Mounted on
|
||||||
zdraidtest/shrcompr zfs 21T 1.0M 21T 1% /zdraidtest/shrcompr
|
zdraidtest/shrcompr zfs 76G 1.0M 76G 1% /zdraidtest/shrcompr
|
||||||
|
|
||||||
+ zfs create -o atime=off -o compression=lz4 -o sharesmb=off -o recordsize=1024k zdraidtest/notshrcompr
|
+ zfs create -o atime=off -o compression=lz4 -o sharesmb=off -o recordsize=1024k zdraidtest/notshrcompr
|
||||||
changed ownership of '/zdraidtest/notshrcompr' from root to user
|
changed ownership of '/zdraidtest/notshrcompr' from root to user
|
||||||
|
|
||||||
Filesystem Type Size Used Avail Use% Mounted on
|
Filesystem Type Size Used Avail Use% Mounted on
|
||||||
zdraidtest/notshrcompr zfs 21T 1.0M 21T 1% /zdraidtest/notshrcompr
|
zdraidtest/notshrcompr zfs 76G 1.0M 76G 1% /zdraidtest/notshrcompr
|
||||||
|
|
||||||
|
+ zfs create -o atime=off -o compression=off -o sharesmb=off -o recordsize=1024k zdraidtest/notshrnotcompr
|
||||||
|
changed ownership of '/zdraidtest/notshrnotcompr' from root to user
|
||||||
|
|
||||||
|
Filesystem Type Size Used Avail Use% Mounted on
|
||||||
|
zdraidtest/notshrnotcompr zfs 76G 1.0M 76G 1% /zdraidtest/notshrnotcompr
|
||||||
|
|
||||||
pool: zdraidtest
|
pool: zdraidtest
|
||||||
state: ONLINE
|
state: ONLINE
|
||||||
config:
|
config:
|
||||||
NAME STATE READ WRITE CKSUM
|
NAME STATE READ WRITE CKSUM
|
||||||
zdraidtest ONLINE 0 0 0
|
zdraidtest ONLINE 0 0 0
|
||||||
draid2:3d:6c:1s-0 ONLINE 0 0 0
|
draid1:8d:24c:0s-0 ONLINE 0 0 0
|
||||||
sdb ONLINE 0 0 0
|
sdb ONLINE 0 0 0
|
||||||
sdc ONLINE 0 0 0
|
sdc ONLINE 0 0 0
|
||||||
sdd ONLINE 0 0 0
|
sdd ONLINE 0 0 0
|
||||||
sde ONLINE 0 0 0
|
sde ONLINE 0 0 0
|
||||||
sdf ONLINE 0 0 0
|
sdf ONLINE 0 0 0
|
||||||
sdg ONLINE 0 0 0
|
sdg ONLINE 0 0 0
|
||||||
draid2:3d:6c:1s-1 ONLINE 0 0 0
|
|
||||||
sdh ONLINE 0 0 0
|
sdh ONLINE 0 0 0
|
||||||
sdi ONLINE 0 0 0
|
sdi ONLINE 0 0 0
|
||||||
sdj ONLINE 0 0 0
|
sdj ONLINE 0 0 0
|
||||||
sdk ONLINE 0 0 0
|
sdk ONLINE 0 0 0
|
||||||
sdl ONLINE 0 0 0
|
sdl ONLINE 0 0 0
|
||||||
sdm ONLINE 0 0 0
|
sdm ONLINE 0 0 0
|
||||||
draid2:3d:6c:1s-2 ONLINE 0 0 0
|
|
||||||
sdn ONLINE 0 0 0
|
sdn ONLINE 0 0 0
|
||||||
sdo ONLINE 0 0 0
|
sdo ONLINE 0 0 0
|
||||||
sdp ONLINE 0 0 0
|
sdp ONLINE 0 0 0
|
||||||
sdq ONLINE 0 0 0
|
sdq ONLINE 0 0 0
|
||||||
sdr ONLINE 0 0 0
|
sdr ONLINE 0 0 0
|
||||||
sds ONLINE 0 0 0
|
sds ONLINE 0 0 0
|
||||||
draid2:3d:6c:1s-3 ONLINE 0 0 0
|
|
||||||
sdt ONLINE 0 0 0
|
sdt ONLINE 0 0 0
|
||||||
sdu ONLINE 0 0 0
|
sdu ONLINE 0 0 0
|
||||||
sdv ONLINE 0 0 0
|
sdv ONLINE 0 0 0
|
||||||
@ -262,115 +310,36 @@ config:
|
|||||||
sdx ONLINE 0 0 0
|
sdx ONLINE 0 0 0
|
||||||
sdy ONLINE 0 0 0
|
sdy ONLINE 0 0 0
|
||||||
spares
|
spares
|
||||||
draid2-0-0 AVAIL
|
sdz AVAIL
|
||||||
draid2-1-0 AVAIL
|
sday AVAIL
|
||||||
draid2-2-0 AVAIL
|
sdaz AVAIL
|
||||||
draid2-3-0 AVAIL
|
sdby AVAIL
|
||||||
|
sdbz AVAIL
|
||||||
errors: No known data errors
|
errors: No known data errors
|
||||||
|
|
||||||
NAME SIZE ALLOC FREE CKPOINT EXPANDSZ FRAG CAP DEDUP HEALTH ALTROOT
|
NAME SIZE ALLOC FREE CKPOINT EXPANDSZ FRAG CAP DEDUP HEALTH ALTROOT
|
||||||
zdraidtest 36.4T 5.61M 36.4T - - 0% 0% 1.00x ONLINE -
|
zdraidtest 88.0G 1.52M 88.0G - - 0% 0% 1.00x ONLINE -
|
||||||
|
|
||||||
NAME USED AVAIL REFER MOUNTPOINT
|
NAME USED AVAIL REFER MOUNTPOINT
|
||||||
zdraidtest 2.24M 21.0T 278K /zdraidtest
|
zdraidtest 948K 75.8G 96.0K /zdraidtest
|
||||||
zdraidtest/notshrcompr 278K 21.0T 278K /zdraidtest/notshrcompr
|
zdraidtest/notshrcompr 96.0K 75.8G 96.0K /zdraidtest/notshrcompr
|
||||||
zdraidtest/shrcompr 278K 21.0T 278K /zdraidtest/shrcompr
|
zdraidtest/notshrnotcompr 96.0K 75.8G 96.0K /zdraidtest/notshrnotcompr
|
||||||
|
zdraidtest/shrcompr 96.0K 75.8G 96.0K /zdraidtest/shrcompr
|
||||||
Filesystem Type Size Used Avail Use% Mounted on
|
Filesystem Type Size Used Avail Use% Mounted on
|
||||||
zdraidtest zfs 21T 384K 21T 1% /zdraidtest
|
zdraidtest zfs 76G 128K 76G 1% /zdraidtest
|
||||||
zdraidtest/shrcompr zfs 21T 1.0M 21T 1% /zdraidtest/shrcompr
|
zdraidtest/shrcompr zfs 76G 1.0M 76G 1% /zdraidtest/shrcompr
|
||||||
zdraidtest/notshrcompr zfs 21T 1.0M 21T 1% /zdraidtest/notshrcompr
|
zdraidtest/notshrcompr zfs 76G 1.0M 76G 1% /zdraidtest/notshrcompr
|
||||||
|
zdraidtest/notshrnotcompr zfs 76G 1.0M 76G 1% /zdraidtest/notshrnotcompr
|
||||||
|
|
||||||
NOTE - best practice is to export the pool and # zpool import -a -d /dev/disk/by-id
|
NOTE - best practice is to export the pool and # zpool import -a -d /dev/disk/by-id
|
||||||
|
|
||||||
-----
|
Spares for this configuration can be used for the entire pool.
|
||||||
|
|
||||||
A different iteration - raidz1 with 4 data disks, 6 children, 1 spare = more space available
|
|
||||||
since we are using small (2TB) disks this should not be an issue
|
|
||||||
|
|
||||||
zpool create -o ashift=12 -o autoexpand=on -O atime=off -O compression=lz4 \
|
|
||||||
zdraidtest \
|
|
||||||
draid1:4d:6c:1s /dev/sdb /dev/sdc /dev/sdd /dev/sde /dev/sdf /dev/sdg \
|
|
||||||
draid1:4d:6c:1s /dev/sdh /dev/sdi /dev/sdj /dev/sdk /dev/sdl /dev/sdm \
|
|
||||||
draid1:4d:6c:1s /dev/sdn /dev/sdo /dev/sdp /dev/sdq /dev/sdr /dev/sds \
|
|
||||||
draid1:4d:6c:1s /dev/sdt /dev/sdu /dev/sdv /dev/sdw /dev/sdx /dev/sdy
|
|
||||||
real 0m3.288s
|
|
||||||
user 0m0.034s
|
|
||||||
sys 0m0.162s
|
|
||||||
|
|
||||||
+ zfs create -o atime=off -o compression=lz4 -o sharesmb=on -o xattr=sa -o recordsize=1024k zdraidtest/shrcompr
|
|
||||||
cannot share 'zdraidtest/shrcompr: system error': SMB share creation failed
|
|
||||||
filesystem successfully created, but not shared
|
|
||||||
changed ownership of '/zdraidtest/shrcompr' from root to user
|
|
||||||
|
|
||||||
Filesystem Type Size Used Avail Use% Mounted on
|
|
||||||
zdraidtest/shrcompr zfs 29T 1.0M 29T 1% /zdraidtest/shrcompr
|
|
||||||
|
|
||||||
+ zfs create -o atime=off -o compression=lz4 -o sharesmb=off -o recordsize=1024k zdraidtest/notshrcompr
|
|
||||||
changed ownership of '/zdraidtest/notshrcompr' from root to user
|
|
||||||
|
|
||||||
Filesystem Type Size Used Avail Use% Mounted on
|
|
||||||
zdraidtest/notshrcompr zfs 29T 1.0M 29T 1% /zdraidtest/notshrcompr
|
|
||||||
|
|
||||||
pool: zdraidtest
|
|
||||||
state: ONLINE
|
|
||||||
config:
|
|
||||||
NAME STATE READ WRITE CKSUM
|
|
||||||
zdraidtest ONLINE 0 0 0
|
|
||||||
draid1:4d:6c:1s-0 ONLINE 0 0 0
|
|
||||||
sdb ONLINE 0 0 0
|
|
||||||
sdc ONLINE 0 0 0
|
|
||||||
sdd ONLINE 0 0 0
|
|
||||||
sde ONLINE 0 0 0
|
|
||||||
sdf ONLINE 0 0 0
|
|
||||||
sdg ONLINE 0 0 0
|
|
||||||
draid1:4d:6c:1s-1 ONLINE 0 0 0
|
|
||||||
sdh ONLINE 0 0 0
|
|
||||||
sdi ONLINE 0 0 0
|
|
||||||
sdj ONLINE 0 0 0
|
|
||||||
sdk ONLINE 0 0 0
|
|
||||||
sdl ONLINE 0 0 0
|
|
||||||
sdm ONLINE 0 0 0
|
|
||||||
draid1:4d:6c:1s-2 ONLINE 0 0 0
|
|
||||||
sdn ONLINE 0 0 0
|
|
||||||
sdo ONLINE 0 0 0
|
|
||||||
sdp ONLINE 0 0 0
|
|
||||||
sdq ONLINE 0 0 0
|
|
||||||
sdr ONLINE 0 0 0
|
|
||||||
sds ONLINE 0 0 0
|
|
||||||
draid1:4d:6c:1s-3 ONLINE 0 0 0
|
|
||||||
sdt ONLINE 0 0 0
|
|
||||||
sdu ONLINE 0 0 0
|
|
||||||
sdv ONLINE 0 0 0
|
|
||||||
sdw ONLINE 0 0 0
|
|
||||||
sdx ONLINE 0 0 0
|
|
||||||
sdy ONLINE 0 0 0
|
|
||||||
spares
|
|
||||||
draid1-0-0 AVAIL
|
|
||||||
draid1-1-0 AVAIL
|
|
||||||
draid1-2-0 AVAIL
|
|
||||||
draid1-3-0 AVAIL
|
|
||||||
errors: No known data errors
|
|
||||||
|
|
||||||
NAME SIZE ALLOC FREE CKPOINT EXPANDSZ FRAG CAP DEDUP HEALTH ALTROOT
|
|
||||||
zdraidtest 36.4T 5.43M 36.4T - - 0% 0% 1.00x ONLINE -
|
|
||||||
|
|
||||||
NAME USED AVAIL REFER MOUNTPOINT
|
|
||||||
zdraidtest 3.00M 28.9T 383K /zdraidtest
|
|
||||||
zdraidtest/notshrcompr 383K 28.9T 383K /zdraidtest/notshrcompr
|
|
||||||
zdraidtest/shrcompr 383K 28.9T 383K /zdraidtest/shrcompr
|
|
||||||
|
|
||||||
Filesystem Type Size Used Avail Use% Mounted on
|
|
||||||
zdraidtest zfs 29T 384K 29T 1% /zdraidtest
|
|
||||||
zdraidtest/shrcompr zfs 29T 1.0M 29T 1% /zdraidtest/shrcompr
|
|
||||||
zdraidtest/notshrcompr zfs 29T 1.0M 29T 1% /zdraidtest/notshrcompr
|
|
||||||
|
|
||||||
NOTE - best practice is to export the pool and # zpool import -a -d /dev/disk/by-id
|
|
||||||
|
|
||||||
-----
|
-----
|
||||||
|
|
||||||
Here is a simulated severely degraded pool with multiple drive failures and a spare in use,
|
Below is a severely degraded pool with all physical spares in use; despite the raidz1 level it has
|
||||||
with 2 failed disks in the same column - still chugging along:
|
sustained (6) simultaneous drive failures; one more with no replacements will kill the pool
|
||||||
|
and there are no vspares allocated, but it was configured for maximum available space:
|
||||||
|
|
||||||
pool: zdraidtest
|
pool: zdraidtest
|
||||||
state: DEGRADED
|
state: DEGRADED
|
||||||
@ -379,51 +348,53 @@ status: One or more devices could not be used because the label is missing or
|
|||||||
functioning in a degraded state.
|
functioning in a degraded state.
|
||||||
action: Replace the device using 'zpool replace'.
|
action: Replace the device using 'zpool replace'.
|
||||||
see: https://openzfs.github.io/openzfs-docs/msg/ZFS-8000-4J
|
see: https://openzfs.github.io/openzfs-docs/msg/ZFS-8000-4J
|
||||||
scan: resilvered 716M in 00:00:13 with 0 errors on Sat Jul 3 17:18:21 2021
|
scan: scrub repaired 0B in 00:00:05 with 0 errors on Mon Jul 5 12:31:42 2021
|
||||||
config:
|
config:
|
||||||
NAME STATE READ WRITE CKSUM
|
NAME STATE READ WRITE CKSUM
|
||||||
zdraidtest DEGRADED 0 0 0
|
zdraidtest DEGRADED 0 0 0
|
||||||
draid1:4d:6c:1s-0 DEGRADED 0 0 0
|
draid1:8d:24c:0s-0 DEGRADED 0 0 0
|
||||||
sdb ONLINE 0 0 0
|
spare-0 DEGRADED 0 0 0
|
||||||
|
sdb UNAVAIL 0 0 0
|
||||||
|
sdz ONLINE 0 0 0
|
||||||
sdc ONLINE 0 0 0
|
sdc ONLINE 0 0 0
|
||||||
|
spare-2 DEGRADED 0 0 0
|
||||||
sdd UNAVAIL 0 0 0
|
sdd UNAVAIL 0 0 0
|
||||||
|
sday ONLINE 0 0 0
|
||||||
sde ONLINE 0 0 0
|
sde ONLINE 0 0 0
|
||||||
sdf ONLINE 0 0 0
|
spare-4 DEGRADED 0 0 0
|
||||||
|
sdf UNAVAIL 0 0 0
|
||||||
|
sdaz ONLINE 0 0 0
|
||||||
sdg ONLINE 0 0 0
|
sdg ONLINE 0 0 0
|
||||||
draid1:4d:6c:1s-1 DEGRADED 0 0 0
|
spare-6 DEGRADED 0 0 0
|
||||||
sdh ONLINE 0 0 0
|
sdh UNAVAIL 0 0 0
|
||||||
|
sdby ONLINE 0 0 0
|
||||||
sdi ONLINE 0 0 0
|
sdi ONLINE 0 0 0
|
||||||
|
spare-8 DEGRADED 0 0 0
|
||||||
sdj UNAVAIL 0 0 0
|
sdj UNAVAIL 0 0 0
|
||||||
|
sdbz ONLINE 0 0 0
|
||||||
sdk ONLINE 0 0 0
|
sdk ONLINE 0 0 0
|
||||||
sdl ONLINE 0 0 0
|
sdl ONLINE 0 0 0
|
||||||
sdm ONLINE 0 0 0
|
sdm ONLINE 0 0 0
|
||||||
draid1:4d:6c:1s-2 DEGRADED 0 0 0
|
|
||||||
sdn ONLINE 0 0 0
|
sdn ONLINE 0 0 0
|
||||||
sdo ONLINE 0 0 0
|
sdo ONLINE 0 0 0
|
||||||
sdp ONLINE 0 0 0
|
sdp ONLINE 0 0 0
|
||||||
sdq ONLINE 0 0 0
|
sdq ONLINE 0 0 0
|
||||||
sdr UNAVAIL 0 0 0
|
sdr ONLINE 0 0 0
|
||||||
sds ONLINE 0 0 0
|
sds ONLINE 0 0 0
|
||||||
draid1:4d:6c:1s-3 DEGRADED 0 0 0
|
sdt ONLINE 0 0 0
|
||||||
sdt UNAVAIL 0 0 0
|
|
||||||
sdu ONLINE 0 0 0
|
sdu ONLINE 0 0 0
|
||||||
sdv ONLINE 0 0 0
|
sdv ONLINE 0 0 0
|
||||||
sdw ONLINE 0 0 0
|
sdw ONLINE 0 0 0
|
||||||
sdx ONLINE 0 0 0
|
sdx ONLINE 0 0 0
|
||||||
spare-5 DEGRADED 0 0 0
|
|
||||||
sdy UNAVAIL 0 0 0
|
sdy UNAVAIL 0 0 0
|
||||||
draid1-3-0 ONLINE 0 0 0
|
|
||||||
spares
|
spares
|
||||||
draid1-0-0 AVAIL
|
sdz INUSE currently in use
|
||||||
draid1-1-0 AVAIL
|
sday INUSE currently in use
|
||||||
draid1-2-0 AVAIL
|
sdaz INUSE currently in use
|
||||||
draid1-3-0 INUSE currently in use
|
sdby INUSE currently in use
|
||||||
|
sdbz INUSE currently in use
|
||||||
errors: No known data errors
|
errors: No known data errors
|
||||||
|
|
||||||
NOTE that unless an extra disk is added to the system, the virtual spares for draid1:4d:6c:1s-3 are all burned up;
|
|
||||||
if ANY of sdu-sdx also fails at this point, we will have a dead pool.
|
|
||||||
Spares for draid1-0-0, 1-1-0 and 1-2-0 CANNOT be used for column 3.
|
|
||||||
|
|
||||||
-----
|
-----
|
||||||
|
|
||||||
NOTE if you simulate/take a drive offline, you cant just "echo online" to it later, that wont bring it back up!
|
NOTE if you simulate/take a drive offline, you cant just "echo online" to it later, that wont bring it back up!
|
||||||
|
522
ZFS/zfs-mk-draid-4-vdevs--24d.sh
Normal file
522
ZFS/zfs-mk-draid-4-vdevs--24d.sh
Normal file
@ -0,0 +1,522 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
echo "$0 - 2021 Dave Bechtel - make a ZFS DRAID pool"
|
||||||
|
echo "- pass arg1='reset' to destroy test pool"
|
||||||
|
echo "- pass arg1='fail' and arg2=dev2fail to simulate failure"
|
||||||
|
echo "Reboot to clear simulated device failures before issuing 'reset'"
|
||||||
|
|
||||||
|
# Requires at least zfs 2.1.0
|
||||||
|
DD=/dev/disk
|
||||||
|
DBI=/dev/disk/by-id
|
||||||
|
|
||||||
|
# total disks for pool / children
|
||||||
|
td=24
|
||||||
|
|
||||||
|
# raidz level (usually 2)
|
||||||
|
rzl=1
|
||||||
|
|
||||||
|
# spares
|
||||||
|
spr=1
|
||||||
|
|
||||||
|
# TODO EDITME
|
||||||
|
zp=zdraidtest
|
||||||
|
|
||||||
|
function zps () {
|
||||||
|
zpool status -v |awk 'NF>0'
|
||||||
|
}
|
||||||
|
|
||||||
|
#pooldisks=$(echo /dev/sd{b..y})
|
||||||
|
|
||||||
|
#pooldisks1=$(echo /dev/sd{b..m})
|
||||||
|
#pooldisks2=$(echo /dev/sd{n..y})
|
||||||
|
#pooldisks=$pooldisks1' '$pooldisks2 # need entire set for reset
|
||||||
|
|
||||||
|
pooldisks1=$(echo /dev/sd{b..g})
|
||||||
|
pooldisks2=$(echo /dev/sd{h..m})
|
||||||
|
pooldisks3=$(echo /dev/sd{n..s})
|
||||||
|
pooldisks4=$(echo /dev/sd{t..y})
|
||||||
|
pooldisks=$pooldisks1' '$pooldisks2' '$pooldisks3' '$pooldisks4 # need entire set for reset
|
||||||
|
#pooldisks=$pooldisks1' '$pooldisks2' '$pooldisks3' '$pooldisks4' '$pooldisks5' '$pooldisks6 # need entire set for reset
|
||||||
|
# sdb sdc sdd sde sdf sdg sdh sdi sdj sdk sdl sdm sdn sdo sdp sdq sdr sds sdt sdu sdv sdw sdx sdy
|
||||||
|
# 1 2 3 4 5 6 1 2 3 4 5 6 1 2 3 4 5 6 1 2 3 4 5 6
|
||||||
|
# D D D Z2 Z2 S
|
||||||
|
# D D D D Z1 S
|
||||||
|
# D D D D D Z1 (no vspare)
|
||||||
|
|
||||||
|
# extending to 32 disks
|
||||||
|
#pooldisks2=$(echo /dev/sda{a..h})
|
||||||
|
#sdaa sdab sdac sdad sdae sdaf sdag sdah
|
||||||
|
|
||||||
|
# failexit.mrg
|
||||||
|
function failexit () {
|
||||||
|
echo '! Something failed! Code: '"$1 $2" # code # (and optional description)
|
||||||
|
exit $1
|
||||||
|
}
|
||||||
|
|
||||||
|
# cre8 drive translation table - NOTE 32 disk config gets overridden vv
|
||||||
|
source ~/bin/boojum/draid-pooldisks-assoc.sh $td
|
||||||
|
|
||||||
|
# Flame the pool and start over from 0
|
||||||
|
if [ "$1" = "reset" ]; then
|
||||||
|
logger "$(date) - $0 - RESET issued - destroying $zp"
|
||||||
|
|
||||||
|
# no need to worry if its not imported / already destroyed
|
||||||
|
if [ $(zpool list |grep -c $zp) -gt 0 ]; then
|
||||||
|
zpool destroy $zp || failexit 999 "Failed to destroy $zp"
|
||||||
|
fi
|
||||||
|
|
||||||
|
for d in $pooldisks; do
|
||||||
|
echo -e -n "o Clearing label for disk $d \r"
|
||||||
|
zpool labelclear -f "$d"1
|
||||||
|
done
|
||||||
|
echo ''
|
||||||
|
# also reset hotspares
|
||||||
|
# echo ${hotspares[@]}
|
||||||
|
# zpool status -v |egrep 'sdz|sday|sdaz|sdby|sdbz|sdcy|sdcz'
|
||||||
|
for d in ${hotspares[@]}; do
|
||||||
|
#echo $d # DEBUG
|
||||||
|
echo -e -n "o Clearing label for Hotspare disk $d \r"
|
||||||
|
zpool labelclear -f "/dev/$d"1
|
||||||
|
done
|
||||||
|
echo ''
|
||||||
|
|
||||||
|
zpool status -v
|
||||||
|
|
||||||
|
exit; # early
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
# Simulate a drive failure; if zed daemon is running, a spare should auto kick in
|
||||||
|
if [ "$1" = "fail" ]; then
|
||||||
|
# NOTE we do NO error checking here, so if you fail your ROOT DISK, THAT'S ON YOU!
|
||||||
|
|
||||||
|
# also cp syslog
|
||||||
|
echo "$(date) - $0 - Simulating disk failure for $2 $(ls -lR $DD |grep $2)" |tee |logger
|
||||||
|
echo offline > /sys/block/$2/device/state
|
||||||
|
cat /sys/block/$2/device/state |tee |logger
|
||||||
|
|
||||||
|
time dd if=/dev/urandom of=/$zp/^^tmpfileDELME bs=1M count=$td; sync
|
||||||
|
# force a write; if not work, try scrub
|
||||||
|
|
||||||
|
zps
|
||||||
|
|
||||||
|
exit; # early
|
||||||
|
fi
|
||||||
|
|
||||||
|
# zpool create <pool> draid[<parity>][:<data>d][:<children>c][:<spares>s] <vdevs...>
|
||||||
|
# ex: draid2:4d:1s:11c
|
||||||
|
|
||||||
|
# In general a smaller value of D will increase IOPS, improve the compression
|
||||||
|
# ratio, and speed up resilvering at the expense of total usable capacity.
|
||||||
|
|
||||||
|
# SLOW writing to zstd-3
|
||||||
|
# draid$rzl:8d:12'c':$spr's' $pooldisks1 \
|
||||||
|
# draid$rzl:8d:12'c':$spr's' $pooldisks2 \
|
||||||
|
|
||||||
|
# TODO EDITME
|
||||||
|
#iteration=OBM
|
||||||
|
iteration=2
|
||||||
|
if [ "$iteration" = "1" ]; then
|
||||||
|
# compression=zstd-3
|
||||||
|
( set -x
|
||||||
|
time zpool create -o ashift=12 -o autoexpand=on -O atime=off -O compression=lz4 \
|
||||||
|
$zp \
|
||||||
|
draid$rzl:4d:6'c':$spr's' $pooldisks1 \
|
||||||
|
draid$rzl:4d:6'c':$spr's' $pooldisks2 \
|
||||||
|
draid$rzl:4d:6'c':$spr's' $pooldisks3 \
|
||||||
|
draid$rzl:4d:6'c':$spr's' $pooldisks4 \
|
||||||
|
|| failexit 101 "Failed to create DRAID"
|
||||||
|
)
|
||||||
|
elif [ "$iteration" = "2" ]; then
|
||||||
|
# 4xVDEVs with 4 vspares
|
||||||
|
# raidz level (usually 2)
|
||||||
|
rzl=2
|
||||||
|
# spares
|
||||||
|
spr=1
|
||||||
|
( set -x
|
||||||
|
time zpool create -o autoreplace=on -o autoexpand=on -O atime=off -O compression=lz4 \
|
||||||
|
$zp \
|
||||||
|
draid$rzl:3d:6'c':$spr's' $pooldisks1 \
|
||||||
|
draid$rzl:3d:6'c':$spr's' $pooldisks2 \
|
||||||
|
draid$rzl:3d:6'c':$spr's' $pooldisks3 \
|
||||||
|
draid$rzl:3d:6'c':$spr's' $pooldisks4 \
|
||||||
|
|| failexit 101 "Failed to create DRAID"
|
||||||
|
)
|
||||||
|
else
|
||||||
|
# One Big Mother
|
||||||
|
# -o ashift=12
|
||||||
|
# raidz level (usually 2)
|
||||||
|
rzl=2
|
||||||
|
# spares - this is a 96-drive pool, you DON'T want to skimp!
|
||||||
|
spr=6
|
||||||
|
( set -x
|
||||||
|
time zpool create -o autoreplace=on -o autoexpand=on -O atime=off -O compression=lz4 \
|
||||||
|
$zp \
|
||||||
|
draid$rzl:8d:$td'c':$spr's' $pooldisks \
|
||||||
|
|| failexit 101 "Failed to create DRAID"
|
||||||
|
)
|
||||||
|
fi
|
||||||
|
|
||||||
|
rc=$?
|
||||||
|
[ $rc -gt 0 ] && exit $rc
|
||||||
|
# ^ Need this check because of subshell, will not exit early otherwise
|
||||||
|
|
||||||
|
# [ $(zpool list |grep -c "no pools") -eq 0 ] && \
|
||||||
|
# zpool add $zp spare ${hotspares[0]} ${hotspares[1]} ${hotspares[2]} ${hotspares[3]}
|
||||||
|
# NOTE we're still keeping a few pspares in reserve
|
||||||
|
|
||||||
|
# The below will not work: gets error
|
||||||
|
# "requested number of dRAID data disks per group 6 is too high, at most 3 disks are available for data"
|
||||||
|
#( set -x
|
||||||
|
#time zpool create -o ashift=12 -o autoexpand=on -O atime=off -O compression=lz4 \
|
||||||
|
# $zp \
|
||||||
|
# draid$rzl:6d:6'c':$spr's' $pooldisks1 \
|
||||||
|
# draid$rzl:6d:6'c':$spr's' $pooldisks2 \
|
||||||
|
# draid$rzl:6d:6'c':$spr's' $pooldisks3 \
|
||||||
|
# draid$rzl:6d:6'c':$spr's' $pooldisks4 \
|
||||||
|
#|| failexit 101 "Failed to create DRAID"
|
||||||
|
#)
|
||||||
|
|
||||||
|
# requires external script in the same PATH
|
||||||
|
# going with lz4 so not limited by CPU for compression
|
||||||
|
zfs-newds.sh 11 $zp shrcompr
|
||||||
|
zfs-newds.sh 10 $zp notshrcompr
|
||||||
|
zfs-newds-zstd.sh 10 $zp notshrcompr-zstd
|
||||||
|
zfs-newds.sh 00 $zp notshrnotcompr
|
||||||
|
|
||||||
|
zps
|
||||||
|
zpool list
|
||||||
|
zfs list
|
||||||
|
|
||||||
|
df -hT |egrep 'ilesystem|zfs'
|
||||||
|
|
||||||
|
echo "NOTE - best practice is to export the pool and # zpool import -a -d $DBI"
|
||||||
|
|
||||||
|
date
|
||||||
|
exit;
|
||||||
|
|
||||||
|
|
||||||
|
# REFS:
|
||||||
|
https://openzfs.github.io/openzfs-docs/Basic%20Concepts/dRAID%20Howto.html
|
||||||
|
|
||||||
|
https://klarasystems.com/articles/openzfs-draid-finally/
|
||||||
|
|
||||||
|
https://www.reddit.com/r/zfs/comments/lnoh7v/im_trying_to_understand_how_draid_works_but_im/
|
||||||
|
|
||||||
|
https://insider-voice.com/a-deep-dive-into-the-new-openzfs-2-1-distributed-raid-topology/
|
||||||
|
|
||||||
|
https://docs.google.com/presentation/d/1uo0nBfY84HIhEqGWEx-Tbm8fPbJKtIP3ICo4toOPcJo/edit#slide=id.g9d6b9fd59f_0_27
|
||||||
|
|
||||||
|
Group size must divide evenly into draid size
|
||||||
|
E.g., 30 drives can only support
|
||||||
|
3 drive group
|
||||||
|
5 drive group
|
||||||
|
10 drive group
|
||||||
|
15 drive group
|
||||||
|
|
||||||
|
Only need to specify group size at creation
|
||||||
|
|
||||||
|
Group Size - the number of pieces the data is partitioned into plus the amount of parity
|
||||||
|
o The amount of parity determines the redundancy
|
||||||
|
o The number of data pieces determines the overhead
|
||||||
|
|
||||||
|
dRAID Size - the number of drives used for data
|
||||||
|
(Does not include spare drives)
|
||||||
|
|
||||||
|
-----
|
||||||
|
|
||||||
|
# make a draid with raidz2, x4 VDEVs, 3 data disks, 6 children, 1 spare
|
||||||
|
|
||||||
|
zpool create -o ashift=12 -o autoexpand=on -O atime=off -O compression=lz4 \
|
||||||
|
zdraidtest \
|
||||||
|
draid2:3d:6c:1s /dev/sdb /dev/sdc /dev/sdd /dev/sde /dev/sdf /dev/sdg \
|
||||||
|
draid2:3d:6c:1s /dev/sdh /dev/sdi /dev/sdj /dev/sdk /dev/sdl /dev/sdm \
|
||||||
|
draid2:3d:6c:1s /dev/sdn /dev/sdo /dev/sdp /dev/sdq /dev/sdr /dev/sds \
|
||||||
|
draid2:3d:6c:1s /dev/sdt /dev/sdu /dev/sdv /dev/sdw /dev/sdx /dev/sdy
|
||||||
|
real 0m3.515s
|
||||||
|
user 0m0.039s
|
||||||
|
sys 0m0.136s
|
||||||
|
|
||||||
|
+ zfs create -o atime=off -o compression=lz4 -o sharesmb=on -o xattr=sa -o recordsize=1024k zdraidtest/shrcompr
|
||||||
|
cannot share 'zdraidtest/shrcompr: system error': SMB share creation failed
|
||||||
|
filesystem successfully created, but not shared
|
||||||
|
changed ownership of '/zdraidtest/shrcompr' from root to user
|
||||||
|
|
||||||
|
Filesystem Type Size Used Avail Use% Mounted on
|
||||||
|
zdraidtest/shrcompr zfs 21T 1.0M 21T 1% /zdraidtest/shrcompr
|
||||||
|
|
||||||
|
+ zfs create -o atime=off -o compression=lz4 -o sharesmb=off -o recordsize=1024k zdraidtest/notshrcompr
|
||||||
|
changed ownership of '/zdraidtest/notshrcompr' from root to user
|
||||||
|
|
||||||
|
Filesystem Type Size Used Avail Use% Mounted on
|
||||||
|
zdraidtest/notshrcompr zfs 21T 1.0M 21T 1% /zdraidtest/notshrcompr
|
||||||
|
|
||||||
|
pool: zdraidtest
|
||||||
|
state: ONLINE
|
||||||
|
config:
|
||||||
|
NAME STATE READ WRITE CKSUM
|
||||||
|
zdraidtest ONLINE 0 0 0
|
||||||
|
draid2:3d:6c:1s-0 ONLINE 0 0 0
|
||||||
|
sdb ONLINE 0 0 0
|
||||||
|
sdc ONLINE 0 0 0
|
||||||
|
sdd ONLINE 0 0 0
|
||||||
|
sde ONLINE 0 0 0
|
||||||
|
sdf ONLINE 0 0 0
|
||||||
|
sdg ONLINE 0 0 0
|
||||||
|
draid2:3d:6c:1s-1 ONLINE 0 0 0
|
||||||
|
sdh ONLINE 0 0 0
|
||||||
|
sdi ONLINE 0 0 0
|
||||||
|
sdj ONLINE 0 0 0
|
||||||
|
sdk ONLINE 0 0 0
|
||||||
|
sdl ONLINE 0 0 0
|
||||||
|
sdm ONLINE 0 0 0
|
||||||
|
draid2:3d:6c:1s-2 ONLINE 0 0 0
|
||||||
|
sdn ONLINE 0 0 0
|
||||||
|
sdo ONLINE 0 0 0
|
||||||
|
sdp ONLINE 0 0 0
|
||||||
|
sdq ONLINE 0 0 0
|
||||||
|
sdr ONLINE 0 0 0
|
||||||
|
sds ONLINE 0 0 0
|
||||||
|
draid2:3d:6c:1s-3 ONLINE 0 0 0
|
||||||
|
sdt ONLINE 0 0 0
|
||||||
|
sdu ONLINE 0 0 0
|
||||||
|
sdv ONLINE 0 0 0
|
||||||
|
sdw ONLINE 0 0 0
|
||||||
|
sdx ONLINE 0 0 0
|
||||||
|
sdy ONLINE 0 0 0
|
||||||
|
spares
|
||||||
|
draid2-0-0 AVAIL
|
||||||
|
draid2-1-0 AVAIL
|
||||||
|
draid2-2-0 AVAIL
|
||||||
|
draid2-3-0 AVAIL
|
||||||
|
errors: No known data errors
|
||||||
|
|
||||||
|
NAME SIZE ALLOC FREE CKPOINT EXPANDSZ FRAG CAP DEDUP HEALTH ALTROOT
|
||||||
|
zdraidtest 36.4T 5.61M 36.4T - - 0% 0% 1.00x ONLINE -
|
||||||
|
|
||||||
|
NAME USED AVAIL REFER MOUNTPOINT
|
||||||
|
zdraidtest 2.24M 21.0T 278K /zdraidtest
|
||||||
|
zdraidtest/notshrcompr 278K 21.0T 278K /zdraidtest/notshrcompr
|
||||||
|
zdraidtest/shrcompr 278K 21.0T 278K /zdraidtest/shrcompr
|
||||||
|
|
||||||
|
Filesystem Type Size Used Avail Use% Mounted on
|
||||||
|
zdraidtest zfs 21T 384K 21T 1% /zdraidtest
|
||||||
|
zdraidtest/shrcompr zfs 21T 1.0M 21T 1% /zdraidtest/shrcompr
|
||||||
|
zdraidtest/notshrcompr zfs 21T 1.0M 21T 1% /zdraidtest/notshrcompr
|
||||||
|
|
||||||
|
NOTE - best practice is to export the pool and # zpool import -a -d /dev/disk/by-id
|
||||||
|
|
||||||
|
-----
|
||||||
|
|
||||||
|
A different iteration - raidz1 with 4 data disks, 6 children, 1 spare = more space available
|
||||||
|
since we are using small (2TB) disks this should not be an issue
|
||||||
|
|
||||||
|
zpool create -o ashift=12 -o autoexpand=on -O atime=off -O compression=lz4 \
|
||||||
|
zdraidtest \
|
||||||
|
draid1:4d:6c:1s /dev/sdb /dev/sdc /dev/sdd /dev/sde /dev/sdf /dev/sdg \
|
||||||
|
draid1:4d:6c:1s /dev/sdh /dev/sdi /dev/sdj /dev/sdk /dev/sdl /dev/sdm \
|
||||||
|
draid1:4d:6c:1s /dev/sdn /dev/sdo /dev/sdp /dev/sdq /dev/sdr /dev/sds \
|
||||||
|
draid1:4d:6c:1s /dev/sdt /dev/sdu /dev/sdv /dev/sdw /dev/sdx /dev/sdy
|
||||||
|
real 0m3.288s
|
||||||
|
user 0m0.034s
|
||||||
|
sys 0m0.162s
|
||||||
|
|
||||||
|
+ zfs create -o atime=off -o compression=lz4 -o sharesmb=on -o xattr=sa -o recordsize=1024k zdraidtest/shrcompr
|
||||||
|
cannot share 'zdraidtest/shrcompr: system error': SMB share creation failed
|
||||||
|
filesystem successfully created, but not shared
|
||||||
|
changed ownership of '/zdraidtest/shrcompr' from root to user
|
||||||
|
|
||||||
|
Filesystem Type Size Used Avail Use% Mounted on
|
||||||
|
zdraidtest/shrcompr zfs 29T 1.0M 29T 1% /zdraidtest/shrcompr
|
||||||
|
|
||||||
|
+ zfs create -o atime=off -o compression=lz4 -o sharesmb=off -o recordsize=1024k zdraidtest/notshrcompr
|
||||||
|
changed ownership of '/zdraidtest/notshrcompr' from root to user
|
||||||
|
|
||||||
|
Filesystem Type Size Used Avail Use% Mounted on
|
||||||
|
zdraidtest/notshrcompr zfs 29T 1.0M 29T 1% /zdraidtest/notshrcompr
|
||||||
|
|
||||||
|
pool: zdraidtest
|
||||||
|
state: ONLINE
|
||||||
|
config:
|
||||||
|
NAME STATE READ WRITE CKSUM
|
||||||
|
zdraidtest ONLINE 0 0 0
|
||||||
|
draid1:4d:6c:1s-0 ONLINE 0 0 0
|
||||||
|
sdb ONLINE 0 0 0
|
||||||
|
sdc ONLINE 0 0 0
|
||||||
|
sdd ONLINE 0 0 0
|
||||||
|
sde ONLINE 0 0 0
|
||||||
|
sdf ONLINE 0 0 0
|
||||||
|
sdg ONLINE 0 0 0
|
||||||
|
draid1:4d:6c:1s-1 ONLINE 0 0 0
|
||||||
|
sdh ONLINE 0 0 0
|
||||||
|
sdi ONLINE 0 0 0
|
||||||
|
sdj ONLINE 0 0 0
|
||||||
|
sdk ONLINE 0 0 0
|
||||||
|
sdl ONLINE 0 0 0
|
||||||
|
sdm ONLINE 0 0 0
|
||||||
|
draid1:4d:6c:1s-2 ONLINE 0 0 0
|
||||||
|
sdn ONLINE 0 0 0
|
||||||
|
sdo ONLINE 0 0 0
|
||||||
|
sdp ONLINE 0 0 0
|
||||||
|
sdq ONLINE 0 0 0
|
||||||
|
sdr ONLINE 0 0 0
|
||||||
|
sds ONLINE 0 0 0
|
||||||
|
draid1:4d:6c:1s-3 ONLINE 0 0 0
|
||||||
|
sdt ONLINE 0 0 0
|
||||||
|
sdu ONLINE 0 0 0
|
||||||
|
sdv ONLINE 0 0 0
|
||||||
|
sdw ONLINE 0 0 0
|
||||||
|
sdx ONLINE 0 0 0
|
||||||
|
sdy ONLINE 0 0 0
|
||||||
|
spares
|
||||||
|
draid1-0-0 AVAIL
|
||||||
|
draid1-1-0 AVAIL
|
||||||
|
draid1-2-0 AVAIL
|
||||||
|
draid1-3-0 AVAIL
|
||||||
|
errors: No known data errors
|
||||||
|
|
||||||
|
NAME SIZE ALLOC FREE CKPOINT EXPANDSZ FRAG CAP DEDUP HEALTH ALTROOT
|
||||||
|
zdraidtest 36.4T 5.43M 36.4T - - 0% 0% 1.00x ONLINE -
|
||||||
|
|
||||||
|
NAME USED AVAIL REFER MOUNTPOINT
|
||||||
|
zdraidtest 3.00M 28.9T 383K /zdraidtest
|
||||||
|
zdraidtest/notshrcompr 383K 28.9T 383K /zdraidtest/notshrcompr
|
||||||
|
zdraidtest/shrcompr 383K 28.9T 383K /zdraidtest/shrcompr
|
||||||
|
|
||||||
|
Filesystem Type Size Used Avail Use% Mounted on
|
||||||
|
zdraidtest zfs 29T 384K 29T 1% /zdraidtest
|
||||||
|
zdraidtest/shrcompr zfs 29T 1.0M 29T 1% /zdraidtest/shrcompr
|
||||||
|
zdraidtest/notshrcompr zfs 29T 1.0M 29T 1% /zdraidtest/notshrcompr
|
||||||
|
|
||||||
|
NOTE - best practice is to export the pool and # zpool import -a -d /dev/disk/by-id
|
||||||
|
|
||||||
|
-----
|
||||||
|
|
||||||
|
Here is a simulated severely degraded pool with multiple drive failures and a spare in use,
|
||||||
|
with 2 failed disks in the same column - still chugging along:
|
||||||
|
|
||||||
|
pool: zdraidtest
|
||||||
|
state: DEGRADED
|
||||||
|
status: One or more devices could not be used because the label is missing or
|
||||||
|
invalid. Sufficient replicas exist for the pool to continue
|
||||||
|
functioning in a degraded state.
|
||||||
|
action: Replace the device using 'zpool replace'.
|
||||||
|
see: https://openzfs.github.io/openzfs-docs/msg/ZFS-8000-4J
|
||||||
|
scan: resilvered 716M in 00:00:13 with 0 errors on Sat Jul 3 17:18:21 2021
|
||||||
|
config:
|
||||||
|
NAME STATE READ WRITE CKSUM
|
||||||
|
zdraidtest DEGRADED 0 0 0
|
||||||
|
draid1:4d:6c:1s-0 DEGRADED 0 0 0
|
||||||
|
sdb ONLINE 0 0 0
|
||||||
|
sdc ONLINE 0 0 0
|
||||||
|
sdd UNAVAIL 0 0 0
|
||||||
|
sde ONLINE 0 0 0
|
||||||
|
sdf ONLINE 0 0 0
|
||||||
|
sdg ONLINE 0 0 0
|
||||||
|
draid1:4d:6c:1s-1 DEGRADED 0 0 0
|
||||||
|
sdh ONLINE 0 0 0
|
||||||
|
sdi ONLINE 0 0 0
|
||||||
|
sdj UNAVAIL 0 0 0
|
||||||
|
sdk ONLINE 0 0 0
|
||||||
|
sdl ONLINE 0 0 0
|
||||||
|
sdm ONLINE 0 0 0
|
||||||
|
draid1:4d:6c:1s-2 DEGRADED 0 0 0
|
||||||
|
sdn ONLINE 0 0 0
|
||||||
|
sdo ONLINE 0 0 0
|
||||||
|
sdp ONLINE 0 0 0
|
||||||
|
sdq ONLINE 0 0 0
|
||||||
|
sdr UNAVAIL 0 0 0
|
||||||
|
sds ONLINE 0 0 0
|
||||||
|
draid1:4d:6c:1s-3 DEGRADED 0 0 0
|
||||||
|
sdt UNAVAIL 0 0 0
|
||||||
|
sdu ONLINE 0 0 0
|
||||||
|
sdv ONLINE 0 0 0
|
||||||
|
sdw ONLINE 0 0 0
|
||||||
|
sdx ONLINE 0 0 0
|
||||||
|
spare-5 DEGRADED 0 0 0
|
||||||
|
sdy UNAVAIL 0 0 0
|
||||||
|
draid1-3-0 ONLINE 0 0 0
|
||||||
|
spares
|
||||||
|
draid1-0-0 AVAIL
|
||||||
|
draid1-1-0 AVAIL
|
||||||
|
draid1-2-0 AVAIL
|
||||||
|
draid1-3-0 INUSE currently in use
|
||||||
|
errors: No known data errors
|
||||||
|
|
||||||
|
NOTE that unless an extra disk is added to the system, the virtual spares for draid1:4d:6c:1s-3 are all burned up;
|
||||||
|
if ANY of sdu-sdx also fails at this point, we will have a dead pool.
|
||||||
|
Spares for draid1-0-0, 1-1-0 and 1-2-0 CANNOT be used for column 3.
|
||||||
|
|
||||||
|
-----
|
||||||
|
|
||||||
|
NOTE if you simulate/take a drive offline, you cant just "echo online" to it later, that wont bring it back up!
|
||||||
|
try rescan-scsi-bus.sh or reboot
|
||||||
|
|
||||||
|
FIX: if a drive is offline, replace it temporarily with a builtin spare:
|
||||||
|
# zpool replace zdraidtest sdd draid2-0-0
|
||||||
|
|
||||||
|
# zps
|
||||||
|
pool: zdraidtest
|
||||||
|
state: DEGRADED
|
||||||
|
status: One or more devices could not be used because the label is missing or
|
||||||
|
invalid. Sufficient replicas exist for the pool to continue
|
||||||
|
functioning in a degraded state.
|
||||||
|
action: Replace the device using 'zpool replace'.
|
||||||
|
see: https://openzfs.github.io/openzfs-docs/msg/ZFS-8000-4J
|
||||||
|
scan: resilvered 0B in 00:00:00 with 0 errors on Sat Jul 3 14:43:51 2021
|
||||||
|
config:
|
||||||
|
NAME STATE READ WRITE CKSUM
|
||||||
|
zdraidtest DEGRADED 0 0 0
|
||||||
|
draid2:5d:24c:2s-0 DEGRADED 0 0 0
|
||||||
|
sdb ONLINE 0 0 0
|
||||||
|
sdc ONLINE 0 0 0
|
||||||
|
spare-2 DEGRADED 0 0 0
|
||||||
|
sdd UNAVAIL 0 0 0
|
||||||
|
draid2-0-0 ONLINE 0 0 0
|
||||||
|
sde ONLINE 0 0 0
|
||||||
|
sdf ONLINE 0 0 0
|
||||||
|
sdg ONLINE 0 0 0
|
||||||
|
sdh ONLINE 0 0 0
|
||||||
|
sdi ONLINE 0 0 0
|
||||||
|
sdj ONLINE 0 0 0
|
||||||
|
sdk ONLINE 0 0 0
|
||||||
|
sdl ONLINE 0 0 0
|
||||||
|
sdm ONLINE 0 0 0
|
||||||
|
sdn ONLINE 0 0 0
|
||||||
|
sdo ONLINE 0 0 0
|
||||||
|
sdp ONLINE 0 0 0
|
||||||
|
sdq ONLINE 0 0 0
|
||||||
|
sdr ONLINE 0 0 0
|
||||||
|
sds ONLINE 0 0 0
|
||||||
|
sdt ONLINE 0 0 0
|
||||||
|
sdu ONLINE 0 0 0
|
||||||
|
sdv ONLINE 0 0 0
|
||||||
|
sdw ONLINE 0 0 0
|
||||||
|
sdx ONLINE 0 0 0
|
||||||
|
sdy ONLINE 0 0 0
|
||||||
|
spares
|
||||||
|
draid2-0-0 INUSE currently in use
|
||||||
|
draid2-0-1 AVAIL
|
||||||
|
errors: No known data errors
|
||||||
|
|
||||||
|
HOWTO fix the above situation with the same disk (you rebooted / it came back online) and decouple the in-use spare:
|
||||||
|
|
||||||
|
zpool export -a
|
||||||
|
|
||||||
|
fdisk -l /dev/sdd # scsi-SATA_VBOX_HARDDISK_VBbcc6c97e-f68b8368
|
||||||
|
zpool labelclear /dev/sdd
|
||||||
|
zpool labelclear -f /dev/sdd1
|
||||||
|
|
||||||
|
zpool import -a
|
||||||
|
zpool status -v # This will show a degraded pool with a missing disk
|
||||||
|
|
||||||
|
# This wont work but gives useful info:
|
||||||
|
zpool replace zdraidtest spare-2 scsi-SATA_VBOX_HARDDISK_VBbcc6c97e-f68b8368 # got error, use detach
|
||||||
|
|
||||||
|
zpool detach zdraidtest 2582498653363374334 # this was listed as UNAVAIL with the spare in-use underneath it
|
||||||
|
zpool status -v # should now show only the spare where sdd was
|
||||||
|
|
||||||
|
# we labelcleared it so it should be ready for re-use;
|
||||||
|
# if you want to be really thorough you can DD zeros to the entire drive but not really necessary
|
||||||
|
zpool replace zdraidtest draid2-0-0 scsi-SATA_VBOX_HARDDISK_VBbcc6c97e-f68b8368 # same disk (sdd) but labelcleared
|
||||||
|
zpool status -v
|
@ -78,7 +78,11 @@ source ~/bin/boojum/draid-pooldisks-assoc.sh $td
|
|||||||
# Flame the pool and start over from 0
|
# Flame the pool and start over from 0
|
||||||
if [ "$1" = "reset" ]; then
|
if [ "$1" = "reset" ]; then
|
||||||
logger "$(date) - $0 - RESET issued - destroying $zp"
|
logger "$(date) - $0 - RESET issued - destroying $zp"
|
||||||
|
|
||||||
|
# no need to worry if its not imported / already destroyed
|
||||||
|
if [ $(zpool list |grep -c $zp) -gt 0 ]; then
|
||||||
zpool destroy $zp || failexit 999 "Failed to destroy $zp"
|
zpool destroy $zp || failexit 999 "Failed to destroy $zp"
|
||||||
|
fi
|
||||||
|
|
||||||
for d in $pooldisks; do
|
for d in $pooldisks; do
|
||||||
echo -e -n "o Clearing label for disk $d \r"
|
echo -e -n "o Clearing label for disk $d \r"
|
||||||
@ -89,6 +93,7 @@ if [ "$1" = "reset" ]; then
|
|||||||
# echo ${hotspares[@]}
|
# echo ${hotspares[@]}
|
||||||
# zpool status -v |egrep 'sdz|sday|sdaz|sdby|sdbz|sdcy|sdcz'
|
# zpool status -v |egrep 'sdz|sday|sdaz|sdby|sdbz|sdcy|sdcz'
|
||||||
for d in ${hotspares[@]}; do
|
for d in ${hotspares[@]}; do
|
||||||
|
#echo $d # DEBUG
|
||||||
echo -e -n "o Clearing label for Hotspare disk $d \r"
|
echo -e -n "o Clearing label for Hotspare disk $d \r"
|
||||||
zpool labelclear -f "/dev/$d"1
|
zpool labelclear -f "/dev/$d"1
|
||||||
done
|
done
|
||||||
@ -128,21 +133,21 @@ fi
|
|||||||
|
|
||||||
# TODO EDITME
|
# TODO EDITME
|
||||||
#iteration=OBM
|
#iteration=OBM
|
||||||
iteration=2
|
iteration=1
|
||||||
if [ "$iteration" = "1" ]; then
|
if [ "$iteration" = "1" ]; then
|
||||||
# compression=zstd-3
|
# compression=zstd-3
|
||||||
# -o ashift=12
|
# -o ashift=12
|
||||||
# raidz level (usually 2)
|
# raidz level (usually 2)
|
||||||
rzl=2
|
rzl=1
|
||||||
# Vspares - this is a 96-drive pool, you DON'T want to skimp!
|
# Vspares - this is a 96-drive pool, you DON'T want to skimp!
|
||||||
spr=4
|
spr=4
|
||||||
( set -x
|
( set -x
|
||||||
time zpool create -o autoreplace=on -o autoexpand=on -O atime=off -O compression=lz4 \
|
time zpool create -o autoreplace=on -o autoexpand=on -O atime=off -O compression=lz4 \
|
||||||
$zp \
|
$zp \
|
||||||
draid$rzl:8d:48'c':$spr's' $pooldisks01 $pooldisks02 $pooldisks03 $pooldisks04 $pooldisks05 $pooldisks06\
|
draid$rzl:8d:24'c':$spr's' $pooldisks01 $pooldisks02 $pooldisks03 $pooldisks04 \
|
||||||
$pooldisks07 $pooldisks08 \
|
draid$rzl:8d:24'c':$spr's' $pooldisks05 $pooldisks06 $pooldisks07 $pooldisks08 \
|
||||||
draid$rzl:8d:48'c':$spr's' $pooldisks09 $pooldisks10 $pooldisks11 $pooldisks12 $pooldisks13 $pooldisks14\
|
draid$rzl:8d:24'c':$spr's' $pooldisks09 $pooldisks10 $pooldisks11 $pooldisks12 \
|
||||||
$pooldisks15 $pooldisks16 \
|
draid$rzl:8d:24'c':$spr's' $pooldisks13 $pooldisks14 $pooldisks15 $pooldisks16 \
|
||||||
|| failexit 101 "Failed to create DRAID"
|
|| failexit 101 "Failed to create DRAID"
|
||||||
)
|
)
|
||||||
elif [ "$iteration" = "2" ]; then
|
elif [ "$iteration" = "2" ]; then
|
||||||
@ -166,6 +171,22 @@ rc=$?
|
|||||||
[ $(zpool list |grep -c "no pools") -eq 0 ] && \
|
[ $(zpool list |grep -c "no pools") -eq 0 ] && \
|
||||||
zpool add $zp spare ${hotspares[0]} ${hotspares[1]} ${hotspares[2]} ${hotspares[3]}
|
zpool add $zp spare ${hotspares[0]} ${hotspares[1]} ${hotspares[2]} ${hotspares[3]}
|
||||||
# NOTE we're still keeping a few pspares in reserve
|
# NOTE we're still keeping a few pspares in reserve
|
||||||
|
elif [ "$iteration" = "3" ]; then
|
||||||
|
# compression=zstd-3
|
||||||
|
# -o ashift=12
|
||||||
|
# raidz level (usually 2)
|
||||||
|
rzl=2
|
||||||
|
# Vspares - this is a 96-drive pool, you DON'T want to skimp!
|
||||||
|
spr=4
|
||||||
|
( set -x
|
||||||
|
time zpool create -o autoreplace=on -o autoexpand=on -O atime=off -O compression=lz4 \
|
||||||
|
$zp \
|
||||||
|
draid$rzl:8d:48'c':$spr's' $pooldisks01 $pooldisks02 $pooldisks03 $pooldisks04 $pooldisks05 $pooldisks06\
|
||||||
|
$pooldisks07 $pooldisks08 \
|
||||||
|
draid$rzl:8d:48'c':$spr's' $pooldisks09 $pooldisks10 $pooldisks11 $pooldisks12 $pooldisks13 $pooldisks14\
|
||||||
|
$pooldisks15 $pooldisks16 \
|
||||||
|
|| failexit 101 "Failed to create DRAID"
|
||||||
|
)
|
||||||
else
|
else
|
||||||
# One Big Mother
|
# One Big Mother
|
||||||
# -o ashift=12
|
# -o ashift=12
|
||||||
|
Loading…
x
Reference in New Issue
Block a user