diff --git a/utils/backupServer/README.md b/utils/backupServer/README.md index 27b20231..8d723b3c 100644 --- a/utils/backupServer/README.md +++ b/utils/backupServer/README.md @@ -6,15 +6,16 @@ shell script that can be launched with backup.js to sync to a storage server aft ## Notes Historical view and backups are best to be stored on a different server. -Backups do generate lots of files. Every day one full backup is made with png files for every single file - and incremential backups all 15min into png files with just the changed pixels since the last full backup set. +Backups do generate lots of files. Every day one full backup is made with png files for every single tile/chunks - and incremential backups all 15min into png files with just the changed pixels since the last full backup set. If the filesystem has a limit on inodes, this is very likely to be hit within a year or so by the amount of small files. -The following scripts are to mitigate this and to decrease disk usage. -## hardlink.sh -Compares the full-backup tile files from one day to another and creates hardlinks on equal tiles, which significantly reduces the numbers of used inodes and disk space used. +We can mitigate this issue by turning duplicates into hardlinks (a file with multiple filenames/paths) and by compressing every month into a squshfs image. squashfs also compresses inodes. However, mksquashfs needs to parse all inodes, which takes lots of RAM (at least 256 bytes per inode - we got millions of files). We use the arguments `-b 8192 -no-xattrs -no-exports -progress`, where no-export is neccessary in order to not hit memory limit when mounting multiple images later. +We do all of this in: + +## compressBackup.sh + +Shell script that compresses the size of the backups with hardlink and squashfs. It is supposed to run as a daily cron script. +Look into it's source for a comment what it does and set BACKUPROOT within it. + This uses the hardlink_0.3 util from https://jak-linux.org/projects/hardlink/ which ships with current debian and ubuntu, but in a different version on other distributions. -## mksquashfs -Backups from a whole month can be archived into a mountable read-only image with sqashfs. -Squashfs compresses the data, including inodes and reolves all duplicates. However, when compressing it needs to parse all inodes, which takes lots of RAM (at least 256 bytes per inode - we got millions of files). -We use the arguments `-b 8192 -no-xattrs -no-exports -progress`, where no-export is neccessary in order to not hit memory limit when mounting multiple images. diff --git a/utils/backupServer/compressBackup.sh b/utils/backupServer/compressBackup.sh new file mode 100755 index 00000000..07b31de5 --- /dev/null +++ b/utils/backupServer/compressBackup.sh @@ -0,0 +1,104 @@ +#!/bin/bash +# Should be run as daily cron job +# +# Makes backup dirs smaller, first by hardlinking duplicate files of daily backups every day +# (duplicates within the time folders (incremential backups) of the previous day, +# and the tiles folder of current day (full backups) to tiles folder of previous day within the same month) +# second by archieving full months into read-only squashfs at the 1st of the next month +# Has to be run as root for the squashfs to be able to get mounted +# +# arguments: +# NONE to take system date to decide what to do (run as daily cronjob) +# or: +# $1 = Date in format YYYYMMDD + +# CHANGE THIS TO YOUR PATH +BACKUPROOT="/home/backup/pixelplanet/canvas" +# mount options of created monthly squashfs images +MOUNT_OPTIONS="ro,defaults" + +# NO CHANGE PAST THIS NEEDED + +if [ "$1" != "" ] + then + set -e + TODAY=`printf "%(%Y/%m/%d)T" \`date --utc --date "$1" +%s\`` + YESTERDAY=`printf "%(%Y/%m/%d)T" $(( $(date --utc --date "$1" +%s) - 24*3600 ))` + set +e + else + TODAY=`printf "%(%Y/%m/%d)T" -1` + YESTERDAY=`printf "%(%Y/%m/%d)T" $(( $(printf "%(%s)T" -1) - 24*3600 ))` +fi + +echo $TODAY $YESTERDAY + +echo "---Resolve duplicates in incremental backups for ${YESTERDAY}---" +DIR="${BACKUPROOT}/${YESTERDAY}" +if [ -w "${DIR}" ]; then + for CAN_DIR in `ls ${DIR}`; do + echo "-Canvas ${CAN_DIR}-" + shopt -s nullglob; + TIMEDIRS=("${DIR}"/"${CAN_DIR}"/*/); + shopt -u nullglob; + CNT=$[${#TIMEDIRS[@]}-2] + IT=0 + while [ $IT -lt $CNT ]; do + DIRF="${TIMEDIRS[${IT}]}" + IT=$[${IT}+1] + DIRS="${TIMEDIRS[${IT}]}" + for COL in `ls ${DIRS}`; do + if [ -d "${DIRF}${COL}" ] && [ -d "${DIRS}${COL}" ]; then + echo /usr/bin/hardlink --respect-name --ignore-time --ignore-owner --maximize "${DIRF}${COL}" "${DIRS}${COL}" + /usr/bin/hardlink --respect-name --ignore-time --ignore-owner --maximize "${DIRF}${COL}" "${DIRS}${COL}" + fi + done + done + done +fi + +# if beginning of month +if [ `echo "${TODAY}" | sed 's/.*\///'` == "01" ] + then + echo "---mksquashfs previous month---" + PREV_YEAR=`echo "${YESTERDAY}" | sed 's/\/.*//'` + PREV_MONTH=`echo "${YESTERDAY}" | sed 's/[^\/]*\///' | sed 's/\/.*//'` + SQUASH_FILE="${BACKUPROOT}/${PREV_YEAR}/${PREV_MONTH}.sqsh.gz" + if [ ! -f "${SQUASH_FILE}" ]; then + echo "doing ${SQUASH_FILE}" + set -e + echo "mksquashfs ${BACKUPROOT}/${PREV_YEAR}/${PREV_MONTH} ${SQUASH_FILE} -b 8192 -no-xattrs -progress -no-exports" + mksquashfs "${BACKUPROOT}/${PREV_YEAR}/${PREV_MONTH}" ${SQUASH_FILE} -b 8192 -no-xattrs -progress -no-exports + if [ -f "${SQUASH_FILE}" ]; then + echo "Mount ${SQUASH_FILE}" + mv "${BACKUPROOT}/${PREV_YEAR}/${PREV_MONTH}" "${BACKUPROOT}/${PREV_YEAR}/rem" + mkdir "${BACKUPROOT}/${PREV_YEAR}/${PREV_MONTH}" + echo "${SQUASH_FILE} ${BACKUPROOT}/${PREV_YEAR}/${PREV_MONTH} squashfs ${MOUNT_OPTIONS} 0 0" >> /etc/fstab + mount "${BACKUPROOT}/${PREV_YEAR}/${PREV_MONTH}" + echo "cleaning up old files... this might take a while" + rm -rf "${BACKUPROOT}/${PREV_YEAR}/rem" + fi + set +e + else + echo "${SQUASH_FILE} already exists. Don't do mksquashfs." + fi + else + DIR="${BACKUPROOT}/${YESTERDAY}" + PREV_DIR="${BACKUPROOT}/${TODAY}" + echo "---Resolve duplicates to full backup to previous day---" + if [ -w "${DIR}" ]; then + for CAN_DIR in `ls ${DIR}`; do + if [ -d "${DIR}/${CAN_DIR}/tiles" ] && [ -d "${PREV_DIR}/${CAN_DIR}/tiles" ]; then + for COL in `ls ${DIR}/${CAN_DIR}/tiles`; do + WDIR="${CAN_DIR}/tiles/${COL}" + echo "----${CAN_DIR} / ${COL}----" + if [ -d "${DIR}/${WDIR}" ] && [ -d "${PREV_DIR}/${WDIR}" ]; then + echo /usr/bin/hardlink --respect-name --ignore-time --ignore-owner -maximize "${DIR}/${WDIR}" "${PREV_DIR}/${WDIR}" + /usr/bin/hardlink --respect-name --ignore-time --ignore-owner --maximize "${DIR}/${WDIR}" "${PREV_DIR}/${WDIR}" + fi + done + fi + done + fi +fi + +exit 0 diff --git a/utils/backupServer/hardlink.sh b/utils/backupServer/hardlink.sh deleted file mode 100644 index f2caacf7..00000000 --- a/utils/backupServer/hardlink.sh +++ /dev/null @@ -1,17 +0,0 @@ - #!/bin/bash -DIR=$1 -PREV_DIR=$2 - -echo "---Resolve duplicates to hardlinks---" -for CAN_DIR in `ls ${DIR}`; do - if [ -d "${DIR}/${CAN_DIR}/tiles" ] && [ -d "${PREV_DIR}/${CAN_DIR}/tiles" ]; then - for COL in `ls ${DIR}/${CAN_DIR}/tiles`; do - WDIR="${CAN_DIR}/tiles/${COL}" - echo "----${CAN_DIR} / ${COL}----" - if [ -d "${DIR}/${WDIR}" ] && [ -d "${PREV_DIR}/${WDIR}" ]; then - echo /usr/bin/hardlink --respect-name --ignore-time --ignore-owner "${DIR}/${WDIR}" "${PREV_DIR}/${WDIR}" - /usr/bin/hardlink --respect-name --ignore-time --ignore-owner --maximize "${DIR}/${WDIR}" "${PREV_DIR}/${WDIR}" - fi - done - fi -done