bak is a simple bash script that seeks ot use rsync for efficient incremental backups, both in terms of time and disk space consumed. It is able to do this by leveraging rsync to copy only incremental changes, creating hard links for those files that haven't been modified between backups.
At present, the documentation is pretty sparse - pretty much just a README. I need to work on that...
bak is released under the GNU GPL v2.
source
#!/bin/bash
#
# bak - a simple rsync backup script
# Copyright (C) 2011 Clifton L. Snyder <cliff.snyder@gmail.com>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
usage="usage: $0 [options] action"
usage=$usage'
options:
-f FILE path to config file (default: /etc/bak/bak.conf)
-s, --source DIR source directory
-t, --target DIR target directory
-q, --quiet suppress non-error messages
-v, --verbose turn on verbose logging
-d, --debug turn on debugging output
-h, --help display a brief help message and exit
actions:
ls list the current backups (default)
do perform a backup
rm remove a backup (UNIMPLEMENTED)
'
# TODO make running as an alternate user work
# -u, --user user run as user
# TODO implement retention (as of now: "infinite retention")
# -r, --retention number of copies to keep
# TODO make 'do' the default action?
# default output: (relatively) quiet with no debugging
# 0: quiet; 1: verbose; 2: debug
LOG_LEVEL=0
### default config options
config="/etc/bak/bak.conf"
# TODO implement retention (as of now: "infinite retention")
# retention=30
# TODO make running as an alternate user work
# user="$USER"
rsync="/usr/bin/rsync"
# some options from http://www.sanitarium.net/golug/rsync_backups_2010.html
# check there for more detail
# TODO research --hard-links
# TODO research --inplace
# TODO research --numeric-ids
# TODO include support for includes/excludes?
# TODO include the option to not cross filesystem boundaries?
# TODO include interactive options --progress and --itemize-changes?
rsync_opts="--archive --hard-links --inplace --numeric-ids --delete"
# default action: ls
ACTION="ls"
# su="/bin/su"
# su_args="-c"
function error(){
echo error: "$@" 1>&2
echo -n "$usage" 1>&2
exit 1
}
# preserve args (in case of -u)
# ARGV="$@"
### parse command line args
while [ -n "$1" ] ; do
case "$1" in
-f)
[[ -z "$2" ]] && error "$1 takes a FILE argument"
shift
config="$1"
[[ ! -f "$config" ]] && error "config file '$config' not found"
;;
# TODO make running as an alternate user work
# -u|--user)
# shift
# [[ -z "$1" ]] && error "-u takes a user as an argument"
# id "$1" >& /dev/null
# [[ ! "$?" -eq 0 ]] && error "user '$1' does not exist"
# user="$1"
# ;;
# TODO implement retention (as of now: "infinite retention")
# -r|--retention)
# [[ -z "$2" ]] && error "$1 takes a NUM argument"
# shift
# retention="$1"
# ;;
-s|--source)
[[ -z "$2" ]] && error "$1 takes a DIR argument"
shift
source_dir="$1"
;;
-t|--target)
[[ -z "$2" ]] && error "$1 takes a DIR argument"
shift
target_dir="$1"
;;
# log options; we'll use the highest specified log level
-q|--quiet) [[ "$LOG_LEVEL" -lt 1 ]] && LOG_LEVEL=0;;
-v|--verbose) [[ "$LOG_LEVEL" -lt 1 ]] && LOG_LEVEL=1;;
-d|--debug) LOG_LEVEL=2;;
-h|--help) echo -n "$usage"; exit 0;;
do|ls|rm) ACTION="$1";;
*) error "unknown option $1";;
esac
shift
done
# TODO make running as an alternate user work
# if the -u option differs from $USER, use su to run it as $user
# if [[ ! "$user" = "$USER" ]] ; then
# [[ "$LOG_LEVEL" -ge 1 ]] && echo "running as $user"
# "$su $su_args" "'$0 $ARGV'"
# exit $?
# fi
[[(-f "$config" && "$LOG_LEVEL" -ge 1)]] && echo "using config file $config"
[[ -f "$config" ]] && source "$config"
### let's make sure we have enough information to get started
# make sure we've got a legit source dir
[[ -z "$source_dir" ]] && error "source directory unspecified"
[[ -d "$source_dir" ]] || error "source directory '$source_dir' doesn't exist"
# generate a canonical path for our source directory
source_dir=`readlink -f $source_dir`
[[ "$LOG_LEVEL" -ge 1 ]] && echo "using source dir $source_dir"
# make sure we've got a legit target dir
[[ -z "$target_dir" ]] && error "target directory unspecified"
[[ -d "$target_dir" ]] || error "target directory '$target_dir' doesn't exist"
# generate a canonical path for our target directory
target_dir=`readlink -f $target_dir`
[[ "$LOG_LEVEL" -ge 1 ]] && echo "using target dir $target_dir"
# TODO check retention var for inappropriate value
# append some logging options to rsync
[[ "$LOG_LEVEL" -ge 2 ]] && rsync_opts="$rsync_opts -vv"
[[ "$LOG_LEVEL" -eq 1 ]] && rsync_opts="$rsync_opts -v"
[[ "$LOG_LEVEL" -eq 0 ]] && rsync_opts="$rsync_opts -q"
### let's figure out some things about where we're backing up
# first, we need to find the latest available copy in $target_dir
dirs=`find $target_dir -maxdepth 1 -mindepth 1 -type d | sort -n`
# let's track a few variables that might be useful
newest=0 # `date +%s -d '1970-01-01 00:00:00 UTC'` # oldest possible date
newest_dir=""
[[ "$LOG_LEVEL" -ge 2 ]] && echo "starting with '$newest' as the newest possible date"
oldest=`date +%s` # "now"
oldest_dir=""
[[ "$LOG_LEVEL" -ge 2 ]] && echo "starting with '$oldest' as the oldest possible date"
count=0
if [ -z "${dirs}" ] ; then
# if we're working with an empty target directory we needn't worry about it
count=0
[[ "$LOG_LEVEL" -ge 1 ]] && echo "empty target directory"
else
# otherwise, we have to determine which is the latest available copy
[[ "$LOG_LEVEL" -ge 1 ]] && echo "non-empty target directory"
# TODO make this more resillient to garbage dirs being in the target
for dir in $dirs; do
let count=$count+1
[[ "$LOG_LEVEL" -ge 2 ]] && echo "incremented target dir count to $count"
d=`basename $dir`
# d=`date +%s -d $b`
# is this directory older?
if [ $d -lt $oldest ] ; then
oldest=$d
oldest_dir=$dir
[[ "$LOG_LEVEL" -ge 2 ]] && echo "new oldest target dir: $oldest"
fi
# is this directory newer?
if [ $d -gt $newest ] ; then
newest=$d
newest_dir=$dir
[[ "$LOG_LEVEL" -ge 2 ]] && echo "new newest target dir: $newest"
fi
done
[[ "$LOG_LEVEL" -ge 1 ]] && echo "oldest target: $oldest"
[[ "$LOG_LEVEL" -ge 1 ]] && echo "newest target: $newest"
[[ "$LOG_LEVEL" -ge 1 ]] && echo "target count: $count"
fi
### all our vars should be set now, it's time to do perform our action
case $ACTION in
ls) # list the current backups
for dir in $dirs; do
d=`basename $dir`
date=`date --date "Jan 1, 1970 00:00:00 +0000 + $d seconds"`
echo $d "($date)"
done
;;
do) # perform a backup
now=`date +%s`
td="$target_dir/$now"
# make sure the new target doesn't already exist
[[ -d "$td" ]] && error "target directory $td already exists"
[[ "$LOG_LEVEL" -ge 1 ]] && echo "creating $td"
mkdir -p "$td"
# add link-dest if there are previous targets
if [ $count -gt 0 ] ; then
[[ "$LOG_LEVEL" -ge 2 ]] && echo "link-dest: $newest_dir/"
rsync_opts="$rsync_opts --link-dest=$newest_dir/"
fi
[[ "$LOG_LEVEL" -ge 2 ]] && echo "$rsync $rsync_opts $source_dir/ $td/"
# perform the actual rsync
$rsync $rsync_opts $source_dir/ $td/
;;
rm) # remove a backup
echo "rm action UNIMPLEMENTED!"
# TODO implement rm action
# TODO implement a restore action?
;;
*) error "unknown action '$ACTION'";;
esac
# TODO implement a "cleanup" step (important for implementing retention)
Note: syntax-highlighted HTML code generated using highlight 2.16