cliftonsnyder.net - projects - bak

bak [download]

bak is a simple bash script that seeks ot use rsync for efficient incremental backups, both in terms of time and disk space consumed. It is able to do this by leveraging rsync to copy only incremental changes, creating hard links for those files that haven't been modified between backups.

At present, the documentation is pretty sparse - pretty much just a README. I need to work on that...

bak is released under the GNU GPL v2.

source

#!/bin/bash
#
# bak - a simple rsync backup script
# Copyright (C) 2011 Clifton L. Snyder <cliff.snyder@gmail.com>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
#
usage="usage: $0 [options] action"
usage=$usage'
options:
 -f FILE          path to config file (default: /etc/bak/bak.conf)
 -s, --source DIR source directory
 -t, --target DIR target directory
 -q, --quiet      suppress non-error messages
 -v, --verbose    turn on verbose logging
 -d, --debug      turn on debugging output
 -h, --help       display a brief help message and exit
actions:
 ls  list the current backups (default)
 do  perform a backup
 rm  remove a backup (UNIMPLEMENTED)
'
# TODO make running as an alternate user work
# -u, --user user run as user
# TODO implement retention (as of now: "infinite retention")
# -r, --retention  number of copies to keep
# TODO make 'do' the default action?

# default output: (relatively) quiet with no debugging
# 0: quiet; 1: verbose; 2: debug
LOG_LEVEL=0 

### default config options

config="/etc/bak/bak.conf"
# TODO implement retention (as of now: "infinite retention")
# retention=30
# TODO make running as an alternate user work
# user="$USER"
rsync="/usr/bin/rsync"
# some options from http://www.sanitarium.net/golug/rsync_backups_2010.html
#   check there for more detail
# TODO research --hard-links
# TODO research --inplace
# TODO research --numeric-ids
# TODO include support for includes/excludes?
# TODO include the option to not cross filesystem boundaries?
# TODO include interactive options --progress and --itemize-changes?
rsync_opts="--archive --hard-links --inplace --numeric-ids --delete"

# default action: ls
ACTION="ls"

# su="/bin/su"
# su_args="-c"

function error(){
    echo error: "$@" 1>&2
    echo -n "$usage" 1>&2
    exit 1
}

# preserve args (in case of -u)
# ARGV="$@"

### parse command line args

while [ -n "$1" ] ; do
    case "$1" in
	-f)
	    [[ -z "$2" ]] && error "$1 takes a FILE argument"
	    shift
	    config="$1"
	    [[ ! -f "$config" ]] && error "config file '$config' not found"
	    ;;
# TODO make running as an alternate user work
#	-u|--user)
#	    shift
#	    [[ -z "$1" ]] && error "-u takes a user as an argument"
#	    id "$1" >& /dev/null
#	    [[ ! "$?" -eq 0 ]] && error "user '$1' does not exist"
#	    user="$1"
#	    ;;
# TODO implement retention (as of now: "infinite retention")
#	-r|--retention)
#	    [[ -z "$2" ]] && error "$1 takes a NUM argument"
#	    shift
#	    retention="$1"
#	    ;;
	-s|--source) 
	    [[ -z "$2" ]] && error "$1 takes a DIR argument"
	    shift
	    source_dir="$1"
	    ;;
	-t|--target) 
	    [[ -z "$2" ]] && error "$1 takes a DIR argument"
	    shift
	    target_dir="$1"
	    ;;
	# log options; we'll use the highest specified log level
	-q|--quiet) [[ "$LOG_LEVEL" -lt 1 ]] && LOG_LEVEL=0;;
	-v|--verbose) [[ "$LOG_LEVEL" -lt 1 ]] && LOG_LEVEL=1;;
	-d|--debug) LOG_LEVEL=2;;
	-h|--help) echo -n "$usage"; exit 0;;
	do|ls|rm) ACTION="$1";;
	*) error "unknown option $1";;
    esac
    shift
done

# TODO make running as an alternate user work
# if the -u option differs from $USER, use su to run it as $user
# if [[ ! "$user" = "$USER" ]] ; then
#     [[ "$LOG_LEVEL" -ge 1 ]] && echo "running as $user"    
#     "$su $su_args" "'$0 $ARGV'"
#     exit $?
# fi

[[(-f "$config" && "$LOG_LEVEL" -ge 1)]] && echo "using config file $config"
[[ -f "$config" ]] && source "$config"

### let's make sure we have enough information to get started

# make sure we've got a legit source dir
[[ -z "$source_dir" ]] && error "source directory unspecified"
[[ -d "$source_dir" ]] || error "source directory '$source_dir' doesn't exist"
# generate a canonical path for our source directory
source_dir=`readlink -f $source_dir`
[[ "$LOG_LEVEL" -ge 1 ]] && echo "using source dir $source_dir"
# make sure we've got a legit target dir
[[ -z "$target_dir" ]] && error "target directory unspecified"
[[ -d "$target_dir" ]] || error "target directory '$target_dir' doesn't exist"
# generate a canonical path for our target directory
target_dir=`readlink -f $target_dir`
[[ "$LOG_LEVEL" -ge 1 ]] && echo "using target dir $target_dir"
# TODO check retention var for inappropriate value

# append some logging options to rsync
[[ "$LOG_LEVEL" -ge 2 ]] && rsync_opts="$rsync_opts -vv"
[[ "$LOG_LEVEL" -eq 1 ]] && rsync_opts="$rsync_opts -v"
[[ "$LOG_LEVEL" -eq 0 ]] && rsync_opts="$rsync_opts -q"

### let's figure out some things about where we're backing up

# first, we need to find the latest available copy in $target_dir
dirs=`find $target_dir -maxdepth 1 -mindepth 1 -type d | sort -n`
# let's track a few variables that might be useful
newest=0 # `date +%s -d '1970-01-01 00:00:00 UTC'` # oldest possible date
newest_dir=""
[[ "$LOG_LEVEL" -ge 2 ]] && echo "starting with '$newest' as the newest possible date"
oldest=`date +%s` # "now"
oldest_dir=""
[[ "$LOG_LEVEL" -ge 2 ]] && echo "starting with '$oldest' as the oldest possible date"
count=0
if [ -z "${dirs}" ] ; then
    # if we're working with an empty target directory we needn't worry about it
    count=0
    [[ "$LOG_LEVEL" -ge 1 ]] && echo "empty target directory"
else
    # otherwise, we have to determine which is the latest available copy
    [[ "$LOG_LEVEL" -ge 1 ]] && echo "non-empty target directory"
# TODO make this more resillient to garbage dirs being in the target
    for dir in $dirs; do
	let count=$count+1
	[[ "$LOG_LEVEL" -ge 2 ]] && echo "incremented target dir count to $count"
	d=`basename $dir`
	# d=`date +%s -d $b`
	# is this directory older?
	if [ $d -lt $oldest ] ; then
	    oldest=$d
	    oldest_dir=$dir
	    [[ "$LOG_LEVEL" -ge 2 ]] && echo "new oldest target dir: $oldest"
	fi
	# is this directory newer?
	if [ $d -gt $newest ] ; then
	    newest=$d
	    newest_dir=$dir
	    [[ "$LOG_LEVEL" -ge 2 ]] && echo "new newest target dir: $newest"
	fi
    done
    [[ "$LOG_LEVEL" -ge 1 ]] && echo "oldest target: $oldest"
    [[ "$LOG_LEVEL" -ge 1 ]] && echo "newest target: $newest"
    [[ "$LOG_LEVEL" -ge 1 ]] && echo "target count: $count"
fi

### all our vars should be set now, it's time to do perform our action

case $ACTION in
    ls) # list the current backups
	for dir in $dirs; do
	    d=`basename $dir`
	    date=`date --date "Jan 1, 1970 00:00:00 +0000 + $d seconds"`
	    echo $d "($date)"
	done
	;;
    do) # perform a backup
	now=`date +%s`
	td="$target_dir/$now"
	# make sure the new target doesn't already exist
	[[ -d "$td" ]] && error "target directory $td already exists"
	[[ "$LOG_LEVEL" -ge 1 ]] && echo "creating $td"
	mkdir -p "$td"
	# add link-dest if there are previous targets 
	if [ $count -gt 0 ] ; then
	    [[ "$LOG_LEVEL" -ge 2 ]] && echo "link-dest: $newest_dir/"
	    rsync_opts="$rsync_opts --link-dest=$newest_dir/"
	fi
	[[ "$LOG_LEVEL" -ge 2 ]] && echo "$rsync $rsync_opts $source_dir/ $td/"
	# perform the actual rsync
	$rsync $rsync_opts $source_dir/ $td/
	;;
    rm) # remove a backup
	echo "rm action UNIMPLEMENTED!"
# TODO implement rm action
# TODO implement a restore action?
	;;
    *) error "unknown action '$ACTION'";;
esac

# TODO implement a "cleanup" step (important for implementing retention)

Note: syntax-highlighted HTML code generated using highlight 2.16