#!/usr/bin/perl -w
#   FauBackup - Backup System, using a Filesystem for Storage
#   Copyright (c) 2000-01 Dr. Volkmar Sieh, (c) 2000-06 Martin Waitz
#   $Id$
#
#   This program is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 2 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program; if not, write to the Free Software
#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

use strict;
use Getopt::Long;


sub printusage($)
{
	my $exitcode = shift;
	print "Usage: faubackup-find [--noregexp] [ignore list]...\n";
	exit $exitcode if $exitcode>=0;
}

sub printhelp()
{
	printusage(-1);
	print <<END;

faubackup-find searches for files in the current directory, and prints their
name to standard-output. That output is intended to be piped through
faubackup-gather, in order to build a backup.
END

	exit 0;
}

sub printversion()
{
	print "FauBackup 0.5.9\n";
	exit 0;
}


my $verbose;
my $useregexp = 1;
my $one_file_system = 0;

my $rootdev;

# little bit more shell like matching
# '*' is standard wildcard
# '**' even crosses directory boundaries
# '?' for exactly one char
# all other characters get escaped (e.g. '.')
# you can use full regexps when preceding the line with 'REGEXP:'
sub match2regexp($)
{
	my $regexp = shift;

	print STDERR "match2regexp: '$regexp' -> " if $verbose;
	if( $regexp =~ /^REGEXP:\s*(.*)$/ ) {
		$regexp = $1;
		if( !$useregexp ) {
			# replace with known invalid one, check_ignore will warn
			$regexp .= "/ :-(";
		}
	} else {
		$regexp =~ s/([^\?\*a-zA-Z0-9\/])/\\$1/g; # escape everything
		$regexp =~ s/\?/./g;
		$regexp =~ s/\*\*/{}/g;		# unescaped '{}' for '**'
		$regexp =~ s/\*/[^\/]*/g;	# to not let '**' match here
		$regexp =~ s/^\{\}\//(?:.*\/)?/;# **/ at start matches .
		$regexp =~ s/\{\}/.*/g;
	}
	print STDERR "'$regexp'\n" if $verbose;

	return $regexp;
}

# escape all characters that could be special in a regexp
sub escape($)
{
	my $str = shift;
	$str =~ s/([^a-zA-Z0-9\/])/\\$1/g;
	return $str;
}

sub check_ignore($$)
{
	my( $ignore, $conf ) = @_;

	if( $ignore =~ /^\// ) {
		print STDERR "Warning: using absolute path in $conf\n";
		return 0;
	}
	eval { "foobar" =~ /^$ignore$/; };
	if( $@ ) {
		print STDERR "Warning: invalid ignore pattern /$ignore/" .
			" in $conf: $@\n";
		return 0;
	}

	1;
}

# read a list of patterns to ignore from the directory specified
# one entry per line, which gets prefixed by the directory name
# (to allow to specify to ignore things in subdirs
sub get_ignorelist($)
{
	my $dir = shift;
	my $ignore = "$dir/.faubackuprc";
	my @ignorelist;
	return unless -f $ignore;

	open IGNORE, $ignore or print STDERR "could not open $ignore: $!\n";
	while(<IGNORE>) {
		chomp;
		if( /^#/ ) { next; }
		if( /^\s*NoBackup:\s*(.+)\s*$/ ) {
			check_ignore($1, $ignore) or next;
			push @ignorelist, escape("$dir/") . $1;
			next;
		}
		if( /^\s*NoBackup\s*$/ ) {
			push @ignorelist, ".*";
			next;
		}
		if( /^\s*Ignore\s+(.+)\s*$/ ) {
			my $regexp = match2regexp($1);
			check_ignore($regexp, $ignore) or next;
			push @ignorelist, escape("$dir/") . $regexp;
			next;
		}
		# more things to come...
	}
	close IGNORE or print STDERR "could not close $ignore: $!";

	return @ignorelist;
}

# search one directory and print the path of every item contained therein
# recurses on subdirectories
sub process_dir($@)
{
	my( $dir, @parentlist ) = @_;
	my( $file, $ignore );
	my @entries;

	# merge list from parent with this one:
	my @ignorelist = get_ignorelist($dir);
	#push @ignorelist, @parentlist;
	foreach (@parentlist) { push @ignorelist, $_ };

	# traverse all directory entries
	unless( opendir DIR, $dir ) {
		print STDERR "could not open $dir/: $!";
		return;
	}
	@entries = readdir DIR;
	closedir DIR or die "could not close $dir/: $!";
	FILE:
	foreach $file (@entries) {
		next if $file eq '.' || $file eq '..';
		if ($file eq '..inodes') {
			print STDERR "warning: $dir contains ..inodes!\n";
			next;
		}
		$file = "$dir/$file";
		# check ignore lists
		foreach (@ignorelist) {
			next FILE if $file =~ /^$_$/;
		}
		# not matched any filter
		# recurse if this is a directory we can cd into
		if( !-l $file && -d _ && -x _ ) {
			my $dev;
			($dev) = stat(_);
			next if $one_file_system && ($dev != $rootdev);
			&process_dir( $file, @ignorelist );
		}
		# output file name
		print "$file\000";
	}

}


# command line option parsing
my( $usage, $help, $version );
GetOptions(
	"help|?" => \$help,
	"usage" => \$usage,
	"version" => \$version,
	"verbose" => \$verbose,
	"regexp!" => \$useregexp,
	"one-file-system|L" => \$one_file_system,
) or printusage(1);
printusage(0) if $usage;
printhelp() if $help;
printversion() if $version;

# command line arguments are used as wildcards of files to ignore
my @ignorelist;
foreach (@ARGV) {
	# interpret argument as an ignore-match
	my $regexp = match2regexp $_;
	check_ignore($regexp, "command line ignore list") or next;
	push @ignorelist, "\\./$regexp";
}
# start search
($rootdev) = stat(".");
process_dir( ".", @ignorelist );
print ".\000";
