#!/usr/bin/perl
#
# $Id: tag.pl,v 1.3 2005/07/12 09:37:23 sch Exp $
#
# Copyright 2005 Stephen Hahn.  
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

# tag - personal file tagging, with hierarchies

use Fcntl;
use SDBM_File;

use Cwd;
use File::Basename;
use File::Path;
use Getopt::Long qw(:config auto_help bundling);

use Data::Dumper;

my $link_root = "$ENV{HOME}/tag";
my $cwd = getcwd();
my $cmd = "tag";

$VERSION = "0.5";

sub permute_n ($$) {
	my ($n, $st) = @_;

	$st =~ s/^\s*//;
	$st =~ s/\s*$//;

	my @a = split(/\s+/, $st);
	my @r = ();

	return () if ($n == 0);
	return (@a) if ($n == 1);

	my $j;

	for ($j = 0; $j <= $#a; $j++) {
		my $sr = $st;
		$sr =~ s/\b$a[$j]\b//;

		my @rs = permute_n($n - 1, $sr);

		foreach (@rs) {
			$_ .= " $a[$j]";
		}
		
		push @r, @rs;
	}

	return (@r);
}

sub permute_generate ($) {
	my ($st) = @_;

	my @a = split(/\s+/, $st);
	my @r = ();

	my $n = $#a + 1;

	for (; $n >=0; $n--) {
		push @r, permute_n($n, $st);
	}

	foreach (@r) {
		s/\s+/\//g;
	}

	return (@r);
}

sub is_dir_empty ($) {
	my ($dir) = @_;

	opendir DIR, $dir;
	my @dirs = readdir DIR;
	closedir DIR;

	return ($#dirs <= 1);
}

sub link_add ($$$) {
	my ($fullpath, $tags, $xtag) = @_;

	my $file = basename($fullpath);

	my @dirs = permute_generate($tags);

	foreach (@dirs) {
		mkpath("$link_root/$_");
		symlink ($fullpath, "$link_root/$_/$file");
	}
}

sub link_delete ($$$) {
	my ($fullpath, $tags, $xtag) = @_;

	my $file = basename($fullpath);

	my @dirs = grep /\b$xtag\b/, permute_generate($tags);

	foreach (@dirs) {
		unlink "$link_root/$_/$file";
		rmdir("$link_root/$_") if is_dir_empty("$link_root/$_");
	}
}

sub tg_read ($$) {
	my ($path, $options) = @_;

	my $file = basename($path);
	my $dir = dirname($path);
	my $r;

	my %db;

	tie(%db, 'SDBM_File', "$dir/.com.blueslugs.tg.db",
	    O_RDWR|O_CREAT, 0666) 
	or die "$cmd: couldn't tie SDBM file in \"$dir\"";
	
	if (defined($db{"$file"})) {
		$r = "$file: " if ($options =~ "prefix");
		$r .= $db{"$file"};
	}

	untie %db;

	return ($r);
}

sub tg_add ($$) {
	my ($path, $tag) = @_;

	my $file = basename($path);
	my $dir = dirname($path);

	my %db;

	tie(%db, 'SDBM_File', "$dir/.com.blueslugs.tg.db",
	    O_RDWR|O_CREAT, 0666) 
	or die "$cmd: couldn't tie SDBM file in \"$dir\"";

	if (defined($db{"$file"})) {
		if ($db{"$file"} !~ /\b$tag\b/) {
			$db{"$file"} .= " $tag";
		}
		link_add($path, $db{$file}, $tag);
	} else {
		$db{"$file"} = "$tag";
		link_add($path, $db{$file}, $tag);
	}

	untie %db;
}

sub tg_add_set ($$) {
	my ($path, $tagref) = @_;
	my @tags = @$tagref;

	foreach (@tags) {
		tg_add($path, $_);
	}
}
	
sub tg_delete ($$) {
	my ($path, $tag) = @_;

	my $file = basename($path);
	my $dir = dirname($path);

	my %db;

	tie(%db, 'SDBM_File', "$dir/.com.blueslugs.tg.db",
	    O_RDWR|O_CREAT, 0666) 
	or die "$cmd: couldn't tie SDBM file in \"$dir\"";

	if (defined($db{"$file"})) {
		if ($db{"$file"} =~ /\b$tag\b/) {
			link_delete($path, $db{$file}, $tag);
			$db{"$file"} =~ s/ $tag / /;
		} else {
			die "$cmd: \"$file\" not tagged with \"$tag\"\n";
		}
	} else {
		die "$cmd: \"$file\" has no tags\n";
	}

	untie %db;
}

sub tg_delete_set ($$) {
	my ($path, $tagref) = @_;
	my @tags = @$tagref;

	foreach (@tags) {
		tg_delete($path, $_);
	}
}

sub tg_query ($) {
	my ($tagref) = @_;

	my @tags = @$tagref;

	my $tag = join('/', @tags);

	if (! -d "$link_root/$tag") {
		die "no files tagged with group \"$tag\"\n";
	}

	opendir DIR, "$link_root/$tag";

	my @entries = readdir DIR;

	closedir DIR;

	foreach (@entries) {
		next if (! -l "$link_root/$tag/$_");

		print readlink("$link_root/$tag/$_"), "\n";
	}
}
	
sub file_move($$$) {
	my ($srcpath, $dstpath, $options) = @_;

	my $tags = tg_read($srcpath, "");

	$dstpath .= basename($srcpath) if (-d $dstpath);

	`/usr/bin/mv $options $srcpath $dstpath`;
	if ($? != 0) {
		die "move from \"$srcpath\" to \"$dstpath\" failed (exit status $?)\n";
	}

	if ($srcpath ne $dstpath and -f $srcpath) {
		die "$cmd: move from \"$srcpath\" to \"$dstpath\" apparently failed\n";
	}

	if (defined($tags)) {
		foreach (split(/\s+/, $tags)) {
			tg_delete($srcpath, $_);
		}
	}

	if (defined($tags)) {
		foreach (split(/\s+/, $tags)) {
			tg_add($dstpath, $_);
		}
	}
}

sub file_copy ($$$) {
	my ($srcpath, $dstpath, $options) = @_;

	my $tags = tg_read($srcpath, "");

	`/usr/bin/cp $options $srcpath $dstpath`;
	if ($? != 0) {
		die "$cmd: copy from \"$srcpath\" to \"$dstpath\" failed (exit status $?)\n";
	}

	$dstpath .= basename($srcpath) if (-d $dstpath);

	if (! -f $dstpath) {
		die "$cmd: copy from \"$srcpath\" to \"$dstpath\" apparently failed\n";
	}

	if (defined($tags)) {
		foreach (split(/\s+/, $tags)) {
			tg_add($dstpath, $_);
		}
	}
}

sub file_remove ($$) {
	my ($path, $options) = @_;

	my $tags = tg_read($path, "");

	`/usr/bin/rm $options $path`;
	if ($? != 0) {
		die "$cmd: removal of \"$path\" failed (exit status $?)\n";
	}

	if (-f $path) {
		die "$cmd: removal of \"$path\" apparently failed\n";
	}

	if (defined($tags)) {
		foreach (split(/\s+/, $tags)) {
			tg_delete($path, $_);
		}
	}
}
	
my %options = ();
my $r = GetOptions(\%options,
    'root|R=s',

    'add|a=s@',
    'delete|d=s@',
    'query|q=s@',

    'copy|cp|c',
    'move|mv|m',
    'remove|rm|r',

    'interactive|i',
);

pod2usage(2) unless (defined($r));

my $args = "";
$args = "-i" if (defined($options{interactive}));

my $display_options = "";
$display_options = "prefix" if ($#ARGV > 0);

#
# Set alternate query set root path, if defined.  (Used for testing, or
# for shared tag sets.)
#
if (defined($options{root})) {
	if ($options{root} =~ /^\//) {
		$link_root = $options{root};
	} else {
		$link_root = "$cwd/$options{root}";
	}
}

if (defined($options{query})) {
	tg_query($options{query});

	exit (0);
}

if (defined($options{copy}) or defined($options{move})) {
	#
	# This invocation is a copy/move operation, in which the final
	# operand is potentially a destination directory.
	#
	die "$cmd: final argument must be a directory\n"
	    if ($#ARGV > 1 && ! -d $ARGV[$#ARGV]);
	
	my $dst = $ARGV[$#ARGV];
	$dst = "$cwd/$dst" unless ($dst =~ /^\//);

	if (defined($options{move})) {
		for ($r = 0; $r < $#ARGV; $r++) {
			my $src = $ARGV[$r];
			$src = "$cwd/$src" unless ($src =~ /^\//);
			file_move($src, $dst, $args);
		}
	} else {
		for ($r = 0; $r < $#ARGV; $r++) {
			my $src = $ARGV[$r];
			$src = "$cwd/$src" unless ($src =~ /^\//);
			file_copy($src, $dst, $args);
		}
	}

	exit (0);
}

#
# This invocation is a per-file operation.
#
foreach (@ARGV) {
	$_ = "$cwd/$_" unless ($_ =~ /^\//);

	if (defined($options{add})) {
		tg_add_set($_, $options{add});
	} elsif (defined($options{delete})) {
		tg_delete_set($_, $options{delete});
	} elsif (defined($options{remove})) {
		file_remove($_, $args);
	} else {
		print tg_read($_, $display_options), "\n";
	}
}

exit(0);

__END__

=head1 NAME

tag - tag-based file classifier 

=head1 SYNOPSIS

  tag --add tag [...] [--root dir] file ...
  tag --delete tag [...] [--root dir] file ...
  tag --query tag [...] [--root dir]

  tag --copy [--interactive] [--root dir] src [src2 ...] dst 
  tag --move [--interactive] [--root dir] src [src2 ...] dst
  tag --remove [--interactive] [--root dir] file ... 

  tag --help

=head1 DESCRIPTION

B<tag> provides a simple database- and symbolic link-based mechanism for
providing a tagging system for files.  The command builds a hierarchy of
directories and symbolic links representing the sets of files marked
with a specific tag.

The various options named after file operations preserve those tags in
the course of carrying out the given operation.

=head1 OPTIONS

=over 8

=item B<--add>

Add the given tags from the tag set to the specified files.
(Abbreviated form: -a)

=item B<--delete>

Delete the given tags from the tag set for the specified files.
(Abbreviated form: -d)

=item B<--query>

Return the list of files matching the given tag(s).
(Abbreviated form: -q)

=item B<--root>

Use the given path as the root for the link hierarchy.  The default root
is C<$HOME/tag>. (Abbreviated form: -R)

=item B<--copy>

=item B<--move>

=item B<--remove>

Perform the named operation.  (Abbreviated forms:  -c, -m, -r)

=item B<--interactive>

In combination with any of the copy, move, or remove operations, make
the operation interactive, as governed by the underlying command.

=item B<--help>

Print a brief help message and exits.  (Abbreviated form: -?)

=back

=head1 SEE ALSO

cp(1), mv(1), rm(1)

=head1 NOTES

The total length of the tags, plus delimiters, is limited by the
underlying database representation.  The current representation has a
limit of 1008 characters.  This limit is expected to present a practical
limitation only on systems whose maximum path length is substantially
greater than this amount.

The number of links associated with a set of tags can grow
combinatorically.  On file systems unable to allocate metadata storage
for files and directories dynamically, large tag sets may exhaust file
system metadata capacity.

See 

  http://blueslugs.com/wordpress/

for further discussion.

=cut


