#!/usr/local/bin/perl # ======================================================================== # urlwatch - watch URL and report when it is updated # Andrew Ho (andrew@zeuscat.com) # # This program contains embedded documentation in Perl POD (Plain Old # Documentation) format. Search for the string "=head1" in this document # to find documentation snippets, or use "perldoc" to read it; utilities # like "pod2man" and "pod2html" can reformat as well. # # $Id: urlwatch,v 1.2 2009/12/17 23:42:49 andrew Exp $ # ======================================================================== =head1 NAME urlwatch - watch URL and report when it is updated =head1 SYNOPSIS % urlwatch [-h] [-x] [-o] \ [-d delay] [-f logfile] [-l loglevel] [-t title] \ [--diff-options=options] \ url [email] =head1 DESCRIPTION This program watches an HTTP URL for changes. By default, it backgrounds itself and makes HTTP fetches to the given URL at specified intervals, saving the file to a temporary location. Once one fetch has been successfully made, an alert will be made whenever the contents of the URL change. When the content behind that URL changes, that fact is logged to a logfile, if specified on the command line. If an e-mail address was specified on the command line, e-mail notifications are sent. A URL is required as a command line argument. A second command line argument is an optional e-mail address to send notifications to. The following is a list of additional, optional command line parameters that this program understands. =over 4 =item -h Display a usage message and exit. =item -x Debug mode; do not daemonize, and set loglevel to default, and always log to stderr, not to a logfile. Useful for testing. =item -o One-shot operation; exit after the first change. The default is to loop forever and continuously watch for changes. =item -d I Set the poll delay; pause for I seconds between URL fetches. The default is 60 seconds. =item -f I Write log output to I. This may be a path to a filename, or a specification that Perl's command understands, for example, a pipe to a process, as in the following example: urlwatch -f '| /path/to/my/loghandler' The default behavior is to append to the logfile if it exists. If no logfile is specified, log output is sent to stderr, which means that when the program daemonizes, you will lose all output. =item -l I Set log verbosity to one of the following levels: =over 4 =item * C =item * C =item * C =item * C =item * C =back The default verbosity is C. =item -t I If sending notification e-mails, include this text in the subject line of the e-mail, instead of the URL being watched. This is useful for making a more human-friendly subject line. The body of the message will contain the full URL in any case. =item --diff-options=I<options> Use these options to your system's C<diff> command. Default is a unified diff, ignoring changes to contiguous whitespace (C<-bu>). =back =head1 AUTHOR Andrew Ho E<lt>F<andrew@zeuscat.com>E<gt> =cut # ------------------------------------------------------------------------ # Libraries, globals, and constants use warnings; use strict; use File::Basename qw(basename); use File::Temp qw(tempfile); use File::Copy qw(copy); use File::Compare qw(compare); use Getopt::Long qw(GetOptions); use POSIX qw(setsid); use LWP::UserAgent (); use Time::HiRes qw(gettimeofday tv_interval); use Zeuscat::Log qw(:loglevels); our $ME = basename $0; our $FROM = 'URL Watcher <andrew@zeuscat.com>'; our $SENDMAIL = '/usr/local/sbin/sendmail -oi -t'; our @Original_ARGV; our $Watch_URL; our $Notify_Email; our $Daemonize = 1; our $One_Shot = 0; our $Timeout = 20; our $Poll_Delay = 60; our $Logfile = "/home/andrew/.$ME.log"; our $LogLevel = LOGLEVEL_INFO; our $Title; our $DO = '-bu'; our($Reference_File, $New_File); our $Have_Reference_File = 0; our $USAGE = join ' ', "usage: $ME [-h] [-x] [-o]", "[-d delay] [-f logfile] [-l loglevel] [-t title]", "[--diff-options=diff]", "url [email]\n"; our $FULL_USAGE = $USAGE . << "EndUsage"; -h display this help text and exit -x debug mode (do not daemonize, log verbose output to stderr) -o one-shot operation (exit after first change) -d delay delay this many seconds between polls (default $Poll_Delay) -f logfile log to this file (default $Logfile) -l loglevel log with this level of verbosity (default info) -t title display this text in e-mail subject line (default is URL) --diff-options pass these options to diff (default is $DO) url watch this URL (required, no default) email send state change notification to this e-mail (default none) EndUsage # ------------------------------------------------------------------------ # Parse command line options @Original_ARGV = @ARGV; my($help, $debug); { local $SIG{__WARN__} = sub { my $errmsg = lcfirst join '', @_; chomp $errmsg; die "$ME: argument parsing error: $errmsg\n$USAGE"; }; GetOptions( 'h|help' => \$help, 'x|debug' => \$debug, 'o|oneshot' => \$One_Shot, 'd|delay=i' => \$Poll_Delay, 'f|logfile=s' => \$Logfile, 'l|loglevel=s' => \$LogLevel, 't|title=s' => \$Title, 'diff-options=s' => \$DO, ); } if($help) { print $FULL_USAGE; exit 0; } if($debug) { $Daemonize = 0; $Logfile = undef; $LogLevel = LOGLEVEL_DEBUG; } if(@ARGV) { $Watch_URL = shift @ARGV; $Notify_Email = shift @ARGV if @ARGV; warn "$ME: ignoring extraneous arguments\n" if @ARGV; } if(!$Watch_URL) { die "$ME: required URL argument missing\n$USAGE"; } elsif($Watch_URL !~ m,^https?://.+,) { die "$ME: invalid URL: $Watch_URL\n$USAGE"; } # ------------------------------------------------------------------------ # Set up our temporary files my $fh; ($fh, $Reference_File) = tempfile(); close $fh; ($fh, $New_File) = tempfile(); close $fh; utime 0, 0, $Reference_File, $New_File; eval qq(END { unlink \$Reference_File, \$New_File }); # ------------------------------------------------------------------------ # Main loop our $Exit = 0; our $Logger = Zeuscat::Log->new($Logfile); $Logger->open_logfile(); $Logger->loglevel($LogLevel); my $version = '$Revision: 1.2 $'; $version =~ s/^\$Revision: //; $version =~ s/ \$$//; $Logger->info_log($ME, ' version ', $version, ' starting up'); $Logger->debug_log('invoked as: ', join ' ', $0, @Original_ARGV); $Logger->debug_log( 'reference file = "', $Reference_File, '"; ', 'new file = "', $New_File, '"' ); $SIG{INT} = make_exit_signal_handler('INT'); $SIG{TERM} = make_exit_signal_handler('TERM'); $SIG{__DIE__} = sub { $Logger->error_die('caught fatal error: ', @_) }; $SIG{__WARN__} = sub { $Logger->warning_log('uncaught warning: ', @_) }; daemonize() if $Daemonize; my $ua = LWP::UserAgent->new; $Logger->info_log($0, ' watching URL: ', $Watch_URL); while(!$Exit) { my $dest_file = $Have_Reference_File ? $New_File : $Reference_File; $Logger->debug_log('fetching URL to ', $dest_file, ': ', $Watch_URL); my $before = [ gettimeofday() ]; my $response = $ua->mirror($Watch_URL, $dest_file); my $elapsed = tv_interval($before); $Logger->debug_log( sprintf 'fetch returned code %03d after %d ms', $response->code, 1000 * $elapsed ); if($response->is_success) { if($Have_Reference_File) { if(compare($Reference_File, $New_File) == 0) { $Logger->debug_log('no change: new file matches reference'); } else { $Logger->info_log('changes detected'); my $diff = qx(/usr/bin/diff $DO $Reference_File $New_File); log_debug_detail($Logger, 'output from diff', $diff); if($Notify_Email) { my $subject = join ' ', $Title || $Watch_URL, 'changed'; my $body = "URL: $Watch_URL\nChanges detected" . ($diff ? ":\n\n$diff" : ".\n"); send_mail($FROM, $Notify_Email, $subject, $body); } last if $One_Shot; $Logger->debug_log( 'copying new file "', $New_File, '" to "', $Reference_File, '"' ); if(copy($New_File, $Reference_File)) { $Logger->debug_log('copy successful'); } else { $Logger->error_die( 'could not copy "', $New_File, '" to "', $Reference_File, '": ', $! || 'unknown error' ); } } } else { $Logger->info_log('saved reference file'); $Have_Reference_File = 1; $Logger->debug_log( 'copying reference file "', $Reference_File, '" to "', $New_File, '"' ); if(copy($Reference_File, $New_File)) { $Logger->debug_log('copy successful'); } else { $Logger->error_log( 'could not copy "', $Reference_File, '" to "', $New_File, '": ', $! || 'unknown error' ); } } } elsif($response->code == 304) { $Logger->debug_log('no change: got not modified response from server'); } else { $Logger->error_log( 'got HTTP status code ', $response->code, ' from URL: ', $Watch_URL ); } sleep $Poll_Delay unless $Exit; } $Logger->info_log('process exiting normally'); exit 0; # ------------------------------------------------------------------------ # Helper functions sub make_exit_signal_handler { my $label = shift; my $signal_handler = sub { if($Exit) { $Logger->info_log('caught multiple SIG', $label, 's, exiting now'); exit 1; } else { $Logger->info_log('caught SIG', $label, ', setting exit flag'); $Exit = 1; } }; return $signal_handler; } sub daemonize { $Logger->debug_log('daemonizing'); my $pid = fork(); if(!defined $pid) { $Logger->error_die($ME, ': cannot fork: ', $! || 'unknown error'); } elsif($pid) { $Logger->debug_log('parent process exiting'); exit 0; # parent process should exit } else { $Logger->debug_log('child process detaching'); foreach my $handle (*STDIN, *STDOUT, *STDERR) { close $handle; unless(open $handle, '+<', '/dev/null') { $Logger->warning_log( $ME, ': could not reopen ', $handle, ': ', $! || 'unknown error' ); } } if(setsid() < 0) { $Logger->error_die( $ME, ': cannot setsid(): ', $! || 'unknown error' ); } } $Logger->debug_log('finished daemonizing'); } sub send_mail { my($from, $to, $subject, $body) = @_; $Logger->debug_log( 'sending e-mail to "', $to, '" with subject "', $subject, '"' ); my $cvs_id = '$Id: urlwatch,v 1.2 2009/12/17 23:42:49 andrew Exp $'; my $message = << " EndText"; From: $from To: $to Subject: $subject $body -- $cvs_id EndText if(open my $fh, "| $SENDMAIL") { print $fh $message; close $fh; $Logger->info_log('sent e-mail notification to ', $Notify_Email); } else { $Logger->error_log( 'e-mail notification to ', $Notify_Email, ' failed: ', $! || 'unknown error' ); } } sub log_debug_detail { my($logger, $label, $text) = @_; $text =~ s/^\s+//; $text =~ s/\s+$//; $text =~ s/^/ || /gm; $logger->debug_log($label, ":\n", $text); } # ======================================================================== __END__