#! /usr/bin/perl -w

# #############################################################################
#
# Copyright (C) 1999, 2000 Michael Gerdts (gerdts@cae.wisc.edu)
#
# This software was written to ease my work at the University of Wisconsin.
# The University of Wisconsin may use this software under any terms that
# they see fit.  The use of an Open Source license, as recognized by the
# Open Source Initiative (http://www.opensource.org/), is encouraged by the
# author.
#
# As for everyone else:
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 675 Mass Ave, Cambridge, MA 02139, USA.
#
# #############################################################################

my $version = "0.90";

=head1 NAME

killer - Background job killer

=head1 SYNOPSIS

killer [B<-h>] [B<-V>]

=head1 DESCRIPTION

I<killer> is a perl script that gets rid of background jobs.  Background
jobs are defined as processes that belong to users who are not currently
logged into the machine.  Jobs can be run in the background (and are
expempt from I<killer>'s acctions) if their scheduling priority has been
reduced by increasing their nice(1) value or if they are being run through
I<condor>.  For more details, see the I<PACKAGE main> section of this
document.

The following sections describe the perl(1) packages that make up the
killer program.  I don't expect that the version that works for me will
work for everyone.  I think that the ProcessTable and Terminals packages
offer enough flexibility that most modifications can be done in the main
package.

Command line options

=over 4

=item -h

Tell me how to get help

=item -V

Display version number

=back

=cut

use strict;
use Sys::Syslog;

package ProcessTable;

use Sys::Syslog;

=head1 PACKAGE ProcessTable

Each ProcessTable object contains hashes (or associative arrays) that map
various aspects of a job to the process ID (PID).  The following hashes are
provided:

=over 12

=item pid2user

Login name associated with the effective UID that the process is running
as.

=item pid2ruser

Login name associate with the real UID that the process is running as.

=item pid2uid

Effective UID that the process is running as.

=item pid2ruid

Real UID that the process is running as.

=item pid2tty

Terminal associated with the process.

=item pid2ppid

Parent process of the process

=item pid2nice

nice(1) value of the process.

=item pid2comm

Command name of the process.

=back

Additionally, the %remainingprocs hash provides the list of processes that
will be killed.

The intended use of this package calls for I<readProcessTable> to be called
to fill in all of the hashes defined above.  Then, processes that meet
specific requirements are removed from the %remainingprocs hash.  Those
that are not removed are considered to be background processes and may be
killed.

=cut

# On HP-UX be sure that env var UNIX95 is defined for ps -o to work!
my $pscmd = '/usr/bin/ps -e -o "user ruser uid ruid tty pid ppid nice comm"';

my $errmsg;

my %pid2user = ();	# login name of effective uid
my %pid2ruser = ();	# login name of real user id
my %pid2uid = ();	# effective UID of user
my %pid2ruid = ();	# real UID of user
my %pid2tty = ();	# tty associated with process
my %pid2ppid = ();	# parent process ID
my %pid2nice = ();	# nice value
my %pid2comm = ();	# Command name being executed
my %remainingprocs = (); # The processes that have not been eliminated

=head2 new

This function creates a new I<ProcessTable> object.  

Example:

    my $ptable = new ProcessTable;

=cut

sub new { 
    my $this = shift;
    my $class = ref($this) || $this;
    my $self = {} ;
    bless $self, $class;
    $self->initialize();
    $errmsg = "";
    return $self;
}

=head2 initialize

This function (re)initializes arrays and any environment variables for external
commands.  It generally will not need to be called, as it is invoked by
new().

Example:

    # Empty out the process table for reuse
    $ptable->initialize();

=cut

sub initialize() {
    # This is required for ps(1) to work right on HP's
    defined($ENV{UNIX95}) || ($ENV{UNIX95} = 1);

    # Make sure that these are all empty...
    %pid2user = ();	# login name of effective uid
    %pid2ruser = ();	# login name of real user id
    %pid2uid = ();	# effective UID of user
    %pid2ruid = ();	# real UID of user
    %pid2tty = ();	# tty associated with process
    %pid2ppid = ();	# parent process ID
    %pid2nice = ();	# nice value
    %pid2comm = ();	# Command name being executed
    %remainingprocs = (); # The processes that have not been eliminated
    return;
}

=head2 readProcessTable

This function executes the ps(1) command to figure out which processes are
running.  Note that it requires a SYSV style ps(1).  

Example:

    # Get a list of processes from the OS
    $ptable->readProcessTable();

=cut

sub readProcessTable {
    my $self = shift;
    my ($user, $ruser, $uid, $ruid, $tty, $pid, $ppid, $nice, $comm);

    open ( PS, "$pscmd|" ) || do {
	$errmsg = "\"$pscmd\" failed: $!";
	return undef;
    };

    # skip the first line of input
    <PS>;
    while (<PS>) {
	#print "\t$_";
	chop;

	# strip leading white space
	$_ =~ s/^[ ]*//;

	($user, $ruser, $uid, $ruid, $tty, $pid, $ppid, $nice, $comm) 
		= split( /[ \t]+/, $_, 9 );
	
	$pid2user{$pid} = $user;
	$pid2ruser{$pid} = $ruser;
	$pid2uid{$pid} = int $uid;
	$pid2ruid{$pid} = int $ruid;
	$pid2tty{$pid} = $tty;
	$pid2ppid{$pid} = int $ppid;
	$pid2nice{$pid} = $nice;
	if ( defined $comm ) {
	    $pid2comm{$pid} = $comm;
	} else {
	    $pid2comm{$pid} = '<defunct>';
	}
	$remainingprocs{$pid} = 1;
    }

    close(PS);
}

=head2 cleanForkBombs

This function looks for a large number of processes owned by one user, and
assumes that it is someone that is using fork() for the first time.  An
effective way to clean up such a mess is to "kill -STOP" each process then
"kill -KILL" each process.

Note this function ignores such mistakes by root.  If root is running a
fork(2) bomb, this script wouldn't run, right?  Also, you should be sure
that the number of processes mentioned below (490) is less (equal to would
be better, right?) than the maximum number of processes per user.  Also,
the OS should have a process limit at least a couple hundred higher than
any individual.  Otherwise, you will have to use the power switch to get
rid of fork bombs.

Each time a process is sent a signal, it is logged via syslog(3C).

Example:

    # Get rid of fork bombs.  Keep track of who did it in @idiots.
    my @idiots = $ptable->cleanForkBombs();

=cut

sub cleanForkBombs {
    my $self = shift;
    my ( @procs, $pid, $user, @bombers);
    @bombers = ();

    foreach $user ( $self->getUsers() ) {
	next if ( $user eq "root" );
	@procs = $self->getUserProcessIds($user);
	if ( $#procs > 490 ) {
	    # first send a SIGSTOP
	    foreach $pid ( @procs ) {
		if ( kill(23, $pid) > 0 ) {
	    	    syslog('info', "kill(23, $pid) user=%s command=%s nice=%d",
	                     $pid2ruser{$pid}, $pid2comm{$pid},
                             $pid2nice{$pid});
		}
	    }
	    # next send a SIGKILL
	    foreach $pid ( @procs ) {
		if ( kill(9, $pid) > 0 ) {
	    	    syslog('info', "kill(9, $pid) user=%s command=%s nice=%d",
	                     $pid2ruser{$pid}, $pid2comm{$pid}, 
                             $pid2nice{$pid});
		}
	    }
	    push @bombers, $user;
	}
    }
    return(@bombers);
}

=head2 getUserProcessIds user

This returns the list of process ID's where the login associated with the real
UID of the process matches the argument to the function.

Example:

    # Find all processes owned by httpd
    my @webservers = $ptable->getUserProcessIds('httpd');

=cut

sub getUserProcessIds($) {
    my $self = shift;
    my ($login) = @_;
    my @pids = ();

    my ( $key, $value );

    while ( ($key, $value) = each(%pid2ruser) ) {
	next unless ( $value eq $login );
	push @pids, $key;
    }
    return(@pids);
}

=head2 getUniqueTtys

This function returns a list of terminals in use.  Note that the format
will be the same as given by ps(1), which will generally lack the leading
"/dev/".

Example:

    # Get a list of all terminals that processes are attached to
    my @ttylist = $ptable->getUniqueTtys();

=cut

sub getUniqueTtys {
    my $self = shift;

    my %ttys;
    my $tty;

    foreach $tty ( keys %pid2tty ) {
	$ttys{$tty} = 1;
    }

    return(keys %ttys);
}

=head2 removeProcessId pid

This function removes pid from the list of processes to be killed.  That
is, it gets rid of a process that should be allowed to run.  Most likely
this will only be called by other functions in this package.

Example:

    # For some reason I know that PID 1234 should be allowed to run
    $ptable->removeProcessId(1234);

=cut

sub removeProcessId($) {
    my $self = shift;
    my ($pid) = @_;

    if (defined $remainingprocs{$pid} ) {
        delete $remainingprocs{$pid}
    }
}

=head2 removeProcesses psfield, psvalue

This function removes processes that possess certain traits.  For example,
if you want to get rid of all processes owned by the user "lp" or all
processes that have /dev/console as their controlling terminal, this is the
function for you.  

psfield can be any of the following

=over 8

=item pid

Removes process id given in second argument.

=item user

Removes processes with effective UID associated with login name given in
second argument.

=item ruser

Removes processes with real UID associated with login name given in second
argument.

=item uid

Removes processes with effective UID given in second argument.

=item ruid

Removes processes with real UID given in second argument.

=item tty

Removes processes with controlling terminal given in second argument.  Note
that it should NOT start with "/dev/".

=item ppid

Removes children of process with PID given in second argument.

=item nice

Removes children with a nice value equal to the second argument.

=item comm

Removes children with a command name that is the same as the second
argument. 

=back

Examples:

    # Allow all imapd processes to run
    $ptable->removeProcesses('comm', 'imapd');

    # Be sure not to kill print jobs
    $ptable->removeProcesses('ruser', 'lp');

=cut

sub removeProcesses($$) {
    my $self = shift;
    my ( $field, $value ) = @_;
    my ( $pid );

    SWITCH: {
	($field eq "pid") && do {
	    $self->removeProcessId($value);
	    last SWITCH;
	};
	($field eq "user") && do {
	    foreach $pid ( keys %pid2user ) {
		if ( $pid2user{$pid} eq $value ) {
		    $self->removeProcessId($pid);
		}
	    }
	    last SWITCH;
	};
	($field eq "ruser") && do {
	    foreach $pid ( keys %pid2ruser ) {
		if ( $pid2ruser{$pid} eq $value ) {
		    $self->removeProcessId($pid);
		}
	    }
	    last SWITCH;
	};
	($field eq "uid") && do {
	    foreach $pid ( keys %pid2uid ) {
		if ( $pid2uid{$pid} == $value ) {
		    $self->removeProcessId($pid);
		}
	    }
	    last SWITCH;
	};
	($field eq "ruid") && do {
	    foreach $pid ( keys %pid2ruid ) {
		if ( $pid2ruid{$pid} == $value ) {
		    $self->removeProcessId($pid);
		}
	    }
	    last SWITCH;
	};
	($field eq "tty") && do {
	    foreach $pid ( keys %pid2tty ) {
		if ( $pid2tty{$pid} eq $value ) {
		    $self->removeProcessId($pid);
		}
	    }
	    last SWITCH;
	};
	($field eq "ppid") && do {
	    foreach $pid ( keys %pid2ppid ) {
		if ( $pid2ppid{$pid} == $value ) {
		    $self->removeProcessId($pid);
		}
	    }
	    last SWITCH;
	};
	($field eq "nice") && do {
	    foreach $pid ( keys %pid2nice ) {
		if ( $pid2nice{$pid} eq $value ) {
		    $self->removeProcessId($pid);
		}
	    }
	    last SWITCH;
	};
	($field eq "comm") && do {
	    foreach $pid ( keys %pid2comm ) {
		if ( $pid2comm{$pid} eq $value ) {
		    $self->removeProcessId($pid);
		}
	    }
	    last SWITCH;
	};
    };
}

=head2 removeChildren pid

This function removes all decendents of the given pid.  That is, if the pid
argument is 1, it will ensure that nothing is killed.

Example:

    # Be sure not to kill off any mail deliveries (assumes you have
    # written getSendmailPid()).  (Sendmail changes uid when it does
    # local delivery.)
    $ptable->removeChildren(getSendmailPid);

=cut

sub removeChildren($) {
    my $self = shift;
    my ($ppid) = @_;
    my ( @children);
    my ( $child, $parent );

    while ( ($child, $parent) = each(%pid2ppid) ) {
	if ( $parent == $ppid ) {
	    push(@children, $child);
	}
    }
    foreach $child ( @children ) {
	$self->removeChildren($child);
	$self->removeProcessId($child);
    }
}

=head2 removeCondorChildren

Condor is a batch job system that allows migration of jobs between
machines (see http://www.cs.wisc.edu/condor/).  This ensures that condor
jobs are left alone.

Example:

    # Be nice to the people that are running their jobs through condor.
    $ptable->removeCondorChildren();

=cut

sub removeCondorChildren {
    my $self = shift;
    my $pid;

    foreach $pid ( keys %pid2comm ) {
	# find the command with the right name
	next unless ( $pid2comm{$pid} eq 'condor_master' );

	# be sure that it is owned by root
	next unless ( $pid2uid{$pid} == 0 );

	# Remove all of its child processes
	$self->removeChildren($pid);
    };
}

=head2 findChildProcs pid

This function finds and returns a list of all of the processess that are
descendents of a the PID given in the first argument.

Example:

    # Find the processes that are decendents of PID 1234
    my @procs = $ptable->findChildProcs(1234);

=cut

sub findChildProcs($) {
    my $self = shift;
    my ($ppid) = (@_);

    my ( $child, $parent, @children, @returnchildren );
    
    while ( ($child, $parent) = each(%pid2ppid) ) {
	if ( $parent == $ppid ) {
	    push(@children, $child);
	}
    }
    push ( @returnchildren, @children);
    foreach $child ( @children ) {
	push @returnchildren, $self->findChildProcs($child);
    }
    return(@returnchildren);
}

=head2 getTtys user

This function returns a list of tty's that are in use by processes owned by
a particular user. 

Example:
    
    # find all tty's in use by gerdts.
    my @ttylist = getTtys('gerdts');

=cut

sub getTtys ($) {
    my $self = shift;
    my ($user) = ( @_ );

    my ( $pid, $login, %ttys );
    while ( ($pid, $login ) = each(%pid2user) ) {
	next unless ( $login eq $user );

	$ttys{${pid2tty{$pid}}} = 1;
    }

    return (keys %ttys);
}

=head2 getUsers 

This function lists all the users that have active processes.

Example:
    
    # Get all users that are logged in
    my @lusers = $ptable->getUsers()

=cut

sub getUsers() {
    my $self = shift;
    
    my ( $pid, $login, %logins );
    while ( ($pid, $login) = each(%pid2user) ) {
	$logins{$login} = 1;
    }
    return(keys %logins);
}

=head2 removeNiceJobs

This function removes all jobs that have a nice value greater than 20.
That is, they have a lower sceduling priority than the default.  

Example:

    # Allow people to run background jobs so long as they yield to
    # those with "foreground" jobs
    $ptable->removeNiceJobs();

=cut

sub removeNiceJobs() {
    my $self = shift;
    my ( $key, $val );

    while ( ($key, $val) = each(%pid2nice) ) {
	# Get rid of things not in the "default" scheduling class
	next unless ($val =~ /^[0-9]+$/);

	if ( int($val) > 20 ) {
	    $self->removeProcessId($key);
	}
    }
}

=head2 printProcess filehandle, pid

This function displays information about the process, kinda like "ps | grep"
would.

Example:

    # Print info about init to STDERR
    $ptable->printProcess(\*STDERR, 1);

=cut

sub printProcess($$) {
    my $self = shift;
    my ( $fh, $pid ) = @_;

    printf $fh "%8s %8s %5d %5d %s\n", $pid2user{$pid}, $pid2ruser{$pid},
	    $pid, $pid2ppid{$pid},  $pid2comm{$pid};
}

=head2 printProcessTable 

=head2 printProcessTable filehandle

This function prints info about all the processes discoverd by
I<readProcessTable>.  If an argument is given, it should be a file handle
to which the output should be printed.

Examples:

    # Print the process table to stdout
    $ptable->printProcessTable();

    # Mail the process table to someone
    open MAIL '|/usr/bin/mail someone';
    $ptable->printProcessTable(\*MAIL);
    close(MAIL);

=cut

sub printProcessTable {
    my $self = shift;
    my ( $fh ) = shift || \*STDOUT;
    my $pid;

    print $fh "  user     ruser   pid   ppid command\n";
    print $fh "======== ======== ===== ===== =================================================\n";
    foreach  $pid ( sort keys %pid2comm ) {
	$self->printProcess($fh, $pid);
    }
    print $fh "======== ======== ===== ===== =================================================\n";
}

=head2 printRemainingProcesses

=head2 printRemainingProcesses filehandle

This function prints info about all the processes discoverd by
I<readProcessTable>, but not removed from %remainingprocs.  
If an argument is given, it should be a file handle
to which the output should be printed.

Examples:

    # Print the jobs to be killed to stdout
    $ptable->printRemainingProcesses();

    # Mail the jobs to be killed to someone
    open MAIL '|/usr/bin/mail someone';
    $ptable->printRemainingProcesses(\*MAIL);
    close(MAIL);

=cut

sub printRemainingProcesses {
    my $self = shift;
    my $fh = shift || \*STDOUT;

    print $fh "  user     ruser   pid   ppid command\n";
    print $fh "======== ======== ===== ===== =================================================\n";
    foreach my $pid ( keys %remainingprocs ) {
	$self->printProcess($fh, $pid);
    }
    print $fh "======== ======== ===== ===== =================================================\n";
}

=head2 getRemainingProcesses

Returns a list of processes that are likely background jobs.

Example:

    # Get a list of the processes that I plan to kill
    my @procsToKill = $ptable->getRemainingProcesses();

=cut

sub getRemainingProcesses {
    my $self = shift;
    
    return keys %remainingprocs;
}

=head2 killAll signalNumber

Sends the specified signal to all the processes listed.  A syslog entry is
made for each signal sent.

Example:

    # Send all of the remaining processes a TERM signal, then a 
    # KILL signal
    $ptable->killAll(15);
    sleep(10);          # Give them a bit of a chance to clean up
    $ptable->killAll(9);

=cut

sub killAll($) {
    my $self = shift;
    my ( $signum ) = @_;
    
    my $killcount = 0;

    foreach my $pid ( keys %remainingprocs ) {
	if ( kill($signum, $pid) > 0 ) {
	    $killcount ++;
	    syslog('info', "kill($signum, $pid) user=%s command=%s nice=%d",
	                     $pid2ruser{$pid}, $pid2comm{$pid},
                             $pid2nice{$pid});
	}
    }
    return $killcount;
}

package Terminals;

=head1 PACKAGE Terminals

The Terminals package provides a means for figuring out how long various
users have been idle.

=cut

my %tty2user;
my %user2ttys;
my %tty2idletime;
my $consoleuser;

my $whocmd = '/usr/bin/who';

=head2 new

This function is used to instantiate a new Terminals object.

Example:

    # Get a new Terminals object.
    my $term = new Terminals;

=cut

sub new {
    my $this = shift;
    my $class = ref($this) || $this;
    my $self = {} ;
    bless $self, $class;
    $self->initialize();
    $errmsg = "";
    return $self;
}

=head2 initialize

This function figures out who is on the system and how long they have been
idle for.  It will generally only be called by new().

Example:

    # Refresh the state of the terminals.
    $term->initialize();

=cut

sub initialize {
    my $self = shift;

    my @parts;

    $consoleuser = "";

    open ( W, "$whocmd|") || return;

    while ( <W> ) {
	chop;
	@parts = split(/[ \t]+/);
	if ( $parts[1] eq 'console' ) {
	    $consoleuser = $parts[0];
	}
	$self->initializeTty($parts[1], stat("/dev/" . $parts[1]));
    }
    close(W);
}

=head2 showConsoleUser

This function returns the login of the person that is physically sitting at
the machine.

Example:

    # Print out the login of the person on the console
    printf "%s is on the console\n", $term->showConsoleUser();

=cut

sub showConsoleUser() {
    my $self = shift;

    return $consoleuser;
}

=head2 initializeTty terminal statparts

This initializes internal structures for the given terminal.

=cut

sub initializeTty($@) {
    my $self = shift;
    my ( $key, @statparts ) = @_;

    # Argument check
    return unless defined ( $statparts[4] );

    # Figure out how long the tty has been idle
    $tty2idletime{$key} = time - $statparts[8];

    # Figure out who is on the terminal
    my ( @pwparts ) = getpwuid($statparts[4]);
    return unless defined ( $pwparts[0] );

    $tty2user{$key} = $pwparts[0];
    push (@{$user2ttys{$pwparts[0]}}, $key);

}

=head2 getIdleTime user

Figure out how long a user has been idle.  This is accomplished by
examining all terminals that the user owns and returns the amount of time
since the most recently accessed one was used.  Additionally, if the user
is at the console it is possible that he/she is not typing, yet is quite
active with the mouse or typing into an application that does not use a
terminal.  

Example:

    # Figure out how long the user on the console has been idle
    my $consoleIdle = $term-getIdleTime($term->showConsoleUser());

=cut

sub getIdleTime($) {
    my $self = shift;
    my ( $user ) = @_;

    my $idletime = 99999999;
    my $tty;
    return $idletime if ( ! defined $user2ttys{$user} );
    my ( @ttys ) = ( @{$user2ttys{$user}} );

    foreach $tty ( @ttys ) {
	next unless defined($tty2idletime{$tty});

	if ( int($tty2idletime{$tty}) < int($idletime) ) {
	    $idletime = int($tty2idletime{$tty});
	}
    }

    if ( $consoleuser eq $user ) {
	my (@statparts, $device);
	foreach $device ( '/dev/ps2mouse', '/dev/ps2kbd', '/dev/mouse', 
	                  '/dev/kbd' ) {
	    @statparts = stat($device);
	    next unless defined($statparts[8]);

	    if ( ( time - $statparts[8] ) < $idletime ) {
		$idletime = (time - $statparts[8]);
	    }
	}
    }
    return $idletime;
}

=head2 printEverything

Prints to stdout who is on what terminal and how long they have been idle.
Only useful for debugging.

Example:

    # Take a look at the contents of structures in my 
    # Terminals object
    $term->printEverything();

=cut

sub printEverything {
    my $self = shift;

    my ( $k, $v);
    print "\ntty -> user\n";
    while ( ($k, $v) = each(%tty2user) ) {
	print "$k -> $v\n";
    }
    print "\ntty -> idle\n";
    while ( ($k, $v) = each(%tty2idletime) ) {
	print "$k -> $v\n";
    }
    print "\nuser -> ttys\n";
    while ( ($k, $v) = each(%user2ttys) ) {
	print "$k -> ", join( ',', @{$v}), "\n";
    }
}

package main;

if ( $#ARGV != -1 ) {
    foreach my $opt ( @ARGV ) {
        $opt eq '-h' && do {
            print STDERR "Type \"perldoc $0\" for lots of help.\n";
            next;
        };
        $opt eq '-V' && do {
            print STDERR "killer version $version\n";
            next;
        };
        print STDERR "killer: option \"$opt\" not recognized\n";
        print STDERR "Type \"perldoc $0\" for lots of help.\n";
    }
    exit(1);
}

=head1 PACKAGE main

The main package is the version used on the Unix workstations at the
University of Wisonsin's Computer-Aided Engineering Center (CAE).  I 
suspect that folks at places other than CAE will want to do things slightly
differently.  Feel free to take this as an example of how you can make
effective use of the processTable and Terminals packages.

=head2 Configuration options

=over 12

=cut

# ########
# Configuration options:
# ########

my $domainname = `domainname`;
chop $domainname;

=item $forkadmin

Email address to notify of fork bombs

=cut

my $forkadmin = "killer\@$domainname";

=item $killadmin

Email address to notify of run-of-the-mill kills

=cut

my $killadmin = "killer\@$domainname";

=item $fromaddr

Who do email messages claim to be from?

=cut

my $fromaddr = "\"Background Job Killer v. $version\" <root\@$domainname>";

=item $stubbornadmin

Email address to notify when jobs will not die

=cut

my $stubbornadmin = "killer\@$domainname";

=item @validusers

These are the folks that you should never kill off

=cut

my @validusers = ( 'condor', 'root', 'daemon' );

=item $maxidletime

The maximum number of seconds that a user can be idle without being
classified as having "background" jobs.

=cut

my $maxidletime = ( 6 * 60 * 60 );


# ########
# End of (intended) configuration options.
# ########

=back

If I am a user really trying to avoid a background job killer, I would
likely include a signal handler that would wait for signal 15.  When I saw
it, I would fork causing the parent to die and the child would continue on
to do my work.  

Assuming that everyone thinks like me, I figure that I will
need to make at least two complete passes to clear up the bad users.  The
first pass is relatively nice (sends a signal 15, followed a bit later by a
signal 9).  A well-written program will take the signal 15 as a sign that
it should clean up and then shut down.  When a process gets a signal 9, it
has no choice but to die.

The second pass is not so nice.  It finds all background processes, sends
them a signal 23 (SIGSTOP), then a signal 9 (SIGKILL).  This pretty much
(but not absolutely) guarantees that processes are unable to find a way
around the background job killer.  

=cut

my @ttys;
my @users;
my $user;

=head2 gatherInfo

This function gathers information from the Terminals and ProcessTable
packages, then based on that information decides which jobs should be
allowed to run.  Specifically it does the following:

=over 2

=cut

sub gatherInfo {

=item *

Instantiates new ProcessTable and Terminals objects.  Note that
Terminals::new fills in all the necessary structures to catch users that
have logged in between calls to I<gatherinfo>.

=cut

    my $ptable = new ProcessTable;
    my $term = new Terminals;

=item *

Reads the process table

=cut

    $ptable->readProcessTable();

=item *

Removes condor processes and condor jobs from the list of processes to be
killed.

=cut

    $ptable->removeCondorChildren();

=item *

Removes all jobs belonging to all users in the configuration array
@validusers from the list of processes to be killed.

=cut

    foreach $user ( @validusers ) {
	$ptable->removeProcesses('ruser', $user);
    }

=item *

Removes all nice(1) jobs from the list of jobs to be killed.

=cut

    $ptable->removeNiceJobs();

=item *

Removes all jobs belonging to users where the user has less than
$maxidletime idle time on at least one terminal.  Additionally, jobs
associated with ttys that are owned by users that have less than
$maxidletime idle time on at least one terminal are preserved.  This makes
it so that if luser uses su(1) to gain the privileges of boozer, processes
owned by boozer will not be killed.

=cut

    foreach $user ( $ptable->getUsers() ) {
	if ( $term->getIdleTime($user) < $maxidletime ) {
	    $ptable->removeProcesses('ruser', $user);
            # Be aware that some users may have su'd to others.  The two users
            # will share the same tty.
            $ptable->removeProcesses('tty', $ptable->getTtys($user));
	}
    }

=item *

Finally, the process table and terminal objects are returned.

=back

=cut 

    return($ptable, $term);
}

openlog('killer', 'pid', 'local4');
my $sendmail;
if ( -x '/usr/lib/sendmail' ) {
    $sendmail = '/usr/lib/sendmail';
} elsif ( -x '/usr/sbin/sendmail' ) {
    $sendmail = '/usr/sbin/sendmail';
} else {
    die "Cannot find executable sendmail\n";
}

my $outfile;

# #########
# Read the process table, then check for fork() bombs.
# #########
my ($ptable, $term) = gatherInfo();
my @bombers = $ptable->cleanForkBombs();
if ( $#bombers != -1 ) {
    if ( open(MAIL, "|$sendmail -t") ) {
	$outfile = \*MAIL;
        print $outfile "From: $fromaddr\n";
        print $outfile "To: $forkadmin\n";
        print $outfile 'Subject: Fork bombs found on ' . `uname -n`;
        print $outfile "\n";
	print $outfile "The following users had lots of processes running on " .
			`uname -n`;
	print $outfile "\t", join(' ', @bombers);
	print $outfile "\nYou can find more information in the syslog logs\n";
	close($outfile);
    }
}

# ##########
# In the first round, try to nicely kill off processes, giving them
# time to clean up before they get the kill -9
# ##########
my ( @remaining ) = $ptable->getRemainingProcesses();
if ( $#remaining == -1 ) {
    exit(0);
}

if ( open(MAIL, "|$sendmail -t") ) {
    $outfile = \*MAIL;
} else {
    $outfile = \*STDERR;
}

print $outfile "From: $fromaddr\n";
print $outfile "To: $killadmin\n";
print $outfile 'Subject: Jobs killed on ' . `uname -n`;
print $outfile "\n";

print $outfile "Attempt 1: Nicely killing the following processes\n";
$ptable->printRemainingProcesses($outfile);
$ptable->killAll(15);
sleep(30);
$ptable->killAll(9);

# #########
# The second time around, assume that when a process got killed
# before, it either spawned a new process (at the kill 15) or
# freed up a process table entry so that another process could
# be spawned.  This should catch fork() bombs as well
# ##########

sleep(5);
($ptable, $term) = gatherInfo();
( @remaining ) = $ptable->getRemainingProcesses();
if ( $#remaining == -1 ) {
    close(MAIL);
    exit(0);
}

# first send all the processes a STOP.  This makes it so that
# none of the processes can do anything once they notice that
# there are free process slots, etc.
print $outfile "\nAttempt 2: Taking care of stubborn jobs\n";
$ptable->printRemainingProcesses($outfile);

$ptable->killAll(23);	
$ptable->killAll(9);

# ########
# The third time around, I just can't figure out how to kill
# the processes.  Let's just whine through email.
# ########

sleep(5);
($ptable, $term) = gatherInfo();
( @remaining ) = $ptable->getRemainingProcesses();
if ( $#remaining == -1 ) {
    close(MAIL);
    exit(0);
}

print $outfile "\nHELP ME: Unable to kill the following jobs\n";
$ptable->printRemainingProcesses($outfile);
close(MAIL);

if ( open(MAIL, "|$sendmail -t") ) {
    $outfile = \*MAIL;
    print $outfile "From: $fromaddr\n";
    print $outfile "To: $stubbornadmin\n";
    print $outfile 'Subject: Cannot kill some jobs on ' . `uname -n`;
    print $outfile "\n";
    print $outfile "The background job killer could not kill some jobs on " .
    		   `uname -n` . "\n";
    $ptable->printRemainingProcesses($outfile);
    print $outfile "\nMore info can be found in syslog and the killer mailbox\n";
    close(MAIL);
}

exit(0);

=head1 BUGS

There is a small window of opportunity for a user that reaches $maxidletime
in the middle of this script to get unfair treatment.  This could probably
be reconciled by shaving some time off of maxidletime for the second call
to main::gatherInfo.

It is still possible to get around the background job killer by having a
lot of proceses that watch each other to be sure that they are still
responding (have not yet gotten a signal 23).  As soon as a stopped process
is found, the still running process could fork(), thus leaving a background
process that is not going to be killed.

Different operating systems have different notions of nice values.  Some go
from -20 to +19.  Some go from 0 to 39.  Solaris and HP-UX (using System V
ps command) report nice values between 0 and 39.  

It is bad to assume that all systems that run this have the same number of
processes per user.  The script should ask the OS how many processes normal
(non-root) users can run.

=head1 TODO

The configuration is quite minimalistic.  It should be made possible to have
per-host configuration directives so that you can, for instance, allow
certain people to run background jobs on certain hosts.

People that really care about finding habitual offenders will probably want
to have a way to add entries to a database and flag those that pop up too
often.  

Thoroughly test on more operating systems.  A very close relative of this
code has performed well on about 60 Solaris 2.5.1 machines.  It has been
lightly tested on HP-UX 10.20 as well.

Make mailing to someone optional.  If you have a lot of workstations
killing off boring stuff all the time, too much meaningless mail traffic is
generated.

If you plan to run this on a machine that runs special processes like a POP
or IMAP server, it would be handy to be able to check multiple conditions
easily.  Perhaps 

    $ptable->removeProcesses( { comm => 'imapd', 
                                parentComm => 'inetd',
                                parentUser => 'root' } );

This would make it so that people don't rename the crack binary imapd to
escape the wrath of killer.

=head1 LICENSE

This program is released under the terms of the General Public License
(GPL) version 2.  The the file COPYING with the distribution.  If you have
lost your copy, you can get a new one at
http://www.gnu.org/copyleft/gpl.html.  In particular remember that this
code is distributed for free without warranty.

If you make use of this code, please send me some email.  While I am open
to suggestions to improvement, I by no means guarantee that I will
implement them.

=head1 SEE ALSO

nice(1) perl(1) ps(1) su(1) who(1) fork(2) signal(5) 

http://www.cs.wisc.edu/condor/

http://www.cae.wisc.edu/~gerdts/killer/

=head1 AUTHOR

killer was written by Mike Gerdts, gerdts@cae.wisc.edu.



