#!/usr/bin/env perl ## ## dams daemon ## ## ## Signature file versions supported. ## @versions = (2 .. 3); ## ## Commands for external decoders ## %external = ( zip => { magic => "^PK\x03\x04", decoder => "/opt/bin/unzip -p '-P%p' %f", inspector => { command => "/opt/bin/unzip -l %f", parser => inspect_parse_zip, }, }, rar => { magic => "^Rar!", decoder => "/opt/bin/unrar p '-p%p' %f", inspector => { command => "/opt/bin/unrar l %f", parser => inspect_parse_rar, }, }, lha => { magic => "^..-lh", decoder => "/opt/bin/lha -p %f", inspector => "/opt/bin/lha l %f", }, arj => { magic => "^\xea\x60", decoder => "/bin/false", inspector => "/bin/false", }, ); ## Default max. depth for MIME recursion; also controlled by -m option. $max_mime_depth = 5; ## Default max. time a process is allowed to live if not a daemon. (seconds) $max_time_allowed = 10; $DEBUG = 0; $A0 = $0; $D0 = $0; $A0 =~ s!.*/!!; $D0 =~ s!/[^/]+$!!; use Getopt::Std; use Digest::SHA1; use Data::Dumper; $rcsid = '$Id: dams2,v 1.20 2004/03/15 09:38:42 dgc Exp $'; $rcsid =~ /,v\s+(\d+\.\d+)\s/; $ver = $1; $sha = new Digest::SHA1; $tmpdir = $ENV{TMPDIR} || "/tmp"; $output = undef; # directory to save decoded parts $eol = $/; ## Convert versions list into hash for easy lookup map { $versions{$_} = 1; } @versions; ## ## print to fh, if it's defined ## sub declare { my ($fh, $depth, @args) = @_; return if (! $fh); print $fh " " x $depth; $args[0] .= "\n"; printf $fh @args; } ## ## Since input can be (is expected to be) a pipe, we cannot rewind. But ## there might be occasions where we need to unread lines, so these two ## functions and hash implement a buffering scheme. We'll use it instead ## of the <> operator for all input. ## %iobuf = (); # contains fifos for each fhref used sub getline { my ($fhref) = @_; my $line; if ($iobuf{$fhref} && $#{$iobuf{$fhref}} >= 0) { $line = shift @{$iobuf{$fhref}}; } else { $line = <$fhref>; } return $line; } sub ungetline { my ($fhref, $line) = @_; $iobuf{$fhref} = [] unless (exists($iobuf{$fhref})); push(@{$iobuf{$fhref}}, $line); } ## ## Print lines if debugging ## sub debug { return unless ($DEBUG); print STDERR @_; } ## ## Copy a line of text to a list of array refs. ## sub store { my ($line, @list) = @_; my $aref; for $aref (@list) { next if (!defined($aref)); push(@{$aref}, $line); } } ## ## return a filehandle in to an output file ## sub mkout { my ($detail) = @_; my $fh = undef; return undef unless ($output); $detail = ",$detail" if (! $detail =~ /^,/); open($fh, ">$output$detail"); return $fh; } ## ## Uniq ## sub cleanList { my @list = @_; my %h; for (@list) { $h{$_} = 1; } return keys %h; } ## ## recursive trace ## sub trace { my ($m, $depth, $order) = @_; my ($hdr, $rhdr, $k, $part, @k); for $hdr qw(from date subject message-id) { $rhdr = $m->{hdrmap}->{lc($hdr)}; if ($m->{hdrdict}->{$rhdr}) { printf "%s%2d: ", " " x $depth, $order; print "$rhdr: ", $m->{hdrdict}->{$rhdr}, "\n"; } } @k = keys %{$m->{mime}}; if ($#k >= 0) { for $k qw(type charset boundary disp encoding name filename) { if ($m->{mime}->{$k}) { printf "%s%2d: ", " " x $depth, $order; print "$k: ", $m->{mime}->{$k}, "\n"; } } } else { printf "%s%2d: ", " " x $depth, $order; printf "(anonymous part)\n"; } printf "%s%2d: ", " " x $depth, $order; printf "textlines/bytes: %d / %d\n", $m->{textlines}, $m->{textbytes}; for $part (0 .. $#{$m->{parts}}) { trace($m->{parts}->[$part], $depth+1, $part); } } ## ## Decoders. ## ## A decoder has three possible returns: ## - undef, meaning the data cannot be decoded with this decoder ## - "", meaning the decoder requires more information to decode this data ## - something else, the decoded data ## ## ## preamble, joins text[] to a single scalar stream ## sub decode_init { my ($m, $v, $seq, $s, $fh, $tmpd) = @_; if ($m->{decoded}->{init}) { declare($fh, 5, "decoded [init, cached]"); return $m->{decoded}->{init}; } declare($fh, 5, "decoded [init]"); $m->{decoded}->{init} = join("", @{$m->{text}}); return $m->{decoded}->{init}; } ## ## base-64 decoder ## sub decode_b64 { my ($m, $v, $seq, $s, $fh, $tmpd) = @_; my $off = 0; my $l, $u; my $slen, $trail; my $r = ""; if ($m->{decoded}->{b64}) { declare($fh, 5, "decoded [b64, cached]"); return $m->{decoded}->{b64}; } $s =~ tr!\r\n\t !!d; # remove whitespace $s =~ s!=+$!!; # remove pads $s =~ tr!A-Za-z0-9+/! -_!; # map to uuencode charset $slen = length($s); $trail = $slen % 60; if ($slen >= 60) { while ($off < $slen) { $r .= "M" . substr($s, $off, 60); $off += 60; } } $r .= chr(32 + $trail*3/4) . substr($s, $off); $m->{decoded}->{b64} = unpack("u*", $r); return undef unless ($m->{decoded}->{b64}); $m->{textencoding} .= ",b64"; declare($fh, 5, "decoded [b64]"); return $m->{decoded}->{b64}; } ## ## uuencode decoder ## sub decode_uu { my ($m, $v, $seq, $s, $fh, $tmpd) = @_; my $off = 0; my $l, $u; my $slen, $trail; my $r = ""; if ($m->{decoded}->{uu}) { declare($fh, 5, "decoded [uu, cached]"); return $m->{decoded}->{uu}; } $s =~ s/^begin [0-9]{3}[^\r\n]*[\r\n]+//s; #$s =~ tr!\r\n\t !!d; # remove whitespace $s =~ s/end$//s; $m->{decoded}->{uu} = unpack("u*", $s); return undef unless ($m->{decoded}->{uu}); $m->{textencoding} .= ",uu"; declare($fh, 5, "decoded [uu]"); return $m->{decoded}->{uu}; } ## ## Unzip data and return ## sub decode_external { my ($m, $v, $seq, $s, $fh, $tmpd, $key, $code) = @_; my $tmpf, $i, $passwd, $cookie, $backref; my $msg, $cmd; ## look for a password key $passwd = undef; if ($v->{$key} && $v->{$key}->{password}) { ## password given as a string $passwd = $v->{$key}->{password}; } elsif (ref($v->{$key} && $v->{$key}->{password_cookies})) { ## password is in a cookie; password_cookies contains ## possible cookies $passwd = undef; for $cookie (@{$v->{$key}->{password_cookies}}) { ($cookie, $backref) = split(":", $cookie); $passwd = cookie($m, $v, $seq, $cookie, $backref); last if ($passwd); } } elsif ($v->{$key} && $v->{$key}->{password_cookie}) { ## password is in this precise cookie ($cookie, $backref) = split(":", $v->{$key}->{password_cookie}); $passwd = cookie($m, $v, $seq, $cookie, $backref); debug("decode/ext [$key]: passcookie ", $v->{$key}->{password_cookie}, " = \"", $passwd, "\"\n"); } if (defined($passwd)) { $msg = "$key/enc"; } else { $msg = "$key"; } $tmpf = "$tmpd/decode.$key"; open(DECODE, ">$tmpf"); print DECODE $s; close(DECODE); &{$code} if ($code); $cmd = $external{$key}->{decoder}; $cmd =~ s!%f!$tmpf!g; $cmd =~ s!%p!$passwd!g; debug("decode/ext [$key]: $cmd\n"); open(DECODE, "$cmd 2>/dev/null |"); $/ = undef; $s = ; close(DECODE); $/ = $eol; unlink($tmpf); if (length($s) == 0) { declare($fh, 5, "!decoded [$msg]"); return undef; } $m->{decoded}->{$key} = $s; $m->{textencoding} .= ",$key"; declare($fh, 5, "decoded [$msg]"); return $m->{decoded}->{$key}; } ## ## Generic decoder ## sub decode_generic { my ($m, $v, $seq, $s, $fh, $tmpd, $type) = @_; if ($m->{decoded}->{$type}) { declare($fh, 5, "decoded [$type, cached]"); return $m->{decoded}->{$type}; } if (! ($s =~ /$external{$type}->{magic}/)) { declare($fh, 5, "-decoded [$type]"); return undef; } return decode_external($m, $v, $seq, $s, $fh, $tmpd, $type, sub { $passwd = "-" if (!defined($passwd)); }); } ## ## Word count ## sub wc { my ($s) = @_; my $lines = 0; my $tmp; $tmp = $s; $tmp =~ s!\r\n!\n!g; $tmp =~ tr!\r!\n!; while ($tmp =~ /^[^\n]*\n(.*)$/s) { ++$lines; $tmp = $1; } return ($lines, length($s)); } ## ## save decoded text with current textencoding ## sub savedecode { my ($data, $detail) = @_; my $fh; if ($output) { $fh = mkout($detail); print $fh $data; close($fh); } } ## ## get a header from a message part ## sub gethdr { my ($m, $hdr) = @_; my $realhdr = $m->{hdrmap}->{lc($hdr)}; return ($realhdr, $m->{hdrdict}->{$realhdr}) if ($realhdr); return (undef, undef); } ## ## Return a msg part's raw (decoded) data ## sub decode { my ($m, $v, $seq, $fh, $partno) = @_; my $in, $out, $tmpd, $type; my @chain; ## this should always succeed; it's just returning the joined text if ($out = decode_init($m, $v, $seq, $in, $fh, undef)) { $in = $out; $m->{currentdecoding} = "init"; if ($output) { savedecode($in, $m->{textencoding}); } } ## Decoders may need tmp dirs. for $i (0 .. 0xffff) { $tmpd = sprintf "$tmpdir/$A0.%04x.%08x.%04x", $$, time, $i; last if mkdir($tmpd, 0700); } return undef if ($i == 0xffff); ## Find the decoder chain to be used. if ($seq->[$partno]->{chain}) { @chain = @{$seq->[$partno]->{chain}}; } else { ## default decoding chain @chain = qw(b64 zip); } for $type (@chain) { if ($type eq "b64") { $out = decode_b64($m, $v, $seq, $in, $fh, undef); } if ($type eq "uu") { $out = decode_uu($m, $v, $seq, $in, $fh, undef); } if ($external{$type}) { $out = decode_generic($m, $v, $seq, $in, $fh, $tmpd, $type); } if ($out) { $in = $out; $m->{currentdecoding} = $type; if ($output) { savedecode($in, $m->{textencoding}); } } } ## Run inspectors. inspect($m, $v, $seq, $fh, $partno, $tmpd, $in); #$m->{textencoding} =~ s/^[,:]//; ($m->{decodelines}, $m->{decodebytes}) = wc($in); rmdir($tmpd); return $in; } ## ## Inspect decoded text ## sub inspect { my ($part, $v, $seq, $fh, $partno, $tmpd, $text) = @_; my $type, @inspectors; my $tmpf; my $cmd, $s, @s, %cookies; return unless ($seq->[$partno]->{inspect}); @inspectors = @{$seq->[$partno]->{inspect}}; return unless ($#inspectors >= 0); $tmpf = "$tmpd/decode.dat"; open(DECODE, ">$tmpf"); print DECODE $text; close(DECODE); for $type (@inspectors) { next unless ($external{$type}->{inspector}); if (! ($text =~ /$external{$type}->{magic}/)) { declare($fh, 5, "-inspect [$type]"); next; } declare($fh, 5, "inspect [$type]"); $cmd = $external{$type}->{inspector}->{command}; $cmd =~ s!%f!$tmpf!g; $cmd =~ s!%p!$passwd!g; debug("decode/ext [$type]: $cmd\n"); open(DECODE, "$cmd 2>/dev/null |"); $/ = undef; $s = ; close(DECODE); $/ = $eol; @s = split(/[\r\n]+/, $s); $s = &{$external{$type}->{inspector}->{parser}}(@s); for $k (keys %{$s}) { setcookie($part, $v, $seq, "meta/$type/$k", $s->{$k}); } #&dumpcookies($part, $v, $seq); } unlink($tmpf); } ## ## zip inspector parser ## sub inspect_parse_zip { my @s = @_; my $i, @w, $n, $c = {}; $n = 0; for $i (3 .. $#s - 2) { ++$n; @w = split(" ", $s[$i], 4); $c->{"$n/size"} = $w[0]; $c->{"$n/date"} = $w[1]; $c->{"$n/time"} = $w[2]; $c->{"$n/name"} = $w[3]; } @w = split(" ", $s[$#s], 3); $c->{"size"} = $w[0]; $c->{"nfiles"} = $w[1]; return $c; } ## ## rar inspector parser ## sub inspect_parse_rar { my @s = @_; my $i, @w, $n, $c = {}; $n = 0; for $i (7 .. $#s - 3) { ++$n; @w = split(" ", $s[$i], 10); $c->{"$n/name"} = $w[0]; $c->{"$n/size"} = $w[1]; $c->{"$n/ratio"} = $w[3]; $c->{"$n/crc"} = $w[7]; $c->{"$n/method"} = $w[8]; } @w = split(" ", $s[$#s-1], 4); $c->{"nfiles"} = $w[0]; $c->{"size"} = $w[1]; return $c; } ## ## Store a message or MIME part into a msg object. ## sub storeMessage { my ($fh, $parent, $marker, @storelist) = @_; my ($h, $v); my $boundary; my $part; my $nparts = 0; my $done = 0; my $ancestor; #@storelist = cleanList(@storelist); if ($parent && $parent->{ancestor}) { $ancestor = $parent->{ancestor}; } elsif ($parent) { $ancestor = $parent; } else { $ancestor = undef; } my $msg = { ## My parent part parent => $parent, ## My topmost parent part ancestor => $ancestor, ## MIME depth depth => $parent->{depth} + 1, ## UNIX V7 "From " leader mboxfrom => undef, ## Message ID, for convenience id => undef, ## Header lookup tables hdr => [], hdrdict => {}, ## MIME attributes mime => {}, ## Body text text => [], textlines => 0, textbytes => 0, ## Body text decoding vectors and properties decoded => {}, decodelines => 0, decodebytes => 0, textencoding => undef, currentdecoding => undef, ## Inspector data (gen. for failed decoders) inspection => {}, ## SHA-1 digests sha => {}, ## Leader text before first MIME part preamble => [], ## Trailing text after last MIME part epilogue => [], ## MIME part list parts => [], ## Cookies cookies => {}, }; ## First, skip any leading whitespace lines. ## XXX Why skippy those part 0's with no MIME hdrs? $_ = undef; while ($_ = getline($fh)) { last unless (/^$/); debug "sm/ws: skipws\n"; } ungetline($fh, $_) if ($_); ## Assume we are pointing at a header block now. HDR: while ($_ = getline($fh)) { if (/^From /) { debug "sm/fromline: $_"; $msg->{mboxfrom} = $_; } elsif (/^$/) { last; } elsif (/^\S+:\s/) { # ok, it's a header line store($_, @storelist, $msg->{hdr}); chomp; ($h, $v) = split(/:\s+/, $_, 2); # scan ahead for continued header lines CHDR: while ($_ = getline($fh)) { if (/^\s+\S/) { store($_, @storelist, $msg->{hdr}); chomp; s/^\s+//; $v .= " $_"; } else { debug("sm/ungetcont: $_"); ungetline($fh, $_); last; } } $v =~ s/^\s+//; $v =~ s/\s+$//; $msg->{hdrmap}->{lc($h)} = $h; $msg->{hdrdict}->{$h} = $v; debug "sm/hdr: $h:$v\n"; ## special header handling here: if ($h =~ /^Content-type/i) { if ($v =~ /([^;]*);/) { $msg->{mime}->{type} = $1; } else { $msg->{mime}->{type} = $v; } if ($v =~ /charset="([^";]*)"?/) { $msg->{mime}->{charset} = $1; } elsif ($v =~ /charset=([^;\s]*)/) { $msg->{mime}->{charset} = $1; } if ($v =~ /name="([^";]*)"?/) { $msg->{mime}->{name} = $1; } elsif ($v =~ /name=([^;\s]*)/) { $msg->{mime}->{name} = $1; } if ($v =~ /boundary="([^";]*)"?/) { $msg->{mime}->{boundary} = $1; $boundary = $1; } elsif ($v =~ /boundary=([^;\s]*)/) { $msg->{mime}->{boundary} = $1; $boundary = $1; } } elsif ($h =~ /^Content-disposition/i) { if ($v =~ /(.*);/) { $msg->{mime}->{disp} = $1; } else { $msg->{mime}->{disp} = $v; } if ($v =~ /filename="([^";]*)"?/) { $msg->{mime}->{filename} = $1; } elsif ($v =~ /filename=([^;\s]*)/) { $msg->{mime}->{filename} = $1; } } elsif ($h =~ /^Content-Transfer-Encoding/i) { $msg->{mime}->{encoding} = $v; } elsif ($h =~ /^Content-description/i) { $msg->{mime}->{description} = $v; } elsif ($h =~ /^Content-ID/i) { $msg->{mime}->{cid} = $v; $msg->{mime}->{cid} =~ s/^\s*<(.*)>\s*$/$1/; } elsif ($h =~ /^Message-ID/i) { $msg->{id} = $v; } } else { # probably not a header debug "sm/ungethdr: $_"; ungetline($fh, $_); last; } #why?#store($_, @storelist, $msg->{hdr}); #last if (/^$/); } ## If type was message/rfc822, recurse again. if ($msg->{mime}->{type} =~ /^message\/rfc822/) { debug "sm/recurse [message/rfc822]\n"; $part = storeMessage($fh, $msg, $boundary, @storelist, $msg->{text}); push(@{$msg->{parts}}, $part); ++$nparts; return $msg; } ## Found end of header. Slurp body. while ($_ = getline($fh)) { if (defined($max_mime_depth) && ($parent->{depth} == $max_mime_depth)) { 1; ## do not next/last; fall through to common case } elsif ($boundary && /^--$boundary--\s*$/) { store($_, @storelist, $msg->{text}); $done = 1; next; } elsif ($boundary && /^--$boundary\s*$/) { store($_, @storelist, $msg->{text}); $part = storeMessage($fh, $msg, $boundary, @storelist, $msg->{text}); push(@{$msg->{parts}}, $part); ++$nparts; next; } elsif ($marker && /^--$marker(--)?\s*$/) { debug "sm/endpart [$marker] pushback\n"; ungetline($fh, $_); last; } ## fallthrough store($_, @storelist, $msg->{text}); if ($nparts == 0 && $msg->{mime}->{boundary}) { push(@{$msg->{preamble}}, $_); } elsif ($done) { push(@{$msg->{epilogue}}, $_); } } for (@{$msg->{text}}) { $msg->{textlines} ++; $msg->{textbytes} += length($_); } return $msg; } ## ## read a virus db entry ## sub readdbent { my ($file) = @_; my $ent; $/ = undef; open(ENTRY, $file) || do { return (undef, "cannot open $file: $!"); }; $_ = ; close(ENTRY); $/ = $eol; $ent = eval '{ ' . $_ . ' };' || do { return (undef, $ent); }; ## mark as a v2 signature unless sig file says otherwise. $ent->{version} = 2 unless ($ent->{version}); return ($ent, undef); } ## ## Return 1 if any of @list matches $item ## sub match_any { my ($item, @list) = @_; map { return 1 if ($item =~ /$_/); } @list; return 0; } ## ## read a virus db ## if vlist is passed, load only these viruses ## sub readdb { my ($dir, $check, @vlist) = @_; my $db = {}; my @keys = (); my @nkeys = (); my @names = (); my $k, $v; my $err, $nerrs; opendir(DB, $dir); @keys = grep(!/^\.\.?$/, readdir(DB)); close(DB); $nerrs = 0; for $k (sort @keys) { next if (! -f "$dir/$k"); ## Inverts the compare to vlist so you can supply ## regexes in the vlist next if ($#vlist > -1 && !match_any($k, @vlist)); ($v, $err) = readdbent("$dir/$k"); if ($v) { ## is sigdata marked inactive? next if ($v->{inactive} && !grep /$k/, @vlist); ## is this dams version out of spec range? if ($v->{versions}) { next if ($ver < $v->{versions}->{min}); next if ($ver > $v->{versions}->{max}); } ## is this sig version unsupported by this dams? if ($v->{version}) { next unless ($versions{$v->{version}}); } ## else push sig onto siglist $db->{$k} = $v; push(@names, $db->{$k}->{name}); push(@nkeys, $k); } if ($check) { if ($err) { print "Failed to load $k: $err\n"; ++$nerrs; } elsif ($v) { print "Loaded $k.\n"; } } } ## Store virus names & keys (filenames) sorted by virus name $db->{".keys"} = [ sort { $db->{$a}->{name} cmp $db->{$b}->{name} } @nkeys ]; $db->{".names"} = [sort @names]; return ($db, $nerrs); } ## ## check whether any headers are required to match, and do ## sub match_headers { my ($m, $v, $fh) = @_; my $each, $h, $val; ## if no hdr requirement, pass return $v unless ($v->{hdrs}); ## if no hdrs, fail return undef unless ($m->{hdrdict}); ## report declare($fh, 2, "checking headers..."); for $each (keys %{$v->{hdrs}}) { ($h, $val) = gethdr($m, $each); if ($val =~ /$v->{hdrs}->{$each}/) { declare($fh, 3, "%s:%s == /%s/", $each, $val, $v->{hdrs}->{$each}); } else { declare($fh, 3, "%s:%s != /%s/", $each, $val, $v->{hdrs}->{$each}); return undef; } } return $v; } ## ## check whether MIME boundary is required to match, and does ## sub match_boundary { my ($m, $v, $fh) = @_; ## if no boundary requirement, pass return $v unless ($v->{boundary}); ## if no boundary, fail return undef unless ($m->{mime}->{boundary}); ## report declare($fh, 2, "checking boundary..."); if ($m->{mime}->{boundary} =~ /$v->{boundary}/) { declare($fh, 3, "%s == /%s/", $m->{mime}->{boundary}, $v->{boundary}); } else { declare($fh, 3, "%s != /%s/", $m->{mime}->{boundary}, $v->{boundary}); return undef; } return $v; } ## ## match a mime part using regex ## sub match_part_rx { my ($m, $v, $seq, $type, $part, $fh) = @_; my $pat, $cookie, $backref; if ($seq->[$part]->{$type} =~ /^\*/) { ($cookie, $backref) = split(":", $'); $pat = cookie($m, $v, $seq, $cookie, $backref); } else { $pat = $seq->[$part]->{$type}; } if ($m->{parts}->[$part]->{mime}->{$type} =~ /$pat/) { declare($fh, 3, "%d:$type %s == /%s/", $part, $m->{parts}->[$part]->{mime}->{$type}, $pat); return $v; } else { declare($fh, 3, "%d:$type %s != /%s/", $part, $m->{parts}->[$part]->{mime}->{$type}, $pat); return undef; } } ## ## match a mime part using string equality ## sub match_part_str { my ($m, $v, $seq, $type, $part, $fh) = @_; my $pat, $cookie, $backref; if ($seq->[$part]->{$type} =~ /^\*/) { ($cookie, $backref) = split(":", $'); $pat = cookie($m, $v, $seq, $cookie, $backref); } else { $pat = $seq->[$part]->{$type}; } if ($m->{parts}->[$part]->{mime}->{$type} eq $pat) { declare($fh, 3, "%d:$type %s == %s", $part, $m->{parts}->[$part]->{mime}->{$type}, $pat); return $v; } else { declare($fh, 3, "%d:$type %s != %s", $part, $m->{parts}->[$part]->{mime}->{$type}, $pat); return undef; } } ## ## match a mime part using numeric equality ## sub match_part_num { my ($m, $v, $seq, $type, $part, $fh) = @_; my $pat, $cookie, $backref; if ($seq->[$part]->{$type} =~ /^\*/) { ($cookie, $backref) = split(":", $'); $pat = cookie($m, $v, $seq, $cookie, $backref); } else { $pat = $seq->[$part]->{$type}; } if ($m->{parts}->[$part]->{mime}->{$type} == $pat) { declare($fh, 3, "%d:$type %d == %d", $part, $m->{parts}->[$part]->{mime}->{$type}, $pat); return $v; } else { declare($fh, 3, "%d:$type %d != %d", $part, $m->{parts}->[$part]->{mime}->{$type}, $pat); return undef; } } ## ## check whether mime parts take the right structure ## (no content checks here; see &match_scannable) ## sub match_sequence { my ($m, $v, $seq, $fh) = @_; my $attr, $part; ## if no sequence by this name, pass return $v unless ($v->{$seq}); ## if no mime parts, fail return undef unless ($m->{parts}); for $part (0 .. $#{$v->{$seq}}) { if ($v->{$seq}->[$part]->{bytes}) { return undef unless (match_part_num($m, $v, $v->{$seq}, "bytes", $part, $fh)); } if ($v->{$seq}->[$part]->{lines}) { return undef unless (match_part_num($m, $v, $v->{$seq}, "lines", $part, $fh)); } for $attr qw(type boundary charset cid description disp encoding filename name) { if ($v->{$seq}->[$part]->{$attr}) { return undef unless (match_part_rx($m, $v, $v->{$seq}, $attr, $part, $fh)); } } } return $v; } ## ## Scan a mime part using a given MIME sequence from a virus ## definition. this is where all decoding methods are engaged. If a ## check will require a decoder, it goes in here. ## sub scan_seq_part { my ($part, $v, $seq, $fh, $partno) = @_; my $seg, $segno; my $rkey, $rtext; my $segs; my $n = $#{$segs}; my $cmpnum, $unpacker; ## If there is no seglist for this sequence/part, assume ## the old DAMS model. if (!exists($seq->[$partno]->{seglist})) { $seq->[$partno]->{seglist} = ["segments"]; } ## If the seglist exists and is a scalar, split it as an array ref. elsif (!ref($seq->[$partno]->{seglist})) { $seq->[$partno]->{seglist} = [split(":", $seq->[$partno]->{seglist})]; } ## If there's still no seglist, fail this check if ($#{$seq->[$partno]->{seglist}} < 0) { declare($fh, 4, "[no segment list]"); return undef; } for $seglist (@{$seq->[$partno]->{seglist}}) { declare($fh, 4, "using seglist \"$seglist\"..."); return 1 if (scan_part_seglist($part, $v, $seq, $fh, $partno, $v->{$seglist})); } return undef; } ## ## Scan a MIME part using a given segment list. ## sub scan_part_seglist { my ($part, $v, $seq, $fh, $partno, $segs) = @_; my $seg, $segno; my $text, $rkey, $rtext; my $n; my $cmpnum, $unpacker; my $cookie, $backref; return undef unless ($segs); ## Decode text. $text = decode($part, $v, $seq, $fh, $partno); ## Say something declare($fh, 5, "scanning segments"); $n = $#{$segs}; for $segno (0 .. $n) { $seg = $segs->[$segno]; ## ## ranged examinations ## ## complete start/range if ($seg->{start} && $seg->{end} && ! $seg->{range}) { $seg->{range} = $seg->{end} - $seg->{start} + 1; } elsif ($seg->{end} && $sed->{range} && ! $seg->{start}) { $seg->{start} = $seg->{end} - $seg->{range} + 1; } elsif (exists($seg->{start}) && !exists($seg->{range})) { if (exists($seg->{text})) { $seg->{range} = length($seg->{text}); } elsif (exists($seg->{match})) { $seg->{range} = length($seg->{match}); } } ## select text range. ## if start/range unavailable, set to all. if (exists($seg->{start}) && exists($seg->{range})) { $rtext = substr($text, $seg->{start}, $seg->{range}); } else { $seg->{start} = 0; $seg->{range} = "all"; $rtext = $text; } ## check text, match, or sha if ($seg->{text}) { if ($rtext eq $seg->{text}) { declare($fh, 6, "seg %d/%d: text [0x%x:%s] matches", $segno+1, $n+1, $seg->{start}, $seg->{range}); } else { declare($fh, 6, "seg %d/%d: text [0x%x:%s] fails", $segno+1, $n+1, $seg->{start}, $seg->{range}); return undef; } } elsif ($seg->{match}) { if ($rtext =~ /$seg->{match}/) { declare($fh, 6, "seg %d/%d: match [0x%x:%s] matches", $segno+1, $n+1, $seg->{start}, $seg->{range}); } else { declare($fh, 6, "seg %d/%d: match [0x%x:%s] fails", $segno+1, $n+1, $seg->{start}, $seg->{range}); return undef; } } elsif ($seg->{sha}) { $rkey = $seg->{start} . ":" . $seg->{range}; if (! $part->{sha}->{$rkey}) { $sha->reset; $sha->add($rtext); $part->{sha}->{$rkey} = $sha->hexdigest; } if ($part->{sha}->{$rkey} =~ $seg->{sha}) { declare($fh, 6, "seg %d/%d: sha [0x%x:%s] %s : %s matches", $segno+1, $n+1, $seg->{start}, $seg->{range}, $part->{sha}->{$rkey}, $seg->{sha}); } else { declare($fh, 6, "seg %d/%d: sha [0x%x:%s] %s : %s fails", $segno+1, $n+1, $seg->{start}, $seg->{range}, $part->{sha}->{$rkey}, $seg->{sha}); return undef; } } ## ## Integer compares ## $cmpnum = undef; $unpacker = undef; ## the way we interpret a piece of text as an integer ## depends on the keyword used: if ($seg->{lelong}) { $unpacker = "V"; $start = $seg->{lelong}; } elsif ($seg->{leshort}) { $unpacker = "v"; $start = $seg->{leshort}; } elsif ($seg->{belong}) { $unpacker = "N"; $start = $seg->{belong}; } elsif ($seg->{beshort}) { $unpacker = "n"; $start = $seg->{beshort}; } elsif ($seg->{byte}) { $unpacker = "C"; $start = $seg->{byte}; } if ($unpacker) { $cmpnum = unpack($unpacker, substr($text, $start, 8)); } if (!$cmpnum && $seg->{cookie}) { ($cookie, $backref) = split(":", $seg->{cookie}); $backref = 1 unless($backref); $cmpnum = cookie($part, $v, $seq, $cookie, $backref); $unpacker = "*$cookie:$backref"; } if (!$cmpnum && $seg->{bytes}) { $cmpnum = $part->{decodebytes}; $unpacker = "bytes"; } if (!$cmpnum && $seg->{lines}) { $cmpnum = $part->{decodelines}; $unpacker = "lines"; } if ($seg->{greater}) { if ($cmpnum > $seg->{greater}) { declare($fh, 6, "seg %d/%d: [%s] %d > %d matches", $segno+1, $n+1, $unpacker, $cmpnum, $seg->{greater}); } else { declare($fh, 6, "seg %d/%d: [%s] %d > %d fails", $segno+1, $n+1, $unpacker, $cmpnum, $seg->{greater}); return undef; } } if ($seg->{less}) { if ($cmpnum < $seg->{less}) { declare($fh, 6, "seg %d/%d: [%s] %d < %d matches", $segno+1, $n+1, $unpacker, $cmpnum, $seg->{less}); } else { declare($fh, 6, "seg %d/%d: [%s] %d < %d fails", $segno+1, $n+1, $unpacker, $cmpnum, $seg->{less}); return undef; } } if ($seg->{equal}) { if ($cmpnum == $seg->{equal}) { declare($fh, 6, "seg %d/%d: [%s] %d == %d matches", $segno+1, $n+1, $unpacker, $cmpnum, $seg->{equal}); } else { declare($fh, 6, "seg %d/%d: [%s] %d == %d fails", $segno+1, $n+1, $unpacker, $cmpnum, $seg->{equal}); return undef; } } #else { # declare($fh, 6, "seg %d/%d: no pattern", # $segno+1, $n+1); # return undef; #} } return 1; } ## ## scan scannable parts ## sub match_scannable { my ($m, $v, $seq, $fh) = @_; my $i, $part; my $n = $#{$v->{$seq}}; ## if no sequence by this name, pass return $v unless ($v->{$seq}); ## if parts required but no segments listed, fail #obsolete#return undef unless ($v->{segments}); ## if no parts in message, fail return undef unless ($m->{parts}); for $i (0 .. $n) { $part = $v->{$seq}->[$i]; ## [v2] If we have a scan property and no chain property, ## we have a v2 defn. Coerce the scan prop into a chain ## prop so we can treat them with a common approach. if (!$part->{chain} && $part->{scan} =~ /../) { $part->{chain} = [split(":", $part->{scan})]; } ## [v3] If we have a chain property, and it's not a list, ## split and coerce into a list. elsif ($part->{chain} && !ref($part->{chain})) { $part->{chain} = [split(":", $part->{chain})]; } ## [v3] if we have an inspect property, and it's not a ## list, split and coerce into a list. if ($part->{inspect} && !ref($part->{inspect})) { $part->{inspect} = [split(":", $part->{inspect})]; } ## Skip if we have no chain list. next unless ($part->{chain}); declare($fh, 3, "part %d/%d is scannable...", $i+1, $n+1); return undef unless (scan_seq_part($m->{parts}->[$i], $v, $v->{$seq}, $fh, $i)); } return $v; } ## ## check a message against a single virus ## sub check_vdef { my ($m, $v, $fh) = @_; my $seq, @seqs; ## match_* functions return undef if the message does NOT match ## the virus signature. They return the virus sig if it does match. ## Fail cheaply if any prereq checks do not match. declare($fh, 1, "checking prerequisites..."); return undef unless match_headers($m, $v, $fh); return undef unless match_boundary($m, $v, $fh); ## For backward compatibility, if there's no checklist, ## assume one containing "parts". if (!exists($v->{checklist})) { $v->{checklist} = ["parts"]; } ## Normalize sequence list, if it's not an array ref. if (!ref($v->{checklist})) { $v->{checklist} = [split(":", $v->{checklist})]; } ## A sequence list now exists. Try all sequences, keeping ## those that match. @seqs = (); declare($fh, 1, "checking MIME part conformance..."); for $seq (@{$v->{checklist}}) { declare($fh, 2, "using sequence \"$seq\"..."); push (@seqs, $seq) if match_sequence($m, $v, $seq, $fh); } ## Fail if no sequences were valid return undef if ($#seqs < 0); ## Iterate through MIME parts, looking for scannable ones. declare($fh, 1, "scanning eligible MIME parts..."); for $seq (@seqs) { declare($fh, 2, "using sequence \"$seq\"..."); return $v if (match_scannable($m, $v, $seq, $fh)); } ## No sequences were valid; fail. return undef; } ## ## check a message against a db ## if fh is passed, print diagnostics to it ## sub check { my ($db, $m, $fh) = @_; my $k, $v; my @sigs; ## Sort definitions by priority, high to low. If a sig has no ## priority assigned, give it zero. Priorities are unbounded ## integers. A "generic" catchall variant of a definition should ## have lower priority than a specific variant. @sigs = sort { ($db->{$b}->{priority} || 0) <=> ($db->{$a}->{priority} || 0) } grep !/^\./, keys %{$db}; for $k (@sigs) { $v = $db->{$k}; declare($fh, 0, "Checking $k [%s]...", $v->{name}); return $v if (check_vdef($m, $v, $fh)); } return undef; } ## ## Syslog arbitrary text ## sub log_write { my ($logdata, $msg) = @_; return undef unless ($logdata); return system("/usr/bin/logger -p $logdata -t '$A0[$$]' '$msg'"); } ## ## Syslog findings ## sub log_result { my ($m, $v, $logdata) = @_; return undef unless ($logdata); my $msgid = $m->{id} || ""; my $match = $v->{name} || ""; return log_write($logdata, "Detected <$match> in $msgid"); } ## ## set a cookie ## sub setcookie { my ($m, $v, $seq, $name, $val) = @_; $m = $m->{ancestor} || $m; $m->{cookies}->{$v->{name}}->{$name}->[1] = $val; } ## ## dump cookies ## sub dumpcookies { my ($m, $v, $seq) = @_; my $c, $i, $n; $m = $m->{ancestor} || $m; for $n (sort keys %{$m->{cookies}->{$v->{name}}}) { $c = $m->{cookies}->{$v->{name}}->{$n}; for $i (1 .. $#{$c}) { print "Cookie $n\[$i]: ", $c->[$i], "\n"; } } } ## ## Try to get a cookie from a mime part. ## ## $name is of the format "#/name", where "#" is the mime part to extract from ## sub cookie { my ($m, $v, $seq, $name, $backref) = @_; my $part; $m = $m->{ancestor} || $m; $backref = $backref || 1; if (exists($m->{cookies}->{$v->{name}}->{$name})) { return $m->{cookies}->{$v->{name}}->{$name}->[$backref]; } if (! exists($m->{cookies}->{$v->{name}})) { $m->{cookies}->{$v->{name}} = {}; } $m->{cookies}->{$v->{name}}->{$name} = undef; for $part (0 .. $#{$seq}) { if (exists($seq->[$part]->{cookies}) && exists($seq->[$part]->{cookies}->{$name})) { $_ = join('', @{$m->{parts}->[$part]->{text}}); if ($_ =~ /$seq->[$part]->{cookies}->{$name}/) { $m->{cookies}->{$v->{name}}->{$name} = [undef,$1,$2,$3,$4,$5,$6,$7,$8,$9]; } } } return $m->{cookies}->{$v->{name}}->{$name}->[$backref]; } ############################################################################### ## command handlers ## ## stream copy ## sub cmd_copy { my ($m, $db, $vlist, @args) = @_; print $m->{mboxfrom}; print join("", @{$m->{hdr}}); print join("", @{$m->{text}}); } ## ## recursive copy ## sub cmd_rcopy { my ($m, $db, $vlist, @args) = @_; my $part; my $nparts; print $m->{mboxfrom}; print join("", @{$m->{hdr}}); print "\n"; $nparts = $#{$m->{parts}}; if ($nparts >= 0) { print join("", @{$m->{preamble}}); for $part (@{$m->{parts}}) { print "--", $m->{mime}->{boundary}, "\n" if ($m->{mime}->{boundary}); cmd_rcopy($part, $db, $vlist, @args); } print "--", $m->{mime}->{boundary}, "--\n\n" if ($m->{mime}->{boundary}); } else { print join("", @{$m->{text}}); } } ## ## extract header ## sub cmd_hdr { my ($m, $db, $vlist, @args) = @_; my $realhdr, $val, $arg; for $arg (@args) { ($realhdr, $val) = gethdr($m, $arg); if ($realhdr) { print "$realhdr: $val\n"; } } } ## ## dump a message ## sub cmd_dump { my ($m, $db, $vlist, @args) = @_; my $dd = Data::Dumper->new([$m], ['msg']); $dd->Purity(1); $dd->Terse(1); $dd->Deepcopy(1); print $dd->Dump; } ## ## dump a database ## sub cmd_dumpdb { my ($m, $db, $vlist, @args) = @_; my $dd = Data::Dumper->new([$db], ['db']); $dd->Purity(1); $dd->Terse(1); $dd->Deepcopy(1); print $dd->Dump; } ## ## trace message ## sub cmd_trace { my ($m, $db, $vlist, @args) = @_; trace($m, 0, 0); } ## ## cookie ## sub cmd_cookie { my ($m, $db, $vlist, @args) = @_; for $v (@{$db->{".keys"}}) { if ($c = cookie($m, $db->{$v}, "xxx", $cmdargs[0], $cmdargs[1])) { print "cookie [", $db->{$v}->{name}, "]: $c\n"; } } } ## ## ident (virus-scan and show your work) ## sub cmd_ident { my ($m, $db, $vlist, @args) = @_; my $r; if ($r = check($db, $m, \*STDOUT)) { print "=> ", $r->{name}, "\n"; log_result($m, $r, $syslog); return 1; } return 0; } ## ## scan for a virus, showing no work ## sub cmd_check { my ($m, $db, $vlist, @args) = @_; my $r; if ($r = check($db, $m, undef)) { print $r->{name}, "\n"; log_result($m, $r, $syslog); return 1; } return 0; } ## ## list known definitions ## sub cmd_list { my ($m, $db, $vlist, @args) = @_; my $k, $rev; print "D.A.M.S $ver [$rcsid]\n"; print "Known signatures: ", $#{$db->{".keys"}}+1, "\n"; for $k (@{$db->{".keys"}}) { $rev = $db->{$k}->{revision}; $rev =~ s!^.*Revision: (\S+).*!$1!; printf " %-.25s [%-.10s, %-.10s]", $db->{$k}->{name}, $rev, $db->{$k}->{added}; if ($db->{$k}->{aka}) { printf " aka %s", join(", ", @{$db->{$k}->{aka}}), "\n"; } print "\n"; } } ## ## validate database ## sub cmd_checkdb { my ($m, $db, $vlist, @args) = @_; printf "$A0: loaded %d signatures correctly.\n", $#{$db->{".keys"}}+1; } ## ## show runtime size data ## sub cmd_size { my ($m, $db, $vlist, @args) = @_; my $vsz, $mlen, $dlen; $m = storeMessage(\*INPUT, undef, undef); $vsz = undef; open(PS, "/usr/bin/ps -efopid,vsz |"); while () { next unless (/^\s*$$\s+(\d+)\s*$/o); $vsz = $1; last; } close PS; my $dd = Data::Dumper->new([$m], ['m']); $dd->Purity(1); $dd->Terse(1); $dd->Deepcopy(1); $mlen = sprintf("%.3f", length($dd->Dump) / 1024); my $dd = Data::Dumper->new([$db], ['db']); $dd->Purity(1); $dd->Terse(1); $dd->Deepcopy(1); $dlen = sprintf("%.3f", length($dd->Dump) / 1024); print "vsize: $vsz kb\n"; print "ddata: $dlen kb\n"; print "mdata: $mlen kb\n"; } ## ## usage ## sub usage { my ($cmdtbl, $cmd) = @_; print STDERR "D.A.M.S $ver [$rcsid]\n"; print STDERR "General usage:\n"; print STDERR " $A0 [options] subcommand [args] [-- signature list]\n"; print STDERR "\n"; print STDERR " options: [-d] [-t time-limit]\n"; print STDERR " db-opts: [-D db-dir]\n"; print STDERR " msg-opts: [-m mime-depth] [-i input-file]\n"; print STDERR " scan-opts: [-l sys.log] [-o decode-to]\n"; print STDERR "\n"; if ($cmd) { print STDERR "Subcommand \"$cmd\": ", $cmdtbl->{$cmd}->{help}, "\n"; print STDERR " $A0 [options] ", $cmdtbl->{$cmd}->{usage}, "\n"; return; } print STDERR "Message-parsing tests/demonstrations:\n"; for $cmd qw(copy rcopy trace dump size hdr) { print STDERR " $A0 [options] ", $cmdtbl->{$cmd}->{usage}, "\n"; } print STDERR "\n"; print STDERR "Database checks and inquiries:\n"; for $cmd qw(dumpdb checkdb list size) { print STDERR " $A0 [options] ", $cmdtbl->{$cmd}->{usage}, "\n"; } print STDERR "\n"; print STDERR "Virus checks and diagnostics:\n"; for $cmd qw(cookie check ident) { print STDERR " $A0 [options] ", $cmdtbl->{$cmd}->{usage}, "\n"; } print STDERR "\n"; return; } sub check_opts { my ($o, $argv) = @_; my @saved; $o = {} unless ($o); if ($argv) { @saved = @ARGV; @ARGV = @{$argv}; } getopts("hdD:i:o:l:m:t:", $o); if ($argv) { @{$argv} = @ARGV; @ARGV = @saved; } $DEBUG = 1 if ($o->{d}); $dbpath = $o->{D} || "$D0/damsdb"; $syslog = $o->{l}; if ($o->{o}) { $output = $o->{o}; if (-d $output) { $output = "$output/$A0.out.$$"; } } if ($o->{m}) { if ($o->{m} eq "-" || $o->{m} eq "0") { $max_mime_depth = undef; } else { $max_mime_depth = $o->{m}; } } if ($o->{t}) { if ($o->{t} eq "-" || $o->{t} eq "0") { $max_time_allowed = undef; } else { $max_time_allowed = $o->{t}; } } if ($o->{i}) { open(INPUT, $o->{i}) || do { print STDERR "$A0: cannot open input \"", $o->{i}, "\": $!\n"; exit 10; }; } else { *INPUT = *STDIN; } return $o; } ############################################################################### ## command table $cmdtable = { copy => { db => 0, msg => 1, help => "copies a messsage from stdin to stdout", fn => \&cmd_copy, usage => "[msg-opts] copy", }, rcopy => { db => 0, msg => 1, help => "reconstructs a messsage by MIME recursion", fn => \&cmd_rcopy, usage => "[msg-opts] rcopy", }, hdr => { db => 0, msg => 1, help => "extracts one or more headers from a message", fn => \&cmd_hdr, usage => "[msg-opts] hdr header1 [header2 ...]", }, dump => { db => 0, msg => 1, help => "dumps a message object in Data::Dumper format", fn => \&cmd_dump, usage => "[msg-opts] dump", }, dumpdb => { db => 1, msg => 0, help => "dumps a virus database object in Data::Dumper format", fn => \&cmd_dumpdb, usage => "[db-opts] dumpdb [-- sig-list]", }, trace => { db => 0, msg => 1, help => "trace and display the MIME structure of a message", fn => \&cmd_trace, usage => "[msg-opts] trace", }, cookie => { db => 1, msg => 1, help => "extract a cookie from a message using rules from virus definition", fn => \&cmd_cookie, usage => "[msg-opts] [db-opts] cookie cookie-name [-- sig-list]", }, ident => { db => 1, msg => 1, help => "scan a message for a virus, displaying checks performed", fn => \&cmd_ident, usage => "[msg-opts] [db-opts] [scan-opts] ident [-- sig-list]", }, check => { db => 1, msg => 1, help => "scan a message for a virus, relatively quietly", fn => \&cmd_check, usage => "[msg-opts] [db-opts] [scan-opts] check [-- sig-list]", }, scan => { db => 1, msg => 1, help => "scan a message for a virus, relatively quietly", fn => \&cmd_check, usage => "[msg-opts] [db-opts] [scan-opts] check [-- sig-list]", }, list => { db => 1, msg => 0, help => "show all recognized viruses", fn => \&cmd_list, usage => "[db-opts] list [-- sig-list]", }, checkdb => { db => 1, msg => 0, help => "validate the virus database", fn => \&cmd_checkdb, usage => "[db-opts] checkdb [-- sig-list]", }, size => { db => 1, msg => 1, help => "display runtime size figures", fn => \&cmd_size, usage => "[msg-opts] [db-opts] size [-- sig-list]", }, }; ############################################################################### ## main routine ## Parse leading options $o = check_opts(undef, undef); ## Timer for resource limiting. $SIG{ALRM} = sub { if ($syslog) { log_write($syslog, "timeout after $max_time_allowed seconds"); } elsif (-t STDIN) { print STDERR "$A0: timeout after $max_time_allowed seconds\n"; } exit 0; }; ## Break remainder of arguments into subcommand args and database args @cmdargs = (); @dbargs = (); $dashes = 0; for (@ARGV) { if ($dashes) { push(@dbargs, $_); } elsif ($_ eq "--") { $dashes = 1; } else { push(@cmdargs, $_); } } while ($#cmdargs >= 0) { ## Get the subcommand $cmd = shift @cmdargs; debug("main/cmd: cmd = $cmd\n"); ## Parse options that appear around subcommand args $o = check_opts($o, \@cmdargs); ## Set timer. When daemon mode becomes implemented, dodge this. alarm($max_time_allowed); if ($TEST_GETOPT) { map { print "opt $_: ", $o->{$_}, "\n"; } sort keys %{$o}; map { print "cmdarg[$cmd]: $_\n"; } @cmdargs; map { print "dbarg: $_\n"; } @dbargs; exit; } if ($cmd eq undef) { usage($cmdtable, undef); exit 2; } if (! $cmdtable->{$cmd}) { usage($cmdtable, undef); exit 4; } if ($o->{h}) { usage($cmdtable, $cmd); exit 2; } if (!$db && $cmdtable->{$cmd}->{db}) { ($db, $errs) = readdb($dbpath, $cmd eq "checkdb", @dbargs); if ($errs) { print STDERR "$A0: fatal errors opening database:\n"; print STDERR "$errs\n"; exit 20; } } if (!$m && $cmdtable->{$cmd}->{msg}) { $m = storeMessage(\*INPUT, undef, undef); } $rc = &{$cmdtable->{$cmd}->{fn}}($m, $db, \@dbargs, @cmdargs); } exit $rc;