Current File : //usr/share/perl5/vendor_perl/Mail/SpamAssassin/Plugin/FreeMail.pm |
package Mail::SpamAssassin::Plugin::FreeMail;
use strict;
my $VERSION = 2.002;
### About:
#
# If From-address is freemail, and Reply-To or address found in mail body is
# a different freemail address, return success. Good sign of Nigerian scams
# etc. Test idea from Marc Perkel.
#
# Also separate functions to check various portions of message for freemails.
#
### Install:
#
# Please add loadplugin to init.pre (so it's loaded before cf files!):
#
# loadplugin Mail::SpamAssassin::Plugin::FreeMail FreeMail.pm
#
### Supported .cf clauses:
#
# freemail_domains domain ...
#
# List of domains to be used in checks.
#
# Regexp is not supported, but following wildcards work:
#
# ? for single character (does not match a dot)
# * for multiple characters (does not match a dot)
#
# For example:
# freemail_domains hotmail.com hotmail.co.?? yahoo.* yahoo.*.*
#
# freemail_whitelist email/domain ...
#
# Emails or domains listed here are ignored (pretend they arent
# freemail). No wildcards!
#
# header FREEMAIL_REPLYTO eval:check_freemail_replyto(['option'])
#
# Checks/compares freemail addresses found from headers and body.
#
# Possible options:
#
# replyto From: or body address is different than Reply-To
# (this is the default)
# reply as above, but if no Reply-To header is found,
# compares From: and body
#
# header FREEMAIL_FROM eval:check_freemail_from(['regex'])
#
# Checks all possible "from" headers to see if sender is freemail.
# Uses SA all_from_addrs() function (includes 'Resent-From', 'From',
# 'EnvelopeFrom' etc).
#
# Add optional regex to match the found email address(es). For example,
# to see if user ends in digit: check_freemail_from('\d@')
#
# If you use multiple check_freemail_from rules with regexes, remember
# that they might hit different emails from different heades. To match
# a certain header only, use check_freemail_header.
#
# header FREEMAIL_HDRX eval:check_freemail_header('header' [, 'regex'])
#
# Searches defined header for freemail address. Optional regex to match
# the found address (like in check_freemail_from).
#
# header FREEMAIL_BODY eval:check_freemail_body(['regex'])
#
# Searches body for freemail address. With optional regex to match.
#
### Changelog:
#
# 1.995 - public beta version, revamped whole code, moved default
# domains to separate file: http://sa.hege.li/freemail_domains.cf
# 1.996 - fix freemail_skip_bulk_envfrom
# 1.997 - set freemail_skip_when_over_max to 1 by default
# 1.998 - don't warn about missing freemail_domains when linting
# 1.999 - default whitelist undisclosed-recipient@yahoo.com etc
# 2.000 - some cleaning up
# 2.001 - fix freemail_whitelist
# 2.002 - _add_desc -> _got_hit, fix description email append bug
#
### Blah:
#
# Author: Henrik Krohns <sa@hege.li>
# Copyright 2009 Henrik Krohns
#
# <@LICENSE>
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to you under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at:
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# </@LICENSE>
#
use Mail::SpamAssassin::Plugin;
use Mail::SpamAssassin::PerMsgStatus;
use Mail::SpamAssassin::Util::RegistrarBoundaries;
use vars qw(@ISA);
@ISA = qw(Mail::SpamAssassin::Plugin);
# List of TLDs from RegistrarBoundaries.pm
my $tlds = $Mail::SpamAssassin::Util::RegistrarBoundaries::VALID_TLDS_RE;
### Some regexp tips courtesy of http://www.regular-expressions.info/email.html
### v 0.02
# full email regex
my $email_regex = qr/
(?=.{0,64}\@) # limit userpart to 64 chars (and speed up searching?)
(?<![a-z0-9!#\$%&'*+\/=?^_`{|}~-]) # start boundary
( # capture email
[a-z0-9!#\$%&'*+\/=?^_`{|}~-]+ # no dot in beginning
(?:\.[a-z0-9!#\$%&'*+\/=?^_`{|}~-]+)* # no consecutive dots, no ending dot
\@
(?:[a-z0-9](?:[a-z0-9-]{0,59}[a-z0-9])?\.){1,4} # max 4x61 char parts (should be enough?)
${tlds} # ends with valid tld
)
(?!(?:[a-z0-9-]|\.[a-z0-9])) # make sure domain ends here
/xi;
# default email whitelist
my $email_whitelist = qr/
^(?:
abuse|support|sales|info|helpdesk|contact|kontakt
| (?:post|host|domain)master
| undisclosed.* # yahoo.com etc(?)
| request-[a-f0-9]{16} # live.com
| bounced?- # yahoo.com etc
| [a-f0-9]{8}(?:\.[a-f0-9]{8}|-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}) # gmail msgids?
| .+=.+=.+ # gmail forward
)\@
/xi;
# skip replyto check when envelope sender is
# allow <> for now
my $skip_replyto_envfrom = qr/
(?:
^(?:post|host|domain)master
| ^double-bounce
| ^(?:sentto|owner|return|(?:gr)?bounced?)-.+
| -(?:request|bounces?|admin|owner)
| \b(?:do[._-t]?)?no[._-t]?repl(?:y|ies)
| .+=.+
)\@
/xi;
sub dbg { Mail::SpamAssassin::Plugin::dbg ("FreeMail: @_"); }
sub new {
my ($class, $mailsa) = @_;
$class = ref($class) || $class;
my $self = $class->SUPER::new($mailsa);
bless ($self, $class);
$self->{freemail_available} = 1;
$self->set_config($mailsa->{conf});
$self->register_eval_rule("check_freemail_replyto");
$self->register_eval_rule("check_freemail_from");
$self->register_eval_rule("check_freemail_header");
$self->register_eval_rule("check_freemail_body");
return $self;
}
sub set_config {
my ($self, $conf) = @_;
my @cmds;
push(@cmds, {
setting => 'freemail_max_body_emails',
default => 5,
type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC,
}
);
push(@cmds, {
setting => 'freemail_max_body_freemails',
default => 3,
type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC,
}
);
push(@cmds, {
setting => 'freemail_skip_when_over_max',
default => 1,
type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC,
}
);
push(@cmds, {
setting => 'freemail_skip_bulk_envfrom',
default => 1,
type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC,
}
);
push(@cmds, {
setting => 'freemail_add_describe_email',
default => 1,
type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC,
}
);
$conf->{parser}->register_commands(\@cmds);
}
sub parse_config {
my ($self, $opts) = @_;
if ($opts->{key} eq "freemail_domains") {
foreach my $temp (split(/\s+/, $opts->{value})) {
if ($temp =~ /^[a-z0-9.*?-]+$/i) {
my $value = lc($temp);
if ($value =~ /[*?]/) { # separate wildcard list
$self->{freemail_temp_wc}{$value} = 1;
}
else {
$self->{freemail_domains}{$value} = 1;
}
}
else {
warn("invalid freemail_domains: $temp");
}
}
$self->inhibit_further_callbacks();
return 1;
}
if ($opts->{key} eq "freemail_whitelist") {
foreach my $temp (split(/\s+/, $opts->{value})) {
my $value = lc($temp);
if ($value =~ /\w[.@]\w/) {
$self->{freemail_whitelist}{$value} = 1;
}
else {
warn("invalid freemail_whitelist: $temp");
}
}
$self->inhibit_further_callbacks();
return 1;
}
return 0;
}
sub finish_parsing_end {
my ($self, $opts) = @_;
my $wcount = 0;
if (defined $self->{freemail_temp_wc}) {
my @domains;
foreach my $value (keys %{$self->{freemail_temp_wc}}) {
$value =~ s/\./\\./g;
$value =~ s/\?/./g;
$value =~ s/\*/[^.]*/g;
push(@domains, $value);
}
my $doms = join('|', @domains);
$self->{freemail_domains_re} = qr/\@(?:${doms})$/;
$wcount = scalar @domains;
undef %{$self->{freemail_temp_wc}};
}
my $count = scalar keys %{$self->{freemail_domains}};
if ($count + $wcount) {
dbg("loaded freemail_domains entries: $count normal, $wcount wildcard");
}
else {
if ($self->{main}->{lint_rules} ||1) {
dbg("no freemail_domains entries defined, disabling plugin");
}
else {
warn("no freemail_domains entries defined, disabling plugin");
}
$self->{freemail_available} = 0;
}
return 0;
}
sub _is_freemail {
my ($self, $email) = @_;
return 0 if $email eq '';
if (defined $self->{freemail_whitelist}{$email}) {
dbg("whitelisted email: $email");
return 0;
}
my $domain = $email;
$domain =~ s/.*\@//;
if (defined $self->{freemail_whitelist}{$domain}) {
dbg("whitelisted domain: $domain");
return 0;
}
if ($email =~ $email_whitelist) {
dbg("whitelisted email, default: $email");
return 0;
}
if (defined $self->{freemail_domains}{$domain}
or ( defined $self->{freemail_domains_re}
and $email =~ $self->{freemail_domains_re} )) {
return 1;
}
return 0;
}
sub _parse_body {
my ($self, $pms) = @_;
# Parse body
if (not defined $pms->{freemail_cache}{body}) {
%{$pms->{freemail_cache}{body}} = ();
my %seen;
my @body_emails;
# get all <a href="mailto:", since they don't show up on stripped_body
my $parsed = $pms->get_uri_detail_list();
while (my($uri, $info) = each %{$parsed}) {
if (defined $info->{types}->{a} and not defined $info->{types}->{parsed}) {
if ($uri =~ /^(?:(?i)mailto):${email_regex}/) {
my $email = lc($1);
push(@body_emails, $email) unless defined $seen{$email};
$seen{$email} = 1;
last if scalar @body_emails >= 20; # sanity
}
}
}
# scan stripped normalized body
# have to do this way since get_uri_detail_list doesn't know what mails are inside <>
my $body = $pms->get_decoded_stripped_body_text_array();
BODY: foreach (@$body) {
# strip urls with possible emails inside
s#<?https?://\S{0,255}(?:\@|%40)\S{0,255}# #gi;
# strip emails contained in <>, not mailto:
# also strip ones followed by quote-like "wrote:" (but not fax: and tel: etc)
s#<?(?<!mailto:)${email_regex}(?:>|\s{1,10}(?!(?:fa(?:x|csi)|tel|phone|e?-?mail))[a-z]{2,11}:)# #gi;
while (/$email_regex/g) {
my $email = lc($1);
push(@body_emails, $email) unless defined $seen{$email};
$seen{$email} = 1;
last BODY if scalar @body_emails >= 40; # sanity
}
}
my $count_all = 0;
my $count_fm = 0;
foreach my $email (@body_emails) {
if (++$count_all == $pms->{main}->{conf}->{freemail_max_body_emails}) {
if ($pms->{main}->{conf}->{freemail_skip_when_over_max}) {
$pms->{freemail_skip_body} = 1;
dbg("too many unique emails found from body");
return 0;
}
}
next unless $self->_is_freemail($email);
if (++$count_fm == $pms->{main}->{conf}->{freemail_max_body_freemails}) {
if ($pms->{main}->{conf}->{freemail_skip_when_over_max}) {
$pms->{freemail_skip_body} = 1;
dbg("too many unique freemails found from body");
return 0;
}
}
$pms->{freemail_cache}{body}{$email} = 1;
}
dbg("all body freemails: ".join(', ', keys %{$pms->{freemail_cache}{body}}))
if scalar keys %{$pms->{freemail_cache}{body}};
}
if (defined $pms->{freemail_skip_body}) {
dbg("[cached] body email limit exceeded, skipping");
return 0;
}
return 1;
}
sub _got_hit {
my ($self, $pms, $email, $desc) = @_;
my $rulename = $pms->get_current_eval_rule_name();
if (defined $pms->{conf}->{descriptions}->{$rulename}) {
$desc = $pms->{conf}->{descriptions}->{$rulename};
}
if ($pms->{main}->{conf}->{freemail_add_describe_email}) {
$email =~ s/\@/[at]/g;
$pms->got_hit($rulename, "", description => $desc." ($email)", ruletype => 'eval');
}
else {
$pms->got_hit($rulename, "", description => $desc, ruletype => 'eval');
}
}
sub check_freemail_header {
my ($self, $pms, $header, $regex) = @_;
return 0 unless $self->{freemail_available};
my $rulename = $pms->get_current_eval_rule_name();
dbg("RULE ($rulename) check_freemail_header".(defined $regex ? " regex:$regex" : ""));
unless (defined $header) {
warn("check_freemail_header needs argument");
return 0;
}
my $re;
if (defined $regex) {
$re = eval { qr/$regex/; };
if ($@) {
warn("invalid regex: $@");
return 0;
}
}
my $email = lc($pms->get(index($header,':') >= 0 ? $header : $header.":addr"));
if ($email eq '') {
dbg("header $header not found from mail");
return 0;
}
dbg("address from header $header: $email");
if ($self->_is_freemail($email)) {
if (defined $re) {
return 0 unless $email =~ $re;
dbg("HIT! $email is freemail and matches regex");
}
else {
dbg("HIT! $email is freemail");
}
$self->_got_hit($pms, $email, "Header $header is freemail");
return 0;
}
return 0;
}
sub check_freemail_body {
my ($self, $pms, $regex) = @_;
return 0 unless $self->{freemail_available};
my $rulename = $pms->get_current_eval_rule_name();
dbg("RULE ($rulename) check_freemail_body".(defined $regex ? " regex:$regex" : ""));
return 0 unless $self->_parse_body($pms);
my $re;
if (defined $regex) {
$re = eval { qr/$regex/; };
if ($@) {
warn("invalid regex: $@");
return 0;
}
}
if (defined $re) {
foreach my $email (keys %{$pms->{freemail_cache}{body}}) {
if ($email =~ $re) {
dbg("HIT! email from body is freemail and matches regex: $email");
$self->_got_hit($pms, $email, "Email from body is freemail");
return 0;
}
}
}
elsif (scalar keys %{$pms->{freemail_cache}{body}}) {
my $emails = join(', ', keys %{$pms->{freemail_cache}{body}});
dbg("HIT! body has freemails: $emails");
$self->_got_hit($pms, $emails, "Body contains freemails");
return 0;
}
return 0;
}
sub check_freemail_from {
my ($self, $pms, $regex) = @_;
return 0 unless $self->{freemail_available};
my $rulename = $pms->get_current_eval_rule_name();
dbg("RULE ($rulename) check_freemail_from".(defined $regex ? " regex:$regex" : ""));
my $re;
if (defined $regex) {
$re = eval { qr/$regex/; };
if ($@ or not defined $re) {
warn("invalid regex: $@");
return 0;
}
}
my %from_addrs = map { lc($_) => 1 } ($pms->all_from_addrs());
delete $from_addrs{''}; # no empty ones thx
unless (scalar keys %from_addrs) {
dbg("no from-addresses found to check");
return 0;
}
dbg("all from-addresses: ".join(', ', keys %from_addrs));
foreach my $email (keys %from_addrs) {
next unless $self->_is_freemail($email);
if (defined $re) {
next unless $email =~ $re;
dbg("HIT! $email is freemail and matches regex");
}
else {
dbg("HIT! $email is freemail");
}
$self->_got_hit($pms, $email, "Sender address is freemail");
return 0;
}
return 0;
}
sub check_freemail_replyto {
my ($self, $pms, $what) = @_;
return 0 unless $self->{freemail_available};
my $rulename = $pms->get_current_eval_rule_name();
dbg("RULE ($rulename) check_freemail_replyto");
if (defined $what) {
if ($what ne 'replyto' and $what ne 'reply') {
warn("invalid check_freemail_replyto option: $what");
return 0;
}
}
else {
$what = 'replyto';
}
# Skip mailing-list etc looking requests, mostly FPs from them
if ($pms->{main}->{conf}->{freemail_skip_bulk_envfrom}) {
my $envfrom = lc($pms->get("EnvelopeFrom"));
if ($envfrom =~ $skip_replyto_envfrom) {
dbg("envelope sender looks bulk, skipping check: $envfrom");
return 0;
}
}
my $from = lc($pms->get("From:addr"));
my $replyto = lc($pms->get("Reply-To:addr"));
my $from_is_fm = $self->_is_freemail($from);
my $replyto_is_fm = $self->_is_freemail($replyto);
dbg("From address: $from") if $from ne '';
dbg("Reply-To address: $replyto") if $replyto ne '';
if ($from_is_fm and $replyto_is_fm and ($from ne $replyto)) {
dbg("HIT! From and Reply-To are different freemails");
$self->_got_hit($pms, "$from, $replyto", "From and Reply-To are different freemails");
return 0;
}
if ($what eq 'replyto') {
if (!$replyto_is_fm) {
dbg("Reply-To is not freemail, skipping check");
return 0;
}
}
elsif ($what eq 'reply') {
if ($replyto ne '' and !$replyto_is_fm) {
dbg("Reply-To defined and is not freemail, skipping check");
return 0;
}
elsif (!$from_is_fm) {
dbg("No Reply-To and From is not freemail, skipping check");
return 0;
}
}
my $reply = $replyto_is_fm ? $replyto : $from;
return 0 unless $self->_parse_body($pms);
# Compare body to headers
if (scalar keys %{$pms->{freemail_cache}{body}}) {
my $check = $what eq 'replyto' ? $replyto : $reply;
dbg("comparing $check to body freemails");
foreach my $email (keys %{$pms->{freemail_cache}{body}}) {
if ($email ne $check) {
dbg("HIT! $check and $email are different freemails");
$self->_got_hit($pms, "$check, $email", "Different freemails in reply header and body");
return 0;
}
}
}
return 0;
}
1;