Date: Tue, 4 Mar 2014 04:10:01 GMT From: Takefu <takefu@airport.fm> To: perl@FreeBSD.org Subject: Re: ports/186756: [UPDATE] mail/p5-Mail-SpamAssassin: update to 3.4.0 Message-ID: <201403040410.s244A1IO042442@freefall.freebsd.org>
next in thread | raw e-mail | index | archive | help
The following reply was made to PR ports/186756; it has been noted by GNATS. From: Takefu <takefu@airport.fm> To: bug-followup@FreeBSD.org Cc: Adam Weinberger <adamw@adamw.org> Subject: Re: ports/186756: [UPDATE] mail/p5-Mail-SpamAssassin: update to 3.4.0 Date: Tue, 04 Mar 2014 13:01:13 +0900 This is a multi-part message in MIME format. --------------060504000802010404090809 Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: 7bit Patch worked well with the exception of the pkg-message. So I have changed. p5-Mail-SpamAssassin-Alt, ja-p5-Mail-SpamAssassin I arranged the PORTREVISION. ja-p5-Mail-SpamAssassin The patch fixes. --------------060504000802010404090809 Content-Type: text/plain; charset=Shift_JIS; name="p5-Mail-SpamAssassin.diff" Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="p5-Mail-SpamAssassin.diff" diff -ruN -ruN /usr/ports/mail/p5-Mail-SpamAssassin/Makefile ./Makefile --- /usr/ports/mail/p5-Mail-SpamAssassin/Makefile 2014-03-04 03:19:16.000000000 +0900 +++ ./Makefile 2014-03-04 12:10:58.000000000 +0900 @@ -2,8 +2,8 @@ # $FreeBSD: head/mail/p5-Mail-SpamAssassin/Makefile 346947 2014-03-03 18:19:16Z adamw $ PORTNAME= Mail-SpamAssassin -PORTVERSION= 3.3.2 -PORTREVISION?= 9 # committer: please bump PORTREVISION on Slaves +PORTVERSION= 3.4.0 +PORTREVISION?= 0 # committer: please bump PORTREVISION on Slaves CATEGORIES?= mail perl5 MASTER_SITES= ${MASTER_SITE_APACHE:S/$/:apache/} ${MASTER_SITE_PERL_CPAN:S/$/:cpan/} MASTER_SITE_SUBDIR= spamassassin/source/:apache Mail/:cpan @@ -15,132 +15,92 @@ LICENSE= APACHE20 -BUILD_DEPENDS= p5-NetAddr-IP>=4.00.7:${PORTSDIR}/net-mgmt/p5-NetAddr-IP \ +BUILD_DEPENDS+= p5-NetAddr-IP>=4.00.7:${PORTSDIR}/net-mgmt/p5-NetAddr-IP \ p5-Net-DNS>=0.63:${PORTSDIR}/dns/p5-Net-DNS \ p5-HTML-Parser>=3.46:${PORTSDIR}/www/p5-HTML-Parser \ p5-libwww>=0:${PORTSDIR}/www/p5-libwww \ p5-Encode-Detect>=0:${PORTSDIR}/converters/p5-Encode-Detect \ p5-Mail-Tools>=0:${PORTSDIR}/mail/p5-Mail-Tools -RUN_DEPENDS:= ${BUILD_DEPENDS} +RUN_DEPENDS= ${BUILD_DEPENDS} CONFLICTS?= ja-p5-Mail-SpamAssassin-[0-9]* p5-Mail-SpamAssassin-devel-[0-9]* p5-Mail-SpamAssassin-Alt-[0-9]* USES= perl5 USE_PERL5= configure USE_LDCONFIG= yes -DBDIR?= /var/db + +DATADIR= ${PREFIX}/share/spamassassin +DBDIR?= ${BASEDIR}/var/db +DOCSDIR= ${PREFIX}/share/doc/${PKGNAMEPREFIX}${PORTNAME} ETCDIR?= ${PREFIX}/etc/mail/spamassassin + CONTACT_ADDRESS?= The administrator of that system USERS?= spamd GROUPS?= spamd CONFIGURE_ARGS= SYSCONFDIR="${PREFIX}/etc" \ CONTACT_ADDRESS="${CONTACT_ADDRESS}" \ - LOCALSTATEDIR="${DBDIR}/spamassassin" + LOCALSTATEDIR="${DBDIR}/spamassassin" \ + BUILD_SPAMC=yes -OPTIONS_DEFINE= AS_ROOT SPAMC SACOMPILE DKIM SSL GNUPG MYSQL PGSQL RAZOR \ +OPTIONS_DEFINE= AS_ROOT SACOMPILE DKIM SSL GNUPG MYSQL PGSQL RAZOR \ SPF_QUERY RELAY_COUNTRY DCC IPV6 -OPTIONS_DEFAULT= AS_ROOT SPAMC DKIM SSL GNUPG RAZOR +OPTIONS_DEFAULT= AS_ROOT DKIM SSL GNUPG RAZOR AS_ROOT_DESC= Run spamd as root (recommended) -SPAMC_DESC= Build spamd/spamc (not for amavisd) -SACOMPILE_DESC= sa-compile +DCC_DESC= Add DCC support (see LICENSE) DKIM_DESC= DKIM/DomainKeys Identified Mail -SSL_DESC= Build with SSL support for spamd/spamc -GNUPG_DESC= Install GnuPG (for sa-update) +GNUPG_DESC= Install GnuPG (for sa-update, optional) +IPV6_DESC= IPv6 sockets support RAZOR_DESC= Add Vipul's Razor support -SPF_QUERY_DESC= Add SPF query support RELAY_COUNTRY_DESC= Relay country support -DCC_DESC= Add DCC support (see LICENSE) -IPV6_DESC= IPv6 sockets support +SACOMPILE_DESC= Compile rulesets (improves speed) +SPF_QUERY_DESC= Add SPF query support +SSL_DESC= Build spamd/spamc with SSL support -SUB_FILES= pkg-install +SUB_FILES= pkg-install pkg-message SUB_LIST= USER=${USERS} GROUP=${GROUPS} INSTALL="${INSTALL}" +PLIST_SUB+= USER=${USERS} GROUP=${GROUPS} +OPTIONS_SUB=yes -.include <bsd.port.options.mk> +DCC_RUN_DEPENDS= dcc-dccd>=1.3.111:${PORTSDIR}/mail/dcc-dccd +DKIM_RUN_DEPENDS= p5-IO-Socket-SSL>=0:${PORTSDIR}/security/p5-IO-Socket-SSL \ + p5-Mail-DKIM>=0.37:${PORTSDIR}/mail/p5-Mail-DKIM \ + p5-Crypt-OpenSSL-RSA>=0.26_1:${PORTSDIR}/security/p5-Crypt-OpenSSL-RSA +GNUPG_RUN_DEPENDS= gnupg1>=1.4.7:${PORTSDIR}/security/gnupg1 +IPV6_RUN_DEPENDS= p5-IO-Socket-INET6>=0:${PORTSDIR}/net/p5-IO-Socket-INET6 +MYSQL_RUN_DEPENDS= p5-DBD-mysql>=0:${PORTSDIR}/databases/p5-DBD-mysql +PGSQL_RUN_DEPENDS= p5-DBD-Pg>=0:${PORTSDIR}/databases/p5-DBD-Pg +RAZOR_RUN_DEPENDS= razor-agents>=2.84:${PORTSDIR}/mail/razor-agents +RELAY_COUNTRY_RUN_DEPENDS= p5-IP-Country>=0:${PORTSDIR}/net/p5-IP-Country +SACOMPILE_RUN_DEPENDS= re2c>=.12.0:${PORTSDIR}/devel/re2c +SPF_QUERY_RUN_DEPENDS= p5-Mail-SPF>=0:${PORTSDIR}/mail/p5-Mail-SPF + +SSL_USE= OPENSSL=yes +SSL_RUN_DEPENDS= p5-IO-Socket-SSL>=0:${PORTSDIR}/security/p5-IO-Socket-SSL +SSL_CONFIGURE_ON= ENABLE_SSL=yes +SSL_CONFIGURE_OFF= ENABLE_SSL=no -.if ${PORT_OPTIONS:MSSL} -USE_OPENSSL= yes -.endif +DOCS= CREDITS Changes INSTALL NOTICE PACKAGING README TRADEMARK UPGRADE USAGE procmailrc.example +DOCSSQL= README README.awl README.bayes awl_mysql.sql awl_pg.sql bayes_mysql.sql bayes_pg.sql userpref_mysql.sql userpref_pg.sql +DOCSLDAP= README README.testing sa_test.ldif +PORTDOCS= ${DOCS} sql ldap -.if ${PORT_OPTIONS:MSPAMC} -CONFIGURE_ARGS+= BUILD_SPAMC=yes -.else -CONFIGURE_ARGS+= BUILD_SPAMC=no -WITH_AS_ROOT= -WITHOUT_SSL=1 -.endif +USE_RC_SUBR= sa-spamd -.if ${PORT_OPTIONS:MSPF_QUERY} -RUN_DEPENDS+= p5-Mail-SPF>=0:${PORTSDIR}/mail/p5-Mail-SPF -.endif -.if ${PORT_OPTIONS:MIPV6} -RUN_DEPENDS+= p5-IO-Socket-INET6>=0:${PORTSDIR}/net/p5-IO-Socket-INET6 -.endif +.include <bsd.port.pre.mk> .if ${PORT_OPTIONS:MSSL} -.include "${PORTSDIR}/Mk/bsd.openssl.mk" -RUN_DEPENDS+= p5-IO-Socket-SSL>=0:${PORTSDIR}/security/p5-IO-Socket-SSL CFLAGS+= -I${OPENSSLINC} LDFLAGS+= -L${OPENSSLLIB} -CONFIGURE_ARGS+= ENABLE_SSL=yes -PLIST_SUB+= SSL="" -.else -CONFIGURE_ARGS+= ENABLE_SSL=no -PLIST_SUB+= SSL="@comment " .endif -.if ${PORT_OPTIONS:MGNUPG} -RUN_DEPENDS+= gnupg1>=1.4.7:${PORTSDIR}/security/gnupg1 -.endif - -.if ${PORT_OPTIONS:MMYSQL} -RUN_DEPENDS+= p5-DBD-mysql>=0:${PORTSDIR}/databases/p5-DBD-mysql -.endif - -.if ${PORT_OPTIONS:MPGSQL} -RUN_DEPENDS+= p5-DBD-Pg>=0:${PORTSDIR}/databases/p5-DBD-Pg -.endif - -.include <bsd.port.pre.mk> - -.if ${PORT_OPTIONS:MRAZOR} -RUN_DEPENDS+= razor-agents>=2.84:${PORTSDIR}/mail/razor-agents -.else -.endif - -.if ${PORT_OPTIONS:MDKIM} -RUN_DEPENDS+= p5-IO-Socket-SSL>=0:${PORTSDIR}/security/p5-IO-Socket-SSL -RUN_DEPENDS+= p5-Mail-DKIM>=0.37:${PORTSDIR}/mail/p5-Mail-DKIM -RUN_DEPENDS+= p5-Crypt-OpenSSL-RSA>=0.26_1:${PORTSDIR}/security/p5-Crypt-OpenSSL-RSA -.endif - -.if ${PORT_OPTIONS:MSACOMPILE} -RUN_DEPENDS+= re2c>=.12.0:${PORTSDIR}/devel/re2c -.endif - -.if ${PORT_OPTIONS:MRELAY_COUNTRY} -RUN_DEPENDS+= p5-IP-Country>=0:${PORTSDIR}/net/p5-IP-Country -.endif - -.if ${PORT_OPTIONS:MDCC} -RUN_DEPENDS+= dcc-dccd>=1.3.111:${PORTSDIR}/mail/dcc-dccd -.endif - -DOCSDIR= ${PREFIX}/share/doc/${PKGNAMEPREFIX}${PORTNAME} -DATADIR= ${PREFIX}/share/spamassassin -DOCS= CREDITS Changes INSTALL NOTICE PACKAGING README TRADEMARK UPGRADE USAGE procmailrc.example -DOCSSQL= README README.awl README.bayes awl_mysql.sql awl_pg.sql bayes_mysql.sql bayes_pg.sql userpref_mysql.sql userpref_pg.sql -DOCSLDAP= README README.testing sa_test.ldif -PORTDOCS= ${DOCS} sql ldap - -USE_RC_SUBR= sa-spamd - .if ${PORT_OPTIONS:MMYSQL} || ${PORT_OPTIONS:MPGSQL} SUB_LIST+= SQL_FLAG="-Q" .else SUB_LIST+= SQL_FLAG="" .endif -.if ! ${PORT_OPTIONS:MAS_ROOT} -SUB_LIST+= RUN_AS_USER="-u ${USERS} -H /var/spool/spamd" + +.if empty(PORT_OPTIONS:MAS_ROOT) +SUB_LIST+= RUN_AS_USER="-u ${USERS} -H ${BASEDIR}/var/spool/spamd" .else SUB_LIST+= RUN_AS_USER="" .endif @@ -153,6 +113,7 @@ -e 's#B_CONFDIR)/v312.pre#B_CONFDIR)/v312.pre.sample#g' \ -e 's#B_CONFDIR)/v320.pre#B_CONFDIR)/v320.pre.sample#g' \ -e 's#B_CONFDIR)/v330.pre#B_CONFDIR)/v330.pre.sample#g' \ + -e 's#B_CONFDIR)/v340.pre#B_CONFDIR)/v340.pre.sample#g' \ -e 's/require DBI/0/' \ ${WRKSRC}/Makefile.PL @${REINPLACE_CMD} -e '/^CC =/d; \ @@ -164,10 +125,10 @@ .if ${PORT_OPTIONS:MRELAY_COUNTRY} ${REINPLACE_CMD} -e '/RelayCountry/s/^# ?loadplugin/loadplugin/' ${WRKSRC}/rules/init.pre .endif -.if ! ${PORT_OPTIONS:MDKIM} +.if empty(PORT_OPTIONS:MDKIM) ${REINPLACE_CMD} -e '/DKIM/s/^loadplugin/#loadplugin/' ${WRKSRC}/rules/v312.pre .endif -.if ! ${PORT_OPTIONS:MSPF_QUERY} +.if empty(PORT_OPTIONS:MSPF_QUERY) ${REINPLACE_CMD} -e '/SPF/s/^loadplugin/#loadplugin/' ${WRKSRC}/rules/init.pre .endif .if ${PORT_OPTIONS:MDCC} @@ -178,31 +139,24 @@ .endif post-build: - @(cd ${BUILD_WRKSRC}; ${SETENV} ${MAKE_ENV} ${MAKE} ${MAKE_FLAGS} ${MAKEFILE} ${MAKE_ARGS} spamc/libspamc.so) + (cd ${BUILD_WRKSRC}; ${SETENV} ${MAKE_ENV} ${MAKE} ${MAKE_FLAGS} ${MAKEFILE} ${MAKE_ARGS} spamc/libspamc.so) .if ${PORT_OPTIONS:MSSL} - @(cd ${BUILD_WRKSRC}; ${SETENV} ${MAKE_ENV} ${MAKE} ${MAKE_FLAGS} ${MAKEFILE} ${MAKE_ARGS} spamc/libsslspamc.so) + (cd ${BUILD_WRKSRC}; ${SETENV} ${MAKE_ENV} ${MAKE} ${MAKE_FLAGS} ${MAKEFILE} ${MAKE_ARGS} spamc/libsslspamc.so) .endif pre-su-install: @${MKDIR} ${STAGEDIR}${DATADIR} - @${INSTALL_PROGRAM} ${WRKSRC}/spamc/libspamc.so ${STAGEDIR}${PREFIX}/lib/libspamc.so.0 + ${INSTALL_LIB} ${WRKSRC}/spamc/libspamc.so ${STAGEDIR}${PREFIX}/lib/libspamc.so.0 @${LN} -sf libspamc.so.0 ${STAGEDIR}${PREFIX}/lib/libspamc.so .if ${PORT_OPTIONS:MSSL} - @${INSTALL_PROGRAM} ${WRKSRC}/spamc/libsslspamc.so ${STAGEDIR}${PREFIX}/lib/libsslspamc.so.0 - @${LN} -sf libsslspamc.so.0 ${STAGEDIR}${PREFIX}/lib/libsslspamc.so + ${INSTALL_LIB} ${WRKSRC}/spamc/libsslspamc.so ${STAGEDIR}${PREFIX}/lib/libsslspamc.so.0 + ${LN} -sf libsslspamc.so.0 ${STAGEDIR}${PREFIX}/lib/libsslspamc.so .endif - @${INSTALL_DATA} ${WRKSRC}/spamc/libspamc.h ${STAGEDIR}${PREFIX}/include + ${INSTALL_DATA} ${WRKSRC}/spamc/libspamc.h ${STAGEDIR}${PREFIX}/include post-install:: -.if ${PORT_OPTIONS:MSPAMC} - @${STRIP_CMD} ${STAGEDIR}${PREFIX}/bin/spamc -.endif - -.if ${PORT_OPTIONS:MDOCS} + ${STRIP_CMD} ${STAGEDIR}${PREFIX}/bin/spamc + @${MKDIR} ${STAGEDIR}/var/lib/spamassassin ${STAGEDIR}${DBDIR}/spamassassin @${MKDIR} ${STAGEDIR}${DOCSDIR} ${STAGEDIR}${DOCSDIR}/sql ${STAGEDIR}${DOCSDIR}/ldap - @${INSTALL_DATA} ${DOCS:S|^|${WRKSRC}/|} ${STAGEDIR}${DOCSDIR} - @${INSTALL_DATA} ${DOCSSQL:S|^|${WRKSRC}/sql/|} ${STAGEDIR}${DOCSDIR}/sql - @${INSTALL_DATA} ${DOCSLDAP:S|^|${WRKSRC}/ldap/|} ${STAGEDIR}${DOCSDIR}/ldap -.endif .include <bsd.port.post.mk> diff -ruN -ruN /usr/ports/mail/p5-Mail-SpamAssassin/distinfo ./distinfo --- /usr/ports/mail/p5-Mail-SpamAssassin/distinfo 2014-01-23 00:30:13.000000000 +0900 +++ ./distinfo 2014-02-14 14:56:40.000000000 +0900 @@ -1,2 +1,2 @@ -SHA256 (Mail-SpamAssassin-3.3.2.tar.gz) = 5323038939a0ef9fc97d5264defce3ae1d95e98b3a94c4c3b583341c927f32df -SIZE (Mail-SpamAssassin-3.3.2.tar.gz) = 1208182 +SHA256 (Mail-SpamAssassin-3.4.0.tar.gz) = 244914c30976844878a7f129fd503eb40986c68a3800f416c3a68b14507c0a64 +SIZE (Mail-SpamAssassin-3.4.0.tar.gz) = 1269753 diff -ruN -ruN /usr/ports/mail/p5-Mail-SpamAssassin/files/patch-bug6624 ./files/patch-bug6624 --- /usr/ports/mail/p5-Mail-SpamAssassin/files/patch-bug6624 2014-01-23 02:40:44.000000000 +0900 +++ ./files/patch-bug6624 1970-01-01 09:00:00.000000000 +0900 @@ -1,88 +0,0 @@ ---- lib/Mail/SpamAssassin/BayesStore/MySQL.pm (revision 1138970) -+++ lib/Mail/SpamAssassin/BayesStore/MySQL.pm (working copy) -@@ -840,14 +840,28 @@ - return 0; - } - -+ # With ON DUPLICATE KEY UPDATE, the affected-rows value per row is 1 if -+ # the row is inserted as a new row and 2 if an existing row is updated. -+ # -+ # Due to a MySQL server bug a value of 3 can be seen. -+ # See: http://bugs.mysql.com/bug.php?id=46675 -+ # When executing the INSERT ... ON DUPLICATE KEY UPDATE statement -+ # and checking the rows return count: -+ # mysql_client_found_rows = 0: The second INSERT returns a row count -+ # of 2 in all MySQL versions. -+ # mysql_client_found_rows = 1: The second INSERT returns this row count: -+ # Before MySQL 5.1.20: 2 -+ # MySQL 5.1.20: undef on Mac OS X, 139775481 on Linux (garbage?) -+ # MySQL 5.1.21 and up: 3 -+ # - my $num_rows = $rc; - - $sth->finish(); - -- if ($num_rows == 1 || $num_rows == 2) { -+ if ($num_rows == 1 || $num_rows == 2 || $num_rows == 3) { - my $token_count_update = ''; - -- $token_count_update = "token_count = token_count + 1," if ($num_rows == 1); -+ $token_count_update = "token_count = token_count + 1," if $num_rows == 1; - $sql = "UPDATE bayes_vars SET - $token_count_update - newest_token_age = GREATEST(newest_token_age, ?), -@@ -872,7 +886,11 @@ - } - else { - # $num_rows was not what we expected -- dbg("bayes: _put_token: Updated an unexpected number of rows."); -+ my $token_displ = $token; -+ $token_displ =~ s/(.)/sprintf('%02x',ord($1))/egs; -+ dbg("bayes: _put_token: Updated an unexpected number of rows: %s, ". -+ "id: %s, token (hex): %s", -+ $num_rows, $self->{_userid}, $token_displ); - $self->{_dbh}->rollback(); - return 0; - } -@@ -987,8 +1005,24 @@ - else { - my $num_rows = $rc; - -- $need_atime_update_p = 1 if ($num_rows == 1 || $num_rows == 2); -- $new_tokens++ if ($num_rows == 1); -+ # With ON DUPLICATE KEY UPDATE, the affected-rows value per row is 1 if -+ # the row is inserted as a new row and 2 if an existing row is updated. -+ # But see MySQL bug (as above): http://bugs.mysql.com/bug.php?id=46675 -+ -+ if ($num_rows == 1) { -+ $new_tokens++; -+ $need_atime_update_p = 1; -+ } elsif ($num_rows == 2 || $num_rows == 3) { -+ $need_atime_update_p = 1; -+ } else { -+ # $num_rows was not what we expected -+ my $token_displ = $token; -+ $token_displ =~ s/(.)/sprintf('%02x',ord($1))/egs; -+ dbg("bayes: _put_tokens: Updated an unexpected number of rows: %s, ". -+ "id: %s, token (hex): %s", -+ $num_rows, $self->{_userid}, $token_displ); -+ $error_p = 1; -+ } - } - } - -@@ -1026,10 +1060,10 @@ - } - } - else { -- # $num_rows was not what we expected -- dbg("bayes: _put_tokens: Updated an unexpected number of rows."); -- $self->{_dbh}->rollback(); -- return 0; -+ info("bayes: _put_tokens: no atime updates needed? Num of tokens: %d", -+ scalar keys %{$tokens}); -+# $self->{_dbh}->rollback(); -+# return 0; - } - } - diff -ruN -ruN /usr/ports/mail/p5-Mail-SpamAssassin/files/patch-bug6655 ./files/patch-bug6655 --- /usr/ports/mail/p5-Mail-SpamAssassin/files/patch-bug6655 2014-01-23 02:40:44.000000000 +0900 +++ ./files/patch-bug6655 1970-01-01 09:00:00.000000000 +0900 @@ -1,50 +0,0 @@ -$FreeBSD: head/mail/p5-Mail-SpamAssassin/files/patch-bug6655 340725 2014-01-22 17:40:44Z mat $ - -https://issues.apache.org/SpamAssassin/show_bug.cgi?id=6655 - ---- lib/Mail/SpamAssassin/Util.pm 2011-06-06 19:59:17.000000000 -0400 -+++ lib/Mail/SpamAssassin/Util.pm 2011-08-26 17:12:19.000000000 -0400 -@@ -1025,6 +1024,8 @@ - return; - } - -+ opendir(my $dh, $tmpdir) || die "Could not open $tmpdir: $!"; -+ closedir $dh; - my ($reportfile, $tmpfile); - my $umask = umask 077; - -@@ -1052,7 +1053,10 @@ - - # ensure the file handle is not semi-open in some way - if ($tmpfile) { -- close $tmpfile or info("error closing $reportfile: $!"); -+ if (! close $tmpfile) { -+ info("error closing $reportfile: $!"); -+ $tmpfile=undef; -+ } - } - } - ---- sa-update.raw 2011-06-24 13:38:50.000000000 -0400 -+++ sa-update.raw 2011-08-29 09:38:50.000000000 -0400 -@@ -677,9 +677,9 @@ - - # Write the content out to a temp file for GPG/Archive::Tar interaction - dbg("channel: populating temp content file"); -- open(TMP, ">$content_file") || die "fatal: can't write to content temp file $content_file: $!\n"; -+ open(TMP, ">$content_file") || die "fatal: couldn't create content temp file $content_file: $!\n"; - binmode TMP; -- print TMP $content; -+ print TMP $content || die "fatal: can't write to content temp file $content_file: $!\n"; - close(TMP); - - # to sign : gpg -bas file -@@ -695,7 +695,7 @@ - die "fatal: couldn't create temp file for GPG signature: $!\n"; - } - binmode $tfh; -- print $tfh $GPG; -+ print $tfh $GPG || die "fatal: can't write temp file for GPG signature: $!\n"; - close($tfh); - - dbg("gpg: calling gpg"); diff -ruN -ruN /usr/ports/mail/p5-Mail-SpamAssassin/files/patch-bug6698 ./files/patch-bug6698 --- /usr/ports/mail/p5-Mail-SpamAssassin/files/patch-bug6698 2014-01-23 02:40:44.000000000 +0900 +++ ./files/patch-bug6698 1970-01-01 09:00:00.000000000 +0900 @@ -1,1471 +0,0 @@ ---- lib/Mail/SpamAssassin/Plugin/DCC.pm 2011-06-06 19:59:17.000000000 -0400 -+++ lib/Mail/SpamAssassin/Plugin/DCC.pm 2011-11-26 07:22:36.000000000 -0500 -@@ -15,6 +15,20 @@ - # limitations under the License. - # </@LICENSE> - -+# Changes since SpamAssassin 3.3.2: -+# support for DCC learning. See dcc_learn_score. -+# deal with orphan dccifd sockets -+# use `cdcc -q` to not stall waiting to find a DCC server when deciding -+# whether DCC checks are enabled -+# use dccproc -Q or dccifd query if a pre-existing X-DCC header shows -+# the message has already been reported -+# dccproc now uses -w /var/dcc/whiteclnt so it acts more like dccifd -+# warn about the use of ancient versions of dccproc and dccifd -+# turn off dccifd greylisting -+# query instead of reporting mail messages that contain X-DCC headers and -+# and so has probably already been reported -+# try harder to find dccproc and cdcc when not explicitly configured -+ - =head1 NAME - - Mail::SpamAssassin::Plugin::DCC - perform DCC check of messages -@@ -30,30 +44,31 @@ - - The DCC or Distributed Checksum Clearinghouse is a system of servers - collecting and counting checksums of millions of mail messages. --TheSpamAssassin.pm counts can be used by SpamAssassin to detect and --reject or filter spam. -- --Because simplistic checksums of spam can be easily defeated, the main --DCC checksums are fuzzy and ignore aspects of messages. The fuzzy --checksums are changed as spam evolves. -+The counts can be used by SpamAssassin to detect and filter spam. - --Note that DCC is disabled by default in C<init.pre> because it is not --open source. See the DCC license for more details. -+See http://www.dcc-servers.net/dcc/ for more information about DCC. - --See http://www.rhyolite.com/anti-spam/dcc/ for more information about --DCC. -+Note that DCC is disabled by default in C<v310.pre> because its use requires -+software that is not distributed with SpamAssassin and that has license -+restrictions for certain commercial uses. -+See the DCC license at http://www.dcc-servers.net/dcc/LICENSE for details. -+ -+Enable it by uncommenting the "loadplugin Mail::SpamAssassin::Plugin::DCC" -+confdir/v310.pre or by adding this line to your local.pre. It might also -+be necessary to install a DCC package, port, rpm, or equivalent from your -+operating system distributor or a tarball from the primary DCC source -+at http://www.dcc-servers.net/dcc/#download -+See also http://www.dcc-servers.net/dcc/INSTALL.html - - =head1 TAGS - - The following tags are added to the set, available for use in reports, - header fields, other plugins, etc.: - -- _DCCB_ DCC server ID in a response -- _DCCR_ response from DCC - header field body in X-DCC-*-Metrics -- _DCCREP_ response from DCC - DCC reputation in percents (0..100) -- --Tag _DCCREP_ provides a nonempty value only with commercial DCC systems. --This is the percentage of spam vs. ham sent from the first untrusted relay. -+ _DCCB_ DCC server ID in X-DCC-*-Metrics header field name -+ _DCCR_ X-DCC-*-Metrics header field body -+ _DCCREP_ DCC Reputation or percent bulk mail (0..100) from -+ commercial DCC software - - =cut - -@@ -75,8 +90,6 @@ - use vars qw(@ISA); - @ISA = qw(Mail::SpamAssassin::Plugin); - --use vars qw($have_inet6); -- - sub new { - my $class = shift; - my $mailsaobject = shift; -@@ -87,7 +100,7 @@ - - # are network tests enabled? - if ($mailsaobject->{local_tests_only}) { -- $self->{dcc_disabled} = 1; -+ $self->{use_dcc} = 0; - dbg("dcc: local tests only, disabling DCC"); - } - else { -@@ -128,20 +141,23 @@ - - =item dcc_fuz2_max NUMBER - --This option sets how often a message's body/fuz1/fuz2 checksum must have been --reported to the DCC server before SpamAssassin will consider the DCC check as --matched. -- --As nearly all DCC clients are auto-reporting these checksums, you should set --this to a relatively high value, e.g. C<999999> (this is DCC's MANY count). -+Sets how often a message's body/fuz1/fuz2 checksum must have been reported -+to the DCC server before SpamAssassin will consider the DCC check hit. -+C<999999> is DCC's MANY count. - - The default is C<999999> for all these options. - - =item dcc_rep_percent NUMBER - --Only commercial DCC systems provide DCC reputation information. This is the --percentage of spam vs. ham sent from the first untrusted relay. It will hit --on new spam from spam sources. Default is C<90>. -+Only the commercial DCC software provides DCC Reputations. A DCC Reputation -+is the percentage of bulk mail received from the last untrusted relay in the -+path taken by a mail message as measured by all commercial DCC installations. -+See http://www.rhyolite.com/dcc/reputations.html -+You C<must> whitelist your trusted relays or MX servers with MX or -+MXDCC lines in /var/dcc/whiteclnt as described in the main DCC man page -+to avoid seeing your own MX servers as sources of bulk mail. -+See http://www.dcc-servers.net/dcc/dcc-tree/dcc.html#White-and-Blacklists -+The default is C<90>. - - =cut - -@@ -189,13 +205,9 @@ - =item dcc_home STRING - - This option tells SpamAssassin where to find the dcc homedir. --If not given, it will try to get dcc to specify one, and if that fails it --will try dcc's own default homedir of '/var/dcc'. --If C<dcc_path> is not specified, it will default to looking in --C<dcc_home/bin> for dcc client instead of relying on SpamAssassin to find it --in the current PATH. If it isn't found there, it will look in the current --PATH. If a C<dccifd> socket is found in C<dcc_home> or specified explicitly, --it will use that interface instead of C<dccproc>. -+If not specified, try to use the locally configured directory -+from the C<cdcc homedir> command. -+Try /var/dcc if that command fails. - - =cut - -@@ -205,7 +217,7 @@ - type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING, - code => sub { - my ($self, $key, $value, $line) = @_; -- if (!defined $value || !length $value) { -+ if (!defined $value || $value eq '') { - return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE; - } - $value = untaint_file_path($value); -@@ -223,14 +235,16 @@ - - =item dcc_dccifd_path STRING - --This option tells SpamAssassin where to find the dccifd socket. If --C<dcc_dccifd_path> is not specified, it will default to looking for a socket --named C<dccifd> in a directory C<dcc_home>. The C<dcc_dccifd_path> can be --a Unix socket name (absolute path), or an INET socket specification in a form --C<[host]:port> or C<host:port>, where a host can be an IPv4 or IPv6 address --or a host name, and port is a TCP port number. In case of an IPv6 address the --brackets are required syntax. If a C<dccifd> socket is found, the plugin will --use it instead of C<dccproc>. -+This option tells SpamAssassin where to find the dccifd socket instead -+of a local Unix socket named C<dccifd> in the C<dcc_home> directory. -+If a socket is specified or found, use it instead of C<dccproc>. -+ -+If specifed, C<dcc_dccifd_path> is the absolute path of local Unix socket -+or an INET socket specified as C<[Host]:Port> or C<Host:Port>. -+Host can be an IPv4 or IPv6 address or a host name -+Port is a TCP port number. The brackets are required for an IPv6 address. -+ -+The default is C<undef>. - - =cut - -@@ -240,45 +254,60 @@ - type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING, - code => sub { - my ($self, $key, $value, $line) = @_; -- $value = '' if !defined $value; -- $self->{dcc_dccifd_path_raw} = $value; # for logging purposes -- undef $self->{dcc_dccifd_host}; -- undef $self->{dcc_dccifd_port}; -- undef $self->{dcc_dccifd_socket}; -- local($1,$2,$3); -- if ($value eq '') { -+ -+ if (!defined $value || $value eq '') { - return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE; -- } elsif ($value =~ m{^ (?: \[ ([^\]]*) \] | ([^:]*) ) : ([^:]*) \z}sx) { -- # "[host]:port" or "host:port", where a host can be an IPv4 or IPv6 -- # address or a host name, and port is a TCP port number or service name -- my $host = defined $1 ? $1 : $2; -- my $port = $3; -- $self->{dcc_dccifd_host} = untaint_var($host); -- $self->{dcc_dccifd_port} = untaint_var($port); -- dbg("config: dcc_dccifd_path set to [%s]:%s", $host,$port); -- } else { # assume a unix socket -+ } -+ -+ local($1,$2,$3); -+ if ($value =~ m{^ (?: \[ ([^\]]*) \] | ([^:]*) ) : ([^:]*) \z}sx) { -+ my $host = untaint_var(defined $1 ? $1 : $2); -+ my $port = untaint_var($3); -+ if (!$host) { -+ info("config: missing or bad host name in dcc_dccifd_path '$value'"); -+ return $Mail::SpamAssassin::Conf::INVALID_VALUE; -+ } -+ if (!$port || $port !~ /^\d+\z/ || $port < 1 || $port > 65535) { -+ info("config: bad TCP port number in dcc_dccifd_path '$value'"); -+ return $Mail::SpamAssassin::Conf::INVALID_VALUE; -+ } -+ -+ $self->{dcc_dccifd_host} = $host; -+ $self->{dcc_dccifd_port} = $port; -+ if ($host !~ /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\z/) { -+ # remember to try IPv6 if we can with a host name or non-IPv4 address -+ $self->{dcc_dccifd_IPv6} = eval { require IO::Socket::INET6 }; -+ } -+ dbg("config: dcc_dccifd_path set to [%s]:%s", $host, $port); -+ -+ } else { -+ # assume a unix socket - if ($value !~ m{^/}) { -- info("config: dcc_dccifd_path should be an absolute socket path"); -+ info("config: dcc_dccifd_path '$value' is not an absolute path"); - # return $Mail::SpamAssassin::Conf::INVALID_VALUE; # abort or accept? - } - $value = untaint_file_path($value); -- # test disabled, dccifd may not yet be running at spamd startup time -- # if (!-S $value) { -- # info("config: dcc_dccifd_path '$value' isn't a local socket"); -- # return $Mail::SpamAssassin::Conf::INVALID_VALUE; -- # } -+ - $self->{dcc_dccifd_socket} = $value; - dbg("config: dcc_dccifd_path set to local socket %s", $value); -+ dbg("dcc: dcc_dccifd_path set to local socket %s", $value); - } -+ -+ $self->{dcc_dccifd_path_raw} = $value; - } - }); - - =item dcc_path STRING - --This option tells SpamAssassin specifically where to find the C<dccproc> --client instead of relying on SpamAssassin to find it in the current PATH. --Note that if I<taint mode> is enabled in the Perl interpreter, you should --use this, as the current PATH will have been cleared. -+Where to find the C<dccproc> client program instead of relying on SpamAssassin -+to find it in the current PATH or C<dcc_home/bin>. This must often be set, -+because the current PATH is cleared by I<taint mode> in the Perl interpreter, -+ -+If a C<dccifd> socket is found in C<dcc_home> or specified explicitly -+with C<dcc_dccifd_path>, use the C<dccifd(8)> interface instead of C<dccproc>. -+ -+The default is C<undef>. -+ - - =cut - -@@ -289,12 +318,12 @@ - type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING, - code => sub { - my ($self, $key, $value, $line) = @_; -- if (!defined $value || !length $value) { -+ if (!defined $value || $value eq '') { - return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE; - } - $value = untaint_file_path($value); - if (!-x $value) { -- info("config: dcc_path '$value' isn't an executable"); -+ info("config: dcc_path '$value' is not executable"); - return $Mail::SpamAssassin::Conf::INVALID_VALUE; - } - -@@ -304,7 +333,7 @@ - - =item dcc_options options - --Specify additional options to the dccproc(8) command. Please note that only -+Specify additional options to the dccproc(8) command. Only - characters in the range [0-9A-Za-z ,._/-] are allowed for security reasons. - - The default is C<undef>. -@@ -319,6 +348,7 @@ - code => sub { - my ($self, $key, $value, $line) = @_; - if ($value !~ m{^([0-9A-Za-z ,._/-]+)$}) { -+ info("config: dcc_options '$value' contains impermissible characters"); - return $Mail::SpamAssassin::Conf::INVALID_VALUE; - } - $self->{dcc_options} = $1; -@@ -327,8 +357,9 @@ - - =item dccifd_options options - --Specify additional options to send to the dccifd(8) daemon. Please note that only --characters in the range [0-9A-Za-z ,._/-] are allowed for security reasons. -+Specify additional options to send to the dccifd daemon with -+the ASCII protocol described on the dccifd(8) man page. -+Only characters in the range [0-9A-Za-z ,._/-] are allowed for security reasons. - - The default is C<undef>. - -@@ -342,265 +373,306 @@ - code => sub { - my ($self, $key, $value, $line) = @_; - if ($value !~ m{^([0-9A-Za-z ,._/-]+)$}) { -+ info("config: dccifd_options '$value' contains impermissible characters"); - return $Mail::SpamAssassin::Conf::INVALID_VALUE; - } - $self->{dccifd_options} = $1; - } - }); - -+=item dcc_learn_score n (default: undef) -+ -+Report messages with total scores this much larger than the -+SpamAssassin spam threshold to DCC as spam. -+ -+=cut -+ -+ push (@cmds, { -+ setting => 'dcc_learn_score', -+ is_admin => 1, -+ default => undef, -+ type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC, -+ }); -+ - $conf->{parser}->register_commands(\@cmds); - } - -+ -+ -+ -+sub ck_dir { -+ my ($self, $dir, $tgt, $src) = @_; -+ -+ $dir = untaint_file_path($dir); -+ if (!stat($dir)) { -+ my $dir_errno = 0+$!; -+ if ($dir_errno == ENOENT) { -+ dbg("dcc: $tgt $dir from $src does not exist"); -+ } else { -+ dbg("dcc: $tgt $dir from $src is not accessible: $!"); -+ } -+ return; -+ } -+ if (!-d _) { -+ dbg("dcc: $tgt $dir from $src is not a directory"); -+ return; -+ } -+ -+ $self->{main}->{conf}->{$tgt} = $dir; -+ dbg("dcc: use '$tgt $dir' from $src"); -+} -+ - sub find_dcc_home { - my ($self) = @_; -+ my $dcc_libexec; -+ -+ # just once -+ return if defined $self->{dcc_version}; -+ $self->{dcc_version} = '?'; - - my $conf = $self->{main}->{conf}; -- return if !$conf->{use_dcc}; - -- my $dcchome = $conf->{dcc_home} || ''; - -- # If we're not given the DCC homedir, try getting DCC to tell us it. -- # If that fails, try the DCC default homedir of '/var/dcc'. -- if ($dcchome eq '') { -+ # Get the DCC software version for talking to dccifd and formating the -+ # dccifd options and the built-in DCC homedir. Use -q to prevent delays. -+ my $cdcc_home; -+ my $cdcc = $self->dcc_pgm_path('cdcc'); -+ my $cmd = '-qV homedir libexecdir'; -+ if ($cdcc && open(CDCC, "$cdcc $cmd 2>&1 |")) { -+ my $cdcc_output = do { local $/ = undef; <CDCC> }; -+ close CDCC; - -- my $cdcc = Mail::SpamAssassin::Util::find_executable_in_env_path('cdcc'); -+ $cdcc_output =~ s/\n/ /g; # everything in 1 line for debugging -+ dbg("dcc: `%s %s` reports '%s'", $cdcc, $cmd, $cdcc_output); -+ $self->{dcc_version} = ($cdcc_output =~ /^(\d+\.\d+\.\d+)/) ? $1 : ''; -+ $cdcc_home = ($cdcc_output =~ /\s+homedir=(\S+)/) ? $1 : ''; -+ if ($cdcc_output =~ /\s+libexecdir=(\S+)/) { -+ $self->ck_dir($1, 'dcc_libexec', 'cdcc'); -+ } -+ } - -- my $cdcc_home = ''; -- if ($cdcc && -x $cdcc && open(CDCC, "$cdcc homedir 2>&1|")) { -- dbg("dcc: dcc_home not set, querying cdcc utility"); -- $cdcc_home = <CDCC> || ''; -- close CDCC; -+ # without a home, try the homedir from cdcc -+ if (!$conf->{dcc_home} && $cdcc_home) { -+ $self->ck_dir($cdcc_home, 'dcc_home', 'cdcc'); -+ } -+ # finally fall back to /var/dcc -+ if (!$conf->{dcc_home}) { -+ $self->ck_dir($conf->{dcc_home} = '/var/dcc', 'dcc_home', 'default') -+ } - -- chomp $cdcc_home; -- $cdcc_home =~ s/\s+homedir=//; -- dbg("dcc: cdcc reports homedir as '%s'", $cdcc_home); -- } -- -- # try first with whatever the cdcc utility reported -- my $cdcc_home_errno = 0; -- if ($cdcc_home eq '') { -- $cdcc_home_errno = ENOENT; -- } elsif (!stat($cdcc_home)) { -- $cdcc_home_errno = 0+$!; -- } -- if ($cdcc_home_errno == ENOENT) { -- # no such file -- } elsif ($cdcc_home_errno != 0) { -- dbg("dcc: cdcc reported homedir $cdcc_home is not accessible: $!"); -- } elsif (!-d _) { -- dbg("dcc: cdcc reported homedir $cdcc_home is not a directory"); -- } else { # ok -- dbg("dcc: cdcc reported homedir $cdcc_home exists, using it"); -- $dcchome = untaint_var($cdcc_home); -- } -- -- # try falling back to /var/dcc -- if ($dcchome eq '') { -- my $var_dcc_errno = stat('/var/dcc') ? 0 : 0+$!; -- if ($var_dcc_errno == ENOENT) { -- # no such file -- } elsif ($var_dcc_errno != 0) { -- dbg("dcc: dcc_home not set and dcc default homedir /var/dcc ". -- "is not accessible: $!"); -- } elsif (!-d _) { -- dbg("dcc: dcc_home not set and dcc default homedir /var/dcc ". -- "is not a directory"); -- } else { # ok -- dbg("dcc: dcc_home not set but dcc default homedir /var/dcc exists, ". -- "using it"); -- $dcchome = '/var/dcc'; -+ # fall back to $conf->{dcc_home}/libexec or /var/dcc/libexec for dccsight -+ if (!$conf->{dcc_libexec}) { -+ $self->ck_dir($conf->{dcc_home} . '/libexec', 'dcc_libexec', 'dcc_home'); - } -+ if (!$conf->{dcc_libexec}) { -+ $self->ck_dir('/var/dcc/libexec', 'dcc_libexec', 'dcc_home'); - } - -- if ($dcchome eq '') { -- dbg("dcc: unable to get homedir from cdcc ". -- "and the dcc default homedir was not found"); -- } -- -- # Remember found homedir path -- dbg("dcc: using '%s' as DCC homedir", $dcchome); -- $conf->{dcc_home} = $dcchome; -+ # format options for dccifd -+ my $opts = ($conf->{dccifd_options} || '') . "\n"; -+ if ($self->{dcc_version} =~ /\d+\.(\d+)\.(\d+)$/ && -+ ($1 < 3 || ($1 == 3 && $2 < 123))) { -+ if ($1 < 3 || ($1 == 3 && $2 < 50)) { -+ info("dcc: DCC version $self->{dcc_version} is years old, ". -+ "obsolete, and likely to cause problems. ". -+ "See http://www.dcc-servers.net/dcc/old-versions.html"); -+ } -+ $self->{dccifd_lookup_options} = "header " . $opts; -+ $self->{dccifd_report_options} = "header spam " . $opts; -+ } else { -+ # dccifd after version 1.2.123 understands "cksums" and "no-grey" -+ $self->{dccifd_lookup_options} = "cksums grey-off " . $opts; -+ $self->{dccifd_report_options} = "header spam grey-off " . $opts; - } - } - --sub is_dccifd_available { -- my ($self) = @_; -- -+sub dcc_pgm_path { -+ my ($self, $pgm) = @_; -+ my $pgmpath; - my $conf = $self->{main}->{conf}; -- $self->{dccifd_available} = 0; - -- if (!$conf->{use_dcc}) { -- dbg("dcc: dccifd is not available: use_dcc is false"); -- } elsif (defined $conf->{dcc_dccifd_host}) { -- dbg("dcc: dccifd inet socket chosen: [%s]:%s", -- $conf->{dcc_dccifd_host}, $conf->{dcc_dccifd_port}); -- $self->{dccifd_available} = 1; -- } else { -- my $sockpath = $conf->{dcc_dccifd_socket}; -- my $dcchome = $conf->{dcc_home}; -- if (defined $sockpath) { -- dbg("dcc: dccifd local socket chosen: %s", $sockpath); -- } elsif (defined $conf->{dcc_dccifd_path_raw}) { -- # avoid falling back to defaults if explicitly provided but wrong -- } elsif (defined $dcchome && $dcchome ne '' && -S "$dcchome/dccifd") { -- $sockpath = "$dcchome/dccifd"; -- $conf->{dcc_dccifd_socket} = $sockpath; -- dbg("dcc: dccifd default local socket chosen: %s", $sockpath); -+ $pgmpath = $conf->{dcc_path}; -+ if (defined $pgmpath && $pgmpath ne '') { -+ # accept explicit setting for dccproc -+ return $pgmpath if $pgm eq 'dccproc'; -+ # try adapting it for cdcc and everything else -+ if ($pgmpath =~ s{[^/]+\z}{$pgm}s) { -+ $pgmpath = untaint_file_path($pgmpath); -+ if (-x $pgmpath) { -+ dbg("dcc: dcc_pgm_path, found %s in dcc_path: %s", $pgm,$pgmpath); -+ return $pgmpath; - } -- if (defined $sockpath && -S $sockpath && -w _ && -r _) { -- $self->{dccifd_available} = 1; -- } elsif (!defined $conf->{dcc_dccifd_path_raw}) { -- dbg("dcc: dccifd is not available: no r/w dccifd socket found"); -- } else { -- dbg("dcc: dccifd is not available: no r/w dccifd socket found: %s", -- $conf->{dcc_dccifd_path_raw}); - } - } - -- return $self->{dccifd_available}; -+ $pgmpath = Mail::SpamAssassin::Util::find_executable_in_env_path($pgm); -+ if (defined $pgmpath) { -+ dbg("dcc: dcc_pgm_path, found %s in env.path: %s", $pgm,$pgmpath); -+ return $pgmpath; -+ } -+ -+ # try dcc_home/bin, dcc_libexec, and some desperate last attempts -+ foreach my $dir ($conf->{dcc_home}.'/bin', $conf->{dcc_libexec}, -+ '/usr/local/bin', '/usr/local/dcc', '/var/dcc') { -+ $pgmpath = $dir . '/' . $pgm; -+ if (-x $pgmpath) { -+ dbg("dcc: dcc_pgm_path, found %s in %s: %s", $pgm,$dir,$pgmpath); -+ return $pgmpath; -+ } -+ } -+ -+ return; - } - --sub is_dccproc_available { -+sub is_dccifd_available { - my ($self) = @_; - my $conf = $self->{main}->{conf}; - -- $self->{dccproc_available} = 0; -+ # dccifd remains available until it breaks -+ return $self->{dccifd_available} if $self->{dccifd_available}; - -- if (!$conf->{use_dcc}) { -- dbg("dcc: dccproc is not available: use_dcc is false"); -- return 0; -+ # deal with configured INET socket -+ if (defined $conf->{dcc_dccifd_host}) { -+ dbg("dcc: dccifd is available via INET socket [%s]:%s", -+ $conf->{dcc_dccifd_host}, $conf->{dcc_dccifd_port}); -+ return ($self->{dccifd_available} = 1); - } -- my $dcchome = $conf->{dcc_home} || ''; -- my $dccproc = $conf->{dcc_path} || ''; - -- if ($dccproc eq '' && ($dcchome ne '' && -x "$dcchome/bin/dccproc")) { -- $dccproc = "$dcchome/bin/dccproc"; -+ # the first time here, compute a default local socket based on DCC home -+ # from self->find_dcc_home() called elsewhere -+ my $sockpath = $conf->{dcc_dccifd_socket}; -+ if (!$sockpath) { -+ if ($conf->{dcc_dccifd_path_raw}) { -+ $sockpath = $conf->{dcc_dccifd_path_raw}; -+ } else { -+ $sockpath = "$conf->{dcc_home}/dccifd"; - } -- if ($dccproc eq '') { -- $dccproc = Mail::SpamAssassin::Util::find_executable_in_env_path('dccproc'); -+ $conf->{dcc_dccifd_socket} = $sockpath; - } - -- unless (defined $dccproc && $dccproc ne '' && -x $dccproc) { -- dbg("dcc: dccproc is not available: no dccproc executable found"); -- return 0; -- } -+ # check the socket every time because it can appear and disappear -+ return ($self->{dccifd_available} = 1) if (-S $sockpath && -w _ && -r _); - -- # remember any found dccproc -+ dbg("dcc: dccifd is not available; no r/w socket at %s", $sockpath); -+ return ($self->{dccifd_available} = 0); -+} -+ -+sub is_dccproc_available { -+ my ($self) = @_; -+ my $conf = $self->{main}->{conf}; -+ -+ # dccproc remains (un)available so check only once -+ return $self->{dccproc_available} if defined $self->{dccproc_available}; -+ -+ my $dccproc = $conf->{dcc_path}; -+ if (!defined $dccproc || $dccproc eq '') { -+ $dccproc = $self->dcc_pgm_path('dccproc'); - $conf->{dcc_path} = $dccproc; -+ if (!$dccproc || ! -x $dccproc) { -+ dbg("dcc: dccproc is not available: no dccproc executable found"); -+ return ($self->{dccproc_available} = 0); -+ } -+ } - -- dbg("dcc: dccproc is available: %s", $conf->{dcc_path}); -- $self->{dccproc_available} = 1; -- return 1; -+ dbg("dcc: %s is available", $conf->{dcc_path}); -+ return ($self->{dccproc_available} = 1); - } - - sub dccifd_connect { -- my($self) = @_; -+ my($self, $tag) = @_; - my $conf = $self->{main}->{conf}; - my $sockpath = $conf->{dcc_dccifd_socket}; -- my $host = $conf->{dcc_dccifd_host}; -- my $port = $conf->{dcc_dccifd_port}; - my $sock; -+ - if (defined $sockpath) { -- dbg("dcc: connecting to a local socket %s", $sockpath); -- $sock = IO::Socket::UNIX->new( -- Type => SOCK_STREAM, Peer => $sockpath); -- $sock or die "dcc: failed to connect to a socket $sockpath: $!\n"; -- } elsif (defined $host) { -- my $specified_path = $conf->{dcc_dccifd_path_raw}; -- if ($host eq '') { -- die "dcc: empty host specification: $specified_path\n"; -- } -- if (!defined $port || $port !~ /^\d+\z/ || $port < 1 || $port > 65535) { -- die "dcc: bad TCP port number: $specified_path\n"; -- } -- my $is_inet4 = $host =~ /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\z/; -- if ($is_inet4) { # inet4 socket (IPv4 address) -- dbg("dcc: connecting to inet4 socket [%s]:%s", $host,$port); -- $sock = IO::Socket::INET->new( -- Proto => 'tcp', PeerAddr => $host, PeerPort => $port); -- } else { -- if (!defined $have_inet6) { -- $have_inet6 = eval { require IO::Socket::INET6 }; -- $have_inet6 = 0 if !defined $have_inet6; -+ $sock = IO::Socket::UNIX->new(Type => SOCK_STREAM, Peer => $sockpath); -+ if ($sock) { -+ dbg("$tag connected to local socket %s", $sockpath); -+ return $sock; - } -- if (!$have_inet6) { # fallback to an inet4 socket (IPv4) -- dbg("dcc: connecting(2) to inet4 socket [%s]:%s", $host,$port); -- $sock = IO::Socket::INET->new( -- Proto => 'tcp', PeerAddr => $host, PeerPort => $port); -- } else { # inet6 socket (IPv6) or a host name -- dbg("dcc: connecting to inet6 socket [%s]:%s", $host,$port); -+ $self->{dccifd_available} = 0; -+ info("$tag failed to connect to local socket $sockpath"); -+ return $sock -+ } -+ -+ # must be TCP/IP -+ my $host = $conf->{dcc_dccifd_host}; -+ my $port = $conf->{dcc_dccifd_port}; -+ -+ if ($conf->{dcc_dccifd_IPv6}) { -+ # try IPv6 if we can with a host name or non-IPv4 address -+ dbg("$tag connecting to inet6 socket [%s]:%s", $host,$port); - $sock = IO::Socket::INET6->new( - Proto => 'tcp', PeerAddr => $host, PeerPort => $port); -+ # fall back to IPv4 if that failed - } -+ if (!$sock) { -+ dbg("$tag connecting to inet4 socket [%s]:%s", $host, $port); -+ $sock = IO::Socket::INET->new( -+ Proto => 'tcp', PeerAddr => $host, PeerPort => $port); - } -- $sock or die "dcc: failed to connect to [$host]:$port : $!\n"; -- } else { -- die "dcc: dccifd socket not provided: $conf->{dcc_dccifd_path_raw}\n"; -- } -+ -+ info("failed to connect to [$host]:$port : $!") if !$sock; - return $sock; - } - -+# check for dccifd every time in case enough uses of dccproc starts dccifd - sub get_dcc_interface { - my ($self) = @_; -+ my $conf = $self->{main}->{conf}; - -- if ($self->is_dccifd_available()) { -- $self->{dcc_interface} = "dccifd"; -- $self->{dcc_disabled} = 0; -- } -- elsif ($self->is_dccproc_available()) { -- $self->{dcc_interface} = "dccproc"; -- $self->{dcc_disabled} = 0; -+ if (!$conf->{use_dcc}) { -+ $self->{dcc_disabled} = 1; -+ return; - } -- else { -- dbg("dcc: dccifd and dccproc are not available, disabling DCC"); -- $self->{dcc_interface} = "none"; -+ -+ $self->find_dcc_home(); -+ if (!$self->is_dccifd_available() && !$self->is_dccproc_available()) { -+ dbg("dcc: dccifd and dccproc are not available"); - $self->{dcc_disabled} = 1; - } -+ -+ $self->{dcc_disabled} = 0; - } - - sub dcc_query { -- my ($self, $permsgstatus, $full) = @_; -+ my ($self, $permsgstatus, $fulltext) = @_; - - $permsgstatus->{dcc_checked} = 1; - -+ if (!$self->{main}->{conf}->{use_dcc}) { -+ dbg("dcc: DCC is not available: use_dcc is 0"); -+ return; -+ } -+ - # initialize valid tags - $permsgstatus->{tag_data}->{DCCB} = ""; - $permsgstatus->{tag_data}->{DCCR} = ""; - $permsgstatus->{tag_data}->{DCCREP} = ""; - -- # short-circuit if there's already a X-DCC header with value of -- # "bulk" from an upstream DCC check -- if ($permsgstatus->get('ALL') =~ -- /^(X-DCC-([^:]{1,80})?-?Metrics:.*bulk.*)$/m) { -- $permsgstatus->{dcc_response} = $1; -+ if ($$fulltext eq '') { -+ dbg("dcc: empty message; skipping dcc check"); - return; - } - -- my $timer = $self->{main}->time_method("check_dcc"); -+ if ($permsgstatus->get('ALL') =~ /^(X-DCC-.*-Metrics:.*)$/m) { -+ $permsgstatus->{dcc_raw_x_dcc} = $1; -+ # short-circuit if there is already a X-DCC header with value of -+ # "bulk" from an upstream DCC check -+ # require "bulk" because then at least one body checksum will be "many" -+ # and so we know the X-DCC header is not forged by spammers -+ return if $permsgstatus->{dcc_raw_x_dcc} =~ / bulk /; -+ } - -- $self->find_dcc_home(); -+ my $timer = $self->{main}->time_method("check_dcc"); - - $self->get_dcc_interface(); -- my $result; -- if ($self->{dcc_disabled}) { -- $result = 0; -- } elsif ($$full eq '') { -- dbg("dcc: empty message, skipping dcc check"); -- $result = 0; -- } elsif ($self->{dccifd_available}) { -- my $client = $permsgstatus->{relays_external}->[0]->{ip}; -- my $clientname = $permsgstatus->{relays_external}->[0]->{rdns}; -- my $helo = $permsgstatus->{relays_external}->[0]->{helo} || ""; -- if ($client) { -- $client = $client . "\r" . $clientname if $clientname; -- } else { -- $client = "0.0.0.0"; -- } -- $self->dccifd_lookup($permsgstatus, $full, $client, $clientname, $helo); -- } else { -- my $client = $permsgstatus->{relays_external}->[0]->{ip}; -- $self->dccproc_lookup($permsgstatus, $full, $client); -- } -+ return if $self->{dcc_disabled}; -+ -+ my $envelope = $permsgstatus->{relays_external}->[0]; -+ ($permsgstatus->{dcc_raw_x_dcc}, -+ $permsgstatus->{dcc_cksums}) = $self->ask_dcc("dcc:", $permsgstatus, -+ $fulltext, $envelope); - } - - sub check_dcc { -@@ -609,28 +681,27 @@ - - $self->dcc_query($permsgstatus, $full) if !$permsgstatus->{dcc_checked}; - -- my $response = $permsgstatus->{dcc_response}; -- return 0 if !defined $response || $response eq ''; -+ my $x_dcc = $permsgstatus->{dcc_raw_x_dcc}; -+ return 0 if !defined $x_dcc || $x_dcc eq ''; - -- local($1,$2); -- if ($response =~ /^X-DCC-(.*)-Metrics: (.*)$/) { -- $permsgstatus->{tag_data}->{DCCB} = $1; -- $permsgstatus->{tag_data}->{DCCR} = $2; -+ if ($x_dcc =~ /^X-DCC-(.*)-Metrics: (.*)$/) { -+ $permsgstatus->set_tag('DCCB', $1); -+ $permsgstatus->set_tag('DCCR', $2); - } -- $response =~ s/many/999999/ig; -- $response =~ s/ok\d?/0/ig; -+ $x_dcc =~ s/many/999999/ig; -+ $x_dcc =~ s/ok\d?/0/ig; - - my %count = (body => 0, fuz1 => 0, fuz2 => 0, rep => 0); -- if ($response =~ /\bBody=(\d+)/) { -+ if ($x_dcc =~ /\bBody=(\d+)/) { - $count{body} = $1+0; - } -- if ($response =~ /\bFuz1=(\d+)/) { -+ if ($x_dcc =~ /\bFuz1=(\d+)/) { - $count{fuz1} = $1+0; - } -- if ($response =~ /\bFuz2=(\d+)/) { -+ if ($x_dcc =~ /\bFuz2=(\d+)/) { - $count{fuz2} = $1+0; - } -- if ($response =~ /\brep=(\d+)/) { -+ if ($x_dcc =~ /\brep=(\d+)/) { - $count{rep} = $1+0; - } - if ($count{body} >= $conf->{dcc_body_max} || -@@ -651,185 +722,185 @@ - } - - sub check_dcc_reputation_range { -- my ($self, $permsgstatus, $full, $min, $max) = @_; -- $self->dcc_query($permsgstatus, $full) if !$permsgstatus->{dcc_checked}; -+ my ($self, $permsgstatus, $fulltext, $min, $max) = @_; -+ -+ # this is called several times per message, so parse the X-DCC header once -+ my $dcc_rep = $permsgstatus->{dcc_rep}; -+ if (!defined $dcc_rep) { -+ $self->dcc_query($permsgstatus, $fulltext) if !$permsgstatus->{dcc_checked}; -+ my $x_dcc = $permsgstatus->{dcc_raw_x_dcc}; -+ if (defined $x_dcc && $x_dcc =~ /\brep=(\d+)/) { -+ $dcc_rep = $1+0; -+ $permsgstatus->set_tag('DCCREP', $dcc_rep); -+ } else { -+ $dcc_rep = -1; -+ } -+ $permsgstatus->{dcc_rep} = $dcc_rep; -+ } - -- my $response = $permsgstatus->{dcc_response}; -- return 0 if !defined $response || $response eq ''; -+ # no X-DCC header or no reputation in the X-DCC header, perhaps for lack -+ # of data in the DCC Reputation server -+ return 0 if $dcc_rep < 0; - -+ # cover the entire range of reputations if not told otherwise - $min = 0 if !defined $min; -- $max = 999 if !defined $max; -+ $max = 100 if !defined $max; - -- local $1; -- my $dcc_rep; -- $dcc_rep = $1+0 if defined $response && $response =~ /\brep=(\d+)/; -- if (defined $dcc_rep) { -- $dcc_rep = int($dcc_rep); # just in case, rule ranges are integer percents - my $result = $dcc_rep >= $min && $dcc_rep <= $max ? 1 : 0; - dbg("dcc: dcc_rep %s, min %s, max %s => result=%s", - $dcc_rep, $min, $max, $result?'YES':'no'); -- $permsgstatus->{tag_data}->{DCCREP} = $dcc_rep; -- return $dcc_rep >= $min && $dcc_rep <= $max ? 1 : 0; -+ return $result; -+} -+ -+# get the X-DCC header line and save the checksums from dccifd or dccproc -+sub parse_dcc_response { -+ my ($self, $resp) = @_; -+ my ($raw_x_dcc, $cksums); -+ -+ # The first line is the header we want. It uses SMTP folded whitespace -+ # if it is long. The folded whitespace is always a single \t. -+ chomp($raw_x_dcc = shift @$resp); -+ my $v; -+ while (($v = shift @$resp) && $v =~ s/^\t(.+)\s*\n/ $1/) { -+ $raw_x_dcc .= $v; -+ } -+ -+ # skip the "reported:" line between the X-DCC header and any checksums -+ # remove ':' to avoid a bug in versions 1.3.115 - 1.3.122 in dccsight -+ # with the length of "Message-ID:" -+ $cksums = ''; -+ while (($v = shift @$resp) && $v =~ s/^([^:]*):/$1/) { -+ $cksums .= $v; - } -- return 0; -+ -+ return ($raw_x_dcc, $cksums); - } - --sub dccifd_lookup { -- my ($self, $permsgstatus, $fulltext, $client, $clientname, $helo) = @_; -+sub ask_dcc { -+ my ($self, $tag, $permsgstatus, $fulltext, $envelope) = @_; - my $conf = $self->{main}->{conf}; -- my $response; -- my $left; -- my $right; -- my $timeout = $conf->{dcc_timeout}; -- my $opts = $conf->{dccifd_options}; -- my @opts = !defined $opts ? () : split(' ',$opts); -+ my ($pgm, $err, $sock, $pid, @resp); -+ my ($client, $clientname, $helo, $opts); - - $permsgstatus->enter_helper_run_mode(); - -+ my $timeout = $conf->{dcc_timeout}; - my $timer = Mail::SpamAssassin::Timeout->new( - { secs => $timeout, deadline => $permsgstatus->{master_deadline} }); -- my $err = $timer->run_and_catch(sub { - -+ $err = $timer->run_and_catch(sub { - local $SIG{PIPE} = sub { die "__brokenpipe__ignore__\n" }; - -- my $sock = $self->dccifd_connect(); -- $sock or die "dcc: failed to connect to a dccifd socket"; -- -- # send the options and other parameters to the daemon -- $sock->print("header " . join(" ",@opts) . "\n") -- or die "dcc: failed write"; # options -- $sock->print($client . "\n") or die "dcc: failed write"; # client -- $sock->print($helo . "\n") or die "dcc: failed write"; # HELO value -- $sock->print("\n") or die "dcc: failed write"; # sender -- $sock->print("unknown\r\n") or die "dcc: failed write"; # recipients -- $sock->print("\n") or die "dcc: failed write"; # recipients -- -- $sock->print($$fulltext) or die "dcc: failed write"; -- -- $sock->shutdown(1) or die "dcc: failed socket shutdown: $!"; -- -- $sock->getline() or die "dcc: failed read status"; -- $sock->getline() or die "dcc: failed read multistatus"; -+ # prefer dccifd to dccproc -+ if ($self->{dccifd_available}) { -+ $pgm = 'dccifd'; - -- my @null = $sock->getlines(); -- if (!@null) { -- # no facility prefix on this -- die "dcc: failed to read header\n"; -- } -+ $sock = $self->dccifd_connect($tag); -+ if (!$sock) { -+ $self->{dccifd_available} = 0; -+ die("dccproc not available") if (!$self->is_dccproc_available()); - -- # the first line will be the header we want to look at -- chomp($response = shift @null); -- # but newer versions of DCC fold the header if it's too long... -- while (my $v = shift @null) { -- last unless ($v =~ s/^\s+/ /); # if this line wasn't folded, stop -- chomp $v; -- $response .= $v; -+ # fall back on dccproc if the socket is an orphan from -+ # a killed dccifd daemon or some other obvious (no timeout) problem -+ dbg("$tag fall back on dccproc"); - } -- -- dbg("dcc: dccifd got response: %s", $response); -- -- }); -- -- $permsgstatus->leave_helper_run_mode(); -- -- if ($timer->timed_out()) { -- dbg("dcc: dccifd check timed out after $timeout secs."); -- return; - } - -- if ($err) { -- chomp $err; -- warn("dcc: dccifd -> check skipped: $err\n"); -- return; -- } -+ if ($self->{dccifd_available}) { - -- if (!defined $response || $response !~ /^X-DCC/) { -- dbg("dcc: dccifd check failed - no X-DCC returned: %s", $response); -- return; -+ # send the options and other parameters to the daemon -+ $client = $envelope->{ip}; -+ $clientname = $envelope->{rdns}; -+ if (!defined $client) { -+ $client = ''; -+ } else { -+ $client .= ("\r" . $clientname) if defined $clientname; - } -+ $helo = $envelope->{helo} || ''; -+ if ($tag ne "dcc:") { -+ $opts = $self->{dccifd_report_options} -+ } else { -+ $opts = $self->{dccifd_lookup_options}; -+ # only query if there is an X-DCC header -+ $opts =~ s/grey-off/& query/ if defined $permsgstatus->{dcc_raw_x_dcc}; -+ } -+ $sock->print($opts) or die "failed write options\n"; -+ $sock->print($client . "\n") or die "failed write SMTP client\n"; -+ $sock->print($helo . "\n") or die "failed write HELO value\n"; -+ $sock->print("\n") or die "failed write sender\n"; -+ $sock->print("unknown\n\n") or die "failed write 1 recipient\n"; -+ $sock->print($$fulltext) or die "failed write mail message\n"; -+ $sock->shutdown(1) or die "failed socket shutdown: $!"; - -- $response =~ s/[ \t]\z//; # strip trailing whitespace -- $permsgstatus->{dcc_response} = $response; --} -+ $sock->getline() or die "failed read status\n"; -+ $sock->getline() or die "failed read multistatus\n"; - --sub dccproc_lookup { -- my ($self, $permsgstatus, $fulltext, $client) = @_; -- my $conf = $self->{main}->{conf}; -- my $response; -- my %count = (body => 0, fuz1 => 0, fuz2 => 0, rep => 0); -- my $timeout = $conf->{dcc_timeout}; -+ @resp = $sock->getlines(); -+ die "failed to read dccifd response\n" if !@resp; - -- $permsgstatus->enter_helper_run_mode(); -- -- # use a temp file here -- open2() is unreliable, buffering-wise, under spamd -+ } else { -+ $pgm = 'dccproc'; -+ # use a temp file -- open2() is unreliable, buffering-wise, under spamd -+ # first ensure that we do not hit a stray file from some other filter. -+ $permsgstatus->delete_fulltext_tmpfile(); - my $tmpf = $permsgstatus->create_fulltext_tmpfile($fulltext); -- my $pid; -- -- my $timer = Mail::SpamAssassin::Timeout->new( -- { secs => $timeout, deadline => $permsgstatus->{master_deadline} }); -- my $err = $timer->run_and_catch(sub { -- -- local $SIG{PIPE} = sub { die "__brokenpipe__ignore__\n" }; - -- # note: not really tainted, this came from system configuration file -- my $path = untaint_file_path($conf->{dcc_path}); -- -- my $opts = $conf->{dcc_options}; -+ my $path = $conf->{dcc_path}; -+ $opts = $conf->{dcc_options}; - my @opts = !defined $opts ? () : split(' ',$opts); - untaint_var(\@opts); -+ unshift(@opts, '-w', 'whiteclnt'); -+ $client = $envelope->{ip}; -+ if ($client) { -+ unshift(@opts, '-a', untaint_var($client)); -+ } else { -+ # get external relay IP address from Received: header if not available -+ unshift(@opts, '-R'); -+ } -+ if ($tag eq "dcc:") { -+ # query instead of report if there is an X-DCC header from upstream -+ unshift(@opts, '-Q') if defined $permsgstatus->{dcc_raw_x_dcc}; -+ } else { -+ # learn or report spam -+ unshift(@opts, '-t', 'many'); -+ } - -- unshift(@opts, "-a", -- untaint_var($client)) if defined $client && $client ne ''; -- -- dbg("dcc: opening pipe: %s", -- join(' ', $path, "-H", "-x", "0", @opts, "< $tmpf")); -+ dbg("$tag opening pipe to %s", -+ join(' ', $path, "-C", "-x", "0", @opts, "<$tmpf")); - - $pid = Mail::SpamAssassin::Util::helper_app_pipe_open(*DCC, -- $tmpf, 1, $path, "-H", "-x", "0", @opts); -+ $tmpf, 1, $path, "-C", "-x", "0", @opts); - $pid or die "$!\n"; - - # read+split avoids a Perl I/O bug (Bug 5985) - my($inbuf,$nread,$resp); $resp = ''; - while ( $nread=read(DCC,$inbuf,8192) ) { $resp .= $inbuf } - defined $nread or die "error reading from pipe: $!"; -- my @null = split(/^/m, $resp, -1); undef $resp; -+ @resp = split(/^/m, $resp, -1); undef $resp; - - my $errno = 0; close DCC or $errno = $!; - proc_status_ok($?,$errno) -- or info("dcc: [%s] finished: %s", $pid, exit_status_str($?,$errno)); -- -- if (!@null) { -- # no facility prefix on this -- die "failed to read header\n"; -- } -+ or info("$tag [%s] finished: %s", $pid, exit_status_str($?,$errno)); - -- # the first line will be the header we want to look at -- chomp($response = shift @null); -- # but newer versions of DCC fold the header if it's too long... -- while (my $v = shift @null) { -- last unless ($v =~ s/^\s+/ /); # if this line wasn't folded, stop -- chomp $v; -- $response .= $v; -+ die "failed to read X-DCC header from dccproc\n" if !@resp; - } -- -- unless (defined($response)) { -- # no facility prefix on this -- die "no response\n"; # yes, this is possible -- } -- -- dbg("dcc: got response: %s", $response); -- - }); - -+ if ($pgm eq 'dccproc') { - if (defined(fileno(*DCC))) { # still open - if ($pid) { -- if (kill('TERM',$pid)) { dbg("dcc: killed stale helper [$pid]") } -- else { dbg("dcc: killing helper application [$pid] failed: $!") } -+ if (kill('TERM',$pid)) { -+ dbg("$tag killed stale dccproc process [$pid]") -+ } else { -+ dbg("$tag killing dccproc process [$pid] failed: $!") -+ } - } - my $errno = 0; close(DCC) or $errno = $!; -- proc_status_ok($?,$errno) -- or info("dcc: [%s] terminated: %s", $pid, exit_status_str($?,$errno)); -+ proc_status_ok($?,$errno) or info("$tag [%s] dccproc terminated: %s", -+ $pid, exit_status_str($?,$errno)); -+ } - } -+ - $permsgstatus->leave_helper_run_mode(); - - if ($timer->timed_out()) { -@@ -833,204 +904,182 @@ - $permsgstatus->leave_helper_run_mode(); - - if ($timer->timed_out()) { -- dbg("dcc: check timed out after $timeout seconds"); -- return; -+ dbg("$tag $pgm timed out after $timeout seconds"); -+ return (undef, undef); - } - - if ($err) { - chomp $err; -- if ($err eq "__brokenpipe__ignore__") { -- dbg("dcc: check failed: broken pipe"); -- } elsif ($err eq "no response") { -- dbg("dcc: check failed: no response"); -- } else { -- warn("dcc: check failed: $err\n"); -- } -- return; -+ info("$tag $pgm failed: $err\n"); -+ return (undef, undef); - } - -- if (!defined($response) || $response !~ /^X-DCC/) { -- $response ||= ''; -- dbg("dcc: check failed: no X-DCC returned (did you create a map file?): %s", $response); -- return; -+ my ($raw_x_dcc, $cksums) = $self->parse_dcc_response(\@resp); -+ if (!defined $raw_x_dcc || $raw_x_dcc !~ /^X-DCC/) { -+ info("$tag instead of X-DCC header, $pgm returned '%s'", $raw_x_dcc); -+ return (undef, undef); - } -- -- $permsgstatus->{dcc_response} = $response; -+ dbg("$tag %s responded with '%s'", $pgm, $raw_x_dcc); -+ return ($raw_x_dcc, $cksums); - } - --# only supports dccproc right now --sub plugin_report { -+# tell DCC server that the message is spam according to SpamAssassin -+sub check_post_learn { - my ($self, $options) = @_; - -- return if $options->{report}->{options}->{dont_report_to_dcc}; -- $self->get_dcc_interface(); -- return if $self->{dcc_disabled}; -- -- # get the metadata from the message so we can pass the external relay information -- $options->{msg}->extract_message_metadata($options->{report}->{main}); -- my $client = $options->{msg}->{metadata}->{relays_external}->[0]->{ip}; -- if ($self->{dccifd_available}) { -- my $clientname = $options->{msg}->{metadata}->{relays_external}->[0]->{rdns}; -- my $helo = $options->{msg}->{metadata}->{relays_external}->[0]->{helo} || ""; -- if ($client) { -- if ($clientname) { -- $client = $client . "\r" . $clientname; -- } -- } else { -- $client = "0.0.0.0"; -- } -- if ($self->dccifd_report($options, $options->{text}, $client, $helo)) { -- $options->{report}->{report_available} = 1; -- info("reporter: spam reported to DCC"); -- $options->{report}->{report_return} = 1; -+ # learn only if allowed -+ return if $self->{learn_disabled}; -+ my $conf = $self->{main}->{conf}; -+ if (!$conf->{use_dcc}) { -+ $self->{learn_disabled} = 1; -+ return; - } -- else { -- info("reporter: could not report spam to DCC via dccifd"); -+ my $learn_score = $conf->{dcc_learn_score}; -+ if (!defined $learn_score || $learn_score eq '') { -+ dbg("dcc: DCC learning not enabled by dcc_learn_score"); -+ $self->{learn_disabled} = 1; -+ return; - } -- } else { -- # use temporary file: open2() is unreliable due to buffering under spamd -- my $tmpf = $options->{report}->create_fulltext_tmpfile($options->{text}); - -- if ($self->dcc_report($options, $tmpf, $client)) { -- $options->{report}->{report_available} = 1; -- info("reporter: spam reported to DCC"); -- $options->{report}->{report_return} = 1; -+ # and if SpamAssassin concluded that the message is spam -+ # worse than our threshold -+ my $permsgstatus = $options->{permsgstatus}; -+ if ($permsgstatus->is_spam()) { -+ my $score = $permsgstatus->get_score(); -+ my $required_score = $permsgstatus->get_required_score(); -+ if ($score < $required_score + $learn_score) { -+ dbg("dcc: score=%d required_score=%d dcc_learn_score=%d", -+ $score, $required_score, $learn_score); -+ return; - } -- else { -- info("reporter: could not report spam to DCC via dccproc"); - } -- $options->{report}->delete_fulltext_tmpfile(); -+ -+ # and if we checked the message -+ return if (!defined $permsgstatus->{dcc_raw_x_dcc}); -+ -+ # and if the DCC server thinks it was not spam -+ if ($permsgstatus->{dcc_raw_x_dcc} !~ /\b(Body|Fuz1|Fuz2)=\d/) { -+ dbg("dcc: already known as spam; no need to learn"); -+ return; - } -+ -+ # dccsight is faster than dccifd or dccproc if we have checksums, -+ # which we do not have with dccifd before 1.3.123 -+ my $old_cksums = $permsgstatus->{dcc_cksums}; -+ return if ($old_cksums && $self->dccsight_learn($permsgstatus, $old_cksums)); -+ -+ # Fall back on dccifd or dccproc without saved checksums or dccsight. -+ # get_dcc_interface() was called when the message was checked -+ -+ # is getting the full text this way kosher? Is get_pristine() public? -+ my $fulltext = $permsgstatus->{msg}->get_pristine(); -+ my $envelope = $permsgstatus->{relays_external}->[0]; -+ my ($raw_x_dcc, $cksums) = $self->ask_dcc("dcc: learn:", $permsgstatus, -+ \$fulltext, $envelope); -+ dbg("dcc: learned as spam") if defined $raw_x_dcc; - } - --sub dccifd_report { -- my ($self, $options, $fulltext, $client, $helo) = @_; -- my $conf = $self->{main}->{conf}; -- my $timeout = $conf->{dcc_timeout}; -- # instead of header use whatever the report option is -- my $opts = $conf->{dccifd_options}; -- my @opts = !defined $opts ? () : split(' ',$opts); -+sub dccsight_learn { -+ my ($self, $permsgstatus, $old_cksums) = @_; -+ my ($raw_x_dcc, $new_cksums); -+ -+ return 0 if !$old_cksums; -+ -+ my $dccsight = $self->dcc_pgm_path('dccsight'); -+ if (!$dccsight) { -+ info("dcc: cannot find dccsight") if $dccsight eq ''; -+ return 0; -+ } - -- $options->{report}->enter_helper_run_mode(); -- my $timer = Mail::SpamAssassin::Timeout->new({ secs => $timeout }); -+ $permsgstatus->enter_helper_run_mode(); - -- my $err = $timer->run_and_catch(sub { -+ # use a temp file here -- open2() is unreliable, buffering-wise, under spamd -+ # ensure that we do not hit a stray file from some other filter. -+ $permsgstatus->delete_fulltext_tmpfile(); -+ my $tmpf = $permsgstatus->create_fulltext_tmpfile(\$old_cksums); -+ my $pid; - -+ my $timeout = $self->{main}->{conf}->{dcc_timeout}; -+ my $timer = Mail::SpamAssassin::Timeout->new( -+ { secs => $timeout, deadline => $permsgstatus->{master_deadline} }); -+ my $err = $timer->run_and_catch(sub { - local $SIG{PIPE} = sub { die "__brokenpipe__ignore__\n" }; - -- my $sock = $self->dccifd_connect(); -- $sock or die "report: failed to connect to a dccifd socket"; -+ dbg("dcc: opening pipe to %s", -+ join(' ', $dccsight, "-t", "many", "<$tmpf")); - -- # send the options and other parameters to the daemon -- $sock->print("spam " . join(" ",@opts) . "\n") -- or die "report: dccifd failed write"; # options -- $sock->print($client . "\n") -- or die "report: dccifd failed write"; # client -- $sock->print($helo . "\n") -- or die "report: dccifd failed write"; # HELO value -- $sock->print("\n") -- or die "report: dccifd failed write"; # sender -- $sock->print("unknown\r\n") -- or die "report: dccifd failed write"; # recipients -- $sock->print("\n") -- or die "report: dccifd failed write"; # recipients -+ $pid = Mail::SpamAssassin::Util::helper_app_pipe_open(*DCC, -+ $tmpf, 1, $dccsight, "-t", "many"); -+ $pid or die "$!\n"; - -- $sock->print($$fulltext) or die "report: dccifd failed write"; -+ # read+split avoids a Perl I/O bug (Bug 5985) -+ my($inbuf,$nread,$resp); $resp = ''; -+ while ( $nread=read(DCC,$inbuf,8192) ) { $resp .= $inbuf } -+ defined $nread or die "error reading from pipe: $!"; -+ my @resp = split(/^/m, $resp, -1); undef $resp; - -- $sock->shutdown(1) or die "report: dccifd failed socket shutdown: $!"; -+ my $errno = 0; close DCC or $errno = $!; -+ proc_status_ok($?,$errno) -+ or info("dcc: [%s] finished: %s", $pid, exit_status_str($?,$errno)); - -- $sock->getline() or die "report: dccifd failed read status"; -- $sock->getline() or die "report: dccifd failed read multistatus"; -+ die "dcc: failed to read learning response\n" if !@resp; - -- my @ignored = $sock->getlines(); -+ ($raw_x_dcc, $new_cksums) = $self->parse_dcc_response(\@resp); - }); - -- $options->{report}->leave_helper_run_mode(); -+ if (defined(fileno(*DCC))) { # still open -+ if ($pid) { -+ if (kill('TERM',$pid)) { -+ dbg("dcc: killed stale dccsight process [$pid]") -+ } else { -+ dbg("dcc: killing stale dccsight process [$pid] failed: $!") } -+ } -+ my $errno = 0; close(DCC) or $errno = $!; -+ proc_status_ok($?,$errno) or info("dcc: dccsight [%s] terminated: %s", -+ $pid, exit_status_str($?,$errno)); -+ } -+ $permsgstatus->delete_fulltext_tmpfile(); -+ $permsgstatus->leave_helper_run_mode(); - - if ($timer->timed_out()) { -- dbg("reporter: DCC report via dccifd timed out after $timeout secs."); -+ dbg("dcc: dccsight timed out after $timeout seconds"); - return 0; - } - - if ($err) { - chomp $err; -- if ($err eq "__brokenpipe__ignore__") { -- dbg("reporter: DCC report via dccifd failed: broken pipe"); -- } else { -- warn("reporter: DCC report via dccifd failed: $err\n"); -- } -+ info("dcc: dccsight failed: $err\n"); - return 0; - } - -+ if ($raw_x_dcc) { -+ dbg("dcc: learned response: %s", $raw_x_dcc); - return 1; --} -- --sub dcc_report { -- my ($self, $options, $tmpf, $client) = @_; -- my $conf = $self->{main}->{conf}; -- my $timeout = $options->{report}->{conf}->{dcc_timeout}; -- -- # note: not really tainted, this came from system configuration file -- my $path = untaint_file_path($options->{report}->{conf}->{dcc_path}); -- my $opts = $conf->{dcc_options}; -- my @opts = !defined $opts ? () : split(' ',$opts); -- untaint_var(\@opts); -- -- # get the metadata from the message so we can pass the external relay info -- -- unshift(@opts, "-a", -- untaint_var($client)) if defined $client && $client ne ''; -- -- my $timer = Mail::SpamAssassin::Timeout->new({ secs => $timeout }); -- -- $options->{report}->enter_helper_run_mode(); -- my $err = $timer->run_and_catch(sub { -- -- local $SIG{PIPE} = sub { die "__brokenpipe__ignore__\n" }; -- -- dbg("report: opening pipe: %s", -- join(' ', $path, "-H", "-t", "many", "-x", "0", @opts, "< $tmpf")); -- -- my $pid = Mail::SpamAssassin::Util::helper_app_pipe_open(*DCC, -- $tmpf, 1, $path, "-H", "-t", "many", "-x", "0", @opts); -- $pid or die "$!\n"; -+ } - -- my($inbuf,$nread,$nread_all); $nread_all = 0; -- # response is ignored, just check its existence -- while ( $nread=read(DCC,$inbuf,8192) ) { $nread_all += $nread } -- defined $nread or die "error reading from pipe: $!"; -+ return 0; -+} - -- dbg("dcc: empty response") if $nread_all < 1; -+sub plugin_report { -+ my ($self, $options) = @_; - -- my $errno = 0; close DCC or $errno = $!; -- # closing a pipe also waits for the process executing on the pipe to -- # complete, no need to explicitly call waitpid -- # my $child_stat = waitpid($pid,0) > 0 ? $? : undef; -- proc_status_ok($?,$errno) -- or die "dcc: reporter error: ".exit_status_str($?,$errno)."\n"; -- }); -- $options->{report}->leave_helper_run_mode(); -+ return if $options->{report}->{options}->{dont_report_to_dcc}; -+ $self->get_dcc_interface(); -+ return if $self->{dcc_disabled}; - -- if ($timer->timed_out()) { -- dbg("reporter: DCC report via dccproc timed out after $timeout seconds"); -- return 0; -- } -+ # get the metadata from the message so we can report the external relay -+ $options->{msg}->extract_message_metadata($options->{report}->{main}); -+ my $envelope = $options->{msg}->{metadata}->{relays_external}->[0]; -+ my ($raw_x_dcc, $cksums) = $self->ask_dcc("reporter:", $options->{report}, -+ $options->{text}, $envelope); - -- if ($err) { -- chomp $err; -- if ($err eq "__brokenpipe__ignore__") { -- dbg("reporter: DCC report via dccproc failed: broken pipe"); -+ if (defined $raw_x_dcc) { -+ $options->{report}->{report_available} = 1; -+ info("reporter: spam reported to DCC"); -+ $options->{report}->{report_return} = 1; - } else { -- warn("reporter: DCC report via dccproc failed: $err\n"); -+ info("reporter: could not report spam to DCC"); - } -- return 0; -- } -- -- return 1; - } - - 1; -- --=back -- --=cut diff -ruN -ruN /usr/ports/mail/p5-Mail-SpamAssassin/files/patch-bug6745 ./files/patch-bug6745 --- /usr/ports/mail/p5-Mail-SpamAssassin/files/patch-bug6745 2013-09-03 04:00:24.000000000 +0900 +++ ./files/patch-bug6745 1970-01-01 09:00:00.000000000 +0900 @@ -1,106 +0,0 @@ ---- lib/Mail/SpamAssassin/Logger/Syslog.pm 2012/05/14 16:28:23 1338277 -+++ lib/Mail/SpamAssassin/Logger/Syslog.pm 2012/05/14 16:31:09 1338278 -@@ -167,17 +167,21 @@ - } - $msg = $timestamp . ' ' . $msg if $timestamp ne ''; - -- # important: do not call syslog() from the SIGCHLD handler -- # child_handler(). otherwise we can get into a loop if syslog() -- # forks a process -- as it does in syslog-ng apparently! (bug 3625) -- $Mail::SpamAssassin::Logger::LOG_SA{INHIBIT_LOGGING_IN_SIGCHLD_HANDLER} = 1; -+# no longer needed since a patch to bug 6745: -+# # important: do not call syslog() from the SIGCHLD handler -+# # child_handler(). otherwise we can get into a loop if syslog() -+# # forks a process -- as it does in syslog-ng apparently! (bug 3625) -+# $Mail::SpamAssassin::Logger::LOG_SA{INHIBIT_LOGGING_IN_SIGCHLD_HANDLER} = 1; -+ - my $eval_stat; - eval { - syslog($level, "%s", $msg); 1; - } or do { - $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat; - }; -- $Mail::SpamAssassin::Logger::LOG_SA{INHIBIT_LOGGING_IN_SIGCHLD_HANDLER} = 0; -+ -+# no longer needed since a patch to bug 6745: -+# $Mail::SpamAssassin::Logger::LOG_SA{INHIBIT_LOGGING_IN_SIGCHLD_HANDLER} = 0; - - if (defined $eval_stat) { - if ($self->check_syslog_sigpipe($msg)) { ---- spamd/spamd.raw 2012/05/14 16:28:23 1338277 -+++ spamd/spamd.raw 2012/05/14 16:31:09 1338278 -@@ -589,6 +589,7 @@ - my $timeout_child; # processing timeout (headers->finish), 0=no timeout - my $clients_per_child; # number of clients each child should process - my %children; # current children -+my @children_exited; - - if ( defined $opt{'max-children'} ) { - $childlimit = $opt{'max-children'}; -@@ -1033,6 +1034,8 @@ - # child_handler() if !$scaling || am_running_on_windows(); - child_handler(); # it doesn't hurt to call child_handler unconditionally - -+ child_cleaner(); -+ - do_sighup_restart() if defined $got_sighup; - - for (my $i = keys %children; $i < $childlimit; $i++) { -@@ -2523,7 +2526,8 @@ - my ($sig) = @_; - - # do NOT call syslog here unless the child's pid is in our list of known -- # children. This is due to syslog-ng brokenness -- bugs 3625, 4237. -+ # children. This is due to syslog-ng brokenness -- bugs 3625, 4237; -+ # see also bug 6745. - - # clean up any children which have exited - for (;;) { -@@ -2534,12 +2538,23 @@ - # - my $pid = waitpid(-1, WNOHANG); - last if !$pid || $pid == -1; -- my $child_stat = $?; -+ push(@children_exited, [$pid, $?, $sig, time]); -+ } - -- if (!defined $children{$pid}) { -- # ignore this child; we didn't realise we'd forked it. bug 4237 -- next; -- } -+ $SIG{CHLD} = \&child_handler; # reset as necessary, should be at end -+} -+ -+# takes care of dead children, as noted by a child_handler() -+# called in a main program flow (not from a signal handler) -+# -+sub child_cleaner { -+ while (@children_exited) { -+ my $tuple = shift(@children_exited); -+ next if !$tuple; # just in case -+ my($pid, $child_stat, $sig, $timestamp) = @$tuple; -+ -+ # ignore this child if we didn't realise we'd forked it. bug 4237 -+ next if !defined $children{$pid}; - - # remove them from our child listing - delete $children{$pid}; -@@ -2550,15 +2565,10 @@ - my $sock = $backchannel->get_socket_for_child($pid); - if ($sock) { $sock->close(); } - } -- -- unless ($Mail::SpamAssassin::Logger::LOG_SA{INHIBIT_LOGGING_IN_SIGCHLD_HANDLER}) { -- info("spamd: handled cleanup of child pid [%s]%s: %s", -- $pid, (defined $sig ? " due to SIG$sig" : ""), -- exit_status_str($child_stat,0)); -- } -+ info("spamd: handled cleanup of child pid [%s]%s: %s", -+ $pid, (defined $sig ? " due to SIG$sig" : ""), -+ exit_status_str($child_stat,0)); - } -- -- $SIG{CHLD} = \&child_handler; # reset as necessary, should be at end - } - - sub restart_handler { diff -ruN -ruN /usr/ports/mail/p5-Mail-SpamAssassin/files/patch-bug6937 ./files/patch-bug6937 --- /usr/ports/mail/p5-Mail-SpamAssassin/files/patch-bug6937 2014-03-04 03:19:16.000000000 +0900 +++ ./files/patch-bug6937 1970-01-01 09:00:00.000000000 +0900 @@ -1,97 +0,0 @@ ---- lib/Mail/SpamAssassin/AsyncLoop.pm.orig 2011-06-07 01:59:17.000000000 +0200 -+++ lib/Mail/SpamAssassin/AsyncLoop.pm 2013-05-29 01:37:58.000000000 +0200 -@@ -361,5 +361,12 @@ - $now = time; # capture new timestamp, after possible sleep in 'select' - -- while (my($key,$ent) = each %$pending) { -+ # A callback routine may generate another DNS query, which may insert -+ # an entry into the %$pending hash thus invalidating the each() context. -+ # So, make sure that callbacks are not called while the each() context -+ # is open, or avoid using each(). [Bug 6937] -+ # -+ # while (my($key,$ent) = each %$pending) { -+ foreach my $key (keys %$pending) { -+ my $ent = $pending->{$key}; - my $id = $ent->{id}; - if (defined $ent->{poll_callback}) { # call a "poll_callback" if exists -@@ -449,5 +456,6 @@ - my $foundcnt = 0; - my $now = time; -- while (my($key,$ent) = each %$pending) { -+ foreach my $key (keys %$pending) { -+ my $ent = $pending->{$key}; - dbg("async: aborting after %.3f s, %s: %s", - $now - $ent->{start_time}, ---- lib/Mail/SpamAssassin/Conf/Parser.pm.orig 2011-06-07 01:59:17.000000000 +0200 -+++ lib/Mail/SpamAssassin/Conf/Parser.pm 2013-05-29 01:32:06.000000000 +0200 -@@ -1249,5 +1249,5 @@ - my $mods = ''; - local ($1,$2); -- if ($re =~ s/^m{//) { -+ if ($re =~ s/^m\{//) { - $re =~ s/}([a-z]*)$//; $mods = $1; - } ---- lib/Mail/SpamAssassin/DnsResolver.pm.orig 2011-06-07 01:59:17.000000000 +0200 -+++ lib/Mail/SpamAssassin/DnsResolver.pm 2013-05-29 01:32:06.000000000 +0200 -@@ -441,8 +441,14 @@ - if (!defined($timeout) || $timeout > 0) - { $timer = $self->{main}->time_method("poll_dns_idle") } -+ $! = 0; - ($nfound, $timeleft) = select($rout=$rin, undef, undef, $timeout); - } - if (!defined $nfound || $nfound < 0) { -- warn "dns: select failed: $!"; -+ if ($!) { warn "dns: select failed: $!\n" } -+ else { info("dns: select interrupted") } -+ return; -+ } elsif (!$nfound) { -+ if (!defined $timeout) { warn("dns: select returned empty-handed\n") } -+ elsif ($timeout > 0) { dbg("dns: select timed out %.3f s", $timeout) } - return; - } ---- lib/Mail/SpamAssassin/Message.pm.orig 2011-06-07 01:59:17.000000000 +0200 -+++ lib/Mail/SpamAssassin/Message.pm 2013-05-29 01:32:06.000000000 +0200 -@@ -567,5 +567,5 @@ - # bug 5557: windows requires tmp file be closed before it can be rm'd - if (ref $part->{'raw'} eq 'GLOB') { -- close($part->{'raw'}) or die "error closing input file: $!"; -+ close($part->{'raw'}) or warn "error closing input file: $!"; - } - ---- lib/Mail/SpamAssassin/PerMsgStatus.pm.orig 2011-06-07 01:59:17.000000000 +0200 -+++ lib/Mail/SpamAssassin/PerMsgStatus.pm 2013-05-29 01:32:06.000000000 +0200 -@@ -421,6 +421,6 @@ - } - -- # ignore tests with 0 score in this scoreset -- next if ($scores->{$test} == 0); -+ # ignore tests with 0 score (or undefined) in this scoreset -+ next if !$scores->{$test}; - - # Go ahead and add points to the proper locations -@@ -1253,11 +1253,10 @@ - my $line = ''; - foreach my $test (sort @{$self->{test_names_hit}}) { -- if (!$line) { -- $line .= $test . "=" . $self->{conf}->{scores}->{$test}; -- } else { -- $line .= $arg . $test . "=" . $self->{conf}->{scores}->{$test}; -- } -+ my $score = $self->{conf}->{scores}->{$test}; -+ $score = '0' if !defined $score; -+ $line .= $arg if $line ne ''; -+ $line .= $test . "=" . $score; - } -- $line ? $line : 'none'; -+ $line ne '' ? $line : 'none'; - }, - ---- lib/Mail/SpamAssassin/Util.pm.orig 2013-05-29 01:29:59.000000000 +0200 -+++ lib/Mail/SpamAssassin/Util.pm 2013-05-29 01:33:16.000000000 +0200 -@@ -1588,5 +1588,5 @@ - return undef; # invalid - } -- elsif ($re =~ s/^m{//) { # m{foo/bar} -+ elsif ($re =~ s/^m\{//) { # m{foo/bar} - $delim = '}'; - } diff -ruN -ruN /usr/ports/mail/p5-Mail-SpamAssassin/files/pkg-message.in ./files/pkg-message.in --- /usr/ports/mail/p5-Mail-SpamAssassin/files/pkg-message.in 1970-01-01 09:00:00.000000000 +0900 +++ ./files/pkg-message.in 2014-03-04 10:40:02.000000000 +0900 @@ -0,0 +1,24 @@ +========================================================================== + +You should complete the following post-installation tasks: + + 1) Read %%DOCSDIR%%/INSTALL + and %%DOCSDIR%%/UPGRADE + BEFORE enabling SpamAssassin for important changes + + 2) Edit the configuration in %%ETCDIR%%, + in particular %%ETCDIR%%/init.pre + You may get lots of annoying (but harmless) error messages + if you skip this step. + + 3) Run 'sa-update' to obtain the latest rules. + Then, run 'sa-compile' for a big speed boost (if you + enabled SA_COMPILE) + + 4) To run spamd, add the following to /etc/rc.conf: + spamd_enable="YES" + +SECURITY NOTE: +Unless you deselected the default AS_ROOT, spamd will be running +as root. If you wish to change this, add the following to /etc/rc.conf: +spamd_flags="-u spamd -H /var/spool/spamd" diff -ruN -ruN /usr/ports/mail/p5-Mail-SpamAssassin/pkg-deinstall ./pkg-deinstall --- /usr/ports/mail/p5-Mail-SpamAssassin/pkg-deinstall 2014-01-23 00:52:06.000000000 +0900 +++ ./pkg-deinstall 1970-01-01 09:00:00.000000000 +0900 @@ -1,17 +0,0 @@ -#!/bin/sh - -if [ "$2" != "POST-DEINSTALL" ]; then - exit 0 -fi - -if [ -d /var/db/spamassassin ]; then - echo "To delete /var/db/spamassassin, use 'rm -rf /var/db/spamassassin'" -fi - -USER=spamd - -if pw usershow "${USER}" 2>/dev/null 1>&2; then - echo "To delete ${USER} user permanently, use 'rmuser ${USER}'" -fi - -exit 0 diff -ruN -ruN /usr/ports/mail/p5-Mail-SpamAssassin/pkg-descr ./pkg-descr --- /usr/ports/mail/p5-Mail-SpamAssassin/pkg-descr 2014-01-23 00:44:51.000000000 +0900 +++ ./pkg-descr 2013-10-03 17:01:51.000000000 +0900 @@ -11,4 +11,4 @@ Additional drop-in rule sets are available at http://wiki.apache.org/spamassassin/CustomRulesets -WWW: http://spamassassin.apache.org/ +WWW: http://spamassassin.apache.org/ diff -ruN -ruN /usr/ports/mail/p5-Mail-SpamAssassin/pkg-message ./pkg-message --- /usr/ports/mail/p5-Mail-SpamAssassin/pkg-message 2013-12-02 20:43:47.000000000 +0900 +++ ./pkg-message 1970-01-01 09:00:00.000000000 +0900 @@ -1,39 +0,0 @@ -************************************************************************* -* _ _____ _____ _____ _ _ _____ ___ ___ _ _ * -* / \|_ _|_ _| ____| \ | |_ _|_ _/ _ \| \ | | * -* / _ \ | | | | | _| | \| | | | | | | | | \| | * -* / ___ \| | | | | |___| |\ | | | | | |_| | |\ | * -* /_/ \_\_| |_| |_____|_| \_| |_| |___\___/|_| \_| * -* * -* See PREFIX/share/doc/p5-Mail-SpamAssassin/INSTALL, * -* and PREFIX/share/doc/p5-Mail-SpamAssassin/UPGRADE, * -* or http://spamassassin.org/dist/INSTALL and * -* http://spamassassin.org/dist/UPGRADE BEFORE enabling * -* this version of SpamAssassin for important information * -* regarding changes in this version. * -* * -* SpamAssassin may require additional configuration in * -* PREFIX/etc/mail/spamassassin/init.pre depending on * -* the options you have installed. Otherwise, annoying * -* (but harmless) error messages may result. Read the * -* files listed above. * -* * -************************************************************************* -You may wish to run sa-update now to obtain the latest rules. - -NOTE: FREEBSD users: If you are updating from a version prior to 3.20. -sa-update now places state files in /var/db/spamassassin and not -/var/lib/spamassassin. This is to be consistant with FreeBSD file -directory conventions. - -If you run sa-compile, you will notice that files are in -/var/db/spamassassin/compiled/<perlversion>/<version> instead of -/var/db/spamassassin/compiled/<version>. -No attempts have been made to move old versions over. You must recompile. - -If you are running with spamd, you must add the following to rc.conf: -spamd_enable="YES" - -Security Note: If you did NOT deselect AS_ROOT, spamd will be running -as root. To change this, also add this to rc.conf: -spamd_flags="-u spamd -H /var/spool/spamd" diff -ruN -ruN /usr/ports/mail/p5-Mail-SpamAssassin/pkg-plist ./pkg-plist --- /usr/ports/mail/p5-Mail-SpamAssassin/pkg-plist 2014-01-10 18:36:12.000000000 +0900 +++ ./pkg-plist 2014-03-04 10:45:30.000000000 +0900 @@ -7,23 +7,26 @@ bin/spamassassin bin/spamc bin/spamd -@unexec rm -rf %D/etc/mail/spamassassin/sa-update-keys -etc/mail/spamassassin/local.cf.sample +@unexec rm -rf %D/%%ETCDIR%%/sa-update-keys +%%ETCDIR%%/local.cf.sample @unexec if cmp -s %B/init.pre.sample %B/init.pre; then rm -f %B/init.pre; fi -etc/mail/spamassassin/init.pre.sample +%%ETCDIR%%/init.pre.sample @exec [ -f %B/init.pre ] || cp %B/%f %B/init.pre @unexec if cmp -s %B/v310.pre.sample %B/v310.pre; then rm -f %B/v310.pre; fi -etc/mail/spamassassin/v310.pre.sample +%%ETCDIR%%/v310.pre.sample @exec [ -f %B/v310.pre ] || cp %B/%f %B/v310.pre @unexec if cmp -s %B/v312.pre.sample %B/v312.pre; then rm -f %B/v312.pre; fi -etc/mail/spamassassin/v312.pre.sample +%%ETCDIR%%/v312.pre.sample @exec [ -f %B/v312.pre ] || cp %B/%f %B/v312.pre @unexec if cmp -s %B/v320.pre.sample %B/v320.pre; then rm -f %B/v320.pre; fi -etc/mail/spamassassin/v320.pre.sample +%%ETCDIR%%/v320.pre.sample @exec [ -f %B/v320.pre ] || cp %B/%f %B/v320.pre @unexec if cmp -s %B/v330.pre.sample %B/v330.pre; then rm -f %B/v330.pre;fi -etc/mail/spamassassin/v330.pre.sample +%%ETCDIR%%/v330.pre.sample @exec [ -f %B/v330.pre ] || cp %B/%f %B/v330.pre +@unexec if cmp -s %B/v340.pre.sample %B/v340.pre; then rm -f %B/v340.pre;fi +%%ETCDIR%%/v340.pre.sample +@exec [ -f %B/v340.pre ] || cp %B/%f %B/v340.pre include/libspamc.h lib/libspamc.so lib/libspamc.so.0 @@ -39,6 +42,7 @@ %%PERL5_MAN3%%/Mail::SpamAssassin::BayesStore::BDB.3.gz %%PERL5_MAN3%%/Mail::SpamAssassin::BayesStore::MySQL.3.gz %%PERL5_MAN3%%/Mail::SpamAssassin::BayesStore::PgSQL.3.gz +%%PERL5_MAN3%%/Mail::SpamAssassin::BayesStore::Redis.3.gz %%PERL5_MAN3%%/Mail::SpamAssassin::BayesStore::SQL.3.gz %%PERL5_MAN3%%/Mail::SpamAssassin::Client.3.gz %%PERL5_MAN3%%/Mail::SpamAssassin::Conf.3.gz @@ -61,12 +65,14 @@ %%PERL5_MAN3%%/Mail::SpamAssassin::Plugin::AWL.3.gz %%PERL5_MAN3%%/Mail::SpamAssassin::Plugin::AccessDB.3.gz %%PERL5_MAN3%%/Mail::SpamAssassin::Plugin::AntiVirus.3.gz +%%PERL5_MAN3%%/Mail::SpamAssassin::Plugin::AskDNS.3.gz %%PERL5_MAN3%%/Mail::SpamAssassin::Plugin::AutoLearnThreshold.3.gz %%PERL5_MAN3%%/Mail::SpamAssassin::Plugin::Bayes.3.gz %%PERL5_MAN3%%/Mail::SpamAssassin::Plugin::BodyRuleBaseExtractor.3.gz %%PERL5_MAN3%%/Mail::SpamAssassin::Plugin::Check.3.gz %%PERL5_MAN3%%/Mail::SpamAssassin::Plugin::DCC.3.gz %%PERL5_MAN3%%/Mail::SpamAssassin::Plugin::DKIM.3.gz +%%PERL5_MAN3%%/Mail::SpamAssassin::Plugin::DNSEval.3.gz %%PERL5_MAN3%%/Mail::SpamAssassin::Plugin::Hashcash.3.gz %%PERL5_MAN3%%/Mail::SpamAssassin::Plugin::MIMEHeader.3.gz %%PERL5_MAN3%%/Mail::SpamAssassin::Plugin::OneLineBodyRuleType.3.gz @@ -116,6 +122,7 @@ %%SITE_PERL%%/Mail/SpamAssassin/BayesStore/DBM.pm %%SITE_PERL%%/Mail/SpamAssassin/BayesStore/MySQL.pm %%SITE_PERL%%/Mail/SpamAssassin/BayesStore/PgSQL.pm +%%SITE_PERL%%/Mail/SpamAssassin/BayesStore/Redis.pm %%SITE_PERL%%/Mail/SpamAssassin/BayesStore/SDBM.pm %%SITE_PERL%%/Mail/SpamAssassin/BayesStore/SQL.pm %%SITE_PERL%%/Mail/SpamAssassin/Client.pm @@ -151,6 +158,7 @@ %%SITE_PERL%%/Mail/SpamAssassin/Plugin/AWL.pm %%SITE_PERL%%/Mail/SpamAssassin/Plugin/AccessDB.pm %%SITE_PERL%%/Mail/SpamAssassin/Plugin/AntiVirus.pm +%%SITE_PERL%%/Mail/SpamAssassin/Plugin/AskDNS.pm %%SITE_PERL%%/Mail/SpamAssassin/Plugin/AutoLearnThreshold.pm %%SITE_PERL%%/Mail/SpamAssassin/Plugin/Bayes.pm %%SITE_PERL%%/Mail/SpamAssassin/Plugin/BodyEval.pm @@ -199,15 +207,12 @@ %%SITE_PERL%%/Mail/SpamAssassin/Util/RegistrarBoundaries.pm %%SITE_PERL%%/Mail/SpamAssassin/Util/ScopedTimer.pm %%SITE_PERL%%/Mail/SpamAssassin/Util/TieOneStringHash.pm +%%SITE_PERL%%/Mail/SpamAssassin/Util/TinyRedis.pm %%SITE_PERL%%/%%PERL_ARCH%%/auto/Mail/SpamAssassin/.packlist %%SITE_PERL%%/spamassassin-run.pod %%DATADIR%%/languages %%DATADIR%%/sa-update-pubkey.txt %%DATADIR%%/user_prefs.template -@unexec rm -rf /var/lib/spamassassin/2* -@unexec rmdir /var/lib/spamassassin 2>/dev/null || true -@unexec rmdir /var/lib 2>/dev/null || true -@unexec rmdir /var/db/spamassassin 2>/dev/null || true @dirrm %%DATADIR%% @dirrm %%SITE_PERL%%/%%PERL_ARCH%%/auto/Mail/SpamAssassin @dirrmtry %%SITE_PERL%%/%%PERL_ARCH%%/auto/Mail @@ -222,7 +227,13 @@ @dirrm %%SITE_PERL%%/Mail/SpamAssassin/Bayes @dirrm %%SITE_PERL%%/Mail/SpamAssassin @dirrmtry %%SITE_PERL%%/Mail -@dirrmtry etc/mail/spamassassin +@dirrmtry %%ETCDIR%% @dirrmtry etc/mail @unexec rm -rf /var/run/spamd -@unexec rm -rf /var/spool/spamd +@unexec rm -rf /var/lib/spamassassin/2* +@dirrmtry /var/lib/spamassassin +@dirrmtry /var/lib +@dirrmtry /var/db/spamassassin +@unexec [ -d /var/db/spamassassin ] && echo "If you are no longer using SpamAssassin, remove /var/db/spamassassin" +@unexec pw usershow "%%USER%%" 2>/dev/null 1>&2 && echo "To delete %%USER%% permanently, 'rmuser %%USER%%'" +@unexec pw usershow "%%USER%%" 2>/dev/null 1>&2 && [ -d /var/spool/spamd ] && echo " Note that this will remove /var/spool/spamd" --------------060504000802010404090809 Content-Type: text/plain; charset=Shift_JIS; name="p5-Mail-SpamAssassin-Alt.diff" Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="p5-Mail-SpamAssassin-Alt.diff" diff -ruN -ruN /usr/ports/mail/p5-Mail-SpamAssassin-Alt/Makefile ./Makefile --- /usr/ports/mail/p5-Mail-SpamAssassin-Alt/Makefile 2014-03-04 05:51:14.000000000 +0900 +++ ./Makefile 2014-03-04 11:10:21.000000000 +0900 @@ -1,6 +1,6 @@ # $FreeBSD: head/mail/p5-Mail-SpamAssassin-Alt/Makefile 346965 2014-03-03 20:51:14Z antoine $ -PORTREVISION= 2 +PORTREVISION= 0 PKGNAMESUFFIX= -Alt MAINTAINER= ports@FreeBSD.org --------------060504000802010404090809 Content-Type: text/plain; charset=Shift_JIS; name="ja-p5-Mail-SpamAssassin.diff" Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="ja-p5-Mail-SpamAssassin.diff" diff -ruN -ruN /usr/ports/japanese/p5-Mail-SpamAssassin/Makefile ./Makefile --- /usr/ports/japanese/p5-Mail-SpamAssassin/Makefile 2014-03-04 05:48:15.000000000 +0900 +++ ./Makefile 2014-03-04 11:53:28.000000000 +0900 @@ -1,7 +1,7 @@ # Created by: TAOKA Fumiyoshi # $FreeBSD: head/japanese/p5-Mail-SpamAssassin/Makefile 346964 2014-03-03 20:48:15Z antoine $ -PORTREVISION= 5 +PORTREVISION= 0 CATEGORIES= japanese mail perl5 PKGNAMEPREFIX= ja-p5- @@ -12,11 +12,11 @@ MASTERDIR= ${.CURDIR}/../../mail/p5-Mail-SpamAssassin -RUN_DEPENDS+= ja-p5-MeCab>=0.98:${PORTSDIR}/japanese/p5-MeCab +BUILD_DEPENDS= ja-p5-MeCab>=0.98:${PORTSDIR}/japanese/p5-MeCab CONFLICTS= p5-Mail-SpamAssassin-[0-9]* -EXTRA_PATCHES= ${.CURDIR}/files/spamassassin-3.3.2-ja-1.patch +EXTRA_PATCHES= ${.CURDIR}/files/spamassassin-3.4.0-ja.patch PKGMESSAGE= ${.CURDIR}/pkg-message PLIST= ${WRKDIR}/pkg-plist diff -ruN -ruN /usr/ports/japanese/p5-Mail-SpamAssassin/files/spamassassin-3.3.2-ja-1.patch ./files/spamassassin-3.3.2-ja-1.patch --- /usr/ports/japanese/p5-Mail-SpamAssassin/files/spamassassin-3.3.2-ja-1.patch 2014-01-24 09:14:07.000000000 +0900 +++ ./files/spamassassin-3.3.2-ja-1.patch 1970-01-01 09:00:00.000000000 +0900 @@ -1,1148 +0,0 @@ -diff -uNr lib/Mail/SpamAssassin/HTML.pm lib/Mail/SpamAssassin/HTML.pm ---- lib/Mail/SpamAssassin/HTML.pm 2011-06-07 08:59:17.000000000 +0900 -+++ lib/Mail/SpamAssassin/HTML.pm 2011-07-14 22:35:46.000000000 +0900 -@@ -84,7 +84,7 @@ - $ok_attributes{span}{$_} = 1 for qw( style ); - - sub new { -- my ($class) = @_; -+ my ($class, $opts) = @_; - my $self = $class->SUPER::new( - api_version => 3, - handlers => [ -@@ -97,6 +97,7 @@ - declaration => ["html_declaration", "self,text"], - ], - marked_sections => 1); -+ $self->{normalize} = $opts->{'normalize'} || 0; - - $self; - } -@@ -672,7 +673,14 @@ - } - } - else { -- $text =~ s/[ \t\n\r\f\x0b\xa0]+/ /g; -+ if ($self->{normalize}) { -+ $text =~ s/\xc2\xa0/ /g; # no-break space -+ $text =~ s/\xe3\x80\x80/ /g; # ideographicspace -+ $text =~ s/[ \t\n\r\f\x0b]+/ /g; -+ } -+ else { -+ $text =~ s/[ \t\n\r\f\x0b\xa0]+/ /g; -+ } - # trim leading whitespace if previous element was whitespace - # and current element is not invisible - if (@{ $self->{text} } && !$display{invisible} && -diff -uNr lib/Mail/SpamAssassin/Message/Node.pm lib/Mail/SpamAssassin/Message/Node.pm ---- lib/Mail/SpamAssassin/Message/Node.pm 2011-06-07 08:59:17.000000000 +0900 -+++ lib/Mail/SpamAssassin/Message/Node.pm 2011-07-14 22:35:46.000000000 +0900 -@@ -42,6 +42,7 @@ - use Mail::SpamAssassin::Constants qw(:sa); - use Mail::SpamAssassin::HTML; - use Mail::SpamAssassin::Logger; -+use Mail::SpamAssassin::Util::Charset; - - =item new() - -@@ -387,27 +388,10 @@ - - sub _normalize { - my ($self, $data, $charset) = @_; -- return $data unless $self->{normalize}; -+ return wantarray ? ($data, $charset) : $data unless $self->{normalize}; - -- my $detected = Encode::Detect::Detector::detect($data); -- -- my $converter; -- -- if ($charset && $charset !~ /^us-ascii$/i && -- ($detected || 'none') !~ /^(?:UTF|EUC|ISO-2022|Shift_JIS|Big5|GB)/i) { -- dbg("message: Using labeled charset $charset"); -- $converter = Encode::find_encoding($charset); -- } -- -- $converter = Encode::find_encoding($detected) unless $converter || !defined($detected); -- -- return $data unless $converter; -- -- dbg("message: Converting..."); -- -- my $rv = $converter->decode($data, 0); -- utf8::downgrade($rv, 1); -- return $rv -+ my ($decoded_data, $detected_charset) = normalize_charset($data, $charset); -+ return wantarray ? ($decoded_data, $detected_charset) : $decoded_data; - } - - =item rendered() -@@ -430,8 +414,12 @@ - # text/x-aol is ignored here, but looks like text/html ... - return(undef,undef) unless ( $self->{'type'} =~ /^text\/(?:plain|html)$/i ); - -- my $text = $self->_normalize($self->decode(), $self->{charset}); -+ my ($text, $charset) = $self->_normalize($self->decode(), $self->{charset}); - my $raw = length($text); -+ if ($self->{normalize}) { -+ $self->{charset} = $charset; -+ $self->{language} = get_language($text, $charset); -+ } - - # render text/html always, or any other text|text/plain part as text/html - # based on a heuristic which simulates a certain common mail client -@@ -441,7 +429,7 @@ - { - $self->{rendered_type} = 'text/html'; - -- my $html = Mail::SpamAssassin::HTML->new(); # object -+ my $html = Mail::SpamAssassin::HTML->new({normalize=>$self->{normalize}}); # object - $html->parse($text); # parse+render text - $self->{rendered} = $html->get_rendered_text(); - $self->{visible_rendered} = $html->get_rendered_text(invisible => 0); -diff -uNr lib/Mail/SpamAssassin/Message.pm lib/Mail/SpamAssassin/Message.pm ---- lib/Mail/SpamAssassin/Message.pm 2011-06-07 08:59:17.000000000 +0900 -+++ lib/Mail/SpamAssassin/Message.pm 2011-07-14 22:35:46.000000000 +0900 -@@ -559,6 +559,8 @@ - delete $self->{'pristine_headers'}; - delete $self->{'line_ending'}; - delete $self->{'missing_head_body_separator'}; -+ delete $self->{'charset'}; -+ delete $self->{'language'}; - - my @toclean = ( $self ); - -@@ -585,6 +587,8 @@ - delete $part->{'invisible_rendered'}; - delete $part->{'type'}; - delete $part->{'rendered_type'}; -+ delete $self->{'charset'}; -+ delete $self->{'language'}; - - # if there are children nodes, add them to the queue of nodes to clean up - if (exists $part->{'body_parts'}) { -@@ -1014,7 +1018,14 @@ - - # whitespace handling (warning: small changes have large effects!) - $text =~ s/\n+\s*\n+/\f/gs; # double newlines => form feed -- $text =~ tr/ \t\n\r\x0b\xa0/ /s; # whitespace => space -+ if ($self->{normalize}) { -+ $text =~ s/\xc2\xa0/ /g; # no-break space => space -+ $text =~ s/\xe3\x80\x80/ /g; # ideographicspace => space -+ $text =~ tr/ \t\n\r\x0b/ /s; # whitespace => space -+ } -+ else { -+ $text =~ tr/ \t\n\r\x0b\xa0/ /s; # whitespace => space -+ } - $text =~ tr/\f/\n/; # form feeds => newline - - # warn "message: $text"; -@@ -1071,7 +1082,14 @@ - - # whitespace handling (warning: small changes have large effects!) - $text =~ s/\n+\s*\n+/\f/gs; # double newlines => form feed -- $text =~ tr/ \t\n\r\x0b\xa0/ /s; # whitespace => space -+ if ($self->{normalize}) { -+ $text =~ s/\xc2\xa0/ /g; # no-break space => space -+ $text =~ s/\xe3\x80\x80/ /g; # ideographicspace => space -+ $text =~ tr/ \t\n\r\x0b/ /s; # whitespace => space -+ } -+ else { -+ $text =~ tr/ \t\n\r\x0b\xa0/ /s; # whitespace => space -+ } - $text =~ tr/\f/\n/; # form feeds => newline - - my @textary = split_into_array_of_short_lines ($text); -@@ -1122,7 +1140,14 @@ - - # whitespace handling (warning: small changes have large effects!) - $text =~ s/\n+\s*\n+/\f/gs; # double newlines => form feed -- $text =~ tr/ \t\n\r\x0b\xa0/ /s; # whitespace => space -+ if ($self->{normalize}) { -+ $text =~ s/\xc2\xa0/ /g; # no-break space => space -+ $text =~ s/\xe3\x80\x80/ /g; # ideographicspace => space -+ $text =~ tr/ \t\n\r\x0b/ /s; # whitespace => space -+ } -+ else { -+ $text =~ tr/ \t\n\r\x0b\xa0/ /s; # whitespace => space -+ } - $text =~ tr/\f/\n/; # form feeds => newline - - my @textary = split_into_array_of_short_lines ($text); -@@ -1198,6 +1223,28 @@ - - # --------------------------------------------------------------------------- - -+sub get_language { -+ my ($self) = @_; -+ -+ if (defined $self->{language}) { return $self->{language}; } -+ my @parts = $self->find_parts(qr/^(?:text|message)\b/i,1); -+ return '' unless @parts; -+ -+ # Go through each part -+ my @langs; -+ for(my $pt = 0 ; $pt <= $#parts ; $pt++ ) { -+ my $p = $parts[$pt]; -+ my $lang = $p->{language}; -+ next unless ($lang); -+ push(@langs, $lang) unless (grep(/^$lang$/, @langs)) -+ } -+ $self->{language} = scalar(@langs) ? join(' ', @langs) : ''; -+ return $self->{language}; -+} -+ -+# --------------------------------------------------------------------------- -+ -+ - 1; - - =back -diff -uNr lib/Mail/SpamAssassin/PerMsgStatus.pm lib/Mail/SpamAssassin/PerMsgStatus.pm ---- lib/Mail/SpamAssassin/PerMsgStatus.pm 2011-06-07 08:59:17.000000000 +0900 -+++ lib/Mail/SpamAssassin/PerMsgStatus.pm 2011-07-14 22:35:46.000000000 +0900 -@@ -53,6 +53,7 @@ - use warnings; - use re 'taint'; - -+use Encode; - use Time::HiRes qw(time); - - use Mail::SpamAssassin::Constants qw(:sa); -@@ -733,19 +734,41 @@ - - # the report charset - my $report_charset = "; charset=iso-8859-1"; -- if ($self->{conf}->{report_charset}) { -- $report_charset = "; charset=" . $self->{conf}->{report_charset}; -- } - - # the SpamAssassin report - my $report = $self->get_report(); -+ if ($self->{conf}->{report_charset}) { -+ $report_charset = "; charset=" . $self->{conf}->{report_charset}; -+ } - - # If there are any wide characters, need to MIME-encode in UTF-8 - # TODO: If $report_charset is something other than iso-8859-1/us-ascii, then - # we could try converting to that charset if possible -- unless ($] < 5.008 || utf8::downgrade($report, 1)) { -+ my $is_utf8 = 0; -+ if ($self->{conf}->{normalize_charset}) { -+ $report = Encode::decode_utf8($report); -+ $is_utf8 = 1; -+ } -+ else { -+ if ($self->{msg}->{charset}) { -+ eval { -+ my $scratch = $report; -+ $report = Encode::decode($self->{msg}->{charset},$scratch,Encode::FB_CROAK); -+ $is_utf8 = 1; -+ }; -+ } -+ } -+ if ($is_utf8) { -+ $is_utf8 = 1; -+ eval { -+ my $scratch = $report; -+ $report = Encode::encode($self->{conf}->{report_charset},$scratch,Encode::FB_CROAK); -+ $is_utf8 = 0; -+ }; -+ if ($is_utf8) { -+ $report = Encode::encode_utf8($report); - $report_charset = "; charset=utf-8"; -- utf8::encode($report); -+ } - } - - # get original headers, "pristine" if we can do it -diff -uNr lib/Mail/SpamAssassin/Plugin/Bayes.pm lib/Mail/SpamAssassin/Plugin/Bayes.pm ---- lib/Mail/SpamAssassin/Plugin/Bayes.pm 2011-06-07 08:59:17.000000000 +0900 -+++ lib/Mail/SpamAssassin/Plugin/Bayes.pm 2011-07-14 22:35:46.000000000 +0900 -@@ -223,6 +223,15 @@ - # will require a longer token than English ones.) - use constant MAX_TOKEN_LENGTH => 15; - -+# Skip if a token is too short. -+our $SKIP_UTF8_SHORT_TOKENS_RE = qr{(?: -+ [\x00-\x7F] # 1 byte -+ | [\xC0-\xDF][\x80-\xBF] # 2 bytes -+ | [\xE0-\xEF][\x80-\xBF]{2} # 3 bytes -+ | [\xF0-\xF7][\x80-\xBF]{3} # 4 bytes -+ | (?:\xE3[\x81-\x83][\x80-\xBF]){2} # 2 characters of Hiragana and Katakana -+)}x; -+ - ########################################################################### - - sub new { -@@ -983,9 +992,28 @@ - $msgdata->{bayes_token_body} = $msg->{msg}->get_visible_rendered_body_text_array(); - $msgdata->{bayes_token_inviz} = $msg->{msg}->get_invisible_rendered_body_text_array(); - @{$msgdata->{bayes_token_uris}} = $msg->get_uri_list(); -+ if ($self->{conf}->{normalize_charset}) { -+ my $tokenizer = $self->get_tokenizer($msg); -+ if (ref($tokenizer)) { -+ $msgdata->{bayes_token_body} = $tokenizer->tokenize($msgdata->{bayes_token_body}); -+ $msgdata->{bayes_token_inviz} = $tokenizer->tokenize($msgdata->{bayes_token_inviz}); -+ } -+ } - return $msgdata; - } - -+sub get_tokenizer { -+ my ($self, $msg) = @_; -+ -+ my $tokenizer; -+ my @languages = split(/\s+/, $msg->{msg}->get_language()); -+ foreach my $lang (@languages) { -+ $tokenizer = $self->{'conf'}->{'tokenizer'}->{$lang}; -+ last if (ref($tokenizer)); -+ } -+ return $tokenizer; -+} -+ - ########################################################################### - - # The calling functions expect a uniq'ed array of tokens ... -@@ -1039,7 +1067,7 @@ - # include quotes, .'s and -'s for URIs, and [$,]'s for Nigerian-scam strings, - # and ISO-8859-15 alphas. Do not split on @'s; better results keeping it. - # Some useful tokens: "$31,000,000" "www.clock-speed.net" "f*ck" "Hits!" -- tr/-A-Za-z0-9,\@\*\!_'"\$.\241-\377 / /cs; -+ tr/-A-Za-z0-9,\@\*\!_'"\$.\200-\377 / /cs; - - # DO split on "..." or "--" or "---"; common formatting error resulting in - # hapaxes. Keep the separator itself as a token, though, as long ones can -@@ -1068,6 +1096,11 @@ - # - next if ( defined $magic_re && $token =~ /$magic_re/ ); - -+ # Skip short UTF-8 tokens. -+ if ($self->{conf}->{normalize_charset}) { -+ next if ($token =~ /^$SKIP_UTF8_SHORT_TOKENS_RE$/o); -+ } -+ - # *do* keep 3-byte tokens; there's some solid signs in there - my $len = length($token); - -@@ -1096,14 +1129,16 @@ - # the domain ".net" appeared in the To header. - # - if ($len > MAX_TOKEN_LENGTH && $token !~ /\*/) { -- if (TOKENIZE_LONG_8BIT_SEQS_AS_TUPLES && $token =~ /[\xa0-\xff]{2}/) { -- # Matt sez: "Could be asian? Autrijus suggested doing character ngrams, -- # but I'm doing tuples to keep the dbs small(er)." Sounds like a plan -- # to me! (jm) -- while ($token =~ s/^(..?)//) { -- push (@rettokens, "8:$1"); -- } -- next; -+ unless ($self->{conf}->{normalize_charset}) { -+ if (TOKENIZE_LONG_8BIT_SEQS_AS_TUPLES && $token =~ /[\xa0-\xff]{2}/) { -+ # Matt sez: "Could be asian? Autrijus suggested doing character ngrams, -+ # but I'm doing tuples to keep the dbs small(er)." Sounds like a plan -+ # to me! (jm) -+ while ($token =~ s/^(..?)//) { -+ push (@rettokens, "8:$1"); -+ } -+ next; -+ } - } - - if (($region == 0 && HDRS_TOKENIZE_LONG_TOKENS_AS_SKIPS) -diff -uNr lib/Mail/SpamAssassin/Plugin/Tokenizer/MeCab.pm lib/Mail/SpamAssassin/Plugin/Tokenizer/MeCab.pm ---- lib/Mail/SpamAssassin/Plugin/Tokenizer/MeCab.pm 1970-01-01 09:00:00.000000000 +0900 -+++ lib/Mail/SpamAssassin/Plugin/Tokenizer/MeCab.pm 2011-07-14 22:29:19.000000000 +0900 -@@ -0,0 +1,84 @@ -+# <@LICENSE> -+# Copyright 2004 Apache Software Foundation -+# -+# Licensed under the Apache License, Version 2.0 (the "License"); -+# you may not use this file except in compliance with the License. -+# You may obtain a copy of the License at -+# -+# http://www.apache.org/licenses/LICENSE-2.0 -+# -+# Unless required by applicable law or agreed to in writing, software -+# distributed under the License is distributed on an "AS IS" BASIS, -+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -+# See the License for the specific language governing permissions and -+# limitations under the License. -+# </@LICENSE> -+ -+=head1 NAME -+ -+Tokenizer::MeCab - Japanese tokenizer with MeCab -+ -+=head1 SYNOPSIS -+ -+loadplugin Mail::SpamAssassin::Plugin::Tokenizer::MeCab -+ -+=head1 DESCRIPTION -+ -+This plugin tokenizes a Japanese string with MeCab that is -+the morphological analysis engine. -+ -+Text::MeCab 0.12 or over is required. -+ -+=cut -+ -+package Mail::SpamAssassin::Plugin::Tokenizer::MeCab; -+ -+use strict; -+use warnings; -+use Mail::SpamAssassin::Plugin::Tokenizer; -+ -+use vars qw(@ISA); -+@ISA = qw(Mail::SpamAssassin::Plugin::Tokenizer); -+ -+# Have to do this so that RPM doesn't find these as required perl modules -+BEGIN { require MeCab; } -+our $language = 'ja'; -+our $mecab = new MeCab::Tagger(-Ochasen); -+ -+sub new { -+ my $class = shift; -+ my $mailsaobject = shift; -+ -+ $class = ref($class) || $class; -+ my $self = $class->SUPER::new($mailsaobject, $language); -+ bless ($self, $class); -+ -+ return $self; -+} -+ -+sub tokenize { -+ my $self = shift; -+ my $text_array = shift; -+ -+ my @tokenized_array; -+ foreach my $text (@$text_array) { -+ next unless ($text); -+ $text =~ s/([\x80-\xFF]{3,})/&_tokenize($1)/eg; -+ push(@tokenized_array, $text); -+ } -+ return \@tokenized_array; -+} -+ -+sub _tokenize { -+ my $text = shift; -+ -+ my @buf; -+ for (my $node = $mecab->parseToNode($text); $node->{next}; $node = $node->{next}) { -+ push(@buf, $node->{surface}); -+ } -+ my $tokenized = join(' ', @buf) . ' '; -+ return $tokenized; -+} -+ -+1; -+ -diff -uNr lib/Mail/SpamAssassin/Plugin/Tokenizer/SimpleJA.pm lib/Mail/SpamAssassin/Plugin/Tokenizer/SimpleJA.pm ---- lib/Mail/SpamAssassin/Plugin/Tokenizer/SimpleJA.pm 1970-01-01 09:00:00.000000000 +0900 -+++ lib/Mail/SpamAssassin/Plugin/Tokenizer/SimpleJA.pm 2011-07-14 22:29:19.000000000 +0900 -@@ -0,0 +1,111 @@ -+# <@LICENSE> -+# Copyright 2004 Apache Software Foundation -+# -+# Licensed under the Apache License, Version 2.0 (the "License"); -+# you may not use this file except in compliance with the License. -+# You may obtain a copy of the License at -+# -+# http://www.apache.org/licenses/LICENSE-2.0 -+# -+# Unless required by applicable law or agreed to in writing, software -+# distributed under the License is distributed on an "AS IS" BASIS, -+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -+# See the License for the specific language governing permissions and -+# limitations under the License. -+# </@LICENSE> -+ -+=head1 NAME -+ -+Tokenizer::SimpleJA - simple Japanese tokenizer -+ -+=head1 SYNOPSIS -+ -+loadplugin Mail::SpamAssassin::Plugin::Tokenizer::SimpleJA -+ -+=head1 DESCRIPTION -+ -+This plugin simply tokenizes a Japanese string by characters other than -+the alphabet, the Chinese character, and the katakana. -+ -+=cut -+ -+package Mail::SpamAssassin::Plugin::Tokenizer::SimpleJA; -+ -+use strict; -+use warnings; -+use Mail::SpamAssassin::Plugin::Tokenizer; -+ -+use vars qw(@ISA); -+@ISA = qw(Mail::SpamAssassin::Plugin::Tokenizer); -+ -+our $language = 'ja'; -+ -+our $RE = qr{( -+ # Hiragana -+ (?: -+ \xE3\x81[\x80-\xBF] -+ | \xE3\x82[\x80-\x9F] -+ )+ -+ # Katakana -+ | (?: -+ \xE3\x82[\xA0-\xBF] -+ | \xE3\x83[\x80-\xBF] -+ )+ -+ # Kanji -+ | (?: -+ \xE3[\x90-\xBF][\x80-\xBF] -+ | [\xE4-\xE9][\x80-\xBF]{2} -+ | \xEF[\xA4-\xAB][\x80-\xBF] -+ )+ -+ # Fullwidth -+ | (?: -+ \xEF\xBC[\x80-\xBF] -+ | \xEF\xBD[\x80-\x9F] -+ )+ -+ # Others -+ | [\xC0-\xDF][\x80-\xBF] -+ | [\xE0-\xE2][\x80-\xBF]{2} -+ | \xE3\x80[\x80-\xBF] -+ | \xE3[\x84-\x8F][\x80-\xBF] -+ | [\xEA-\xEE][\x80-\xBF]{2} -+ | \xEF[\x80-\xA3][\x80-\xBF] -+ | \xEF[\xAC-\xBB][\x80-\xBF] -+ | \xEF\xBD[\xA0-\xBF] -+ | \xEF[\xBE-\xBF][\x80-\xBF] -+ | [\xF0-\xF7][\x80-\xBF]{3} -+)}x; -+ -+sub new { -+ my $class = shift; -+ my $mailsaobject = shift; -+ -+ $class = ref($class) || $class; -+ my $self = $class->SUPER::new($mailsaobject, $language); -+ bless ($self, $class); -+ -+ return $self; -+} -+ -+sub tokenize { -+ my $self = shift; -+ my $text_array = shift; -+ -+ my @tokenized_array; -+ foreach my $text (@$text_array) { -+ next unless ($text); -+ $text =~ s/([\x80-\xFF]{3,})/&_tokenize($1)/eg; -+ push(@tokenized_array, $text); -+ } -+ return \@tokenized_array; -+} -+ -+sub _tokenize { -+ my $text = shift; -+ -+ $text =~ s/$RE/$1 /og; -+ $text = ' ' . $text; -+ return $text; -+} -+ -+1; -+ -diff -uNr lib/Mail/SpamAssassin/Plugin/Tokenizer.pm lib/Mail/SpamAssassin/Plugin/Tokenizer.pm ---- lib/Mail/SpamAssassin/Plugin/Tokenizer.pm 1970-01-01 09:00:00.000000000 +0900 -+++ lib/Mail/SpamAssassin/Plugin/Tokenizer.pm 2011-07-14 22:35:46.000000000 +0900 -@@ -0,0 +1,115 @@ -+# <@LICENSE> -+# Copyright 2004 Apache Software Foundation -+# -+# Licensed under the Apache License, Version 2.0 (the "License"); -+# you may not use this file except in compliance with the License. -+# You may obtain a copy of the License at -+# -+# http://www.apache.org/licenses/LICENSE-2.0 -+# -+# Unless required by applicable law or agreed to in writing, software -+# distributed under the License is distributed on an "AS IS" BASIS, -+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -+# See the License for the specific language governing permissions and -+# limitations under the License. -+# </@LICENSE> -+ -+=head1 NAME -+ -+Mail::SpamAssassin::Plugin::Tokenizer - Tokenizer plugin base class -+ -+=head1 SYNOPSIS -+ -+=head2 SpamAssassin configuration: -+ -+ loadplugin MyTokenizerPlugin /path/to/MyTokenizerPlugin.pm -+ -+=head2 Perl code: -+ -+ use Mail::SpamAssassin::Plugin::Tokenizer; -+ use vars qw(@ISA); -+ @ISA = qw(Mail::SpamAssassin::Plugin::Tokenizer); -+ # language to use this plugin -+ our $language = 'ja'; -+ -+ # constructor: register language -+ sub new { -+ my $class = shift; -+ my $mailsaobject = shift; -+ -+ # some boilerplate... -+ $class = ref($class) || $class; -+ my $self = $class->SUPER::new($mailsaobject, $language); -+ bless ($self, $class); -+ -+ return $self; -+ } -+ -+ # tokenize function -+ sub tokenize { -+ my $self = shift; -+ my $text_array_ref = shift; -+ -+ ...... -+ -+ return $tokenized_array_ref; -+ } -+ -+ -+=head1 DESCRIPTION -+ -+This plugin is the base class of tokenizer plugin. -+You must define tokenize() and $language -+ -+=head1 INTERFACE -+ -+ sub tokenize { -+ my $self = shift; -+ my $text_array_ref = shift; -+ -+ ...... -+ -+ return $tokenized_array_ref; -+ } -+ -+=cut -+ -+package Mail::SpamAssassin::Plugin::Tokenizer; -+ -+use Mail::SpamAssassin::Plugin; -+use Mail::SpamAssassin::Logger; -+use strict; -+use warnings; -+use bytes; -+ -+use vars qw(@ISA); -+@ISA = qw(Mail::SpamAssassin::Plugin); -+ -+sub new { -+ my $class = shift; -+ my $mailsaobject = shift; -+ my $language = shift; -+ -+ # some boilerplate... -+ $class = ref($class) || $class; -+ my $self = $class->SUPER::new($mailsaobject); -+ bless ($self, $class); -+ -+ if ($language) { -+ $self->{main}->{conf}->{tokenizer}->{$language} = $self; -+ } -+ else { -+ dbg("plugin: $self: \$language is not defined"); -+ } -+ -+ return $self; -+} -+ -+sub tokenize { -+ my ($self, $ref) = @_; -+ -+ return $ref; -+} -+ -+1; -+ -diff -uNr lib/Mail/SpamAssassin/Util/Charset.pm lib/Mail/SpamAssassin/Util/Charset.pm ---- lib/Mail/SpamAssassin/Util/Charset.pm 1970-01-01 09:00:00.000000000 +0900 -+++ lib/Mail/SpamAssassin/Util/Charset.pm 2011-07-14 22:29:19.000000000 +0900 -@@ -0,0 +1,471 @@ -+# <@LICENSE> -+# Copyright 2006 Apache Software Foundation -+# -+# Licensed under the Apache License, Version 2.0 (the "License"); -+# you may not use this file except in compliance with the License. -+# You may obtain a copy of the License at -+# -+# http://www.apache.org/licenses/LICENSE-2.0 -+# -+# Unless required by applicable law or agreed to in writing, software -+# distributed under the License is distributed on an "AS IS" BASIS, -+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -+# See the License for the specific language governing permissions and -+# limitations under the License. -+# </@LICENSE> -+ -+ -+=head1 NAME -+ -+ Mail::SpamAssassin::Util::Charset.pm - Utility for charset and language -+ -+=head1 SYNOPSIS -+ -+ my ($decoded, $detected) = Mail::SpamAssassin::Util::Charset::normalize_charset($str, $charset); -+ my $language = Mail::SpamAssassin::Util::Charset::get_language($str, $charset); -+ -+=head1 DESCRIPTION -+ -+This module implements utility methods for charset and language. -+ -+=cut -+ -+package Mail::SpamAssassin::Util::Charset; -+ -+use strict; -+use warnings; -+use Encode; -+use Encode::Guess; -+use Encode::Alias; -+ -+use vars qw ( -+ @ISA @EXPORT -+); -+ -+require Exporter; -+ -+@ISA = qw(Exporter); -+@EXPORT = qw(normalize_charset get_language); -+ -+########################################################################### -+ -+use constant HAS_ENCODE_DETECT => eval { require Encode::Detect::Detector; }; -+use constant HAS_ENCODE_HANEXTRA => eval { require Encode::HanExtra; }; -+use constant HAS_ENCODE_EUCJPMS => eval { require Encode::EUCJPMS; }; -+ -+########################################################################### -+ -+our $KANA_HAN_RE = qr{ -+ # Hiragana and Katakana -+ \xE3[\x81-\x83][\x80-\xBF] -+ # Han -+ | \xE3[\x90-\xBF][\x80-\xBF] -+ | [\xE4-\xE9][\x80-\xBF]{2} -+ | \xEF[\xA4-\xAB][\x80-\xBF] -+}x; -+ -+our %enc2lang; -+our %lang2enc; -+our %scr2lang; -+our %cjkscr2lang; -+our @scrorder; -+ -+BEGIN { -+ -+ # See the following URL about this map: -+ # http://czyborra.com/charsets/iso8859.html -+ # http://czyborra.com/charsets/codepages.html -+ # http://czyborra.com/charsets/cyrillic.html -+ # http://en.wikipedia.org/wiki/ISO_8859 -+ # http://www.w3.org/International/O-charset-lang.html -+ %enc2lang = ( -+ # buint-in Encodings and Encode::Byte -+ # N. America -+ 'ascii' => 'en', -+ 'cp437' => 'en', -+ 'cp863' => 'weurope', -+ -+ # W. Europe (Latin1, Latin9) -+ # fr es ca eu pt it sq rm nl de da sv no fi fo is ga gd en af -+ 'iso-8859-1' => 'weurope', -+ 'iso-8859-15' => 'weurope', -+ 'cp850' => 'weurope', -+ 'cp860' => 'weurope', -+ 'cp1252' => 'weurope', -+ 'MacRoman' => 'weurope', -+ -+ # Cntrl. Europe / Latin2 / Latin10 -+ # hr cs hu pl sr sk sl -+ 'iso-8859-2' => 'ceurope', -+ 'cp852' => 'ceurope', -+ 'cp1250' => 'ceurope', -+ 'MacCentralEurRoman' => 'ceurope', -+ 'MacCroatian' => 'ceurope', -+ 'iso-8859-16' => 'ceurope', -+ 'MacRomanian' => 'ceurope', -+ -+ # Latin3 (Esperanto, Maltese, and Turkish. Turkish is now on 8859-9.) -+ # eo mt -+ 'iso-8859-3' => 'seurope', -+ -+ # Baltics (Latin4, Latin7) -+ # lv lt -+ 'iso-8859-4' => 'neurope', -+ 'iso-8859-13' => 'baltic', -+ 'cp1257' => 'baltic', -+ -+ # Nordics (Latin6) -+ # et kl iu se -+ 'iso-8859-10' => 'nordic', -+ -+ # Cyrillics -+ # bg be uk sr mk ru -+ 'iso-8859-5' => 'ru', -+ 'cp855' => 'ru', -+ 'cp1251' => 'ru', -+ 'cp866' => 'ru', -+ 'MacCyrillic' => 'ru', -+ 'koi8-r' => 'ru', -+ 'MacUkrainian' => 'uk', -+ 'koi8-u' => 'uk', -+ -+ # Arabic -+ 'iso-8859-6' => 'ar', -+ 'cp864' => 'ar', -+ 'cp1256' => 'ar', -+ 'MacArabic' => 'ar', -+ 'cp1006' => 'fa', -+ 'MacFarsi' => 'fa', -+ -+ # Greek -+ 'iso-8859-7' => 'el', -+ 'cp1253' => 'el', -+ 'MacGreek' => 'el', -+ -+ # Hebrew -+ # he yi -+ 'iso-8859-8' => 'he', -+ 'cp862' => 'he', -+ 'cp1255' => 'he', -+ 'MacHebrew' => 'he', -+ -+ # Turkish -+ 'iso-8859-9' => 'tr', -+ 'cp857' => 'tr', -+ 'cp1254' => 'tr', -+ 'MacTurkish' => 'tr', -+ -+ # Thai -+ 'iso-8859-11' => 'th', -+ 'cp874' => 'th', -+ -+ # Celtics (Latin8) -+ # gd cy br -+ 'iso-8859-14' => 'celtic', -+ -+ # Vietnamese -+ 'viscii' => 'vi', -+ 'cp1258' => 'vi', -+ -+ # Encode::CN -+ 'euc-cn' => 'zh', -+ 'cp936' => 'zh', -+ 'hz' => 'zh', -+ -+ # Encode::TW -+ 'big5-eten' => 'zh', -+ 'big5-hkscs' => 'zh', -+ 'cp950' => 'zh', -+ -+ # Encode::JP -+ 'euc-jp' => 'ja', -+ 'shiftjis' => 'ja', -+ '7bit-jis' => 'ja', -+ 'iso-2022-jp' => 'ja', -+ 'iso-2022-jp-1' => 'ja', -+ 'cp932' => 'ja', -+ -+ # Encode::KR -+ 'euc-kr' => 'ko', -+ 'cp949' => 'ko', -+ 'johab' => 'ko', -+ 'iso-2022-kr' => 'ko', -+ -+ # Encode::HanExtra -+ 'euc-tw' => 'zh', -+ 'gb18030' => 'zh', -+ -+ # Encode::JIS2K -+ 'euc-jisx0213' => 'ja', -+ 'shiftjisx0123' => 'ja', -+ 'iso-2022-jp-3' => 'ja', -+ -+ # Encode::EUCJPMS -+ 'eucJP-ms' => 'ja', -+ 'cp51932' => 'ja', -+ 'cp50220' => 'ja', -+ 'cp50221' => 'ja', -+ -+ ); -+ -+ %lang2enc = ( -+ # Latin1 -+ 'en' => ['ascii'], -+ 'weurope' => ['cp1252'], -+ -+ # Latin2 -+ 'ceurope' => ['cp1250'], -+ -+ # Latin3 -+ 'seurope' => ['iso-8859-3'], -+ -+ # Latin4 -+ 'neurope' => ['iso-8859-4'], -+ -+ # Latin5 -+ 'tr' => ['cp1254'], -+ -+ # Latin6 -+ 'nordic' => ['iso-8859-10'], -+ -+ # Latin7 -+ 'baltic' => ['cp1257'], -+ -+ # Latin8 -+ 'celtic' => ['iso-8859-14'], -+ -+ # Non Latin -+ 'ru' => ['koi8-r', 'cp1251'], -+ 'uk' => ['koi8-u'], -+ -+ 'ar' => ['cp1256'], -+ 'el' => ['cp1253'], -+ 'he' => ['cp1255'], -+ 'th' => ['cp874'], -+ 'vi' => ['viscii', 'cp1258'], -+ 'zh' => ['euc-cn', 'cp950'], -+ 'ja' => ['euc-jp', 'cp932'], -+ 'ko' => ['euc-kr', 'cp949'], -+ -+ ); -+ -+ %scr2lang = ( -+ 'InLatin1Supplement' => ['weurope'], -+ 'InLatinExtendedA' => [ -+ 'ceurope', -+ 'seurope', -+ 'tr', -+ 'vi' -+ ], -+ 'InLatinExtendedB' => [ -+ 'nordic', -+ 'baltic', -+ 'celtic' -+ ], -+ 'Thai' => ['th'], -+ 'Cyrillic' => ['ru', 'uk'], -+ 'Arabic' => ['ar'], -+ 'Greek' => ['el'], -+ 'Hebrew' => ['he'], -+ ); -+ -+ # better detection for CJK -+ @scrorder = ('Hiragana','Katakana','Hangul','Han',keys(%scr2lang)); -+ %cjkscr2lang = ( -+ 'Hiragana' => ['ja'], -+ 'Katakana' => ['ja'], -+ 'Hangul' => ['ko'], -+ 'Han' => ['zh', 'ja', 'ko'], -+ ); -+ -+ unless (HAS_ENCODE_HANEXTRA) { -+ Encode::Alias::define_alias( qr/^gb18030$/i => ' "euc-cn"' ); -+ } -+ Encode::Alias::define_alias( qr/^unicode-1-1-(.+)$/i => ' "$1"' ); -+ Encode::Alias::define_alias( qr/^TIS-620$/i => ' "iso-8859-11"' ); -+ Encode::Alias::define_alias( qr/^x-mac-(.+)$/i => ' "Mac$1"' ); -+ Encode::Alias::define_alias( qr/^Shift_JIS$/i => ' "cp932"' ); -+ if (HAS_ENCODE_EUCJPMS) { -+ Encode::Alias::define_alias( qr/^iso-2022-jp$/i => ' "cp50221"' ); -+ } -+} -+ -+sub get_language { -+ my $str = shift; # $str must be UTF-8 encoding -+ my $charset = shift; -+ -+ return 'en' unless $charset; -+ if ($charset !~ /^utf/i) { -+ return $enc2lang{$charset}; -+ } elsif (defined($str)) { -+ $str =~ s/[\x00-\x7F]//g; # remove ASCII characters -+ return 'en' if ($str eq ''); -+ -+ my %handled; -+ $str = Encode::decode_utf8($str) unless (Encode::is_utf8($str)); -+ foreach my $scr (@scrorder) { -+ next if ($str !~ /\p{$scr}/); -+ my $scrlangs = exists($cjkscr2lang{$scr}) ? $cjkscr2lang{$scr} : $scr2lang{$scr}; -+ foreach my $lang (@$scrlangs) { -+ next if (exists($handled{$lang})); -+ foreach my $enc (@{$lang2enc{$lang}}) { -+ my $scratch = $str; -+ Encode::encode($enc, $scratch, Encode::FB_QUIET); -+ return $lang if ($scratch eq ''); -+ } -+ $handled{$lang} = 1; -+ } -+ } -+ } -+ return 'en'; -+} -+ -+# TEST 1: try conversion to use the specified charset. -+# TEST 2: try conversion to use Encode::Detect. -+# TEST 3: try conversion to use Encode::Guess. -+sub normalize_charset { -+ my $str = shift; -+ my $charset = shift; -+ -+ return wantarray ? ($str, 'ascii') : $str unless ($str); -+ -+ my $decoded; -+ my $detected; -+ -+ if ($charset) { -+ ($decoded, $detected) = _specified_encoding($str, $charset); -+ } -+ unless ($detected) { -+ ($decoded, $detected) = _encode_detect($str); -+ } -+ unless ($detected) { -+ ($decoded, $detected) = _encode_guess($str); -+ } -+ unless ($detected) { -+ return ($str, undef); -+ } -+ $decoded =~ s/^\x{feff}//g; -+ $decoded = Encode::encode_utf8($decoded); -+ -+ # unfold hiragana, katakana and han -+ if ($detected =~ /^(?:UTF|EUC|BIG5|GB|SHIFTJIS|ISO-2022|CP969$|CP932$|CP949|CP50221$)/i) { -+ $decoded =~ s/($KANA_HAN_RE)\012($KANA_HAN_RE)/$1$2/og; -+ } -+ return wantarray ? ($decoded, $detected) : $decoded; -+} -+ -+sub _specified_encoding { -+ my $str = shift; -+ my $encoding = shift; -+ -+ my $detected; -+ my $decoded; -+ -+ return (undef, undef) unless ($encoding); -+ -+ # note: ISO-2022-* is not deistinguish from US-ASCII -+ return (undef, undef) if ($str =~ /\e/ and $encoding !~ /^ISO-2022/i); -+ -+ # UTF-16|32 encoding without BOM cannot be trusted. -+ return (undef, undef) if ($encoding =~ /^UTF-32$/i and $str !~ /^(?:\xFF\xFE\x00\x00|\x00\x00\xFE\xFF)/); -+ return (undef, undef) if ($encoding =~ /^UTF-16$/i and $str !~ /^(?:\xFF\xFE|\xFE\xFF)/); -+ -+ #$encoding = _get_alias($encoding); -+ my $encoder = Encode::find_encoding($encoding); -+ if (ref($encoder)) { -+ $decoded = $encoder->decode($str,Encode::FB_QUIET); -+ $detected = $encoder->name if ($str eq ''); -+ } -+ return ($decoded, $detected); -+} -+ -+sub _encode_detect { -+ return undef unless HAS_ENCODE_DETECT; -+ my $str = shift; -+ -+ # UTF-16|32 encoding without BOM cannot be trusted. -+ return (undef, undef) if ($str =~ /\x00\x00/ and $str !~ /^(?:\xFF\xFE\x00\x00|\x00\x00\xFE\xFF)/); -+ return (undef, undef) if ($str =~ /\x00/ and $str !~ /^(?:\xFF\xFE|\xFE\xFF)/); -+ -+ my $decoded; -+ my $detected = Encode::Detect::Detector::detect($str); -+ if ($detected) { -+ $detected = _get_alias($detected); -+ my $encoder = Encode::find_encoding($detected); -+ if (ref($encoder)) { -+ $decoded = $encoder->decode($str); -+ $detected = $decoded ? $encoder->name : undef; -+ } -+ else { -+ $detected = undef; -+ } -+ } -+ return ($decoded, $detected); -+} -+ -+sub _encode_guess { -+ my $str = shift; -+ -+ my $detected; -+ my $decoded; -+ my $encoder; -+ -+ # Step 1: Examine ISO-2022-*. -+ if ($str =~ /\e/) { -+ $Encode::Guess::NoUTFAutoGuess = 1; -+ $encoder = Encode::Guess::guess_encoding($str, -+ qw/cp50221 7bit-jis iso-2022-kr/); -+ $Encode::Guess::NoUTFAutoGuess = 0; -+ } -+ -+ # Step 2: Examine US-ASCII/UTF-(8|16|32) -+ unless (ref($encoder)) { -+ $Encode::Guess::NoUTFAutoGuess = 0; -+ $encoder = Encode::Guess::guess_encoding($str); -+ } -+ -+ # Step 3: Examine other encodings -+ unless (ref($encoder)) { -+ $Encode::Guess::NoUTFAutoGuess = 1; -+ eval { -+ if ($str =~ /[\x80-\xFF]{4}/) { -+ $encoder = Encode::Guess::guess_encoding($str, -+ qw/euc-cn big5-eten euc-jp cp932 euc-kr cp949/); -+ } -+ else { -+ $encoder = Encode::Guess::guess_encoding($str, -+ qw/iso-8859-1 cp1252/); -+ } -+ }; -+ $Encode::Guess::NoUTFAutoGuess = 0; -+ } -+ if (ref($encoder)) { -+ $detected = $encoder->name; -+ if ($detected) { -+ $decoded = $encoder->decode($str); -+ } -+ } -+ return ($decoded, $detected); -+} -+ -+sub _get_alias { -+ my $encoding = shift; -+ -+ unless (HAS_ENCODE_HANEXTRA) { -+ $encoding =~ s/^gb18030$/euc-cn/i; -+ } -+ $encoding =~ s/^unicode-1-1-(.+)$/$1/i; -+ $encoding =~ s/^TIS-620$/iso-8859-11/i; -+ $encoding =~ s/x-mac-(.+)$/Mac$1/i; -+ $encoding =~ s/^Shift_JIS$/cp932/i; -+ if (HAS_ENCODE_EUCJPMS) { -+ $encoding =~ s/^iso-2022-jp$/cp50221/i; -+ $encoding =~ s/^euc-jp$/cp51932/i; -+ } -+ -+ return $encoding; -+} -+ -+ -+1; -+ diff -ruN -ruN /usr/ports/japanese/p5-Mail-SpamAssassin/files/spamassassin-3.3.2-ja-1.plist ./files/spamassassin-3.3.2-ja-1.plist --- /usr/ports/japanese/p5-Mail-SpamAssassin/files/spamassassin-3.3.2-ja-1.plist 2014-03-04 05:48:15.000000000 +0900 +++ ./files/spamassassin-3.3.2-ja-1.plist 1970-01-01 09:00:00.000000000 +0900 @@ -1,12 +0,0 @@ -%%SITE_PERL%%/Mail/SpamAssassin/Plugin/Tokenizer/MeCab.pm -%%SITE_PERL%%/Mail/SpamAssassin/Plugin/Tokenizer/SimpleJA.pm -@dirrm %%SITE_PERL%%/Mail/SpamAssassin/Plugin/Tokenizer -%%SITE_PERL%%/Mail/SpamAssassin/Plugin/Tokenizer.pm -%%SITE_PERL%%/Mail/SpamAssassin/Util/Charset.pm -%%PERL5_MAN3%%/Mail::SpamAssassin::Plugin::Tokenizer.3.gz -%%PERL5_MAN3%%/Mail::SpamAssassin::Plugin::Tokenizer::MeCab.3.gz -%%PERL5_MAN3%%/Mail::SpamAssassin::Plugin::Tokenizer::SimpleJA.3.gz -%%PERL5_MAN3%%/Mail::SpamAssassin::Util::Charset.3.gz -@unexec if cmp -s %D/%%ETCDIR%%/%%TOKENIZER_PRE%%.sample %D/%%ETCDIR%%/%%TOKENIZER_PRE%%; then rm -f %D/%%ETCDIR%%/%%TOKENIZER_PRE%%; fi -%%ETCDIR%%/%%TOKENIZER_PRE%%.sample -@exec if [ ! -f %B/%%TOKENIZER_PRE%% ]; then cp -p %B/%f %B/%%TOKENIZER_PRE%%; fi diff -ruN -ruN /usr/ports/japanese/p5-Mail-SpamAssassin/files/spamassassin-3.4.0-ja.patch ./files/spamassassin-3.4.0-ja.patch --- /usr/ports/japanese/p5-Mail-SpamAssassin/files/spamassassin-3.4.0-ja.patch 1970-01-01 09:00:00.000000000 +0900 +++ ./files/spamassassin-3.4.0-ja.patch 2014-03-04 12:07:38.000000000 +0900 @@ -0,0 +1,1143 @@ +--- lib/Mail/SpamAssassin/HTML.pm.orig 2014-02-07 17:36:28.000000000 +0900 ++++ lib/Mail/SpamAssassin/HTML.pm 2014-03-04 11:18:44.000000000 +0900 +@@ -86,7 +86,7 @@ + $ok_attributes{div}{$_} = 1 for qw( style ); + + sub new { +- my ($class) = @_; ++ my ($class, $opts) = @_; + my $self = $class->SUPER::new( + api_version => 3, + handlers => [ +@@ -99,6 +99,7 @@ + declaration => ["html_declaration", "self,text"], + ], + marked_sections => 1); ++ $self->{normalize} = $opts->{'normalize'} || 0; + + $self; + } +@@ -681,7 +682,14 @@ + } + } + else { +- $text =~ s/[ \t\n\r\f\x0b\xa0]+/ /g; ++ if ($self->{normalize}) { ++ $text =~ s/\xc2\xa0/ /g; # no-break space ++ $text =~ s/\xe3\x80\x80/ /g; # ideographicspace ++ $text =~ s/[ \t\n\r\f\x0b]+/ /g; ++ } ++ else { ++ $text =~ s/[ \t\n\r\f\x0b\xa0]+/ /g; ++ } + # trim leading whitespace if previous element was whitespace + # and current element is not invisible + if (@{ $self->{text} } && !$display{invisible} && +--- lib/Mail/SpamAssassin/Message/Node.pm.orig 2014-02-07 17:36:23.000000000 +0900 ++++ lib/Mail/SpamAssassin/Message/Node.pm 2014-03-04 11:22:38.000000000 +0900 +@@ -42,6 +42,7 @@ + use Mail::SpamAssassin::Constants qw(:sa); + use Mail::SpamAssassin::HTML; + use Mail::SpamAssassin::Logger; ++use Mail::SpamAssassin::Util::Charset; + + =item new() + +@@ -385,27 +386,10 @@ + + sub _normalize { + my ($self, $data, $charset) = @_; +- return $data unless $self->{normalize}; ++ return wantarray ? ($data, $charset) : $data unless $self->{normalize}; + +- my $detected = Encode::Detect::Detector::detect($data); +- +- my $converter; +- +- if ($charset && $charset !~ /^us-ascii$/i && +- ($detected || 'none') !~ /^(?:UTF|EUC|ISO-2022|Shift_JIS|Big5|GB)/i) { +- dbg("message: Using labeled charset $charset"); +- $converter = Encode::find_encoding($charset); +- } +- +- $converter = Encode::find_encoding($detected) unless $converter || !defined($detected); +- +- return $data unless $converter; +- +- dbg("message: Converting..."); +- +- my $rv = $converter->decode($data, 0); +- utf8::downgrade($rv, 1); +- return $rv ++ my ($decoded_data, $detected_charset) = normalize_charset($data, $charset); ++ return wantarray ? ($decoded_data, $detected_charset) : $decoded_data; + } + + =item rendered() +@@ -428,8 +412,12 @@ + # text/x-aol is ignored here, but looks like text/html ... + return(undef,undef) unless ( $self->{'type'} =~ /^text\/(?:plain|html)$/i ); + +- my $text = $self->_normalize($self->decode(), $self->{charset}); ++ my ($text, $charset) = $self->_normalize($self->decode(), $self->{charset}); + my $raw = length($text); ++ if ($self->{normalize}) { ++ $self->{charset} = $charset; ++ $self->{language} = get_language($text, $charset); ++ } + + # render text/html always, or any other text|text/plain part as text/html + # based on a heuristic which simulates a certain common mail client +@@ -439,7 +427,7 @@ + { + $self->{rendered_type} = 'text/html'; + +- my $html = Mail::SpamAssassin::HTML->new(); # object ++ my $html = Mail::SpamAssassin::HTML->new({normalize=>$self->{normalize}}); # object + $html->parse($text); # parse+render text + $self->{rendered} = $html->get_rendered_text(); + $self->{visible_rendered} = $html->get_rendered_text(invisible => 0); +--- lib/Mail/SpamAssassin/Message.pm.orig 2014-02-07 17:36:28.000000000 +0900 ++++ lib/Mail/SpamAssassin/Message.pm 2014-03-04 11:27:31.000000000 +0900 +@@ -604,6 +604,8 @@ + delete $self->{'pristine_headers'}; + delete $self->{'line_ending'}; + delete $self->{'missing_head_body_separator'}; ++ delete $self->{'charset'}; ++ delete $self->{'language'}; + + my @toclean = ( $self ); + +@@ -630,6 +632,8 @@ + delete $part->{'invisible_rendered'}; + delete $part->{'type'}; + delete $part->{'rendered_type'}; ++ delete $self->{'charset'}; ++ delete $self->{'language'}; + + # if there are children nodes, add them to the queue of nodes to clean up + if (exists $part->{'body_parts'}) { +@@ -1085,7 +1089,14 @@ + + # whitespace handling (warning: small changes have large effects!) + $text =~ s/\n+\s*\n+/\f/gs; # double newlines => form feed +- $text =~ tr/ \t\n\r\x0b\xa0/ /s; # whitespace => space ++ if ($self->{normalize}) { ++ $text =~ s/\xc2\xa0/ /g; # no-break space => space ++ $text =~ s/\xe3\x80\x80/ /g; # ideographicspace => space ++ $text =~ tr/ \t\n\r\x0b/ /s; # whitespace => space ++ } ++ else { ++ $text =~ tr/ \t\n\r\x0b\xa0/ /s; # whitespace => space ++ } + $text =~ tr/\f/\n/; # form feeds => newline + + # warn "message: $text"; +@@ -1142,7 +1153,14 @@ + + # whitespace handling (warning: small changes have large effects!) + $text =~ s/\n+\s*\n+/\f/gs; # double newlines => form feed +- $text =~ tr/ \t\n\r\x0b\xa0/ /s; # whitespace => space ++ if ($self->{normalize}) { ++ $text =~ s/\xc2\xa0/ /g; # no-break space => space ++ $text =~ s/\xe3\x80\x80/ /g; # ideographicspace => space ++ $text =~ tr/ \t\n\r\x0b/ /s; # whitespace => space ++ } ++ else { ++ $text =~ tr/ \t\n\r\x0b\xa0/ /s; # whitespace => space ++ } + $text =~ tr/\f/\n/; # form feeds => newline + + my @textary = split_into_array_of_short_lines ($text); +@@ -1193,7 +1211,14 @@ + + # whitespace handling (warning: small changes have large effects!) + $text =~ s/\n+\s*\n+/\f/gs; # double newlines => form feed +- $text =~ tr/ \t\n\r\x0b\xa0/ /s; # whitespace => space ++ if ($self->{normalize}) { ++ $text =~ s/\xc2\xa0/ /g; # no-break space => space ++ $text =~ s/\xe3\x80\x80/ /g; # ideographicspace => space ++ $text =~ tr/ \t\n\r\x0b/ /s; # whitespace => space ++ } ++ else { ++ $text =~ tr/ \t\n\r\x0b\xa0/ /s; # whitespace => space ++ } + $text =~ tr/\f/\n/; # form feeds => newline + + my @textary = split_into_array_of_short_lines ($text); +@@ -1269,6 +1294,28 @@ + + # --------------------------------------------------------------------------- + ++sub get_language { ++ my ($self) = @_; ++ ++ if (defined $self->{language}) { return $self->{language}; } ++ my @parts = $self->find_parts(qr/^(?:text|message)\b/i,1); ++ return '' unless @parts; ++ ++ # Go through each part ++ my @langs; ++ for(my $pt = 0 ; $pt <= $#parts ; $pt++ ) { ++ my $p = $parts[$pt]; ++ my $lang = $p->{language}; ++ next unless ($lang); ++ push(@langs, $lang) unless (grep(/^$lang$/, @langs)) ++ } ++ $self->{language} = scalar(@langs) ? join(' ', @langs) : ''; ++ return $self->{language}; ++} ++ ++# --------------------------------------------------------------------------- ++ ++ + 1; + + =back +--- lib/Mail/SpamAssassin/PerMsgStatus.pm.orig 2014-02-07 17:36:28.000000000 +0900 ++++ lib/Mail/SpamAssassin/PerMsgStatus.pm 2014-03-04 11:30:25.000000000 +0900 +@@ -53,6 +53,7 @@ + use warnings; + use re 'taint'; + ++use Encode; + use Errno qw(ENOENT); + use Time::HiRes qw(time); + +@@ -996,19 +997,41 @@ + + # the report charset + my $report_charset = "; charset=iso-8859-1"; +- if ($self->{conf}->{report_charset}) { +- $report_charset = "; charset=" . $self->{conf}->{report_charset}; +- } + + # the SpamAssassin report + my $report = $self->get_report(); ++ if ($self->{conf}->{report_charset}) { ++ $report_charset = "; charset=" . $self->{conf}->{report_charset}; ++ } + + # If there are any wide characters, need to MIME-encode in UTF-8 + # TODO: If $report_charset is something other than iso-8859-1/us-ascii, then + # we could try converting to that charset if possible +- unless ($] < 5.008 || utf8::downgrade($report, 1)) { ++ my $is_utf8 = 0; ++ if ($self->{conf}->{normalize_charset}) { ++ $report = Encode::decode_utf8($report); ++ $is_utf8 = 1; ++ } ++ else { ++ if ($self->{msg}->{charset}) { ++ eval { ++ my $scratch = $report; ++ $report = Encode::decode($self->{msg}->{charset},$scratch,Encode::FB_CROAK); ++ $is_utf8 = 1; ++ }; ++ } ++ } ++ if ($is_utf8) { ++ $is_utf8 = 1; ++ eval { ++ my $scratch = $report; ++ $report = Encode::encode($self->{conf}->{report_charset},$scratch,Encode::FB_CROAK); ++ $is_utf8 = 0; ++ }; ++ if ($is_utf8) { ++ $report = Encode::encode_utf8($report); + $report_charset = "; charset=utf-8"; +- utf8::encode($report); ++ } + } + + # get original headers, "pristine" if we can do it +--- lib/Mail/SpamAssassin/Plugin/Bayes.pm.orig 2014-02-07 17:36:27.000000000 +0900 ++++ lib/Mail/SpamAssassin/Plugin/Bayes.pm 2014-03-04 11:34:46.000000000 +0900 +@@ -223,6 +223,15 @@ + # will require a longer token than English ones.) + use constant MAX_TOKEN_LENGTH => 15; + ++# Skip if a token is too short. ++our $SKIP_UTF8_SHORT_TOKENS_RE = qr{(?: ++ [\x00-\x7F] # 1 byte ++ | [\xC0-\xDF][\x80-\xBF] # 2 bytes ++ | [\xE0-\xEF][\x80-\xBF]{2} # 3 bytes ++ | [\xF0-\xF7][\x80-\xBF]{3} # 4 bytes ++ | (?:\xE3[\x81-\x83][\x80-\xBF]){2} # 2 characters of Hiragana and Katakana ++)}x; ++ + ########################################################################### + + sub new { +@@ -1039,9 +1048,28 @@ + $msgdata->{bayes_token_body} = $msg->{msg}->get_visible_rendered_body_text_array(); + $msgdata->{bayes_token_inviz} = $msg->{msg}->get_invisible_rendered_body_text_array(); + @{$msgdata->{bayes_token_uris}} = $msg->get_uri_list(); ++ if ($self->{conf}->{normalize_charset}) { ++ my $tokenizer = $self->get_tokenizer($msg); ++ if (ref($tokenizer)) { ++ $msgdata->{bayes_token_body} = $tokenizer->tokenize($msgdata->{bayes_token_body}); ++ $msgdata->{bayes_token_inviz} = $tokenizer->tokenize($msgdata->{bayes_token_inviz}); ++ } ++ } + return $msgdata; + } + ++sub get_tokenizer { ++ my ($self, $msg) = @_; ++ ++ my $tokenizer; ++ my @languages = split(/\s+/, $msg->{msg}->get_language()); ++ foreach my $lang (@languages) { ++ $tokenizer = $self->{'conf'}->{'tokenizer'}->{$lang}; ++ last if (ref($tokenizer)); ++ } ++ return $tokenizer; ++} ++ + ########################################################################### + + # The calling functions expect a uniq'ed array of tokens ... +@@ -1095,7 +1123,7 @@ + # include quotes, .'s and -'s for URIs, and [$,]'s for Nigerian-scam strings, + # and ISO-8859-15 alphas. Do not split on @'s; better results keeping it. + # Some useful tokens: "$31,000,000" "www.clock-speed.net" "f*ck" "Hits!" +- tr/-A-Za-z0-9,\@\*\!_'"\$.\241-\377 / /cs; ++ tr/-A-Za-z0-9,\@\*\!_'"\$.\200-\377 / /cs; + + # DO split on "..." or "--" or "---"; common formatting error resulting in + # hapaxes. Keep the separator itself as a token, though, as long ones can +@@ -1124,6 +1152,11 @@ + # + next if ( defined $magic_re && $token =~ /$magic_re/ ); + ++ # Skip short UTF-8 tokens. ++ if ($self->{conf}->{normalize_charset}) { ++ next if ($token =~ /^$SKIP_UTF8_SHORT_TOKENS_RE$/o); ++ } ++ + # *do* keep 3-byte tokens; there's some solid signs in there + my $len = length($token); + +@@ -1152,14 +1185,16 @@ + # the domain ".net" appeared in the To header. + # + if ($len > MAX_TOKEN_LENGTH && $token !~ /\*/) { +- if (TOKENIZE_LONG_8BIT_SEQS_AS_TUPLES && $token =~ /[\xa0-\xff]{2}/) { +- # Matt sez: "Could be asian? Autrijus suggested doing character ngrams, +- # but I'm doing tuples to keep the dbs small(er)." Sounds like a plan +- # to me! (jm) +- while ($token =~ s/^(..?)//) { +- push (@rettokens, "8:$1"); +- } +- next; ++ unless ($self->{conf}->{normalize_charset}) { ++ if (TOKENIZE_LONG_8BIT_SEQS_AS_TUPLES && $token =~ /[\xa0-\xff]{2}/) { ++ # Matt sez: "Could be asian? Autrijus suggested doing character ngrams, ++ # but I'm doing tuples to keep the dbs small(er)." Sounds like a plan ++ # to me! (jm) ++ while ($token =~ s/^(..?)//) { ++ push (@rettokens, "8:$1"); ++ } ++ next; ++ } + } + + if (($region == 0 && HDRS_TOKENIZE_LONG_TOKENS_AS_SKIPS) +diff -uNr /dev/null lib/Mail/SpamAssassin/Plugin/Tokenizer/MeCab.pm +--- /dev/null 1970-01-01 09:00:00.000000000 +0900 ++++ lib/Mail/SpamAssassin/Plugin/Tokenizer/MeCab.pm 2011-07-14 22:29:19.000000000 +0900 +@@ -0,0 +1,84 @@ ++# <@LICENSE> ++# Copyright 2004 Apache Software Foundation ++# ++# Licensed under the Apache License, Version 2.0 (the "License"); ++# you may not use this file except in compliance with the License. ++# You may obtain a copy of the License at ++# ++# http://www.apache.org/licenses/LICENSE-2.0 ++# ++# Unless required by applicable law or agreed to in writing, software ++# distributed under the License is distributed on an "AS IS" BASIS, ++# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++# See the License for the specific language governing permissions and ++# limitations under the License. ++# </@LICENSE> ++ ++=head1 NAME ++ ++Tokenizer::MeCab - Japanese tokenizer with MeCab ++ ++=head1 SYNOPSIS ++ ++loadplugin Mail::SpamAssassin::Plugin::Tokenizer::MeCab ++ ++=head1 DESCRIPTION ++ ++This plugin tokenizes a Japanese string with MeCab that is ++the morphological analysis engine. ++ ++Text::MeCab 0.12 or over is required. ++ ++=cut ++ ++package Mail::SpamAssassin::Plugin::Tokenizer::MeCab; ++ ++use strict; ++use warnings; ++use Mail::SpamAssassin::Plugin::Tokenizer; ++ ++use vars qw(@ISA); ++@ISA = qw(Mail::SpamAssassin::Plugin::Tokenizer); ++ ++# Have to do this so that RPM doesn't find these as required perl modules ++BEGIN { require MeCab; } ++our $language = 'ja'; ++our $mecab = new MeCab::Tagger(-Ochasen); ++ ++sub new { ++ my $class = shift; ++ my $mailsaobject = shift; ++ ++ $class = ref($class) || $class; ++ my $self = $class->SUPER::new($mailsaobject, $language); ++ bless ($self, $class); ++ ++ return $self; ++} ++ ++sub tokenize { ++ my $self = shift; ++ my $text_array = shift; ++ ++ my @tokenized_array; ++ foreach my $text (@$text_array) { ++ next unless ($text); ++ $text =~ s/([\x80-\xFF]{3,})/&_tokenize($1)/eg; ++ push(@tokenized_array, $text); ++ } ++ return \@tokenized_array; ++} ++ ++sub _tokenize { ++ my $text = shift; ++ ++ my @buf; ++ for (my $node = $mecab->parseToNode($text); $node->{next}; $node = $node->{next}) { ++ push(@buf, $node->{surface}); ++ } ++ my $tokenized = join(' ', @buf) . ' '; ++ return $tokenized; ++} ++ ++1; ++ +diff -uNr /dev/null lib/Mail/SpamAssassin/Plugin/Tokenizer/SimpleJA.pm +--- /dev/null 1970-01-01 09:00:00.000000000 +0900 ++++ lib/Mail/SpamAssassin/Plugin/Tokenizer/SimpleJA.pm 2011-07-14 22:29:19.000000000 +0900 +@@ -0,0 +1,111 @@ ++# <@LICENSE> ++# Copyright 2004 Apache Software Foundation ++# ++# Licensed under the Apache License, Version 2.0 (the "License"); ++# you may not use this file except in compliance with the License. ++# You may obtain a copy of the License at ++# ++# http://www.apache.org/licenses/LICENSE-2.0 ++# ++# Unless required by applicable law or agreed to in writing, software ++# distributed under the License is distributed on an "AS IS" BASIS, ++# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++# See the License for the specific language governing permissions and ++# limitations under the License. ++# </@LICENSE> ++ ++=head1 NAME ++ ++Tokenizer::SimpleJA - simple Japanese tokenizer ++ ++=head1 SYNOPSIS ++ ++loadplugin Mail::SpamAssassin::Plugin::Tokenizer::SimpleJA ++ ++=head1 DESCRIPTION ++ ++This plugin simply tokenizes a Japanese string by characters other than ++the alphabet, the Chinese character, and the katakana. ++ ++=cut ++ ++package Mail::SpamAssassin::Plugin::Tokenizer::SimpleJA; ++ ++use strict; ++use warnings; ++use Mail::SpamAssassin::Plugin::Tokenizer; ++ ++use vars qw(@ISA); ++@ISA = qw(Mail::SpamAssassin::Plugin::Tokenizer); ++ ++our $language = 'ja'; ++ ++our $RE = qr{( ++ # Hiragana ++ (?: ++ \xE3\x81[\x80-\xBF] ++ | \xE3\x82[\x80-\x9F] ++ )+ ++ # Katakana ++ | (?: ++ \xE3\x82[\xA0-\xBF] ++ | \xE3\x83[\x80-\xBF] ++ )+ ++ # Kanji ++ | (?: ++ \xE3[\x90-\xBF][\x80-\xBF] ++ | [\xE4-\xE9][\x80-\xBF]{2} ++ | \xEF[\xA4-\xAB][\x80-\xBF] ++ )+ ++ # Fullwidth ++ | (?: ++ \xEF\xBC[\x80-\xBF] ++ | \xEF\xBD[\x80-\x9F] ++ )+ ++ # Others ++ | [\xC0-\xDF][\x80-\xBF] ++ | [\xE0-\xE2][\x80-\xBF]{2} ++ | \xE3\x80[\x80-\xBF] ++ | \xE3[\x84-\x8F][\x80-\xBF] ++ | [\xEA-\xEE][\x80-\xBF]{2} ++ | \xEF[\x80-\xA3][\x80-\xBF] ++ | \xEF[\xAC-\xBB][\x80-\xBF] ++ | \xEF\xBD[\xA0-\xBF] ++ | \xEF[\xBE-\xBF][\x80-\xBF] ++ | [\xF0-\xF7][\x80-\xBF]{3} ++)}x; ++ ++sub new { ++ my $class = shift; ++ my $mailsaobject = shift; ++ ++ $class = ref($class) || $class; ++ my $self = $class->SUPER::new($mailsaobject, $language); ++ bless ($self, $class); ++ ++ return $self; ++} ++ ++sub tokenize { ++ my $self = shift; ++ my $text_array = shift; ++ ++ my @tokenized_array; ++ foreach my $text (@$text_array) { ++ next unless ($text); ++ $text =~ s/([\x80-\xFF]{3,})/&_tokenize($1)/eg; ++ push(@tokenized_array, $text); ++ } ++ return \@tokenized_array; ++} ++ ++sub _tokenize { ++ my $text = shift; ++ ++ $text =~ s/$RE/$1 /og; ++ $text = ' ' . $text; ++ return $text; ++} ++ ++1; ++ +diff -uNr /dev/null lib/Mail/SpamAssassin/Plugin/Tokenizer.pm +--- /dev/null 1970-01-01 09:00:00.000000000 +0900 ++++ lib/Mail/SpamAssassin/Plugin/Tokenizer.pm 2011-07-14 22:35:46.000000000 +0900 +@@ -0,0 +1,115 @@ ++# <@LICENSE> ++# Copyright 2004 Apache Software Foundation ++# ++# Licensed under the Apache License, Version 2.0 (the "License"); ++# you may not use this file except in compliance with the License. ++# You may obtain a copy of the License at ++# ++# http://www.apache.org/licenses/LICENSE-2.0 ++# ++# Unless required by applicable law or agreed to in writing, software ++# distributed under the License is distributed on an "AS IS" BASIS, ++# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++# See the License for the specific language governing permissions and ++# limitations under the License. ++# </@LICENSE> ++ ++=head1 NAME ++ ++Mail::SpamAssassin::Plugin::Tokenizer - Tokenizer plugin base class ++ ++=head1 SYNOPSIS ++ ++=head2 SpamAssassin configuration: ++ ++ loadplugin MyTokenizerPlugin /path/to/MyTokenizerPlugin.pm ++ ++=head2 Perl code: ++ ++ use Mail::SpamAssassin::Plugin::Tokenizer; ++ use vars qw(@ISA); ++ @ISA = qw(Mail::SpamAssassin::Plugin::Tokenizer); ++ # language to use this plugin ++ our $language = 'ja'; ++ ++ # constructor: register language ++ sub new { ++ my $class = shift; ++ my $mailsaobject = shift; ++ ++ # some boilerplate... ++ $class = ref($class) || $class; ++ my $self = $class->SUPER::new($mailsaobject, $language); ++ bless ($self, $class); ++ ++ return $self; ++ } ++ ++ # tokenize function ++ sub tokenize { ++ my $self = shift; ++ my $text_array_ref = shift; ++ ++ ...... ++ ++ return $tokenized_array_ref; ++ } ++ ++ ++=head1 DESCRIPTION ++ ++This plugin is the base class of tokenizer plugin. ++You must define tokenize() and $language ++ ++=head1 INTERFACE ++ ++ sub tokenize { ++ my $self = shift; ++ my $text_array_ref = shift; ++ ++ ...... ++ ++ return $tokenized_array_ref; ++ } ++ ++=cut ++ ++package Mail::SpamAssassin::Plugin::Tokenizer; ++ ++use Mail::SpamAssassin::Plugin; ++use Mail::SpamAssassin::Logger; ++use strict; ++use warnings; ++use bytes; ++ ++use vars qw(@ISA); ++@ISA = qw(Mail::SpamAssassin::Plugin); ++ ++sub new { ++ my $class = shift; ++ my $mailsaobject = shift; ++ my $language = shift; ++ ++ # some boilerplate... ++ $class = ref($class) || $class; ++ my $self = $class->SUPER::new($mailsaobject); ++ bless ($self, $class); ++ ++ if ($language) { ++ $self->{main}->{conf}->{tokenizer}->{$language} = $self; ++ } ++ else { ++ dbg("plugin: $self: \$language is not defined"); ++ } ++ ++ return $self; ++} ++ ++sub tokenize { ++ my ($self, $ref) = @_; ++ ++ return $ref; ++} ++ ++1; ++ +diff -uNr /dev/null lib/Mail/SpamAssassin/Util/Charset.pm +--- /dev/null 1970-01-01 09:00:00.000000000 +0900 ++++ lib/Mail/SpamAssassin/Util/Charset.pm 2011-07-14 22:29:19.000000000 +0900 +@@ -0,0 +1,471 @@ ++# <@LICENSE> ++# Copyright 2006 Apache Software Foundation ++# ++# Licensed under the Apache License, Version 2.0 (the "License"); ++# you may not use this file except in compliance with the License. ++# You may obtain a copy of the License at ++# ++# http://www.apache.org/licenses/LICENSE-2.0 ++# ++# Unless required by applicable law or agreed to in writing, software ++# distributed under the License is distributed on an "AS IS" BASIS, ++# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++# See the License for the specific language governing permissions and ++# limitations under the License. ++# </@LICENSE> ++ ++ ++=head1 NAME ++ ++ Mail::SpamAssassin::Util::Charset.pm - Utility for charset and language ++ ++=head1 SYNOPSIS ++ ++ my ($decoded, $detected) = Mail::SpamAssassin::Util::Charset::normalize_charset($str, $charset); ++ my $language = Mail::SpamAssassin::Util::Charset::get_language($str, $charset); ++ ++=head1 DESCRIPTION ++ ++This module implements utility methods for charset and language. ++ ++=cut ++ ++package Mail::SpamAssassin::Util::Charset; ++ ++use strict; ++use warnings; ++use Encode; ++use Encode::Guess; ++use Encode::Alias; ++ ++use vars qw ( ++ @ISA @EXPORT ++); ++ ++require Exporter; ++ ++@ISA = qw(Exporter); ++@EXPORT = qw(normalize_charset get_language); ++ ++########################################################################### ++ ++use constant HAS_ENCODE_DETECT => eval { require Encode::Detect::Detector; }; ++use constant HAS_ENCODE_HANEXTRA => eval { require Encode::HanExtra; }; ++use constant HAS_ENCODE_EUCJPMS => eval { require Encode::EUCJPMS; }; ++ ++########################################################################### ++ ++our $KANA_HAN_RE = qr{ ++ # Hiragana and Katakana ++ \xE3[\x81-\x83][\x80-\xBF] ++ # Han ++ | \xE3[\x90-\xBF][\x80-\xBF] ++ | [\xE4-\xE9][\x80-\xBF]{2} ++ | \xEF[\xA4-\xAB][\x80-\xBF] ++}x; ++ ++our %enc2lang; ++our %lang2enc; ++our %scr2lang; ++our %cjkscr2lang; ++our @scrorder; ++ ++BEGIN { ++ ++ # See the following URL about this map: ++ # http://czyborra.com/charsets/iso8859.html ++ # http://czyborra.com/charsets/codepages.html ++ # http://czyborra.com/charsets/cyrillic.html ++ # http://en.wikipedia.org/wiki/ISO_8859 ++ # http://www.w3.org/International/O-charset-lang.html ++ %enc2lang = ( ++ # buint-in Encodings and Encode::Byte ++ # N. America ++ 'ascii' => 'en', ++ 'cp437' => 'en', ++ 'cp863' => 'weurope', ++ ++ # W. Europe (Latin1, Latin9) ++ # fr es ca eu pt it sq rm nl de da sv no fi fo is ga gd en af ++ 'iso-8859-1' => 'weurope', ++ 'iso-8859-15' => 'weurope', ++ 'cp850' => 'weurope', ++ 'cp860' => 'weurope', ++ 'cp1252' => 'weurope', ++ 'MacRoman' => 'weurope', ++ ++ # Cntrl. Europe / Latin2 / Latin10 ++ # hr cs hu pl sr sk sl ++ 'iso-8859-2' => 'ceurope', ++ 'cp852' => 'ceurope', ++ 'cp1250' => 'ceurope', ++ 'MacCentralEurRoman' => 'ceurope', ++ 'MacCroatian' => 'ceurope', ++ 'iso-8859-16' => 'ceurope', ++ 'MacRomanian' => 'ceurope', ++ ++ # Latin3 (Esperanto, Maltese, and Turkish. Turkish is now on 8859-9.) ++ # eo mt ++ 'iso-8859-3' => 'seurope', ++ ++ # Baltics (Latin4, Latin7) ++ # lv lt ++ 'iso-8859-4' => 'neurope', ++ 'iso-8859-13' => 'baltic', ++ 'cp1257' => 'baltic', ++ ++ # Nordics (Latin6) ++ # et kl iu se ++ 'iso-8859-10' => 'nordic', ++ ++ # Cyrillics ++ # bg be uk sr mk ru ++ 'iso-8859-5' => 'ru', ++ 'cp855' => 'ru', ++ 'cp1251' => 'ru', ++ 'cp866' => 'ru', ++ 'MacCyrillic' => 'ru', ++ 'koi8-r' => 'ru', ++ 'MacUkrainian' => 'uk', ++ 'koi8-u' => 'uk', ++ ++ # Arabic ++ 'iso-8859-6' => 'ar', ++ 'cp864' => 'ar', ++ 'cp1256' => 'ar', ++ 'MacArabic' => 'ar', ++ 'cp1006' => 'fa', ++ 'MacFarsi' => 'fa', ++ ++ # Greek ++ 'iso-8859-7' => 'el', ++ 'cp1253' => 'el', ++ 'MacGreek' => 'el', ++ ++ # Hebrew ++ # he yi ++ 'iso-8859-8' => 'he', ++ 'cp862' => 'he', ++ 'cp1255' => 'he', ++ 'MacHebrew' => 'he', ++ ++ # Turkish ++ 'iso-8859-9' => 'tr', ++ 'cp857' => 'tr', ++ 'cp1254' => 'tr', ++ 'MacTurkish' => 'tr', ++ ++ # Thai ++ 'iso-8859-11' => 'th', ++ 'cp874' => 'th', ++ ++ # Celtics (Latin8) ++ # gd cy br ++ 'iso-8859-14' => 'celtic', ++ ++ # Vietnamese ++ 'viscii' => 'vi', ++ 'cp1258' => 'vi', ++ ++ # Encode::CN ++ 'euc-cn' => 'zh', ++ 'cp936' => 'zh', ++ 'hz' => 'zh', ++ ++ # Encode::TW ++ 'big5-eten' => 'zh', ++ 'big5-hkscs' => 'zh', ++ 'cp950' => 'zh', ++ ++ # Encode::JP ++ 'euc-jp' => 'ja', ++ 'shiftjis' => 'ja', ++ '7bit-jis' => 'ja', ++ 'iso-2022-jp' => 'ja', ++ 'iso-2022-jp-1' => 'ja', ++ 'cp932' => 'ja', ++ ++ # Encode::KR ++ 'euc-kr' => 'ko', ++ 'cp949' => 'ko', ++ 'johab' => 'ko', ++ 'iso-2022-kr' => 'ko', ++ ++ # Encode::HanExtra ++ 'euc-tw' => 'zh', ++ 'gb18030' => 'zh', ++ ++ # Encode::JIS2K ++ 'euc-jisx0213' => 'ja', ++ 'shiftjisx0123' => 'ja', ++ 'iso-2022-jp-3' => 'ja', ++ ++ # Encode::EUCJPMS ++ 'eucJP-ms' => 'ja', ++ 'cp51932' => 'ja', ++ 'cp50220' => 'ja', ++ 'cp50221' => 'ja', ++ ++ ); ++ ++ %lang2enc = ( ++ # Latin1 ++ 'en' => ['ascii'], ++ 'weurope' => ['cp1252'], ++ ++ # Latin2 ++ 'ceurope' => ['cp1250'], ++ ++ # Latin3 ++ 'seurope' => ['iso-8859-3'], ++ ++ # Latin4 ++ 'neurope' => ['iso-8859-4'], ++ ++ # Latin5 ++ 'tr' => ['cp1254'], ++ ++ # Latin6 ++ 'nordic' => ['iso-8859-10'], ++ ++ # Latin7 ++ 'baltic' => ['cp1257'], ++ ++ # Latin8 ++ 'celtic' => ['iso-8859-14'], ++ ++ # Non Latin ++ 'ru' => ['koi8-r', 'cp1251'], ++ 'uk' => ['koi8-u'], ++ ++ 'ar' => ['cp1256'], ++ 'el' => ['cp1253'], ++ 'he' => ['cp1255'], ++ 'th' => ['cp874'], ++ 'vi' => ['viscii', 'cp1258'], ++ 'zh' => ['euc-cn', 'cp950'], ++ 'ja' => ['euc-jp', 'cp932'], ++ 'ko' => ['euc-kr', 'cp949'], ++ ++ ); ++ ++ %scr2lang = ( ++ 'InLatin1Supplement' => ['weurope'], ++ 'InLatinExtendedA' => [ ++ 'ceurope', ++ 'seurope', ++ 'tr', ++ 'vi' ++ ], ++ 'InLatinExtendedB' => [ ++ 'nordic', ++ 'baltic', ++ 'celtic' ++ ], ++ 'Thai' => ['th'], ++ 'Cyrillic' => ['ru', 'uk'], ++ 'Arabic' => ['ar'], ++ 'Greek' => ['el'], ++ 'Hebrew' => ['he'], ++ ); ++ ++ # better detection for CJK ++ @scrorder = ('Hiragana','Katakana','Hangul','Han',keys(%scr2lang)); ++ %cjkscr2lang = ( ++ 'Hiragana' => ['ja'], ++ 'Katakana' => ['ja'], ++ 'Hangul' => ['ko'], ++ 'Han' => ['zh', 'ja', 'ko'], ++ ); ++ ++ unless (HAS_ENCODE_HANEXTRA) { ++ Encode::Alias::define_alias( qr/^gb18030$/i => ' "euc-cn"' ); ++ } ++ Encode::Alias::define_alias( qr/^unicode-1-1-(.+)$/i => ' "$1"' ); ++ Encode::Alias::define_alias( qr/^TIS-620$/i => ' "iso-8859-11"' ); ++ Encode::Alias::define_alias( qr/^x-mac-(.+)$/i => ' "Mac$1"' ); ++ Encode::Alias::define_alias( qr/^Shift_JIS$/i => ' "cp932"' ); ++ if (HAS_ENCODE_EUCJPMS) { ++ Encode::Alias::define_alias( qr/^iso-2022-jp$/i => ' "cp50221"' ); ++ } ++} ++ ++sub get_language { ++ my $str = shift; # $str must be UTF-8 encoding ++ my $charset = shift; ++ ++ return 'en' unless $charset; ++ if ($charset !~ /^utf/i) { ++ return $enc2lang{$charset}; ++ } elsif (defined($str)) { ++ $str =~ s/[\x00-\x7F]//g; # remove ASCII characters ++ return 'en' if ($str eq ''); ++ ++ my %handled; ++ $str = Encode::decode_utf8($str) unless (Encode::is_utf8($str)); ++ foreach my $scr (@scrorder) { ++ next if ($str !~ /\p{$scr}/); ++ my $scrlangs = exists($cjkscr2lang{$scr}) ? $cjkscr2lang{$scr} : $scr2lang{$scr}; ++ foreach my $lang (@$scrlangs) { ++ next if (exists($handled{$lang})); ++ foreach my $enc (@{$lang2enc{$lang}}) { ++ my $scratch = $str; ++ Encode::encode($enc, $scratch, Encode::FB_QUIET); ++ return $lang if ($scratch eq ''); ++ } ++ $handled{$lang} = 1; ++ } ++ } ++ } ++ return 'en'; ++} ++ ++# TEST 1: try conversion to use the specified charset. ++# TEST 2: try conversion to use Encode::Detect. ++# TEST 3: try conversion to use Encode::Guess. ++sub normalize_charset { ++ my $str = shift; ++ my $charset = shift; ++ ++ return wantarray ? ($str, 'ascii') : $str unless ($str); ++ ++ my $decoded; ++ my $detected; ++ ++ if ($charset) { ++ ($decoded, $detected) = _specified_encoding($str, $charset); ++ } ++ unless ($detected) { ++ ($decoded, $detected) = _encode_detect($str); ++ } ++ unless ($detected) { ++ ($decoded, $detected) = _encode_guess($str); ++ } ++ unless ($detected) { ++ return ($str, undef); ++ } ++ $decoded =~ s/^\x{feff}//g; ++ $decoded = Encode::encode_utf8($decoded); ++ ++ # unfold hiragana, katakana and han ++ if ($detected =~ /^(?:UTF|EUC|BIG5|GB|SHIFTJIS|ISO-2022|CP969$|CP932$|CP949|CP50221$)/i) { ++ $decoded =~ s/($KANA_HAN_RE)\012($KANA_HAN_RE)/$1$2/og; ++ } ++ return wantarray ? ($decoded, $detected) : $decoded; ++} ++ ++sub _specified_encoding { ++ my $str = shift; ++ my $encoding = shift; ++ ++ my $detected; ++ my $decoded; ++ ++ return (undef, undef) unless ($encoding); ++ ++ # note: ISO-2022-* is not deistinguish from US-ASCII ++ return (undef, undef) if ($str =~ /\e/ and $encoding !~ /^ISO-2022/i); ++ ++ # UTF-16|32 encoding without BOM cannot be trusted. ++ return (undef, undef) if ($encoding =~ /^UTF-32$/i and $str !~ /^(?:\xFF\xFE\x00\x00|\x00\x00\xFE\xFF)/); ++ return (undef, undef) if ($encoding =~ /^UTF-16$/i and $str !~ /^(?:\xFF\xFE|\xFE\xFF)/); ++ ++ #$encoding = _get_alias($encoding); ++ my $encoder = Encode::find_encoding($encoding); ++ if (ref($encoder)) { ++ $decoded = $encoder->decode($str,Encode::FB_QUIET); ++ $detected = $encoder->name if ($str eq ''); ++ } ++ return ($decoded, $detected); ++} ++ ++sub _encode_detect { ++ return undef unless HAS_ENCODE_DETECT; ++ my $str = shift; ++ ++ # UTF-16|32 encoding without BOM cannot be trusted. ++ return (undef, undef) if ($str =~ /\x00\x00/ and $str !~ /^(?:\xFF\xFE\x00\x00|\x00\x00\xFE\xFF)/); ++ return (undef, undef) if ($str =~ /\x00/ and $str !~ /^(?:\xFF\xFE|\xFE\xFF)/); ++ ++ my $decoded; ++ my $detected = Encode::Detect::Detector::detect($str); ++ if ($detected) { ++ $detected = _get_alias($detected); ++ my $encoder = Encode::find_encoding($detected); ++ if (ref($encoder)) { ++ $decoded = $encoder->decode($str); ++ $detected = $decoded ? $encoder->name : undef; ++ } ++ else { ++ $detected = undef; ++ } ++ } ++ return ($decoded, $detected); ++} ++ ++sub _encode_guess { ++ my $str = shift; ++ ++ my $detected; ++ my $decoded; ++ my $encoder; ++ ++ # Step 1: Examine ISO-2022-*. ++ if ($str =~ /\e/) { ++ $Encode::Guess::NoUTFAutoGuess = 1; ++ $encoder = Encode::Guess::guess_encoding($str, ++ qw/cp50221 7bit-jis iso-2022-kr/); ++ $Encode::Guess::NoUTFAutoGuess = 0; ++ } ++ ++ # Step 2: Examine US-ASCII/UTF-(8|16|32) ++ unless (ref($encoder)) { ++ $Encode::Guess::NoUTFAutoGuess = 0; ++ $encoder = Encode::Guess::guess_encoding($str); ++ } ++ ++ # Step 3: Examine other encodings ++ unless (ref($encoder)) { ++ $Encode::Guess::NoUTFAutoGuess = 1; ++ eval { ++ if ($str =~ /[\x80-\xFF]{4}/) { ++ $encoder = Encode::Guess::guess_encoding($str, ++ qw/euc-cn big5-eten euc-jp cp932 euc-kr cp949/); ++ } ++ else { ++ $encoder = Encode::Guess::guess_encoding($str, ++ qw/iso-8859-1 cp1252/); ++ } ++ }; ++ $Encode::Guess::NoUTFAutoGuess = 0; ++ } ++ if (ref($encoder)) { ++ $detected = $encoder->name; ++ if ($detected) { ++ $decoded = $encoder->decode($str); ++ } ++ } ++ return ($decoded, $detected); ++} ++ ++sub _get_alias { ++ my $encoding = shift; ++ ++ unless (HAS_ENCODE_HANEXTRA) { ++ $encoding =~ s/^gb18030$/euc-cn/i; ++ } ++ $encoding =~ s/^unicode-1-1-(.+)$/$1/i; ++ $encoding =~ s/^TIS-620$/iso-8859-11/i; ++ $encoding =~ s/x-mac-(.+)$/Mac$1/i; ++ $encoding =~ s/^Shift_JIS$/cp932/i; ++ if (HAS_ENCODE_EUCJPMS) { ++ $encoding =~ s/^iso-2022-jp$/cp50221/i; ++ $encoding =~ s/^euc-jp$/cp51932/i; ++ } ++ ++ return $encoding; ++} ++ ++ ++1; ++ diff -ruN -ruN /usr/ports/japanese/p5-Mail-SpamAssassin/files/spamassassin-3.4.0-ja.plist ./files/spamassassin-3.4.0-ja.plist --- /usr/ports/japanese/p5-Mail-SpamAssassin/files/spamassassin-3.4.0-ja.plist 1970-01-01 09:00:00.000000000 +0900 +++ ./files/spamassassin-3.4.0-ja.plist 2014-03-04 05:48:15.000000000 +0900 @@ -0,0 +1,12 @@ +%%SITE_PERL%%/Mail/SpamAssassin/Plugin/Tokenizer/MeCab.pm +%%SITE_PERL%%/Mail/SpamAssassin/Plugin/Tokenizer/SimpleJA.pm +@dirrm %%SITE_PERL%%/Mail/SpamAssassin/Plugin/Tokenizer +%%SITE_PERL%%/Mail/SpamAssassin/Plugin/Tokenizer.pm +%%SITE_PERL%%/Mail/SpamAssassin/Util/Charset.pm +%%PERL5_MAN3%%/Mail::SpamAssassin::Plugin::Tokenizer.3.gz +%%PERL5_MAN3%%/Mail::SpamAssassin::Plugin::Tokenizer::MeCab.3.gz +%%PERL5_MAN3%%/Mail::SpamAssassin::Plugin::Tokenizer::SimpleJA.3.gz +%%PERL5_MAN3%%/Mail::SpamAssassin::Util::Charset.3.gz +@unexec if cmp -s %D/%%ETCDIR%%/%%TOKENIZER_PRE%%.sample %D/%%ETCDIR%%/%%TOKENIZER_PRE%%; then rm -f %D/%%ETCDIR%%/%%TOKENIZER_PRE%%; fi +%%ETCDIR%%/%%TOKENIZER_PRE%%.sample +@exec if [ ! -f %B/%%TOKENIZER_PRE%% ]; then cp -p %B/%f %B/%%TOKENIZER_PRE%%; fi diff -ruN -ruN /usr/ports/japanese/p5-Mail-SpamAssassin/pkg-message ./pkg-message --- /usr/ports/japanese/p5-Mail-SpamAssassin/pkg-message 2014-01-23 00:52:06.000000000 +0900 +++ ./pkg-message 2014-03-04 11:12:06.000000000 +0900 @@ -1,42 +1,3 @@ -************************************************************************* -* _ _____ _____ _____ _ _ _____ ___ ___ _ _ * -* / \|_ _|_ _| ____| \ | |_ _|_ _/ _ \| \ | | * -* / _ \ | | | | | _| | \| | | | | | | | | \| | * -* / ___ \| | | | | |___| |\ | | | | | |_| | |\ | * -* /_/ \_\_| |_| |_____|_| \_| |_| |___\___/|_| \_| * -* * -* See PREFIX/share/doc/p5-Mail-SpamAssassin/INSTALL, * -* and PREFIX/share/doc/p5-Mail-SpamAssassin/UPGRADE, * -* or http://spamassassin.org/dist/INSTALL and * -* http://spamassassin.org/dist/UPGRADE BEFORE enabling * -* this version of SpamAssassin for important information * -* regarding changes in this version. * -* * -* SpamAssassin may require additional configuration in * -* PREFIX/etc/mail/spamassassin/init.pre depending on * -* the options you have installed. Otherwise, annoying * -* (but harmless) error messages may result. Read the * -* files listed above. * -* * -************************************************************************* -You may wish to run sa-update now to obtain the latest rules. - -NOTE: FREEBSD users: If you are updating from a version prior to 3.20. -sa-update now places state files in /var/db/spamassassin and not -/var/lib/spamassassin. This is to be consistant with Freebsd file -directory conventions. - -If you run sa-compile, you will notice that files are in -/var/db/spamassassin/compiled/<perlversion>/<version> instead of -/var/db/spamassassin/compiled/<version>. -No attempts have been made to move old versions over. You must recompile. - -If you are running with spamd, you must add the following to rc.conf: -spamd_enable="YES" - -Security Note: If you did NOT deselected AS_ROOT, spamd will be running -as root. To change this, also add this to rc.conf: -spamd_flags="-u spamd -H /var/spool/spamd" ************************************************************************ For Japanese users, see documents in --------------060504000802010404090809--
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201403040410.s244A1IO042442>