~dricottone/mail-filters

9302fb18f5020d65665b8177610811c96f999880 — Dominic Ricottone 4 years ago
Initial commit
A  => LICENSE.md +31 -0
@@ 1,31 @@
BSD 3-Clause License
====================

_Copyright (c) 2020, Dominic Ricottone_  
_All rights reserved._

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

1. Redistributions of source code must retain the above copyright notice, this
   list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright notice,
   this list of conditions and the following disclaimer in the documentation
   and/or other materials provided with the distribution.

3. Neither the name of the copyright holder nor the names of its
   contributors may be used to endorse or promote products derived from
   this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


A  => Makefile +25 -0
@@ 1,25 @@
SHARE_DIR?=/usr/local/share
INSTALL_DIR?=$(SHARE_DIR)/mail-filters

install:
	mkdir -m755 -p $(INSTALL_DIR)
	install -m755 src/ao3.awk $(INSTALL_DIR)/ao3.awk
	install -m755 src/debian.awk $(INSTALL_DIR)/debian.awk
	install -m755 src/fanfiction.awk $(INSTALL_DIR)/fanfiction.awk
	install -m755 src/freebsd.awk $(INSTALL_DIR)/freebsd.awk
	install -m755 src/googlegroups.awk $(INSTALL_DIR)/googlegroups.awk
	install -m755 src/html.sh $(INSTALL_DIR)/html.sh
	install -m755 src/mailman.awk $(INSTALL_DIR)/mailman.awk
	install -m755 src/ubuntu.awk $(INSTALL_DIR)/ubuntu.awk

uninstall:
	rm $(INSTALL_DIR)/ao3.awk
	rm $(INSTALL_DIR)/debian.awk
	rm $(INSTALL_DIR)/fanfiction.awk
	rm $(INSTALL_DIR)/freebsd.awk
	rm $(INSTALL_DIR)/googlegroups.awk
	rm $(INSTALL_DIR)/html.sh
	rm $(INSTALL_DIR)/mailman.awk
	rm $(INSTALL_DIR)/ubuntu.awk
	rmdir $(INSTALL_DIR)


A  => README.md +89 -0
@@ 1,89 @@
# mail-filters

A set of text processing scripts that 'clean' plaintext email messages. This includes

 + removing cruft and repetitive text
 + removing non-plaintext MIME parts
 + inserting ANSI color codes
 + standardizing whitespace



## Installation

Run `sudo make install`. All it's going to do is copy the filters to /usr/local/share, though. I don't know why I even wrote this.



## Uninstallation

Run `make uninstall`. Again, all we're doing is copying files... This is a bit much...



## Recommended aerc configuration:

In `aerc.conf`:

```
[filters]
text/html                =path/to/html.sh
from,Archive of Our Own  =path/to/ao3.awk
from,FanFiction          =path/to/fanfiction.awk
to,~.@lists.ubuntu.com   =path/to/ubuntu.awk
to,~.@lists.debian.org   =path/to/debian.awk
from,~.@freebsd.org      =path/to/freebsd.awk
from,~.+@googlegroups.com=path/to/googlegroups.awk
from,~.+@archlinux.org   =path/to/mailman.awk
from,~.+@python.org      =path/to/mailman.awk
from,~.+@gnu.org         =path/to/mailman.awk
text/*                   =cat
```



## Recommended mutt configuration

In `muttrc`:

```
set display_filter = "path/to/filter.sh"
```

And in `filter.sh`:

```
tmp=$(mktemp /tmp/filter.XXXXXXXX)
cat > "$TMP"

if grep --quiet -e '^From: Archive of Our Own' "$TMP"; then
  cat "$TMP" | path/to/ao3.awk
elif grep --quiet -e '^From: FanFiction' "$TMP"; then
  cat "$TMP" | path/to/fanfiction.awk
elif grep --quiet -e '^To:.*@lists.ubuntu.com' "$TMP"; then
  cat "$TMP" | path/to/ubuntu.awk
elif grep --quiet -e '^To:.*@lists.debian.org' "$TMP"; then
  cat "$TMP" | path/to/debian.awk
elif grep --quiet -e '^From:.*@freebsd.org' "$TMP"; then
  cat "$TMP" | path/to/freebsd.awk
elif grep --quiet -e '^From:.*@googlegroups.com' "$TMP"; then
  cat "$TMP" | path/to/googlegroups.awk
elif grep --quiet -e '^From:.*@archlinux.org' "$TMP"; then
  cat "$TMP" | path/to/mailman.awk
elif grep --quiet -e '^From:.*@python.org' "$TMP"; then
  cat "$TMP" | path/to/mailman.awk
elif grep --quiet -e '^From:.*@gnu.org' "$TMP"; then
  cat "$TMP" | path/to/mailman.awk
else
  cat "$TMP"
fi

rm -f "$TMP"
```



## License

All materials of this repository are licensed under BSD-3. A copy of this license is included in the file LICENSE.md.


A  => src/ao3.awk +53 -0
@@ 1,53 @@
#!/bin/awk -f

# ao3.awk
# =======
# A filter (as for mutt or aerc) intended to clean & decorate plaintext mail
# from Archive of Our Own (AO3), highlighting descriptive information

BEGIN {
  highlight="\033[44m";
  cyan="\033[36m";
  reset="\033[0m";

  comma_replacement=reset ", " highlight;
}
{
  # stop processing at end of mail, marked by dashed line
  if ($0 ~ /^-+\s*/) exit 0;

  if ($0 ~ /\([1-9][0-9]* words\)/) {
    matched=match($0, /\([1-9][0-9]* words\)/);
    if (matched!=0) {
      original=substr($0, RSTART, RLENGTH);
      replacement=reset dim cyan original reset;
      sub(/\([1-9][0-9]* words\)/,replacement);
    }
  }

  if ($0 ~ /posted a (new|backdated)/) {
    sub(/\(http.*\) posted/,"posted");
    $1=dim cyan $1 reset;
  }
  else if ($0 ~ /^by/) {
    sub(/ \(http.*\)/,"");
    $2=dim cyan $2 reset;
  }
  else if ($0 ~ /^(Chapters|Fandom|Rating|Warnings):/) {
    $1=$1 dim cyan;
    $0=$0 reset;
  }
  else if ($0 ~ /^(Relationships|Characters):/) {
    gsub(/, /,comma_replacement);
    $2=highlight $2
    $0=$0 reset
  }
  else if ($0 ~ /^Additional Tags:/) {
    gsub(/, /,comma_replacement);
    $3=highlight $3
    $0=$0 reset
  }

  print $0;
}


A  => src/debian.awk +54 -0
@@ 1,54 @@
#!/bin/awk -f

# debian.awk
# ================
# A filter (as for mutt or aerc) intended to clean & decorate plaintext mail
# to @lists.debian.com, highlighting header information

BEGIN {
  in_header=0;
  do_not_print=0;

  dim="\033[2m";
  yellow="\033[33m";
  cyan="\033[36m";
  reset="\033[0m";

  _releases = "oldstable stable testing unstable squeeze wheezy jessie stretch buster";
  split(_releases,releases," ");
}
{
  # hide blocks of non-plaintext
  if (do_not_print==1 && $0 ~ /END PGP SIGNATURE/) do_not_print=0;
  else if ($0 ~ /BEGIN PGP SIGNATURE/) do_not_print=1;

  else {
    # identify header section
    if (in_header==1 && $0 ~ /^-\s+-{5,}/) in_header=0;
    else if ($0 ~ /^-\s+-{5,}/) in_header=1;

    if (in_header==1) {
      if ($0 !~ /^-/) {
        $0=dim cyan $0 reset;
      }
    }
    else {
      if ($0 ~ /^(Package|Mailing list)\s*:/) {
        $3=dim cyan $3;
        $0=$0 reset;
      }
      else if ($0 ~ /^(CVE ID)\s*:/) {
        $4=dim cyan $4;
        $0=$0 reset;
      }
      for (i in releases) {
        replacement=yellow releases[i] reset;
        gsub(releases[i],replacement);
      }
    }

    if (do_not_print==0) print $0;
  }
}



A  => src/fanfiction.awk +48 -0
@@ 1,48 @@
#!/bin/awk -f

# fanfiction.awk
# ==============
# A filter (as for mutt or aerc) intended to clean & decorate plaintext mail
# from FanFiction.net, highlighting descriptive information

BEGIN {
  highlight="\033[44m";
  cyan="\033[36m";
  reset="\033[0m";

  comma_replacement=reset ", " highlight;
  brace_replacement=reset "] " highlight;
  close_brace_replacement=reset "] ";
}
{
  # stop processing at end of mail, marked by website URL
  if ($0 ~ /^FanFiction http/) exit 0;

  if ($0 ~ /^New (story|chapter) from/) {
    $4=cyan $4;
    $0=$0 reset;
  }
  else if ($0 ~ /^(Words|Genre|Rated):/) {
    $1=$1 dim cyan;
    $0=$0 reset;
  }
  else if ($0 ~ /^Character:/) {
    if ($2 ~ /^\[/) {
      $2="[" highlight substr($2,2);
    }
    else {
      $2=highlight $2;
    }
    gsub(/, /,comma_replacement);
    gsub(/\] /,brace_replacement);
    if ($NF ~ /]\s+$/) {
      sub(/]\s+$/,close_brace_replacement,$NF);
    }
    else {
      $0=$0 reset;
    }
  }

  print $0;
}


A  => src/freebsd.awk +90 -0
@@ 1,90 @@
#!/bin/awk -f

# freebsd.awk
# ===========
# A filter (as for mutt or aerc) intended to clean & decorate plaintext mail
# from @freebsd.org, highlighting header information

BEGIN {
  in_header=0;

  dim="\033[2m";
  yellow="\033[33m";
  cyan="\033[36m";
  reset="\033[0m";
}
{
  # hide blocks of non-plaintext
  if (do_not_print==1 && $0 ~ /END PGP SIGNATURE/) do_not_print=0;
  else if ($0 ~ /BEGIN PGP SIGNATURE/) do_not_print=1;

  else {
    # identify header section
    if (in_header==1 && $0 ~ /^\s*$/) in_header=0;
    else if ($0 ~ /^(={5,}|(Topic|Category|Module|Announced|Credits|Affects|Corrected|CVE Name):)/) in_header=1;

    if (in_header==1) {
      # highlight header details while preserving whitespace
      if ($0 ~ /^(Topic|Category|Module|Announced|Credits|Affects|Corrected):/) {
        whitespace=substr("                ",length($1)+2)
        $2=whitespace dim cyan $2;
        $0=$0 reset;
      }
      else if ($0 ~ /^CVE Name:/) {
        whitespace=substr("                ",length($1)+length($2)+3)
        $3=whitespace dim cyan $3;
        $0=$0 reset;
      }
      else if ($0 !~ /^=/) {
        $0=dim cyan $0 reset;
      }

      # highlight release names
      for (i=10; i<=12; i++) {
        for (j=1; j<=4; j++) {
          release=i "." j "-STABLE";
          replacement=yellow release cyan;
          gsub(release,replacement);

          for (k=1; k<=9; k++) {
            release=i "." j "-RELEASE-p" k;
            replacement=yellow release cyan;
            gsub(release,replacement);
          }
        }
      }
    }
    else {
      # highlight section titles
      if ($0 ~ /^(I|II|III|IV|V|VI|VII)\.\s/) {
        $0=yellow $0 reset;
      }
      else if ($0 ~ /^(1|2|3|4|a|b|c|d)\)\s/) {
        $1=yellow $1 reset;
      }
      # color syntax sections
      else if ($1=="#") {
        $0=cyan $0 reset;
      }

      # highlight release names
      for (i=10; i<=12; i++) {
        for (j=1; j<=4; j++) {
          release=i "." j "-STABLE";
          replacement=yellow release reset;
          gsub(release,replacement);

          for (k=1; k<=9; k++) {
            release=i "." j "-RELEASE-p" k;
            replacement=yellow release reset;
            gsub(release,replacement);
          }
        }
      }
    }

    if (do_not_print==0) print $0;
  }
}



A  => src/googlegroups.awk +69 -0
@@ 1,69 @@
#!/bin/awk -f

# googlegroups.awk
# ================
# A filter (as for mutt or aerc) intended to clean & decorate plaintext mail
# from @googlegroups.com, highlighting header information and hiding URLs.

BEGIN {
  in_header=0;
  do_not_print=0;

  dim="\033[2m";
  yellow="\033[33m";
  cyan="\033[36m";
  reset="\033[0m";
}
{
  # identify header section
  if (in_header==1 && $0 ~ /^={5,}/) in_header=0;
  else if ($0 ~ /^={5,}/) in_header=1;

  if (do_not_print==0) {
    if (in_header==1) {
      if ($0 ~ /^Topic:/) {
        $1=$1 dim cyan;
        $0=$0 reset;
      }
      else if ($0 ~ /^Url:/) {
        # skip printing this line
        do_not_print=1;
      }
    }
    else {
      if ($0 ~ /^(From|Date|Group):/) {
        $1=$1 dim cyan;
        $0=$0 reset;
      }
      else if ($0 ~ /^  - .* \[[1-9][0-9]? Updates?]/) {
        matched=match($0, /\[[1-9][0-9]? Updates?]/);
        if (matched!=0) {
          original=substr($0, RSTART, RLENGTH);
          replacement=reset yellow original reset;
          sub(/\[[1-9][0-9]? Updates?]/,replacement);

          # skip printing next line
          do_not_print=2;
          print dim cyan $0 reset;
        }
      }
      else if ($0 ~ /^-+ [1-9][0-9]? of [1-9][0-9]? -/) {
        $2=yellow $2;
        $4=$4 reset;
      }
      else if ($0 ~ /^Url: http/) {
        # skip printing this line
        do_not_print=1;
      }
      else if ($0 ~ /^Url:/) {
        # skip printing this AND next line
        do_not_print=2;
      }
    }
  }

  if (do_not_print==0) print $0;
  else do_not_print=do_not_print-1;
}



A  => src/html.sh +9 -0
@@ 1,9 @@
#!/bin/sh

# html.sh
# =======
# A filter (as for mutt or aerc) which runs w3m--vendored from a filter
# bundled with aerc

w3m -T text/html -cols $(tput cols) -dump -o display_image=false -o display_link_number=true


A  => src/mailman.awk +90 -0
@@ 1,90 @@
#!/bin/awk -f

# mailman.awk
# ===========
# A filter (as for mutt or aerc) intended to clean & decorate plaintext mail
# from mailman servers, highlighting header information

BEGIN {
  in_todays_topics=0;
  in_header=0;
  do_not_print=0;

  get_boundary=0;
  boundary="";

  dim="\033[2m";
  yellow="\033[33m";
  cyan="\033[36m";
  reset="\033[0m";
}
{
  # get boundary
  if (get_boundary==1) {
    get_boundary=0;
    matched=match($0,/boundary=".*"/);
    if (matched!=0) boundary="--" substr($0,RSTART+10,RLENGTH-11);
    # if failed to extract boundary, resume printing
    else do_not_print=0;
  }

  # skip blocks of non-text (HTML, PGP Signatures, non-text MIME parts)
  if (do_not_print==1 && $0 ~ /(<\/html>|END PGP SIGNATURE)/) do_not_print=0;
  else if (boundary!="" && $0 ~ boundary) do_not_print=0;

  else if ($0 ~ /(<html>|BEGIN PGP SIGNATURE)/) do_not_print=1;
  else if ($0 ~ /^Content-Type:/) {
    in_header=0;
    if ($0 ~ /multipart\/alternative/) { do_not_print=1; get_boundary=1; }
    else if (boundary!="" && $0 ~ /text\/html/) do_not_print=1;
    # for other content types, resume printing
    else do_not_print=0;
  }

  else {
    # identify "Today's Topics" section
    if (in_todays_topics==1 && $0 ~ /^-{5,}/) in_todays_topics=0;
    else if ($0 ~ /^Today's Topics:/) in_todays_topics=1;

    # identify header section
    if (in_header==1 && $0 ~ /^\s*$/) { in_header=0; do_not_print=0; }
    else if ($0 ~ /^(Message|Date|From|To|Subject|Message-ID):/) in_header=1;

    if (in_todays_topics==1) {
      matched=match($0, /\(.+\)/);
      if (matched!=0) {
        original=substr($0, RSTART, RLENGTH);
        replacement=reset original;
        sub(/\(.+\)/,replacement);
      }
      if ($0 ~ /^ +[1-9][0-9]?\./) {
        $1=$1 dim cyan;
        $0=$0 reset;
      }
      else if ($0 !~ /^Today's Topics:/) {
        $0=dim cyan $0 reset
      }
    }
    else if (in_header==1) {
      if ($0 ~ /^(Date|From|Subject):/) {
        do_not_print=0;
        $1=$1 dim cyan;
        $0=$0 reset;
      }
      else if ($0 ~ /^Message:/) {
        do_not_print=0;
        $1=$1 yellow;
        $0=$0 reset;
      }
      else if ($0 ~ /^(To|Message-ID):/) {
        do_not_print=1;
      }
      else {
        $0=dim cyan $0 reset;
      }
    }

    if (do_not_print==0) print $0;
  }
}


A  => src/ubuntu.awk +54 -0
@@ 1,54 @@
#!/bin/awk -f

# ubuntu.awk
# ==========
# A filter (as for mutt or aerc) intended to clean & decorate plaintext mail
# to @lists.ubuntu.com, highlighting header information and hiding URLs

BEGIN {
  in_header=0;
  in_references=0;
  do_not_print_blank=0;

  dim="\033[2m";
  cyan="\033[36m";
  reset="\033[0m";

  colon_replacement=": " reset
}
{
  # identify header/references section
  if (in_header==1 && $0 ~ /^={5,}/) in_header=0;
  else if ($0 ~ /^={5,}/) in_header=1;
  else if (in_references==1 && $0 ~ /^\s+$/) in_references=0;
  else if ($0 ~ /^References:/) in_references=1;

  if (in_header==1) {
    if ($0 !~ /={5,}/) {
      $0=dim cyan $0 reset;
    }
  }
  else {
    # skip next line if blank
    if ($0 ~ /^(A security issue affects|Summary|Details|Software Description|Update instructions):/) do_not_print_blank=2;

    # highlight header details (except references)
    if ($0 ~ /^- [A-Za-z]/) {
      $2=dim cyan $2;
      $0=$0 reset;
      sub(/: /,colon_replacement);
    }
    else if (in_references==0 && $0 ~ /^  [A-Za-z]/) {
      $1=dim cyan $1 reset;
      $0="  " $0;
    }
  }

  if (do_not_print_blank==0) print $0;
  else {
    if ($0 !~ /^\s*$/) print $0;
    do_not_print_blank=do_not_print_blank-1;
  }
}