#!/bin/bash

  # Cleans the Apache log from the referrer spammers footprints.
  # Call this script before running the site statistics program.

  LOGS_PATH=<INSERT YOUR LOGS DIRECTORY HERE>
  REFERRER_SPAMWORDS_FILE=<INSERT YOUR FILE WITH REFERRER SPAMWORDS HERE>

  # Theoretically, this will clean ANY log line with a spamword in it.
  # However, correctly chosen spamwords will have very little chance to be met
  # elsewhere than in a referrer-spambot hit.
  # I may write a script to look only in the referrer field. When I have the time.
  if [ -r $REFERRER_SPAMWORDS_FILE ] ; then  
    grep -f $REFERRER_SPAMWORDS_FILE -v < $LOGS_PATH/access.log > $LOGS_PATH/clean.log
  fi

  # After this, the clean.log file will have the permissions o fthe process that created it
  # (most probably root.root). This may be inconvenient for the statistics program.
  # Change the file owner or permissions, if necessary. If not, just omit this.
  chown support.staff $LOGS_PATH/clean.log
  chmod 755 $LOGS_PATH/clean.log

  # Now, do not forget to re-set the copnfiguration of your statistics program
  # to read not from access.log, but from clean.log! :-)

# end of file.
