import traceback, relaycheck, os, pop3trace, re, hamsterrules
import pop3config, relaycheck, fnmatch, brainfuck, cPickle, types

trace = pop3trace.trace

# make sure the latest spam rules are read
# HACK to fool py2exe:
import_this = list('import spamtokens')
reload_this = list('reload(spamtokens)')
try:
    exec("".join(import_this))        
except:
    pass

def getSubjectWeight(subject):
    try:
        return float(reduce(lambda x,y:x+y,map(ord,subject)))/len(subject)
    except:
        return 0

def splitSubject(subject):
    result = []
    pos = 0
    for i in range(len(subject)):
        c = subject[i]
        if c in '!:. \t\',?"*+':
            result.append( subject[pos:i])
            pos = i+1
    result.append( subject[pos:] )
    return filter(lambda x:len(x),result)

def isSpamSubject(subject):
    trace("checking, if '%s' is a spam subject" % subject)
    weight = getSubjectWeight(subject)
    if weight > 128:
        return "weight %.2f indicates spam mail." % weight

    tokens = splitSubject(subject)

    for rule in hamsterrules.SubjectRules:
        if rule.match(subject):
            return "subject '%s' is in hamster rules" % (subject)

    for token in tokens:
        if token.lower() in spamtokens.FILTER_SUBJECT_TOKENS:
            return "token '%s' indicates spam mail." % token

    try:
        isdigit = int(tokens[-1])
    except:
        isdigit = 0
        
    if isdigit:
        return "integer as last token indicates spam mail."

def getIpAddress(ipstr):
    try:
        l = ipstr.split(".")
        assert( len(l) == 4 )
        for i in l:
            j = int(i)
            assert( j >= 0 and j <= 255 )
        return ipstr
    except:
        return None

def findIpAddresses(line):
    result, is_recording_ip = [], -1
    
    for i in xrange(len(line)):
        c = line[i]
        if is_recording_ip >= 0:
            if c in "0123456789.":
                pass
            elif start_c == '(':
                if c == ')':
                    ip = getIpAddress(line[is_recording_ip:i])
                    if ip is not None:
                        result.append( ip )
                is_recording_ip = -1
            elif start_c == '[':
                if c == ']':
                    ip = getIpAddress(line[is_recording_ip:i])
                    if ip is not None:
                        result.append( ip )
                is_recording_ip = -1

        if is_recording_ip == -1 and c in "([":
            start_c, is_recording_ip = c, i+1
    return result

known_checks_list = []
known_checks_dict = {}

CACHE_HAS_BEEN_READ = 0

def ReadDSNBLCache(filename):
    global known_checks_list, known_checks_dict
    try:
        f = open(filename,"rb")
        trace("%s opened\n" % filename)
        data = cPickle.load(f)
        trace("got data: %s\n" % str(data))
        f.close()
        if type(data) == types.TupleType and data[0] == "DNSBL-CACHE":
            trace("ok, is cache data.")
            assert( type(data[1]) == types.ListType )
            assert( type(data[2]) == types.DictType )
            known_checks_list = data[1]
            known_checks_dict = data[2]
    except:
        # todo: dump exception to tracefile
        traceback.print_exc()

def WriteDNSBLCache(filename):
    global known_checks_list, known_checks_dict
    try:
        f = open(filename,"wb")
        data = ("DNSBL-CACHE", known_checks_list, known_checks_dict)
        cPickle.dump( data, f, 1 )
        f.close()
    except:
        traceback.print_exc()

def optimizedDNSBL(address):
    global CACHE_FILENAME, CACHE_HAS_BEEN_READ
    global known_checks_list, known_checks_dict
    
    if not CACHE_HAS_BEEN_READ:
        ReadDSNBLCache(CACHE_FILENAME)
        CACHE_HAS_BEEN_READ = 1

    if relaycheck.addressInRange(address, spamtokens.IGNORE_THESE_ADDRESSES):
        return None

    try:
        server = known_checks_dict[address]
        return server
    except:
        pass

    if relaycheck.addressInRange(address, spamtokens.FILTER_THESE_ADDRESSES):
        return "FILTER_THESE_ADDRESSES-rule"
    
    server = relaycheck.checkRelayBlacklist(address)
    known_checks_dict[address] = server
    if len(known_checks_list) == 1000:
        killme = known_checks_list[0]
        del known_checks_dict[killme]
        del known_checks_list[0]
    known_checks_list.append(address)
    
    WriteDNSBLCache(CACHE_FILENAME)
    return server    

def getSender(sender):
    sender = sender.strip().lower()
    x = sender.find("<")
    y = sender.find(">")
    if x >= 0 and x < y:
        sender = sender[x+1:y]
    return sender

def MatchRules(data, rules):
    for rule in rules:
        # if the rule starts with a '$', it is interpreted as a python regular expression
        if rule[0] == '$':
            # this is an RE
            if re.compile(rule[1:]).match(data):
                return "data '%s' matches RE rule '%s'" % (data, rule[1:])

        elif rule[:4] == '!bf:':
            # this is a brainfuck expression
            if brainfuck.brainfuck(rule[4:],data):
                return "data '%s' matches BRAINFUCK rule '%s'" % (data, rule[4:])

        elif rule.find( "*" )>=0 or rule.find("?")>= 0:
            # this is a filename-style pattern matching rule
            if fnmatch.fnmatch(data, rule):
                return "data '%s' matches FN rule '%s'" % (data, rule)
            
        elif data == rule:
            return "data '%s' matches plaintext rule '%s'" % (data, rule)

    return None

def IgnoreThisMail(lines):
    for line in lines:
        line = line.strip()
        if not line:
            break
        index = line.find(":")
        if index > 0:
            keyword = line[:index].lower()
            try:
                rules = spamtokens.IGNORE_RULES[keyword]
                data = line[index+1:].strip().lower()
                if keyword in ("from","to"):
                    data = getSender(data)
                result = MatchRules(data, rules)
                if result:
                    trace(result)
                    return 1
            except:
                pass
    return 0

           
def CheckHeaderForSpam(lines):   
    lines = map(lambda x:x.lower().rstrip(),lines)
    exec("".join(reload_this))
    trace("CheckHeaderForSpam() called")

    # check subject
    msg_subject = None
    for line in lines:
        if not line:
            break
        index = line.find(":")
        if index > 0:
            keyword = line[:index].lower()
            try:
                rules = spamtokens.FILTER_RULES[keyword]
                data = line[index+1:].strip().lower()
                if keyword in ("from","to"):
                    data = getSender(data)
                if keyword == "subject":
                    msg_subject = data
                result = MatchRules(data, rules)
                if result:
                    trace(result)
                    return result
            except:
                pass

    if msg_subject:
        isspam = isSpamSubject(msg_subject)
        if isspam:
            return "subject '%s' is spam, because %s" % (msg_subject, isspam)

    if pop3config.HAMSTER_RULES is not None:
        trace("checking against hamster rules")
        for line in lines:
            for address in findIpAddresses(line):
                for ip in hamsterrules.IPAddressRules:
                    if ip.match(address):
                        return "ip '%s' in line '%s' is in hamster rules" % (address, line)

    for line in lines:
        for address in findIpAddresses(line):
            trace( "checking IP address '%s' on blacklists" % address )
            server = optimizedDNSBL(address)
            if server is not None:
                return "ip '%s' in line '%s' is blacklisted by %s" % (address, line, server)
