import pdb, os, codecs, re, sys
from messaging import dbgMsg, errMsg, stdMsg

ignorableFromRE = re.compile( '(^From )(.*$)')
FromRE = re.compile( '(^From: )(.*$)', re.I)
DateRE = re.compile( '(^Date: )(.*$)', re.I)
yearRE = re.compile( '^[0-9][0-9][0-9][0-9]$')
dayRE = re.compile( '^[0-9][0-9]?$')
SubjectRE = re.compile( '(^Subject: )(.*$)', re.I)
MessageIdRE = re.compile( '(^Message-ID: <)([^>]+)(>.*$)', re.I)
InReplyToRE = re.compile( '(^In-Reply-To: <)([^>]+)(>.*$)', re.I)
ReferencesRE = re.compile( '(^References: <)([^>]+)(>.*$)', re.I)

has8859RE = re.compile( '(^.*)(=\?ISO-8859-1\?Q\?)([^\?]+)(\?=)(.*$)', re.I)
escapedHexRE = re.compile( '(^[^=]+)(=)([a-fA-F0-9][a-fA-F0-9])(.*$)')
escapedHex2RE = re.compile( '(=)([a-fA-F0-9][a-fA-F0-9])')

ignoreEMAdd1RE = re.compile( '(^sc34wg3@isotopicmaps.org \()(.*)(\)$)')
loginAtDomainRE = re.compile( '(^[^ ]+)( at )([^ ]+)( )(.*$)')

hasAngleBracketsRE = re.compile( '(^.*)(<)(.+?)(>)(.*$)')
hasBracketsRE = re.compile( '(^.*)(\[)(.+?)(\])(.*$)')
hasParenthesisRE = re.compile( '(^.*)(\()(.+?)(\))(.*$)')
hasQuotationRE = re.compile( '(^.*)(")(.+?)(")(.*$)')

isFromEmailAddressRE = re.compile( '^[A-Za-z_0-9\-\.]+@[A-Za-z_0-9\-\.]+$')
fromEmailAddressRE = re.compile( '[A-Za-z_0-9\-\.]+@[A-Za-z_0-9\-\.]+')

beginsAndEndsAlphaRE = re.compile( '^[A-za-z0-9].*[A-za-z0-9,\.]$')  ## if ends with comma or period, it's OK, e.g. "Mason, James David" or "B. Tommie Usdin"
isAlphaCharRE = re.compile( '^[A-za-z0-9]$')  ## 

recombinableAtRE = re.compile( ' at ')

messages = {}

#######################################################
def initializeEMAIL( vsv, origPopTaskObject):

    global fromPersonNames, fromEmailAddresses, remainder, FromMO, hasAngleBracketsMO, hasBracketsMO, hasParenthesisMO, hasQuotationMO, has8859MO, messages, messageIdString, thisMessage, filename, dateString, fromPersonNames2, fromEmailAddresses2, fromPersonNames, fromEmailAddresses, inReplyToString, subjectString, dateCode, flineNumber, fline


    allFromPersonNames = {}
    allFromEmailAddresses = {}
    allDates = {}
    allSubjects = {}
    allMessageIds = {}
    allInReplyTos = {}
    allReferences = {}

    messageIdString = ''
    inReplyToString = ''
    subjectString = ''
    dateString = ''
    dateCode = ''
    thisMessage = []

    inHeaderNow = False

    for root, dirnames, filenames in os.walk( os.environ[ 'INPUTSDIRPATH']):
        for filename in filenames:

            # print filename

            fromPersonNames = {}
            fromEmailAddresses = {}
            fromPersonNames2 = {}
            fromEmailAddresses2 = {}

            FO = codecs.open( os.path.join( root, filename), 'r', '8859')
            flines = FO.readlines()
            FO.close()
            thisMessage = []
            prevLine = ''
            flineNumber = 0
            for fline in flines:

                flineNumber += 1

                remainder = fline.strip()

                ignorableFromMO = ignorableFromRE.match( remainder)
                if ignorableFromMO:
                    inHeaderNow = True
                    continue

                if not len( remainder):
                    inHeaderNow = False

                thisMessage.append( remainder)

                FromMO = FromRE.match( remainder)
                DateMO = DateRE.match( remainder)
                SubjectMO = SubjectRE.match( remainder)
                MessageIdMO = MessageIdRE.match( remainder)
                InReplyToMO = InReplyToRE.match( remainder)
                ReferencesMO = ReferencesRE.match( remainder)

                ###
                ### From: lines parsed here
                ###
                if FromMO and inHeaderNow and ( 'original message' not in prevLine.lower()) and ( 'forwarded message' not in prevLine.lower()):


                    dumpMessage( vsv)

                    removeFromColon()

                    while True:
                        has8859MO = has8859RE.match( remainder)
                        if not has8859MO:
                            break
                        replace8859String()
                        
                    while True:
                        hasAngleBracketsMO = hasAngleBracketsRE.match( remainder)
                        if not hasAngleBracketsMO:
                            break
                        removeAngleBracketExpression()

                    while True:
                        hasQuotationMO = hasQuotationRE.match( remainder)
                        if hasQuotationMO == None:
                            break
                        removeQuotationExpression()

                    while True:
                        hasParenthesisMO = hasParenthesisRE.match( remainder)
                        if hasParenthesisMO == None:
                            break
                        removeParenthesisExpression()

                    while True:
                        hasBracketsMO = hasBracketsRE.match( remainder)
                        if hasBracketsMO == None:
                            break
                        removeBracketExpression()

                    tokens = remainder.split( ' ')
                    remainderLists = [ [], []]
                    foundEmail = False
                    for token in tokens:
                        if not len( token): continue
                        if token == '=20': continue
                        if token == '[': continue
                        if token == '=': continue
##                        if token == '\\\\': continue
                        if token.lower() == 'mailto:': continue
                        isFromEmailAddressMO = isFromEmailAddressRE.match( token)
                        if isFromEmailAddressMO:
                            fromEmailAddresses[ token] = None
                            foundEmail = True
                        else:
                            beginsAndEndsAlphaMO = beginsAndEndsAlphaRE.match( token)
                            isAlphaCharMO = isAlphaCharRE.match( token)
                            if beginsAndEndsAlphaMO or isAlphaCharMO:
                                if foundEmail:
                                    remainderLists[ 1].append( token)
                                else:
                                    remainderLists[ 0].append( token)
                            else:
                                errMsg( 'token = "%s"; remainder = "%s"; fline = "%s"' % ( token, remainder, fline))
                                sys.exit( 1)
                    for j in range( 2):
                        name = ' '.join( remainderLists[ j])
                        if len( name):

                            while True:
                                recombinableAtMO = recombinableAtRE.search( name)
                                if not recombinableAtMO: break
                                name = u'%s%s%s' % ( name[ :recombinableAtMO.start()], '@', name[ recombinableAtMO.end():])

                            while True:
                                fromEmailAddressMO = fromEmailAddressRE.search( name)
                                if not fromEmailAddressMO: break
                                fromEmailAddresses[ fromEmailAddressMO.group()] = None
                                name = u'%s%s' % ( name[ :fromEmailAddressMO.start()], name[ fromEmailAddressMO.end():])

                            if len( name):
                                fromPersonNames[ name] = None

##                     if fromPersonNames:
##                         print 'fromPersonNames = %s' % ( fromPersonNames)
##                     if fromEmailAddresses:
##                         print 'fromEmailAddresses = %s' % ( fromEmailAddresses)
##                     if fromPersonNames or fromEmailAddresses:
##                         print repr( fline)
##                         print

                    for fromPersonName in fromPersonNames:

                        fromPersonName = fromPersonName.replace( u'\\(', u'(')
                        fromPersonName = fromPersonName.replace( u'\\)', u')')

                        tokenList = fromPersonName.split( u' ')
                        tokenList2 = []
                        for token in tokenList:
                            if len( token):
                                if token.lower() not in [ u'mailto:', u'[<a', u'sc34wg3@isotopicmaps.org']:
                                    tokenList2.append( token)
                        fromPersonName = ' '.join( tokenList2)

                        if fromPersonName.lower().startswith( u'on behalf of'):
                            fromPersonNames = {}
                            fromEmailAddresses = {}
                            fromPersonName = fromPersonName[ 12:].strip()
                            fromPersonNames[ fromPersonName] = None

                        while True:
                            escapedHex2MO = escapedHex2RE.search( fromPersonName)
                            if not escapedHex2MO: break
                            fromPersonName = u'%s%s%s' % ( fromPersonName[ :escapedHex2MO.start()], Hex2Char( escapedHex2MO.group( 2)), fromPersonName[ escapedHex2MO.end():])

                        fromPersonName = fromPersonName.strip()
                        if not len( fromPersonName): continue

                        allFromPersonNames[ fromPersonName] = None
                        fromPersonNames2[ fromPersonName] = None

                    for fromEmailAddress in fromEmailAddresses:

                        fromEmailAddress = fromEmailAddress.strip()

                        if fromEmailAddress.lower() == 'sc34wg3@isotopicmaps.org': continue

                        if fromEmailAddress.lower().startswith( 'mailto:'):
                            fromEmailAddress = fromEmailAddress[ 7:].strip()

                        fromEmailAddress = fromEmailAddress.strip()

                        allFromEmailAddresses[ fromEmailAddress.strip()] = None
                        fromEmailAddresses2[ fromEmailAddress.strip()] = None
                        

                ###
                ### Date: lines parsed here
                ###
                elif DateMO and inHeaderNow:
                    dateString = DateMO.group( 2)
                    dateCode = parseDate( dateString)
                    allDates[ dateCode] = None

                ###
                ### Subject: lines parsed here
                ###
                elif SubjectMO and inHeaderNow:
                    subjectString = SubjectMO.group( 2).strip()
###
##                     print
##                     print subjectString
###
                    foundRe = foundSc34wg3 = True
                    while foundRe or foundSc34wg3:
                        if subjectString.lower().startswith( 're: '):
                            subjectString = subjectString[ 4:].strip()
                            foundRe = True
                        else:
                            foundRe = False
                        if subjectString.lower().startswith( '[sc34wg3] '):
                            subjectString = subjectString[ 10:].strip()
                            foundSc34wg3 = True
                        else:
                            foundSc34wg3 = False
                            
###
##                     print subjectString
###                    
                    allSubjects[ subjectString] = None

                ###
                ### MessageId: lines parsed here
                ###
                elif MessageIdMO and inHeaderNow:
                    messageIdString = MessageIdMO.group( 2).strip()
                    allMessageIds[ messageIdString] = None

                ###
                ### InReplyTo: lines parsed here
                ###
                elif InReplyToMO and inHeaderNow:
                    inReplyToString = InReplyToMO.group( 2).strip()
                    allInReplyTos[ inReplyToString] = None

                ###
                ### References: lines parsed here
                ###
                elif ReferencesMO and inHeaderNow:
                    referencesString = ReferencesMO.group( 2).strip()
                    allReferences[ referencesString] = None

                prevLine = fline

            dumpMessage( vsv)


##     fromPersonNameList = allFromPersonNames.keys()
##     fromPersonNameList.sort()
##     print
##     print
##     for fromPersonName in fromPersonNameList:
##         print repr( fromPersonName)
        
##     fromEmailAddressList = allFromEmailAddresses.keys()
##     fromEmailAddressList.sort()
##     print
##     for fromEmailAddress in fromEmailAddressList:
##         print fromEmailAddress
        
##     dateList = allDates.keys()
##     dateList.sort()
##     print
##     for date in dateList:
##         print date
        
##     subjectList = allSubjects.keys()
##     subjectList.sort()
##     print
##     for subject in subjectList:
##         print subject
        
##     messageIdList = allMessageIds.keys()
##     messageIdList.sort()
##     print
##     for messageId in messageIdList:
##         print messageId
        
##     inReplyToList = allInReplyTos.keys()
##     inReplyToList.sort()
##     print
##     for inReplyTo in inReplyToList:
##         print inReplyTo
        
##     referencesList = allReferences.keys()
##     referencesList.sort()
##     print
##     for references in referencesList:
##         print references
        

#######################################################
def parseDate( dateString):
    dateString = dateString.strip()
    tokens = dateString.split( ' ')
    year = 0
    day = 0
    month = 0
    months = [ 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec']
    for token in tokens:
        if token.endswith( ','):
            token = token[ :-1]
        if token[ :3].lower() in [ 'mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']: continue
        if token[ :3].lower() in months:
            for month in range( 12):
                if token[ :3].lower() == months[ month]:
                    month += 1
                    break
        yearMO = yearRE.match( token)
        if yearMO:
            year = int( token)
        dayMO = dayRE.match( token)
        if dayMO:
            day = int( token)
            
    if year == 0 or month == 0 or day == 0:
        errMsg( 'year: %d  month: %d  day: %d  "%s"' % ( year, month, day, dateString))
    return '%04d/%02d/%02d' % ( year, month, day)

#######################################################
def removeFromColon():
    global fromPersonNames, fromEmailAddresses, remainder, FromMO, hasAngleBracketsMO, hasBracketsMO, hasParenthesisMO, hasQuotationMO, has8859MO, messages, messageIdString, thisMessage, filename, dateString, fromPersonNames2, fromEmailAddresses2, fromPersonNames, fromEmailAddresses, inReplyToString, subjectString, dateCode, flineNumber, fline

    remainder = FromMO.group(2).strip()



#######################################################
def HexDigitChar2Int( c):
    if c in '0123456789':
        return ord( c) - ord( '0')
    if c in 'abcdef':
        return ( ord( c) - ord( 'a')) + 10
    if c in 'ABCDEF':
        return ( ord( c) - ord( 'A')) + 10


#######################################################
def Hex2Char( hexString):
    return unichr( ( HexDigitChar2Int( hexString[ 0]) * 16) + HexDigitChar2Int( hexString[ 1]))



#######################################################
def replace8859String():
    global fromPersonNames, fromEmailAddresses, remainder, FromMO, hasAngleBracketsMO, hasBracketsMO, hasParenthesisMO, hasQuotationMO, has8859MO, messages, messageIdString, thisMessage, filename, dateString, fromPersonNames2, fromEmailAddresses2, fromPersonNames, fromEmailAddresses, inReplyToString, subjectString, dateCode, flineNumber, fline

    decodeableString = has8859MO.group( 3)

    while True:
        escapedHexMO = escapedHexRE.match( decodeableString)
        if not escapedHexMO: break
        decodeableString = u'%s%s%s' % ( escapedHexMO.group( 1), Hex2Char( escapedHexMO.group( 3)), escapedHexMO.group( 4))

    remainder = u'%s%s%s' % ( has8859MO.group( 1), decodeableString, has8859MO.group( 5))        
    

#######################################################
def removeAngleBracketExpression():
    global fromPersonNames, fromEmailAddresses, remainder, FromMO, hasAngleBracketsMO, hasBracketsMO, hasParenthesisMO, hasQuotationMO, has8859MO, messages, messageIdString, thisMessage, filename, dateString, fromPersonNames2, fromEmailAddresses2, fromPersonNames, fromEmailAddresses, inReplyToString, subjectString, dateCode, flineNumber, fline
    
    angleBracketContents = hasAngleBracketsMO.group( 3).strip()

    if angleBracketContents.startswith( 'A '): 
        remainder = '%s%s' % ( hasAngleBracketsMO.group( 1), hasAngleBracketsMO.group( 5))
        remainder = remainder.strip()
        return
    if angleBracketContents == '/A':
        remainder = '%s%s' % ( hasAngleBracketsMO.group( 1), hasAngleBracketsMO.group( 5))
        remainder = remainder.strip()
        return

    if angleBracketContents.lower().startswith( 'mailto:'):
        angleBracketContents = angleBracketContents[ 7:]

    isFromEmailAddressMO = isFromEmailAddressRE.match( angleBracketContents)
    if not isFromEmailAddressMO:
        errMsg( 'angleBracketContents = "%s"; remainder = "%s"; fline = "%s"' % ( angleBracketContents, remainder, fline))
        sys.exit( 1)
    remainder = '%s%s' % ( hasAngleBracketsMO.group( 1), hasAngleBracketsMO.group( 5))
    remainder = remainder.strip()
    fromEmailAddresses[ hasAngleBracketsMO.group( 3)] = None


#######################################################
def removeBracketExpression():
    global fromPersonNames, fromEmailAddresses, remainder, FromMO, hasAngleBracketsMO, hasBracketsMO, hasParenthesisMO, hasQuotationMO, has8859MO, messages, messageIdString, thisMessage, filename, dateString, fromPersonNames2, fromEmailAddresses2, fromPersonNames, fromEmailAddresses, inReplyToString, subjectString, dateCode, flineNumber, fline
    
    bracketContents = hasBracketsMO.group( 3).strip()

    if bracketContents.startswith( 'A '): 
        remainder = '%s%s' % ( hasBracketsMO.group( 1), hasBracketsMO.group( 5)).strip()
        return
    if bracketContents == '/A':
        remainder = '%s%s' % ( hasBracketsMO.group( 1), hasBracketsMO.group( 5)).strip()
        return

    if bracketContents.lower().startswith( 'mailto:'):
        bracketContents = bracketContents[ 7:]

    isFromEmailAddressMO = isFromEmailAddressRE.match( bracketContents)
    if not isFromEmailAddressMO:
        errMsg( 'bracketContents = "%s"; remainder = "%s"; fline = "%s"' % ( bracketContents, remainder, allMessages, fline))
        sys.exit( 1)
    remainder = '%s%s' % ( hasBracketsMO.group( 1), hasBracketsMO.group( 5))
    remainder = remainder.strip()
    fromEmailAddresses[ hasBracketsMO.group( 3)] = None

#######################################################
def removeParenthesisExpression():
    global fromPersonNames, fromEmailAddresses, remainder, FromMO, hasAngleBracketsMO, hasBracketsMO, hasParenthesisMO, hasQuotationMO, has8859MO, messages, messageIdString, thisMessage, filename, dateString, fromPersonNames2, fromEmailAddresses2, fromPersonNames, fromEmailAddresses, inReplyToString, subjectString, dateCode, flineNumber, fline
    
    parenthesisContents = hasParenthesisMO.group( 3).strip()

    if parenthesisContents.startswith( 'A '): 
        remainder = '%s%s' % ( hasParenthesisMO.group( 1), hasParenthesisMO.group( 5)).strip()
        return
    if parenthesisContents == '/A':
        remainder = '%s%s' % ( hasParenthesisMO.group( 1), hasParenthesisMO.group( 5)).strip()
        return

    if parenthesisContents.lower().startswith( 'mailto:'):
        parenthesisContents = parenthesisContents[ 7:]

    remainder = '%s%s' % ( hasParenthesisMO.group( 1), hasParenthesisMO.group( 5))
    remainder = remainder.strip()
###                                        
##     if hasParenthesisMO.group( 3) == 'Geir Ove Gr=F8nmo':
##         pdb.set_trace()
###

    fromPersonNames[ hasParenthesisMO.group( 3)] = None


#######################################################
def removeQuotationExpression():
    global fromPersonNames, fromEmailAddresses, remainder, FromMO, hasAngleBracketsMO, hasBracketsMO, hasParenthesisMO, hasQuotationMO, has8859MO, messages, messageIdString, thisMessage, filename, dateString, fromPersonNames2, fromEmailAddresses2, fromPersonNames, fromEmailAddresses, inReplyToString, subjectString, dateCode, flineNumber, fline
    
    quotationContents = hasQuotationMO.group( 3).strip()

    remainder = '%s%s' % ( hasQuotationMO.group( 1), hasQuotationMO.group( 5))
    remainder = remainder.strip()
###                                        
##     if hasQuotationMO.group( 3) == 'Geir Ove Gr=F8nmo':
##         pdb.set_trace()
###
    fromPersonNames[ hasQuotationMO.group( 3)] = None



#######################################################
def dumpMessage( vsv):
    global fromPersonNames, fromEmailAddresses, remainder, FromMO, hasAngleBracketsMO, hasBracketsMO, hasParenthesisMO, hasQuotationMO, has8859MO, messages, messageIdString, thisMessage, filename, dateString, fromPersonNames2, fromEmailAddresses2, fromPersonNames, fromEmailAddresses, inReplyToString, subjectString, dateCode, flineNumber, fline

    if len( messageIdString) and ( len( fromPersonNames2) or len( fromEmailAddresses2)):
        messages[ messageIdString] = thisMessage
        thisMessage = []
    else:
##        errMsg( 'no messageIdString; %s  %s  "%s"' % ( filename, fline, dateString))
        fromPersonNames2 = {}
        fromEmailAddresses2 = {}
        fromPersonNames = {}
        fromEmailAddresses = {}
        messageIdString = ''
        inReplyToString = ''
        subjectString = ''
        dateString = ''
        dateCode = ''
        thisMessage = []
        return
    
    if len( fromPersonNames2) > 1 and \
           not fromPersonNames2.has_key( 'MXM') and \
           not fromPersonNames2.has_key( 'Morpheus') and \
           not fromPersonNames2.has_key( 'empolis KL') and \
           not fromPersonNames2.has_key( 'LNG-DAY') and \
           not fromPersonNames2.has_key( 'empolis DA') and \
           not fromPersonNames2.has_key( 'LNG-EWR'):
        pdb.set_trace()

    if len( fromPersonNames2) == 0 and len( fromEmailAddresses2) == 0 :
        print '%s  %s' % ( flineNumber, fline)
        pdb.set_trace()


##     print '\n%s\n%s\n' % ( fromPersonNames2.keys(), fromEmailAddresses2.keys())

    ## SIPs

    personProxy = vsv.newProxyObject()
    vsv.newPropertyInstance(
        personProxy,
        'uod1', 'personNames',
        fromPersonNames2.keys())
    vsv.newPropertyInstance(
        personProxy,
        'uod2', 'emailAddresses',
        fromEmailAddresses2.keys())

    emailProxy = vsv.newProxyObject()
    vsv.newPropertyInstance(
        emailProxy,
        'uod1', 'emailID',
        messageIdString)

    if len( inReplyToString):
        inReplyToProxy = vsv.newProxyObject()
        vsv.newPropertyInstance(
            inReplyToProxy,
            'uod1', 'emailID',
            inReplyToString)

    subjectLineProxy = vsv.newProxyObject()
    vsv.newPropertyInstance(
        subjectLineProxy,
        'uod2', 'subjectLine',
        subjectString)

    dateProxy = vsv.newProxyObject()
    vsv.newPropertyInstance(
        dateProxy,
        'uod1', 'date',
        dateCode)

    threadProxy = vsv.newProxyObject()
    vsv.newPropertyInstance(
        threadProxy,
        'uod1', 'firstEmail',
        [ emailProxy.proxyId])


    ## OPs
    vsv.newPropertyInstance(
        personProxy,
        'uod1', 'emails',
        [ emailProxy.proxyId])

    vsv.newPropertyInstance(
        personProxy,
        'uod1', 'emailSubjectLines',
        [ subjectLineProxy.proxyId])

    vsv.newPropertyInstance(
        emailProxy,
        'uod1', 'sender',
        [ personProxy.proxyId])

    vsv.newPropertyInstance(
        emailProxy,
        'uod1', 'subjectLine',
        [ subjectLineProxy.proxyId])

    if len( inReplyToString):
        vsv.newPropertyInstance(
            emailProxy,
            'uod1', 'inReplyTo',
            [ inReplyToProxy.proxyId])

    vsv.newPropertyInstance(
        emailProxy,
        'uod1', 'content',
        '\n'.join( messages[ messageIdString]))

    vsv.newPropertyInstance(
        emailProxy,
        'uod2', 'date',
        [ dateProxy.proxyId])

    vsv.newPropertyInstance(
        subjectLineProxy,
        'uod1', 'emails',
        [ emailProxy.proxyId])

    vsv.newPropertyInstance(
        dateProxy,
        'uod1', 'emails',
        [ emailProxy.proxyId])

   


    ## clear working variables for next email
    fromPersonNames2 = {}
    fromEmailAddresses2 = {}
    fromPersonNames = {}
    fromEmailAddresses = {}
    messageIdString = ''
    inReplyToString = ''
    subjectString = ''
    dateString = ''
    dateCode = ''
    thisMessage = []
