python读取并解析邮件

发布时间:2024年01月24日

很久以前写的还是 python2.7的

#-*- encoding: utf-8 -*-
#读取邮件导入到mysql数据库
import os
import imaplib
import email
import MySQLdb
import MySQLdb.cursors
import time
from datetime import datetime
#设置命令窗口输出使用中文编码
import sys
import re
import logging
import socket
import base64
import chardet
import ssl

socket.setdefaulttimeout(60)
log_file='/log/maillog.log'
logging.basicConfig(format = '%(asctime)s  %(filename)s : %(levelname)s  %(message)s',datefmt = '%Y-%m-%d %A %H:%M:%S',filename=log_file,level=logging.ERROR)

reload(sys)
sys.setdefaultencoding('gbk')

import ConfigParser

cp = ConfigParser.SafeConfigParser()
cp.read('my.conf')
att_path = cp.get('email_att', 'att_path')

def filter_html(s):  
    if s==None:
        return ''
    if(len(s)>10):
        d = re.compile(r'<script(.*?)</script>',re.S|re.IGNORECASE)  
        s = d.sub(' ',s)
        d = re.compile(r'<iframe(.*?)</iframe>',re.S|re.IGNORECASE)  
        s = d.sub(' ',s)
        d = re.compile(r'<style(.*?)</style>',re.S|re.IGNORECASE)  
        s = d.sub(' ',s)
        d = re.compile(r'<!--(.*?)-->',re.S|re.IGNORECASE)  
        s = d.sub(' ',s)

    return s  
#保存文件方法(都是保存在指定的根目录下)
def savefile(filename, data, path):
    filepath = path + filename
    f = open(filepath, 'wb')
    f.write(data)
    f.close()

#字符编码转换方法
def my_unicode(s, encoding):
    ss = ''
    #ss = unicode(s,'utf-8')
    #print ss
    #exit()
    if encoding==None:encoding = chardet.detect(s)

    try:
        ss = unicode(s, encoding)
    except:
        try:
            ss = unicode(s,'gbk')
        except:
            try:
                ss = unicode(s,'utf-8')
            except:
                ss = ''

    return ss

#获得字符编码方法
def get_charset(message, default="ascii"):
    #Get themessage charset
    charset=message.get_charset()
    return charset
def my_get_payload(part,charset):
    if charset==None:
        try:
            mailContent = part.get_payload(decode=True)
        except:
            mailContent = ''
    else:
        try:
            mailContent =part.get_payload(decode=True).decode(charset,'ignore')
        except:
            mailContent = ''
    return mailContent


#解析邮件方法(区分出正文与附件)
def parseEmail(msg, mychar,path):
    mailContent= None
    contenttype= None
    ym = time.strftime('%Y%m%d',time.localtime(time.time()))
    mypath = path+"/"+str(ym)+"/"
    if os.path.exists(mypath)==False:
        os.mkdir(mypath)
    atts = ''
    attarr = []
    atterr = []
    suffix=None
    fnum = 0
    for part in msg.walk():
        if not part.is_multipart():
            contenttype =part.get_content_type()
            mptype = part.get_content_maintype()
            filename = part.get_filename()
            charset = get_charset(part)
			#是否有附件
            if filename or mptype=='image':
                if filename:
                    h = email.Header.Header(filename)
                    dh = email.Header.decode_header(h)
                    fname = dh[0][0]
                    encodeStr = dh[0][1]
                    try:
                        if encodeStr != None:
                            if charset == None:
                                try:
                                    fname = fname.decode(encodeStr, 'gbk')
                                except:
                                    fname = fname.decode(encodeStr, 'ignore')
                            else:
                                fname = fname.decode(encodeStr, charset)
                    except:
                        fname = ''
 
                elif mptype=='image':
                    content_id = part.get('Content-ID')                    
                    fname = content_id+".jpg"
                #print fname
                data = part.get_payload(decode=True)
				#print('Attachment : ' + fname)
				#保存附件
                if fname != None and len(fname)>0:
                    fnameid = str(time.strftime('%H%M%S',time.localtime(time.time())))+str(fnum)
                    ext = ''
                    if "." in fname:
                        fnamearr = fname.split('.')
                        ext = fnamearr[len(fnamearr)-1]
                    #下载图片附件
                    ext = ext.lower()
                    if ext=='gif' or ext=='png' or ext=='jpg' or ext=='bmp' or ext=='jpeg' or ext=='webp':
                    
                        fnewname = fnameid+"."+ext+".att"
                        try:
                            savefile(fnewname, data,mypath)
                            attarr.append(fnewname)
                        except:
                            atterr.append(fnewname)
                        fnum=fnum+1

            else:
                if contenttype in ['text/plain']:
                    suffix = '.txt'
                if contenttype in ['text/html']:
                    suffix = '.htm'
                mailchar = part.get_content_charset()
                if contenttype in ['text/plain','text/html']:
                    if charset == None:
                        if mailchar!=None:
                            mailContent = my_get_payload(part,mailchar) 
                        elif mychar!=None:
                            mailContent =  my_get_payload(part,mychar) 
                        else:
                            mailContent = my_get_payload(part,mychar) 
                    else:
                        mailContent = my_get_payload(part,charset) 
    atts = ';'.join(attarr)
    #exit()
    if mailContent!=None:
        mailContent = mailContent.encode('utf-8','ignore')
    return  (mailContent, suffix,atts)
def get_line_char(s):
	#print s
    arr = s.split('?')
    if len(arr)>3 and arr[2].lower()=='b':return arr[1]
    else:return None
#获取邮件方法
def getMail(cfgname, attpath):
    mailhost = cp.get(cfgname, 'imaphost')
    password = cp.get(cfgname, 'psswd')
    account = cp.get(cfgname, 'account')
    password = cp.get(cfgname, 'psswd')
    port = int(cp.get(cfgname, 'port'))
    ssl = int(cp.get(cfgname, 'ssl'))
    classarr = cfgname.split('_')
    mailclass = classarr[1]
	#是否采用ssl
    if ssl ==1:
        #sslcontext = ssl.SSLContext(ssl.PROTOCOL_TLSv1_2)
        #imapServer = imaplib.IMAP4_SSL(mailhost, port,ssl_context=sslcontext)
        imapServer = imaplib.IMAP4_SSL(mailhost, port)
    else:
        imapServer = imaplib.IMAP4(mailhost, port)
    imapServer.login(account, password)
    #"Junk"
    imapServer.select()
    #邮件状态设置,新邮件为Unseen
    #Messagestatues = 'All,Unseen,Seen,Recent,Answered, Flagged'
    resp, items= imapServer.search(None, "Unseen")
    midarr = items[0].split()

    midarr.reverse()
    if len(midarr)>10:midarr=midarr[0:10]
    midarr.reverse()
    number =1
    #maillimit = 20
    for i in midarr:
        #get information of email
        resp, mailData = imapServer.fetch(i,"(RFC822)")
        #if resp != 'OK':contiue
        if mailData[0] and mailData[0][1]:        
            mailText = mailData[0][1]
        else:continue
        msg = email.message_from_string(mailText)

        subject = ''
        sub = ''
        subchar = None

        if msg["Subject"]:

            subject = email.Header.decode_header(msg["Subject"].replace('\r\n','\t'))
            subchar = subject[0][1]
            #print msg["Subject"]
            if subchar==None:subchar=get_line_char(msg["Subject"])
            #sub = my_unicode(subject[0][0],subchar).encode('gbk','ignore') 
            sub = my_unicode(subject[0][0],subchar).encode('utf-8','ignore') 
        strsub = 'Subject : ' + sub

        #ls = msg["From"].split(' ')
        strfrom = ''
        #print sub
        datestr = msg["Date"]
        #print datestr
        if datestr!=None:datestr = datestr.strip() 
        #strdate = 'Date : ' + datestr
        ccstr =  msg["CC"]
        ccret = []
        if ccstr!=None:
            ccarr1 = ccstr.split('>')
            for ci in ccarr1:
                if '<' in ci:
                    ccarr2 = ci.split('<')
                    ccret.append(ccarr2[1])

        cc = ''
        if len(ccret)>0:cc=','.join(ccret)

        received = msg["Received"]
        if received:
            rec_arr = received.split('>;')
            if len(rec_arr)==2 and rec_arr[1]:
                datestr = rec_arr[1].strip() 
        try:
            maildate = time.strptime(datestr[5:24],'%d %b %Y %H:%M:%S')
            mailtime = time.strftime("%Y-%m-%d %H:%M:%S",maildate)
        except:
            mailtime = time.strftime("%Y-%m-%d %H:%M:%S")
        msgid = msg["X-QQ-mid"]
        fromemail = ''
        fromstr = msg["From"]
        if fromstr!=None:        
            fromarr = fromstr.split('<')
            fromstr2 = fromarr[len(fromarr)-1]
            fromarr2 = fromstr2.split('>')
            fromemail = fromarr2[0]
        mailContent, suffix ,atts= parseEmail(msg, subchar,attpath)
        #命令窗体输出邮件基本信息

        #保存邮件正文      
        if atts==None:atts=''
        if mailContent==None:mailContent=''
        if suffix==None:suffix=''
        if msgid==None:msgid=''
        if ( suffix != '') and ( msgid != ''):
        	#html过滤
            mailContent = filter_html(mailContent)
            #savefile(str(number) + suffix, mailContent, mypath)
            #保存数据
            #save_to_db(cp,msgid,sub,mailContent,fromemail,atts,mailtime,mailclass,cc)
        number = number + 1
        #if number>maillimit:break
    imapServer.close()
    imapServer.logout()



#smtp发送邮件
import smtplib
from email.mime.text import MIMEText
from email.header import Header

def send_mail(cfgname,subject,msg,receiver,msgtype,cc):
    mailhost = cp.get(cfgname, 'smtphost')
    password = cp.get(cfgname, 'psswd')
    sender = cp.get(cfgname, 'account')

    receivers = [receiver]  # 接收邮件,可设置为你的QQ邮箱或者其他邮箱
    if msgtype=='HTML':
        message = MIMEText(msg, 'html', 'gbk')
    else:
        message = MIMEText(msg, 'plain', 'gbk')
    message['From'] = Header(u'邮件收发测试'.encode('gbk',"ignore"), 'gbk')
    #message['To'] =  Header("测试", 'gbk')
    message['Subject'] = Header(subject, 'gbk')

    ccarr = []
    if len(cc)>1:
        message['Cc'] = cc
        ccarr = cc.split(',')
 
    success = 0
    try:
        smtpObj = smtplib.SMTP() 
        smtpObj.connect(mailhost, 25)    # 25 为 SMTP 端口号
        smtpObj.login(sender,password)  
        smtpObj.sendmail(sender, receivers+ccarr, message.as_string())
        #print "邮件发送成功"
        success = 1
    except:
        #print "Error: 无法发送邮件"
        success = 0
    return success

 

if __name__ =="__main__":
    getMail('email_info', att_path)
文章来源:https://blog.csdn.net/dongtest/article/details/135820473
本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。