Python抓取收件箱邮件写入MySQL数据库小脚本

# !/usr/bin/python3
# -*- coding: utf-8 -*-"""
create_author : 蛙鳜鸡鹳狸猿
create_time   : 2019-01-21
program       : *_* mail handler *_*
"""import os
import re
import sys
import poplib
import chardet
import datetime
import _mysql_connector
from email import parser, policy
from email.utils import parseaddr, parsedate_tz, mktime_tzCON = {"host" : "......",  # MySQL host"port" : 3306,"user" : "......",  # MySQL user"password" : "......",  # MySQL password"database" : "......"   # MySQL database
}SET = {"charset": "utf8",  # character charset"use_unicode": True,    # use unicode"autocommit" : False    # transaction autocommit
}SQL = {"dbs" : "......",   # MySQL database"tab" : "......",   # MySQL table"sql" : """INSERT INTO `{dbs}`.`{tab}` (`ts_send`, `sender`, `mail`, `subject`, `content`) VALUES('{time}', '{from}', '{addr}', '{name}', '{body}');"""
}MAIL = {"host" : "pop3......",  # mail host"port" : 995,"user" : "......@.......com",   # mail user"pass" : "......"   # mail password
}# File write to local args
# ローカルへ書くファイルのバラメータ
mail_dig_tag_path = os.getcwd()
mail_dig_tag_file = "mail_dig.tag"# Mail filter define args
# メールのフィルタの定義のバラメータ
mail_dig_filter_list = ["Linux", "MySQL", "MongoDB", "Redis", "C", "Python", "Shell"]# Codec
codec = ["gb2312", "gb18030", "gbk", "hz", "big5", "big5hkscs", "cp950", "cp932", "euc_jp", "euc_jis_2004", "euc_jisx0213","iso2022_jp", "iso2022_jp_1", "iso2022_jp_2", "iso2022_jp_2004", "iso2022_jp_3", "iso2022_jp_ext","shift_jis", "shift_jis_2004", "shift_jisx0213"]class MailProc(object):def __init__(self):self.mysql_con = self._mysqlcon()self.mail_con = self._mailcon()self.char_col = set()def _fileproc(self, path, text):"""Handler of writing file out to local.ローカルへファイルを書くハンドラ。:param path: stringfull path of the file to write out.ファイルを書き込むディレクトリ。:param text: stringcontent to write out.書き込む内容。:return: Python built-in exit code.Pythonの組み込みの終了コード。"""with open(path, 'w') as f:f.write(text)def _mysqlcon(self):"""Get MySQL connection.MySQLの接続を取得する。:return: MySQL connection object.MySQLの接続のオブジェクト。"""con = _mysql_connector.MySQL()con.connect(**CON)con.set_character_set(SET["charset"])con.use_unicode(SET["use_unicode"])con.autocommit(SET["autocommit"])con.query("SET NAMES utf8mb4;")con.query("SET CHARACTER SET utf8mb4;")con.query("SET character_set_connection=utf8mb4;")con.commit()return condef _mailcon(self):mail_con = poplib.POP3_SSL(host=MAIL["host"], port=MAIL["port"])mail_con.user(MAIL["user"])mail_con.pass_(MAIL["pass"])return mail_condef _get_mail_source(self, ino):mail_src = ''for mail_row in self.mail_con.retr(ino)[1]:if mail_row:char = chardet.detect(mail_row)["encoding"]self.char_col.add(char)try:mail_dec = mail_row.decode("utf-8")except UnicodeDecodeError:mail_dec = mail_row.decode(char)mail_src = mail_src + mail_dec + "\n"mail_par = parser.Parser(policy=policy.default).parsestr(mail_src)return mail_pardef _get_mail_id(self, mail_par):return mail_par.get("Message-ID")def _get_mail_time(self, mail_par):return datetime.datetime.fromtimestamp(mktime_tz(parsedate_tz(mail_par.get("Date"))))def _get_mail_from(self, mail_par):return parseaddr(mail_par.get("From"))[0]def _get_mail_addr(self, mail_par):return parseaddr(mail_par.get("From"))[1]def _get_mail_name(self, mail_par):return mail_par.get("Subject")def _get_mail_body(self, mail_par):def __body_man(is_decode=False, char=None):body = ''if not mail_par.is_multipart():if not is_decode:body = mail_par.get_payload()elif is_decode:mail_par.set_charset(char)body = mail_par.get_payload(decode=True).decode(char)elif mail_par.is_multipart():for part in mail_par.get_payload():if part.get_content_type() == part.get_default_type():if not is_decode:body += part.get_payload()elif is_decode:part.set_charset(char)body += part.get_payload(decode=True).decode(char)return bodychar = mail_par.get_charsets()[-1]if char == "utf-8":return __body_man()elif char != "utf-8":codec_copy = codec.copy()for char_detect in self.char_col:codec_copy.append(char_detect)for char_decode in codec_copy:try:return __body_man(is_decode=True, char=char_decode)except UnicodeDecodeError:continuefinally:self.char_col = set()def mail_caseinfo_dig(self):tag_id = ''tag_ts = datetime.datetime.strptime("1970-01-01 00:00:00", "%Y-%m-%d %H:%M:%S")file_tags = mail_dig_tag_path + '/' + mail_dig_tag_fileif os.path.exists(file_tags):with open(file_tags, 'r') as f:tags_info = f.readlines()tag_id = tags_info[0].replace("\n", '')tag_ts = datetime.datetime.strptime(tags_info[1], "%Y-%m-%d %H:%M:%S")mail_cnt = self.mail_con.stat()[0]mail_par_ck = self._get_mail_source(ino=mail_cnt)mail_tag_id = self._get_mail_id(mail_par=mail_par_ck)mail_tag_ts = self._get_mail_time(mail_par=mail_par_ck)tags = "{id}\n{ts}".format(id=mail_tag_id, ts=mail_tag_ts)if mail_tag_id == tag_id:if not os.path.exists(file_tags):self._fileproc(path=file_tags, text=tags)self.mail_con.quit()sys.exit()elif mail_tag_id != tag_id:for mail_ino in range(mail_cnt):mail_par = self._get_mail_source(ino=mail_ino + 1)mail_time = self._get_mail_time(mail_par=mail_par)mail_id = self._get_mail_id(mail_par=mail_par)if mail_time >= tag_ts and mail_id != tag_id:mail_body = self._get_mail_body(mail_par=mail_par)if re.search("(\\\\u\d+\w?)+", mail_body):continueguide = Falsemail_name = self._get_mail_name(mail_par=mail_par)for filter_loop in mail_dig_filter_list:if re.search(filter_loop, mail_name):guide = Truebreakif not guide:for filter_loop in mail_dig_filter_list:if re.search(filter_loop, mail_body):guide = Truebreakif not guide:continueelif guide:mail_from = self._get_mail_from(mail_par=mail_par)mail_addr = self._get_mail_addr(mail_par=mail_par)MAIL = {"time": mail_time.__str__(),"from": mail_from,"addr": mail_addr,"name": mail_name,"body": mail_body,}print(MAIL)for column in MAIL:if column != "time":MAIL[column] = self.mysql_con.escape_string(MAIL[column]).decode("utf-8")try:self.mysql_con.query(SQL["sql"].format(**SQL, **MAIL))except _mysql_connector.MySQLInterfaceError as E:self.mysql_con.rollback()if mail_ino:mail_par = self._get_mail_source(ino=mail_ino)mail_time = self._get_mail_time(mail_par=mail_par)mail_id = self._get_mail_id(mail_par=mail_par)tags = "{id}\n{ts}".format(id=mail_id, ts=mail_time)self._fileproc(path=file_tags, text=tags)self.mysql_con.close()self.mail_con.quit()sys.exit(E)else:self.mysql_con.commit()self.mysql_con.close()self._fileproc(path=file_tags, text=tags)self.mail_con.quit()if __name__ == "__main__":MP = MailProc()MP.mail_caseinfo_dig()

 


本文来自互联网用户投稿,文章观点仅代表作者本人,不代表本站立场,不承担相关法律责任。如若转载,请注明出处。 如若内容造成侵权/违法违规/事实不符,请点击【内容举报】进行投诉反馈!

相关文章

立即
投稿

微信公众账号

微信扫一扫加关注

返回
顶部