国产探花免费观看_亚洲丰满少妇自慰呻吟_97日韩有码在线_资源在线日韩欧美_一区二区精品毛片,辰东完美世界有声小说,欢乐颂第一季,yy玄幻小说排行榜完本

首頁 > 編程 > Python > 正文

python實現的一個火車票轉讓信息采集器

2020-02-23 05:32:20
字體:
來源:轉載
供稿:網友

好吧,我承認我是對晚上看到一張合適的票轉讓但打過電話去說已經被搞走了這件事情感到蛋疼。直接上文件吧。

#coding: utf-8'''春運查詢火車票轉讓信息Author: piglei2007@gmail.comDate: 2011.01.25'''import reimport osimport timeimport urlparseimport datetimeimport tracebackimport urllib2import socketsocket.setdefaulttimeout(20)BLANK_RE = re.compile(r"/s+")opener = urllib2.build_opener(urllib2.HTTPCookieProcessor())opener.addheaders = [  ("User-agent", "Mozilla/5.0 (X11; U; FreeBSD i386; en-US; rv:1.9.1) Gecko/20090704 Firefox/3.5"),  ("Accept", "*/*"),]urllib2.install_opener(opener)from BeautifulSoup import BeautifulSoupSOURCE = {  "58": "http://bj.58.com/huochepiao/?Num=%(train)s&StartTime=%(date)s00",  "ganji": "http://bj.ganji.com/piao/cc_%(train)s/%(date)s/",}RECORD_FILE = "/tmp/ticket_records.txt"def parse_record():  try:    return set([x.strip() for x in open(RECORD_FILE, "r").readlines()])  except IOError:    open(RECORD_FILE, "w")    return set()def flush_record(records):  open(RECORD_FILE, "w").write("/n".join(records))def main(config):  """  開始抓取  """  existed = parse_record()  to_email = []  for train in config["trains"]:    for date in config["dates"]:      for type, _url in SOURCE.items():        url = _url % dict(train=train, date=date)        content = urllib2.urlopen(url).read()        soup = BeautifulSoup(content)        result = parse_content(type, soup, train)        for url, text in result:          url = urlparse.urljoin(_url, url)          # 只要臥鋪!          if url not in existed and u"臥" in text:            to_email.append([text, url])          existed.add(url)  if to_email:    content = "".join(      [x for x in [" | ".join(y) for y in to_email]]    ).encode("utf-8")    simple_mail(config["people"], content)  flush_record(existed)def parse_content(type, soup, train):  """  獲得車次信息  """  result = []  if type == "58":    info_table = soup.find("table", id="infolist")    if info_table:      for x in info_table.findAll("tr", text=re.compile(ur"%s(?!時刻表)" % train, re.I)):        a = x.parent        _text = BLANK_RE.sub("", a.text)        result.append([a["href"], _text])  if type == "ganji":    for x in soup.findAll("dl", {"class": "list_piao"}):      a = x.dt.a      result.append([a["href"], a.text])  return resultEMAIL_HOST = 'smtp.sohu.com'EMAIL_HOST_USER = 'yourname@sohu.com'EMAIL_HOST_PASSWORD = 'yourpassword'EMAIL_PORT = 25def simple_mail(to, content):  """  發送郵件  """  import smtplib  from email.mime.text import MIMEText  msgRoot = MIMEText(content, 'html', 'UTF-8')  msgRoot['Subject'] = "[%s]有票來啦!!!!" % datetime.datetime.today().isoformat(" ")  msgRoot['From'] = EMAIL_HOST_USER  msgRoot['To'] = ", ".join(to)  s = smtplib.SMTP(EMAIL_HOST, EMAIL_PORT)  s.login(EMAIL_HOST_USER, EMAIL_HOST_PASSWORD)  s.sendmail(EMAIL_HOST_USER, to, msgRoot.as_string())  s.close()def switch_time_zone():  """  切換時區  """  os.environ["TZ"] = "Asia/Shanghai"  time.tzset()switch_time_zone()if __name__ == '__main__':  config = {    "trains": ("k471",),    "dates": ("20110129",),    "people": (      "youremail@sohu.com",    )  }  try:    main(config)    print "%s: ok" % datetime.datetime.today()  except Exception, e:    print traceback.format_exc()            
發表評論 共有條評論
用戶名: 密碼:
驗證碼: 匿名發表
主站蜘蛛池模板: 台中县| 龙里县| 永和县| 泾阳县| 墨脱县| 宁南县| 乌拉特中旗| 南陵县| 浦县| 黄龙县| 祁东县| 玛纳斯县| 紫金县| 获嘉县| 文山县| 三都| 东兰县| 满洲里市| 武定县| 利津县| 安多县| 神池县| 成都市| 苍南县| 云南省| 合江县| 固始县| 蒙自县| 安陆市| 日照市| 余江县| 封丘县| 桦川县| 罗城| 富顺县| 舒城县| 兴安县| 阆中市| 尤溪县| 谢通门县| 扬中市|