爬虫技巧与正则表达式应用

from bs4 import BeautifulSoup

import requests

urlIndex = []

head = {"User-Agent":"Mozila/5.0(Window NT 6.1; AppleWebKit/537.36 ; Chrome/47.0.2526.106 Safari/537.36 )"}

TimeOut=5

root="http://money.163.com/special/002534M5/review.html"

Page =requests.session().get(root,headers=head,timeout=TimeOut)

Coding= (Page.encoding)

contens= Page.content

soup= BeautifulSoup(contens,from_encoding="gbk")

for result in soup.select("div.colLM li"):

urlIndex.append(result)

for result2 in urlIndex:

title= result2.span.text

wangzhi=(result2.a)["href"]

shijian=(result2.select(".atime"))[0].text

print shijian

rar 文件大小:792.81KB