python 爬取百度文库并下载(免费文章限定)
import requests import re import json import os session = requests.session() def fetch_url(url): return session.get(url).content.decode('gbk') def get_doc_id(url): return re.findall('view/(.*).html', url)[0] def parse_type(content): return re.findall(rdocType.*?\:.*?\'(.*?)\'\,, content)[0] def
暂无评论