浏览器调试模式下Network选项XHR筛选出Ajax请求(Request Headers的X-Requested-With:XMLHttpRequest)
# 个人微博爬取,存入csv(字段:时间、评论、转发、点赞、内容)import requestsimport jsonimport csvdef get_html(path, page): params = { 'type': 'uid', 'value': 2830678474, 'containerid': 1076032830678474, 'page': page } headers = { 'Referer': 'https://m.weibo.cn/u/2830678474', 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1' } try: r = requests.get(url=path, params=params, headers=headers) r.raise_for_status() return r.text except: print('status_code is not 200') return Nonedef detail_html(txt): data = txt.get('data') page = data.get('cardlistInfo').get('page') cards = data.get('cards') with open('weibo.csv', 'a', encoding='utf-8') as csvfile: fieldnames = ['created_at', 'attitudes_count', 'comments_count', 'reposts_count', 'content'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) for card in cards: if card.get('card_type') != 9: # 去除广告 continue mblog = card.get('mblog') data = { 'created_at' : mblog.get('created_at'), # 创建时间 'attitudes_count': mblog.get('attitudes_count'), # 点赞数量 'comments_count': mblog.get('comments_count'), # 评论数量 'reposts_count': mblog.get('reposts_count'), # 转发数量 'content': mblog.get('text') # 文本内容 } writer.writerow(data) return pageif __name__ == '__main__': path = 'https://m.weibo.cn/api/container/getIndex' page = 1 with open('weibo.csv', 'w', encoding='utf-8') as csvfile: fieldnames = ['created_at', 'attitudes_count', 'comments_count', 'reposts_count', 'content'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() while page: txt = json.loads(get_html(path, page)) if txt: page = detail_html(txt) print(page)