应用介绍
#!/usr/bin/python
# -*- coding:utf-8 -*-
import requests
import pandas as pd
def sxs_crawl(pages=30, kw='数据挖掘', c='全国'):
list_urls = ["https://iosapi.shixiseng.com/app/interns/search?c={}&d=&ft=&i=&k={}"
"&m=&page={}&s=-0&st=&t=zj&x=&z=".format(c, kw, page) for page in range(pages)]
job_list_data = []
for url in list_urls:
response = requests.get(url)
if response.json()['msg']:
job_list_data.extend(response.json()['msg'])
else:
break
job_list = pd.DataFrame(job_list_data)
job_list.to_csv('/Users/apple/Desktop/job_list.csv', index=False)
# 职位详情ID爬取
uuids = list(job_list['uuid'])
job_detailed_url = ['https://iosapi.shixiseng.com/app/intern/info?uuid={}'.format(uuid) for uuid in uuids]
job_detailed_data = []
for url in job_detailed_url:
response = requests.get(url)
job_detailed_data.append(response.json()['msg'])
job_detailed = pd.DataFrame(job_detailed_data)
job_detailed.to_csv('/Users/apple/Desktop/job_detailed.csv', index=False)
# 公司信息爬取
cuuids = list(job_detailed['cuuid'])
com_detailed_url = ['https://iosapi.shixiseng.com/app/company/info?uuid={}'.format(cuuid) for cuuid in cuuids]
com_detailed_data = []
for url in com_detailed_url:
response = requests.get(url)
com_detailed_data.append(response.json()['msg'])
com_detailed = pd.DataFrame(com_detailed_data)
com_detailed.to_csv('/Users/apple/Desktop/com_detailed.csv', index=False)
print('Successfully crawled {} jobs.'.format(job_list.shape[0]))
if __name__ == '__main__':
sxs_crawl(pages=30, kw='数据挖掘', c='全国')
此项目很简单,主要是手机抓包分析和获取iso端的数据接口,有了接口可以直接编写代码爬取数据。
适应于Python爬虫学习者、手机抓包学习者。
©版权声明:本文内容由互联网用户自发贡献,版权归原创作者所有,本站不拥有所有权,也不承担相关法律责任。如果您发现本站中有涉嫌抄袭的内容,欢迎发送邮件至: www_apollocode_net@163.com 进行举报,并提供相关证据,一经查实,本站将立刻删除涉嫌侵权内容。
转载请注明出处: apollocode » python爬取手机抓包数据
文件列表(部分)
名称 | 大小 | 修改日期 |
---|---|---|
com_detailed.csv | 103.16 KB | 2019-12-30 |
job_detailed.csv | 86.31 KB | 2019-12-30 |
job_list.csv | 11.35 KB | 2019-12-30 |
sxs_app.py | 0.66 KB | 2019-12-30 |
发表评论 取消回复