应用介绍
import requests
import pandas as pd
from pymongo import MongoClient
class DataCrawler(object):
def __init__(self):
self.cities = list(pd.read_csv('city_data.csv')['city'])
client = MongoClient(host='localhost', port=27017)
db = client.Laborday
self.col = db.ticket
def get_city_trip(self):
for city in self.cities:
print('正在爬取城市:{}的数据!'.format(city))
res = requests.get('https://travelsearch.fliggy.com/async/queryItemResult.do?searchType='
'product&keyword={}&category=SCENIC&pagenum=1'.format(city))
data = res.json()
itemPagenum = data['data']['data'].get('itemPagenum')
if itemPagenum is not None:
page_count = itemPagenum['data']['count']
data_list = data['data']['data']['itemProducts']['data']['list'][0]['auctions']
for ticket in data_list:
ticket['city'] = city
self.col.insert_one(ticket)
print('成功爬取城市:{}的第{}页数据!'.format(city, 1))
if page_count > 1:
for page in range(2, page_count+1):
res = requests.get('https://travelsearch.fliggy.com/async/queryItemResult.do?searchType='
'product&keyword={}&category=SCENIC&pagenum={}'.format(city, page))
data = res.json()
data_list = data['data']['data']['itemProducts']['data']['list'][0]['auctions']
for ticket in data_list:
ticket['city'] = city
self.col.insert_one(ticket)
print('成功爬取城市:{}的第{}页数据!'.format(city, page))
if __name__ == '__main__':
data_crawler = DataCrawler()
data_crawler.get_city_trip()
分析景点门票销售数据,哪些景点会人挤人,哪些景点值得一去。
适合Python数据分析学习者、Python爬虫学习者、Pandas使用者、数据可视化学习者等等。
©版权声明:本文内容由互联网用户自发贡献,版权归原创作者所有,本站不拥有所有权,也不承担相关法律责任。如果您发现本站中有涉嫌抄袭的内容,欢迎发送邮件至: www_apollocode_net@163.com 进行举报,并提供相关证据,一经查实,本站将立刻删除涉嫌侵权内容。
转载请注明出处: apollocode » Python爬虫景点销售数据
文件列表(部分)
名称 | 大小 | 修改日期 |
---|---|---|
get_data.py | 0.69 KB | 2019-12-30 |
city_data.csv | 3.72 KB | 2019-12-30 |
发表评论 取消回复