前端怎样根据页面内容获取数据的来源?
复制要查询的内容,F12后刷新页面,在Network下进行search,搜索要查找的内容,此时有数据时,点击获取数据来源那个接口。
import requests
import time
import pymysql
conn = pymysql.connect(host="", user="root",password="",database="",charset="utf8")
cursor = conn.cursor()
main_url = 'https://www.lagou.com/jobs/list_java?labelWords=&fromSearch=true&suginput='
headers_0 = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36"
}
cookies_response = requests.get(main_url,headers=headers_0)
print(cookies_response.cookies)
api_url = 'https://www.lagou.com/jobs/positionAjax.json?needAddtionalResult=false'
sid = ''
for page in range(1,31):
if page == 1:
data = {
'first': 'true',
'pn': '1',
'kd': 'python'
}
else:
data = {
'first': 'false',
'pn': str(page),
'kd': 'python',
'sig': sid
}
header = {
"authority": "www.lagou.com",
"cookie": "user_trace_token=20210627151414-7baf440f-d4d8-4cf8-b2cd-9f8d094cba25; JSESSIONID=ABAAAECAAEBABII9B80B115A7B06EAB4727D0C8443CA7F3; LGUID=20210627151414-d68b5f81-6a4f-44b6-8364-a2a954037383; sajssdk_2015_cross_new_user=1; _ga=GA1.2.997903917.1624778056; _gid=GA1.2.1810296173.1624778056; WEBTJ-ID=20210627%E4%B8%8B%E5%8D%883:14:22151422-17a4c51b9354e-0cf41873464a2-6373267-1327104-17a4c51b936da6; RECOMMEND_TIP=true; privacyPolicyPopup=false; LGSID=20210627151421-35ebc2b6-6a91-4e62-9d01-89ff906d6cd3; sensorsdata2015session=%7B%7D; index_location_city=%E5%85%A8%E5%9B%BD; __lg_stoken__=d933d16a05691c24fa63ff3b430203a067d5d31f537dff3c88d798b73a06c8d672365afa132fc359df75c4260b52c2c082917561f9d985756c99339f0e6eb941518a3c6192ec; X_MIDDLE_TOKEN=a8b51f2cd6f6374bbd33a90d35fc9a99; Hm_lvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1624778396,1624778846,1624778857,1624778880; gate_login_token=ba2a2c73430323a943d25a84017ba53fc6db412cbe3e0515; _putrc=D7115C3DCB8E3D87; login=true; unick=%E8%A2%81%E5%8A%B2%E6%9D%BE; showExpriedIndex=1; showExpriedCompanyHome=1; showExpriedMyPublish=1; hasDeliver=143; __SAFETY_CLOSE_TIME__9136890=1; TG-TRACK-CODE=index_checkmore; _gat=1; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%229136890%22%2C%22%24device_id%22%3A%2217a4c51a1d9353-0780f782aac12e-6373267-1327104-17a4c51a1da416%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%2C%22%24latest_referrer%22%3A%22%22%2C%22%24os%22%3A%22Windows%22%2C%22%24browser%22%3A%22Chrome%22%2C%22%24browser_version%22%3A%2291.0.4472.114%22%2C%22lagou_company_id%22%3A%22%22%7D%2C%22first_id%22%3A%2217a4c51a1d9353-0780f782aac12e-6373267-1327104-17a4c51a1da416%22%7D; Hm_lpvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1624780861; LGRID=20210627160100-9b1967b3-d0c0-49f8-a8c6-3bbd773f3392; X_HTTP_TOKEN=0a92058e2570a6b907808742614489fb89a8fb2e8f; SEARCH_ID=23027ec4ba9246c79885084b4b4eae65",
"origin": "https://www.lagou.com",
"referer": "https://www.lagou.com/jobs/list_java/p-city_0?&cl=false&fromSearch=true&labelWords=&suginput=",
"user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36"
}
print(data)
if page%6 == 0:
time.sleep(10)
response = requests.post(api_url,
headers=header,
#cookies=cookies_response.cookies,
data=data)
data = response.json()
result = data['content']['positionResult']['result']
sid = data['content']['showId']
for r in result:
d = {
'city': r['city'],
'companyFullName': r['companyFullName'],
'companySize': r['companySize'],
'education': r['education'],
'positionName': r['positionName'],
'salary': r['salary'],
'workYear': r['workYear']
# 'positionDetail': r['positionDetail']
}
# 要执行的SQL语句
print(r['createTime'])
sql = "INSERT INTO resource_collection_job (platform,position_name,publish_time,city,company_full_name,company_size,education,salary,work_year,position_detail) VALUES ('拉勾', %s,%s,%s,%s,%s,%s,%s,%s,%s)"
# 执行操作
cursor.execute(sql,(r['positionName'],r['createTime'],r['city'],r['companyFullName'],r['companySize'],r['education'],r['salary'],r['workYear'],r['positionDetail']))
#with open('lagou.csv',mode='a',encoding='utf-8-sig') as f:
#f.write(",".join(list(d.values()))+'\n')
# 提交事务
conn.commit()
print('完成')