项目
博客
文档
归档
资源链接
关于我
项目
博客
文档
归档
资源链接
关于我
Python爬虫拉钩获取招聘信息(源码)
2021-06-27
·
yuan
·
原创
·
源码
·
本文共 1,015个字,预计阅读需要 4分钟。
前端怎样根据页面内容获取数据的来源? 复制要查询的内容,F12后刷新页面,在Network下进行search,搜索要查找的内容,此时有数据时,点击获取数据来源那个接口。 ```python import requests import time import pymysql conn = pymysql.connect(host="", user="root",password="",database="",charset="utf8") cursor = conn.cursor() main_url = 'https://www.lagou.com/jobs/list_java?labelWords=&fromSearch=true&suginput=' headers_0 = { "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36" } cookies_response = requests.get(main_url,headers=headers_0) print(cookies_response.cookies) api_url = 'https://www.lagou.com/jobs/positionAjax.json?needAddtionalResult=false' sid = '' for page in range(1,31): if page == 1: data = { 'first': 'true', 'pn': '1', 'kd': 'python' } else: data = { 'first': 'false', 'pn': str(page), 'kd': 'python', 'sig': sid } header = { "authority": "www.lagou.com", "cookie": "user_trace_token=20210627151414-7baf440f-d4d8-4cf8-b2cd-9f8d094cba25; JSESSIONID=ABAAAECAAEBABII9B80B115A7B06EAB4727D0C8443CA7F3; LGUID=20210627151414-d68b5f81-6a4f-44b6-8364-a2a954037383; sajssdk_2015_cross_new_user=1; _ga=GA1.2.997903917.1624778056; _gid=GA1.2.1810296173.1624778056; WEBTJ-ID=20210627%E4%B8%8B%E5%8D%883:14:22151422-17a4c51b9354e-0cf41873464a2-6373267-1327104-17a4c51b936da6; RECOMMEND_TIP=true; privacyPolicyPopup=false; LGSID=20210627151421-35ebc2b6-6a91-4e62-9d01-89ff906d6cd3; sensorsdata2015session=%7B%7D; index_location_city=%E5%85%A8%E5%9B%BD; __lg_stoken__=d933d16a05691c24fa63ff3b430203a067d5d31f537dff3c88d798b73a06c8d672365afa132fc359df75c4260b52c2c082917561f9d985756c99339f0e6eb941518a3c6192ec; X_MIDDLE_TOKEN=a8b51f2cd6f6374bbd33a90d35fc9a99; Hm_lvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1624778396,1624778846,1624778857,1624778880; gate_login_token=ba2a2c73430323a943d25a84017ba53fc6db412cbe3e0515; _putrc=D7115C3DCB8E3D87; login=true; unick=%E8%A2%81%E5%8A%B2%E6%9D%BE; showExpriedIndex=1; showExpriedCompanyHome=1; showExpriedMyPublish=1; hasDeliver=143; __SAFETY_CLOSE_TIME__9136890=1; TG-TRACK-CODE=index_checkmore; _gat=1; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%229136890%22%2C%22%24device_id%22%3A%2217a4c51a1d9353-0780f782aac12e-6373267-1327104-17a4c51a1da416%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%2C%22%24latest_referrer%22%3A%22%22%2C%22%24os%22%3A%22Windows%22%2C%22%24browser%22%3A%22Chrome%22%2C%22%24browser_version%22%3A%2291.0.4472.114%22%2C%22lagou_company_id%22%3A%22%22%7D%2C%22first_id%22%3A%2217a4c51a1d9353-0780f782aac12e-6373267-1327104-17a4c51a1da416%22%7D; Hm_lpvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1624780861; LGRID=20210627160100-9b1967b3-d0c0-49f8-a8c6-3bbd773f3392; X_HTTP_TOKEN=0a92058e2570a6b907808742614489fb89a8fb2e8f; SEARCH_ID=23027ec4ba9246c79885084b4b4eae65", "origin": "https://www.lagou.com", "referer": "https://www.lagou.com/jobs/list_java/p-city_0?&cl=false&fromSearch=true&labelWords=&suginput=", "user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36" } print(data) if page%6 == 0: time.sleep(10) response = requests.post(api_url, headers=header, #cookies=cookies_response.cookies, data=data) data = response.json() result = data['content']['positionResult']['result'] sid = data['content']['showId'] for r in result: d = { 'city': r['city'], 'companyFullName': r['companyFullName'], 'companySize': r['companySize'], 'education': r['education'], 'positionName': r['positionName'], 'salary': r['salary'], 'workYear': r['workYear'] # 'positionDetail': r['positionDetail'] } # 要执行的SQL语句 print(r['createTime']) sql = "INSERT INTO resource_collection_job (platform,position_name,publish_time,city,company_full_name,company_size,education,salary,work_year,position_detail) VALUES ('拉勾', %s,%s,%s,%s,%s,%s,%s,%s,%s)" # 执行操作 cursor.execute(sql,(r['positionName'],r['createTime'],r['city'],r['companyFullName'],r['companySize'],r['education'],r['salary'],r['workYear'],r['positionDetail'])) #with open('lagou.csv',mode='a',encoding='utf-8-sig') as f: #f.write(",".join(list(d.values()))+'\n') # 提交事务 conn.commit() print('完成') ```