php爬虫的post代码常用的
这是一段常用的循环采集某网站内容python代码:
import requests,json,time
def down(id):
url= "https://www.test.com/mulu/" + str(id)
print(url)
#dict = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.80 Safari/537.36"}
dict = {"User-Agent":"Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)"}
#dict = {"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML, like Gecko) Version/8.0.2 Safari/600.2.5 (Amazonbot/0.1; +https://developer.amazon.com/support/amazonbot)"}
r =requests.get(url,headers=dict)
r.encoding = r.apparent_encoding
filename = 'D:\\test\\100000\\' + str(id) +'.txt'
print(filename)
if r.status_code == 200:
with open(filename, 'w',encoding='utf-8') as f:
f.write(r.text)
else:
print(f"Failed to retrieve the webpage: Status code {r.status_code}")
for id in range(50000,100001):
print(id)
down(id)
time.sleep(1)