# -*- coding: utf-8 -* from time import sleep, time from selenium import webdriver from requests.packages import urllib3 import requests, traceback import re import os from bs4 import BeautifulSoup # verify参数控制是否检查证书(默认是ture),通过设置忽略屏蔽警告 urllib3.disable_warnings() headers = { "cookie": "varify_key=kisspng; fotCookie=1; __gads=ID=b691421f54f0f43e-22b06658ecd00005:T=1647050530:RT=1647050530:S=ALNI_Map1ltTfpamxwCcGyFh1N5NFdUUnw; __atuvc=2%7C10%2C1%7C11; _gid=GA1.2.1245356425.1647615079; _gat_gtag_UA_193347727_2=1; _ga_WR5JC9XF6P=GS1.1.1647615078.7.1.1647615100.0; _ga=GA1.2.748602746.1647050410", "referer": "https://www.****.com/", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.74 Safari/537.36" } def getHTMLText(url): r = requests.get(url,headers=headers,timeout=30,verify=False) r.raise_for_status() r.encoding = r.apparent_encoding return r.text def writeText(name='H:\\pngurl.txt', text='\n'): with open(name, 'a+', encoding="utf-8") as txt: # 存储路径里的文件夹需要事先创建。 txt.write(text) txt.close() def formdata(): params = {"dwagain": "ezL3icHsQsojeJ9sOp4rcGBub7ApWrIyP2onRJR9aX3oMou4VsQ4PmI9SaYEfR=="} requests.request("POST",url='',files=params) if __name__ == '__main__': #options=webdriver.ChromeOptions() # 忽略无用的日志 #options.add_experimental_option("excludeSwitches", ['enable-automation', 'enable-logging']) #driver=webdriver.Chrome(chrome_options=options) base_url = 'https://www.****.com' download_url = 'download-png.html' urls = ['https://www.***.com/free/watercolor,{}.html'.format(str(i)) for i in range(1,61)] for url in urls: print(url) html = getHTMLText(url) #print(html) soup_p = BeautifulSoup(html,'lxml') tab = soup_p.find('ul',class_='list-four-ul') hrefs = tab.find_all('article') #print(len(hrefs)) num = 0 for href in hrefs: num= num + 1 one_url = href.a.get('href') print(base_url + one_url) downloadurl = base_url + one_url + download_url writeText(text=download_url+'\n') #driver.get(base_url + one_url + download_url) #sleep(20) #if num>3: # break sleep(20) #driver.quit()