# -*- coding: utf-8 -*
from time import sleep, time
from selenium import webdriver
from requests.packages import urllib3
import requests, traceback
import re
import os
from bs4 import BeautifulSoup
# verify参数控制是否检查证书(默认是ture),通过设置忽略屏蔽警告
urllib3.disable_warnings()
headers = {
"cookie": "varify_key=kisspng; fotCookie=1; __gads=ID=b691421f54f0f43e-22b06658ecd00005:T=1647050530:RT=1647050530:S=ALNI_Map1ltTfpamxwCcGyFh1N5NFdUUnw; __atuvc=2%7C10%2C1%7C11; _gid=GA1.2.1245356425.1647615079; _gat_gtag_UA_193347727_2=1; _ga_WR5JC9XF6P=GS1.1.1647615078.7.1.1647615100.0; _ga=GA1.2.748602746.1647050410",
"referer": "https://www.****.com/",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.74 Safari/537.36"
}
def getHTMLText(url):
r = requests.get(url,headers=headers,timeout=30,verify=False)
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
def writeText(name='H:\\pngurl.txt', text='\n'):
with open(name, 'a+', encoding="utf-8") as txt: # 存储路径里的文件夹需要事先创建。
txt.write(text)
txt.close()
def formdata():
params = {"dwagain": "ezL3icHsQsojeJ9sOp4rcGBub7ApWrIyP2onRJR9aX3oMou4VsQ4PmI9SaYEfR=="}
requests.request("POST",url='',files=params)
if __name__ == '__main__':
#options=webdriver.ChromeOptions()
# 忽略无用的日志
#options.add_experimental_option("excludeSwitches", ['enable-automation', 'enable-logging'])
#driver=webdriver.Chrome(chrome_options=options)
base_url = 'https://www.****.com'
download_url = 'download-png.html'
urls = ['https://www.***.com/free/watercolor,{}.html'.format(str(i)) for i in range(1,61)]
for url in urls:
print(url)
html = getHTMLText(url)
#print(html)
soup_p = BeautifulSoup(html,'lxml')
tab = soup_p.find('ul',class_='list-four-ul')
hrefs = tab.find_all('article')
#print(len(hrefs))
num = 0
for href in hrefs:
num= num + 1
one_url = href.a.get('href')
print(base_url + one_url)
downloadurl = base_url + one_url + download_url
writeText(text=download_url+'\n')
#driver.get(base_url + one_url + download_url)
#sleep(20)
#if num>3:
# break
sleep(20)
#driver.quit()