python結合requests和selenium实现页面的访问，地址的记录等

猿人甲 python分享 (320) 2023-04-17 11:57:37

# -*- coding: utf-8 -*

from time import sleep, time
from selenium import webdriver
from requests.packages import urllib3
import requests, traceback
import re
import os
from bs4 import BeautifulSoup


# verify参数控制是否检查证书（默认是ture），通过设置忽略屏蔽警告
urllib3.disable_warnings()

headers = {
"cookie": "varify_key=kisspng; fotCookie=1; __gads=ID=b691421f54f0f43e-22b06658ecd00005:T=1647050530:RT=1647050530:S=ALNI_Map1ltTfpamxwCcGyFh1N5NFdUUnw; __atuvc=2%7C10%2C1%7C11; _gid=GA1.2.1245356425.1647615079; _gat_gtag_UA_193347727_2=1; _ga_WR5JC9XF6P=GS1.1.1647615078.7.1.1647615100.0; _ga=GA1.2.748602746.1647050410",
"referer": "https://www.****.com/",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.74 Safari/537.36"
}

def getHTMLText(url):
    r = requests.get(url,headers=headers,timeout=30,verify=False)
    r.raise_for_status()
    r.encoding = r.apparent_encoding
    return r.text	

def writeText(name='H:\\pngurl.txt', text='\n'):
    with open(name, 'a+', encoding="utf-8") as txt:  # 存储路径里的文件夹需要事先创建。
        txt.write(text)
    txt.close()

def formdata():
    params = {"dwagain": "ezL3icHsQsojeJ9sOp4rcGBub7ApWrIyP2onRJR9aX3oMou4VsQ4PmI9SaYEfR=="}
    requests.request("POST",url='',files=params)

if __name__ == '__main__':
    #options=webdriver.ChromeOptions()
    # 忽略无用的日志
    #options.add_experimental_option("excludeSwitches", ['enable-automation', 'enable-logging'])
    #driver=webdriver.Chrome(chrome_options=options)
    base_url = 'https://www.****.com'
    download_url = 'download-png.html'
    urls = ['https://www.***.com/free/watercolor,{}.html'.format(str(i)) for i in range(1,61)]
    for url in urls:
        print(url)
        html = getHTMLText(url)
        #print(html)
        soup_p = BeautifulSoup(html,'lxml')
        tab = soup_p.find('ul',class_='list-four-ul')
        hrefs = tab.find_all('article')
        #print(len(hrefs))
        num = 0
        for href in hrefs:
            num= num + 1
            one_url = href.a.get('href')
            print(base_url + one_url)         
            downloadurl = base_url + one_url + download_url
            writeText(text=download_url+'\n')
            #driver.get(base_url + one_url + download_url)
            #sleep(20)
            #if num>3:
            #    break
        sleep(20)
    #driver.quit()

THE END

发表回复取消回复

请先登录账户再评论哦

python发邮件功能实现

使用python发送钉钉群消息，获取每日一次，发送不同格式信息。

python結合requests和selenium实现页面的访问，地址的记录等

发表回复取消回复

相关文章阅读

根据后缀名整理文件夹

使用python将文字转为语音

python识别条形码

使用python截图并调用文字识别接口实现保存文件

栏目最新文章

python autoenv怎么用

适合程序猿的专属代理IP

热门文章

Ip修改器的用途有哪些？

可以实现换IP的渠道有哪些呢？

代理IP在游戏中起到什么作用

代理ip能够解决什么问题？

换IP软件的操作原理是什么？

标签云

四叶天IP代理

python結合requests和selenium实现页面的访问，地址的记录等

发表回复 取消回复

相关文章阅读

根据后缀名整理文件夹

使用python将文字转为语音

python识别条形码

使用python截图并调用文字识别接口实现保存文件

栏目最新文章

python autoenv怎么用

适合程序猿的专属代理IP

热门文章

Ip修改器的用途有哪些？

可以实现换IP的渠道有哪些呢？

代理IP在游戏中起到什么作用

代理ip能够解决什么问题？

换IP软件的操作原理是什么？

标签云

四叶天IP代理

发表回复取消回复