一个下载看kindle(kankindle.com)的所有电子书的python脚本,程序会自动下载首页部分13页的所有电子书,下载到ebook目录下,程序会检测是否下载过。
#!/usr/bin/envpython #coding=utf-8 frombs4importBeautifulSoup importurllib2 importsocket importre importunicodedata importos fromurwid.text_layoutimporttrim_line defdownload(url): print'startingdownload%s'%url response=urllib2.urlopen(url,timeout=30) html_data=response.read() soup=BeautifulSoup(html_data) print'starttoanalayse---------------' title_soup=soup.find_all(class_='yanshi_xiazai') name_soup=soup.find_all('h1') tag_a=title_soup[0].a.attrs['href'] tag_name=title_soup[0].a.contents link_name=name_soup[0] link_name=str(link_name).replace("<h1>","").replace("</h1>","") #printtag_name[0] #printlink_name filename=link_name+".mobi" filename="ebook/"+filename print'filenameis:%s'%filename print"downloadingwithurllib2%s"%tag_a ifos.path.exists(filename): print'alreadydonwload,ignore' else: try: f=urllib2.urlopen(tag_a,timeout=60) data=f.read() #print'thedatais%s'%data withopen(filename,"wb")ascode: code.write(data) exceptException,e: printe defget_all_link(url): print'Startinggetallthelist' response=urllib2.urlopen(url,timeout=30) html_data=response.read() #printhtml_data soup=BeautifulSoup(html_data) link_soup=soup.find_all('a') #printlink_soup foreach_linkinlink_soup: ifre.search('view',str(each_link)): #printeach_link printeach_link printeach_link.attrs['href'] download(each_link.attrs['href']) if__name__=='__main__': forpageinrange(1,13): url="http://kankindle.com/simple/page/3"+str(page) url=url.strip() printurl get_all_link(url)