import urllib2
import urllib
import re
import os
def get_links(u):
url = u
response = urllib2.urlopen(url)
content = response.read()
print("now entering "+url)
print(str(len(content))+" bytes")
links = re.findall("<a href=\"(.*)\"(?:\W|>)>(.*)\W?</\w>",content)
print(str(len(links))+" links")
paths = []
for a in links:
if "files" in a[0]:
#uncomment for debugging
#print('<a href=\"'+url.split('/files')[0]+a[0]+'\">'+a[-1].strip()+'</a><br />')
paths.append('<a href=\"'+url.split('/files')[0]+a[0]+'\">'+a[-1].strip()+'</a><br />\n')
return paths
url = "http://www.demonoid.pw/files/?category=4&seeded=2&external=2&page="
ftest = open("games.htm","w")
for i in range(1,11):
pulled = get_links(url+str(i))
ftest.write(("\n<p></p>" if i>1 else "")+"[ "+url+str(i)+" ]:<br />\n")
[ftest.write(p) for p in pulled]
ftest.close()
print(("links disponibles en "+os.path.abspath(ftest.name)).replace('\\\\',"\\"))
print("file:///"+(os.path.abspath(ftest.name)).replace('\\\\',"\/"))
The url can be de-harcoded; as a test I only pulled games links (hint: "category"), can be converted into a def, class or whatnot.
Be creative!
Be creative!
Be the first to comment
You can use [html][/html], [css][/css], [php][/php] and more to embed the code. Urls are automatically hyperlinked. Line breaks and paragraphs are automatically generated.