from selenium import webdriver
import time
import os
import datetime as dt
from BeautifulSoup import BeautifulSoup
from pyvirtualdisplay import Display
import json
import sys,getopt
import datetime
chromedriver = "/usr/bin/chromedriver"
os.environ["webdriver.chrome.driver"] = chromedriver
display = Display(visible=0, size=(800, 600))
display.start()
driver = webdriver.Chrome(chromedriver)
# driver = webdriver.Firefox()
url = "http://www.flashscore.com"
driver.implicitly_wait(10)
list_of_sports = ['soccer', 'tennis', 'baseball', 'american-football', 'hockey', 'basketball']
def get_json_of_sport(sport, sport_web_driver, date, data):
table_main = sport_web_driver.find_element_by_class_name('table-main')
soup = BeautifulSoup(table_main.get_attribute('innerHTML'))
for table in soup.findAll("table", {"class": sport}):
league = table.find("span", {"class": 'country_part'}).text
nation = table.find("span", {"class": 'tournament_part'}).text
print "-------------------------------------------------------------------------------------------"
print table.find("span", {"class": 'country_part'}).text, table.find("span", {"class": 'tournament_part'}).text
print "--------------------------------------------------------------------------------------------"
trs = table.findAll("tr", {"class": lambda x: x and "stage-scheduled" in x.split()}) or \
table.findAll("tr", {"class": lambda x: x and "stage-finished" in x.split()}) or \
table.findAll("tr", {"class": lambda x: x and "stage-live" in x.split()})
for index, tr in enumerate(trs):
match_json = {}
try:
time_of_match = tr.find("td", {"class": lambda x: x and "time" in x.split()}).text
except:
continue
team_home = tr.find("td", {"class": lambda x: x and "team-home" in x.split()}).text
try:
team_away = tr.find("td", {"class": lambda x: x and "team-away" in x.split()}).text
except:
team_away = trs[index+1].find("td", {"class": lambda x: x and "team-away" in x.split()}).text
match_json['homeTeam'] = team_home
match_json['awayTeam'] = team_away
match_json['Date'] = date
match_json['Hour'] = time_of_match
match_json['League'] = league
match_json['Nation'] = nation
data.append(set(match_json))
print time_of_match, team_home, team_away
for sport in list_of_sports:
url2 = url + '/' + sport + '/'
driver.get(url2)
time.sleep(10)
days = 7
date_of_match = ''
if sys.argv[1:]:
print "=================================="
argv = sys.argv[1:]
for i in argv:
date_of_match = datetime.datetime.strptime(i,'%Y/%m/%d').strftime("%Y/%m/%d")
else:
print "dfdfdf"
date_of_match = dt.datetime.now().strftime("%Y/%m/%d")
date_int = dt.datetime.now()
for i in range(0, days):
data = []
get_json_of_sport(sport, driver, date_of_match, data)
json_data = json.dumps(data)
date_string = date_int.strftime("%Y_%m_%d")
print json_data
filename = sport + "_" + date_string + '.txt'
if os.path.isfile(filename):
os.remove(filename)
with open(filename, 'w') as outfile:
json.dump(json_data, outfile)
time.sleep(10)
print "\n"
try:
driver.find_element_by_css_selector('span.day.tomorrow').click()
except:
break
date_int = dt.datetime.now() + dt.timedelta(days=1)
# date_of_match = (dt.datetime.now() + dt.timedelta(days=1)).strftime("%Y/%m/%d")
date_of_match = ''
if sys.argv[1:]:
print"----------------"
argv = sys.argv[1:]
for i in argv:
date_of_match = datetime.datetime.strptime(i,'%Y/%m/%d').strftime("%Y/%m/%d")+ dt.timedelta(days=1).strftime("%Y/%m/%d")
else:
print "fdfdf"
date_of_match = (dt.datetime.now() + dt.timedelta(days=1)).strftime("%Y/%m/%d")
driver.close()
Imagine what would you do if you could automate all the repetitive and boring activities you perform using internet, like checking every day the first results of Google for a given keyword, or download a bunch of files from different websites.
In this code you’ll learn to use Selenium with Python, a Web Scraping tool that simulates a user surfing the Internet. For example, you can cial accounts, simulate a user to test your web application, and anything you find in your daily live that it’s repetitive. The possibilities are infinite! :-)
Here my example code for scrap the data from the sports website. grab all the data and filter the data according to category's like football,cricket,basketball etc , this code will help you to detail understand about the working selenium with python ,and how to scrap the data using the technology
Requirements:
Step 1 : Create Virtual ENV
You need to install virtual environments in your local machine if virtualenv is installed in your system create a virtualenv using this command : virtualenv scrapy. if you dont installed the virtual env install virtualenv in your root in your machine : sudo pip install virtualenv. activate the env using source scrapy/bin/activate.
Step 2 : Install dependencies in your env.
BeautifulSoup==3.2.1
EasyProcess==0.1.9
PyVirtualDisplay==0.1.5
argparse==1.2.1
beautifulsoup4==4.4.1
selenium==2.47.3
wsgiref==0.1.2
Step 3 : download the code from the git hub and run it. you can see the script downloading the match details accodring to category wise and make it in txt file.
This code you can run it two way with argument and with arguments.
if you run the code as python filename.py : you can see the details according to today and tomor
In this code you’ll learn to use Selenium with Python, a Web Scraping tool that simulates a user surfing the Internet. For example, you can cial accounts, simulate a user to test your web application, and anything you find in your daily live that it’s repetitive. The possibilities are infinite! :-)
Here my example code for scrap the data from the sports website. grab all the data and filter the data according to category's like football,cricket,basketball etc , this code will help you to detail understand about the working selenium with python ,and how to scrap the data using the technology
Requirements:
Step 1 : Create Virtual ENV
You need to install virtual environments in your local machine if virtualenv is installed in your system create a virtualenv using this command : virtualenv scrapy. if you dont installed the virtual env install virtualenv in your root in your machine : sudo pip install virtualenv. activate the env using source scrapy/bin/activate.
Step 2 : Install dependencies in your env.
BeautifulSoup==3.2.1
EasyProcess==0.1.9
PyVirtualDisplay==0.1.5
argparse==1.2.1
beautifulsoup4==4.4.1
selenium==2.47.3
wsgiref==0.1.2
Step 3 : download the code from the git hub and run it. you can see the script downloading the match details accodring to category wise and make it in txt file.
This code you can run it two way with argument and with arguments.
if you run the code as python filename.py : you can see the details according to today and tomor
Be the first to comment
You can use [html][/html], [css][/css], [php][/php] and more to embed the code. Urls are automatically hyperlinked. Line breaks and paragraphs are automatically generated.