import requests
from bs4 import BeautifulSoup as bs
import datetime
import csv
url = "http://www.chennaimetrowater.tn.nic.in/reserve.asp"
def daterange( start_date, end_date ):
if start_date <= end_date:
for n in range( ( end_date - start_date ).days + 1 ):
yield start_date + datetime.timedelta( n )
else:
for n in range( ( start_date - end_date ).days + 1 ):
yield start_date - datetime.timedelta( n )
start = datetime.date( year = 2017, month = 2, day = 27 )
end = datetime.date( year = 2017, month = 11, day = 3 )
x = []
y = []
for date in daterange( start, end ):
form_data = {
"ldate" : date.strftime('%d/%m/%Y')
}
r = requests.get(url, data=form_data, verify=False)
soup = bs(r.content, "lxml")
row = soup.find_all('tr')[6]
td = row.find_all('td')[4]
volume = td.find_all('font')[0]
x.extend([date])
y.extend([(volume.text)])
print(date)
print(volume.text)
with open('chennai-metro-2017.csv', 'w', newline='') as fp:
a = csv.writer(fp, delimiter=',')
data = [['Date','Total Volume'],x,y]
a.writerows(data)
print(x)
print(y)
A Python script to extract data from a website using Beautiful Soup and saving the extracted data to a csv file.
Be the first to comment
You can use [html][/html], [css][/css], [php][/php] and more to embed the code. Urls are automatically hyperlinked. Line breaks and paragraphs are automatically generated.