Data extraction with Beautiful Soup - Python

import requests from bs4 import BeautifulSoup as bs import datetime import csv url = "http://www.chennaimetrowater.tn.nic.in/reserve.asp" def daterange( start_date, end_date ): if start_date <= end_date: for n in range( ( end_date - start_date ).days + 1 ): yield start_date + datetime.timedelta( n ) else: for n in range( ( start_date - end_date ).days + 1 ): yield start_date - datetime.timedelta( n ) start = datetime.date( year = 2017, month = 2, day = 27 ) end = datetime.date( year = 2017, month = 11, day = 3 ) x = [] y = [] for date in daterange( start, end ): form_data = { "ldate" : date.strftime('%d/%m/%Y') } r = requests.get(url, data=form_data, verify=False) soup = bs(r.content, "lxml") row = soup.find_all('tr')[6] td = row.find_all('td')[4] volume = td.find_all('font')[0] x.extend([date]) y.extend([(volume.text)]) print(date) print(volume.text) with open('chennai-metro-2017.csv', 'w', newline='') as fp: a = csv.writer(fp, delimiter=',') data = [['Date','Total Volume'],x,y] a.writerows(data) print(x) print(y)
A Python script to extract data from a website using Beautiful Soup and saving the extracted data to a csv file.

Be the first to comment

You can use [html][/html], [css][/css], [php][/php] and more to embed the code. Urls are automatically hyperlinked. Line breaks and paragraphs are automatically generated.