-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathscrape_hennepin_library_meeting_rooms.py
54 lines (41 loc) · 1.51 KB
/
scrape_hennepin_library_meeting_rooms.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import requests
from bs4 import BeautifulSoup
import csv
BASE_URL = 'http://www.hclib.org'
OUTPUT_FILE = 'hennepin-library-rooms.csv'
def get_page(url):
r = requests.get(url)
html = r.text
page = BeautifulSoup(html)
return page
def get_room_list():
url = BASE_URL + '/about/meeting-rooms'
page = get_page(url)
rooms = page.find_all('div', class_='meeting-room-listing')
return rooms
def parse_room(room):
image_div = room.find('div', class_='listing__img')
image_tag = image_div.find('img')
image_url = BASE_URL + image_tag.get('src')
content = room.find('div', class_='listing__content')
location = content.find('h3').text
list_items = content.find_all('li')
room_name = list_items[0].text
group_size = list_items[2].text
anchor = content.find_all('a')[1]
detail_url = BASE_URL + anchor.get('href')
page = get_page(detail_url)
content = page.find('div', class_='main-content')
description = content.find_all('p')[1].text.encode('utf-8')
return (location, room_name, group_size, description,
image_url, detail_url)
if __name__ == '__main__':
header_row = ['location', 'room_name', 'group_size', 'description',
'image_url', 'detail_url']
with open(OUTPUT_FILE, 'wb') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(header_row)
for room in get_room_list():
row = parse_room(room)
print (row)
writer.writerow(row)