-
Notifications
You must be signed in to change notification settings - Fork 179
/
bs_scraper.py
81 lines (63 loc) · 2.61 KB
/
bs_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import urllib2
from email.MIMEText import MIMEText
import smtplib
from bs4 import BeautifulSoup
GMAIL_LOGIN = '[email protected]'
GMAIL_PASSWORD = 'YOU NO CAN HAZ'
def send_email(subject, message, from_addr=GMAIL_LOGIN, to_addr=GMAIL_LOGIN):
msg = MIMEText(message)
msg['Subject'] = subject
msg['From'] = from_addr
msg['To'] = to_addr
msg['Reply-To'] = '[email protected]'
server = smtplib.SMTP('smtp.gmail.com', 587) # port 465 or 587
server.ehlo()
server.starttls()
server.ehlo()
server.login(GMAIL_LOGIN, GMAIL_PASSWORD)
server.sendmail(from_addr, to_addr, msg.as_string())
server.close()
def get_site_html(url):
source = urllib2.urlopen(url).read()
return source
def get_tree(url):
source = get_site_html(url)
tree = BeautifulSoup(source)
return tree
if __name__ == '__main__':
stuff_i_like = ['burger', 'wine', 'sushi', 'sweet potato fries', 'BBQ']
found_happy_hours = []
my_happy_hours = []
# First, I'm going to identify the areas of the page I want to look at
tables = get_tree(
'http://www.downtownla.com/3_10_happyHours.asp?action=ALL')
# Then, I'm going to sort out the *exact* parts of the page
# that match what I'm looking for...
for t in tables.findAll('p', {'class': 'calendar_EventTitle'}):
text = t.text
for s in t.findNextSiblings():
text += '\n' + s.text
found_happy_hours.append(text)
print "The scraper found %d happy hours!" % len(found_happy_hours)
# Now I'm going to loop through the food I like
# and see if any of the happy hour descriptions match
for food in stuff_i_like:
for hh in found_happy_hours:
# checking for text AND making sure I don't have duplicates
if food in hh and hh not in my_happy_hours:
print "YAY! I found some %s!" % food
my_happy_hours.append(hh)
print "I think you might like %d of them, yipeeeee!" % len(my_happy_hours)
# Now, let's make a mail message we can read:
message = 'Hey Katharine,\n\n\n'
message += 'OMG, I found some stuff for you in Downtown, take a look.\n\n'
message += '==============================\n'.join(my_happy_hours)
message = message.encode('utf-8')
# To read more about encoding:
# http://diveintopython.org/xml_processing/unicode.html
message = message.replace('\t', '').replace('\r', '')
message += '\n\nXOXO,\n Your Py Script'
# And email it to ourselves!
email = '[email protected]'
send_email('Happy Hour Update', message, from_addr=GMAIL_LOGIN,
to_addr=email)