forked from jseabold/pycon-ds-2018
-
Notifications
You must be signed in to change notification settings - Fork 0
/
load_data.py
34 lines (29 loc) · 826 Bytes
/
load_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import numpy as np
import pandas as pd
def float_to_zip(zip_code):
# convert from the string in the file to a float
try:
zip_code = float(zip_code)
except ValueError: # some of them are empty
return np.nan
# 0 makes sure to left-pad with zero
# zip codes have 5 digits
# .0 means, we don't want anything after the decimal
# f is for float
zip_code = "{:05.0f}".format(zip_code)
return zip_code
dta = pd.read_csv(
"data/health_inspection_chi.csv",
index_col='inspection_id',
parse_dates=['inspection_date'],
converters={
'zip': float_to_zip
},
usecols=lambda col: col != 'location',
dtype={
'results': 'category',
'risk': 'category',
'inspection_type': 'category',
'facility_type': 'category'
}
)