-
Notifications
You must be signed in to change notification settings - Fork 1
/
31-Redshift Lab SQL.txt
132 lines (112 loc) · 3.92 KB
/
31-Redshift Lab SQL.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
Create tables:
create table users(
userid integer not null distkey sortkey,
username char(8),
firstname varchar(30),
lastname varchar(30),
city varchar(30),
state char(2),
email varchar(100),
phone char(14),
likesports boolean,
liketheatre boolean,
likeconcerts boolean,
likejazz boolean,
likeclassical boolean,
likeopera boolean,
likerock boolean,
likevegas boolean,
likebroadway boolean,
likemusicals boolean);
create table venue(
venueid smallint not null distkey sortkey,
venuename varchar(100),
venuecity varchar(30),
venuestate char(2),
venueseats integer);
create table category(
catid smallint not null distkey sortkey,
catgroup varchar(10),
catname varchar(10),
catdesc varchar(50));
create table date(
dateid smallint not null distkey sortkey,
caldate date not null,
day character(3) not null,
week smallint not null,
month character(5) not null,
qtr character(5) not null,
year smallint not null,
holiday boolean default('N'));
create table event(
eventid integer not null distkey,
venueid smallint not null,
catid smallint not null,
dateid smallint not null sortkey,
eventname varchar(200),
starttime timestamp);
create table listing(
listid integer not null distkey,
sellerid integer not null,
eventid integer not null,
dateid smallint not null sortkey,
numtickets smallint not null,
priceperticket decimal(8,2),
totalprice decimal(8,2),
listtime timestamp);
create table sales(
salesid integer not null,
listid integer not null distkey,
sellerid integer not null,
buyerid integer not null,
eventid integer not null,
dateid smallint not null sortkey,
qtysold smallint not null,
pricepaid decimal(8,2),
commission decimal(8,2),
saletime timestamp);
Load data into tables:
copy users from 's3://go-lambda-bucket/tickit/allusers_pipe.txt'
credentials 'aws_iam_role=arn:aws:iam::001178231653:role/myRedshiftRole'
delimiter '|' region 'us-east-1';
copy venue from 's3://go-lambda-bucket/tickit/venue_pipe.txt'
credentials 'aws_iam_role=arn:aws:iam::001178231653:role/myRedshiftRole'
delimiter '|' region 'us-east-1';
copy category from 's3://go-lambda-bucket/tickit/category_pipe.txt'
credentials 'aws_iam_role=arn:aws:iam::001178231653:role/myRedshiftRole'
delimiter '|' region 'us-east-1';
copy date from 's3://go-lambda-bucket/tickit/date2008_pipe.txt'
credentials 'aws_iam_role=arn:aws:iam::001178231653:role/myRedshiftRole'
delimiter '|' region 'us-east-1';
copy event from 's3://go-lambda-bucket/tickit/allevents_pipe.txt'
credentials 'aws_iam_role=arn:aws:iam::001178231653:role/myRedshiftRole'
delimiter '|' timeformat 'YYYY-MM-DD HH:MI:SS' region 'us-east-1';
copy listing from 's3://go-lambda-bucket/tickit/listings_pipe.txt'
credentials 'aws_iam_role=arn:aws:iam::001178231653:role/myRedshiftRole'
delimiter '|' region 'us-east-1';
copy sales from 's3://go-lambda-bucket/tickit/sales_tab.txt'
credentials 'aws_iam_role=arn:aws:iam::001178231653:role/myRedshiftRole'
delimiter '\t' timeformat 'MM/DD/YYYY HH:MI:SS' region 'us-east-1';
Query the Redshift cluster:
-- Find total sales on a given calendar date.
SELECT sum(qtysold)
FROM sales, date
WHERE sales.dateid = date.dateid
AND caldate = '2008-01-05';
-- Find top 10 buyers by quantity.
SELECT firstname, lastname, total_quantity
FROM (SELECT buyerid, sum(qtysold) total_quantity
FROM sales
GROUP BY buyerid
ORDER BY total_quantity desc limit 10) Q, users
WHERE Q.buyerid = userid
ORDER BY Q.total_quantity desc;
-- Find events in the 99.9 percentile in terms of all time gross sales.
SELECT eventname, total_price
FROM (SELECT eventid, total_price, ntile(1000) over(order by total_price desc) as percentile
FROM (SELECT eventid, sum(pricepaid) total_price
FROM sales
GROUP BY eventid)) Q, event E
WHERE Q.eventid = E.eventid
AND percentile = 1
ORDER BY total_price desc;