1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
| // Step-1
// Create a table having no cluster key
create or replace table t1_no_cluster (
o_orderkey number(38,0),
o_custkey number(38,0),
o_orderstatus varchar(1),
o_totalprice number(12,2),
o_orderdate date,
o_orderpriority varchar(15),
o_clerk varchar(15),
o_shippriority number(38,0),
o_comment varchar(79)
);
insert into t1_no_cluster select * from SNOWFLAKE_SAMPLE_DATA.TPCH_SF1000.ORDERS limit 500000000;
// table t2 having order date as cluster key
create or replace table t2_order_dt (
o_orderkey number(38,0),
o_custkey number(38,0),
o_orderstatus varchar(1),
o_totalprice number(12,2),
o_orderdate date,
o_orderpriority varchar(15),
o_clerk varchar(15),
o_shippriority number(38,0),
o_comment varchar(79)
)
cluster by (o_orderdate);
insert into t2_order_dt select * from SNOWFLAKE_SAMPLE_DATA.TPCH_SF1000.ORDERS limit 500000000;
// create at table having order priority as cluster key
create or replace table t3_order_priority (
o_orderkey number(38,0),
o_custkey number(38,0),
o_orderstatus varchar(1),
o_totalprice number(12,2),
o_orderdate date,
o_orderpriority varchar(15),
o_clerk varchar(15),
o_shippriority number(38,0),
o_comment varchar(79)
)
cluster by (o_orderpriority);
insert into t3_order_priority select * from SNOWFLAKE_SAMPLE_DATA.TPCH_SF1000.ORDERS limit 500000000;
// Step-2
//let me populat data and here I am fast forwarding it
//as populating 5M in each table taks sometime..
//Step-3
// lets see the table and associated cluster keys
show tables like 't%_%_';
//Step-4
// lets see the record count
select count(*) from t1_no_cluster;
select count(*) from t2_order_dt;
select count(*) from t3_order_priority;
//Step-5
// now run a query against each table and see the behaviour
// we will use the timestamp as field so it does not use the cache
// you can also disable the catch using parameter setting at session level.
// we will also chose a different warehouse of same size...
//Step-6
// lets validate our 1st query in our default warehouse where order priority is medium.
select * from t1_no_cluster where O_ORDERPRIORITY = '3-MEDIUM' and current_timestamp() > '2020-01-01 08:08:49.426 -0800' limit 10;
select * from t2_order_dt where O_ORDERPRIORITY = '3-MEDIUM' and current_timestamp() > '2020-01-01 08:08:49.426 -0800' limit 10;
select * from t3_order_priority where O_ORDERPRIORITY = '3-MEDIUM' and current_timestamp() > '2020-01-01 08:08:49.426 -0800' limit 10;
//Step-7
// ===================
-- chose a different wh and resume it to make sure we focus only on query execution time
use warehouse wh_01;
alter warehouse wh_01 RESUME IF SUSPENDED;
select * from t1_no_cluster where O_ORDERPRIORITY = '3-MEDIUM' and current_timestamp() > '2020-01-02 08:08:49.426 -0800' limit 10;
-- has no cluster key
//Step-8
// ===================
-- chose a different wh and resume it to make sure we focus only on query execution time
use warehouse wh_02;
alter warehouse wh_02 RESUME IF SUSPENDED;
select * from t2_order_dt where O_ORDERPRIORITY = '3-MEDIUM' and current_timestamp() > '2020-01-01 08:08:49.426 -0800' limit 10;
-- has order dt as cluster key
//Step-9
// ===================
-- chose a different wh and resume it to make sure we focus only on query execution time
use warehouse wh_03;
alter warehouse wh_03 RESUME IF SUSPENDED;
select * from t3_order_priority where O_ORDERPRIORITY = '3-MEDIUM' and current_timestamp() > '2020-01-01 08:08:49.426 -0800' limit 10;
-- has order priority as cluster key
|