Snowflake Time Travel Feature

Snowflake Time Travel Partition

Snowflake’s Time Travel Feature is the most powerful and impactful utility, which, if used correctly and designed appropriately will save a lot of time and effort to simplify many complex data project operational activities.

Time travel is not just limited to seeing the past state of your data, it brings a whole lot of new possibilities for the way design and operates data projects.

This episode is a quick 30min introduction of time travel features and helps you to answer the following questions

  1. What is time travel and how it works (conceptually)?
  2. All editions of snowflake support time travel?
  3. Time Travel and Fail-safe, both are the same?
  4. How does time travel works for transient and temporary tables?
  5. Does time travel cost a lot?
  6. Time travel vs data retention parameters, are the same?
  7. What is extended SQL for time travel?
  8. Time Travel is applicable only for table or for DB as well as schema too?

Deep Dive Snowflake Micro Partition & Clustering Concept

This 30min video tutorial provides complete coverage on snowflake’s time travel feature.

SQL Script

Creating Tables Using Time Travel Feature

//lets create a table 
    create or replace table tt_3_days (
        o_orderkey number(38,0),
        o_custkey number(38,0),
        o_orderstatus varchar(1),
        o_totalprice number(12,2),
        o_orderdate date,
        o_orderpriority varchar(15),
        o_clerk varchar(15),
        o_shippriority number(38,0),
        o_comment varchar(79)
    )
    data_retention_time_in_days=3
    ;
    
    -- desc table does not show time travel period
    desc table tt_3_days;    
    
    -- to see the retention period, you have to use show tables sql
    show tables like 'tt_3_days';
    
    -- alternatively, you can see it via information schema
    select * from "TIPSDB"."INFORMATION_SCHEMA"."TABLES"
    where table_name = 'TT_3_DAYS';
    
    -- load data from a sample data set
    insert into tt_3_days select * from SNOWFLAKE_SAMPLE_DATA.TPCH_SF1000.ORDERS limit 1000000; 
    
    -- check the data
    select count(*) from tt_3_days;
    
    -- see first few records
    select *  from tt_3_days limit 10;
    
    -- lets change few order key and change the status from High to Urgent
    select * from tt_3_days;
    
        -- select min/max of order key and how order key looks like 
        select min(O_ORDERKEY), max(O_ORDERKEY)  from tt_3_days;
        select O_ORDERKEY from tt_3_days order by O_ORDERKEY;
        
        -- lets update with order key small than a number
        update tt_3_days set O_ORDERPRIORITY = '1-URGENT' where O_ORDERKEY < 100000 and O_ORDERPRIORITY <> '1-URGENT';
        
        
        -- lets update some other fields
        update t1_3_days set O_ORDERPRIORITY = '1-URGENT' where O_ORDERKEY < 100000 and O_ORDERPRIORITY <> '1-URGENT';

        -- now lets drop the same records
        delete from tt_3_days where O_ORDERKEY < 100000 and O_ORDERPRIORITY = '1-URGENT';
        
        select count(*) from tt_3_days; -- 999946
        
        -- so what are the table size before this stmt
        select count(*) from tt_3_days before(statement => '01a151b8-0000-6427-0001-3cfe0001ab96');
        
        -- extended time travel SQL
        select count(*) from tt_3_days at(statement => '01a151b8-0000-6427-0001-3cfe0001ab96');
        
        -- minus operation
        select * from tt_3_days before(statement => '01a151b8-0000-6427-0001-3cfe0001ab96')
        minus
        select * from tt_3_days;
        
        -- not exist query approach
        select * from tt_3_days before(statement => '01a151b8-0000-6427-0001-3cfe0001ab96') tt_past
        where not exists 
        (select 1
                  from tt_3_days tt_now
                  where tt_past.O_ORDERKEY = tt_now.O_ORDERKEY);
        

        -- you can also use offset key word
        
        -- The following query selects historical data from a table as of 300sec ago:
        select count(*) from tt_3_days at(offset => -60);
        
        -- The following query selects historical data from a table as of the date and time represented by the specified timestamp:
        select count(*) from tt_3_days at(timestamp => 'Mon, 20 Dec 2021 07:51:03.389 -0800'::timestamp_tz); -- 100000
        select count(*) from tt_3_days at(timestamp => 'Mon, 20 Dec 2021 07:59:03.389 -0800'::timestamp_tz); -- 999981
        
        
        -- altering the table to change the retention period
        --chaing value from 3 to 5 days
        alter table tt_3_days 
            set data_retention_time_in_days=5;
        
        -- lets validate
        show tables like 'tt_3_days';
            
        -- the retention period can also be changed from 5 to 2 days
        alter table tt_3_days 
            set data_retention_time_in_days=2;
            
       -- lets validate
       show tables like 'tt_3_days';
       
       -- decreasing the retention period will lead to data loss
       -- any changes done during that period will not appear.
        
        

    /* -----------------------------------------    
    Lets create a transient and temp table and see how it works
    */

    //following will work as we are not having any retention period
    create or replace transient table tt_0_days_tr (
        o_orderkey number(38,0),
        o_custkey number(38,0),
        o_orderstatus varchar(1),
        o_totalprice number(12,2),
        o_orderdate date,
        o_orderpriority varchar(15),
        o_clerk varchar(15),
        o_shippriority number(38,0),
        o_comment varchar(79)
    )
    data_retention_time_in_days=0
    ;
    
    //following will work as we are not having only 1 day of retention period
    create or replace transient table tt_1_days_tr (
        o_orderkey number(38,0),
        o_custkey number(38,0),
        o_orderstatus varchar(1),
        o_totalprice number(12,2),
        o_orderdate date,
        o_orderpriority varchar(15),
        o_clerk varchar(15),
        o_shippriority number(38,0),
        o_comment varchar(79)
    )
    data_retention_time_in_days=1
    ;
    
    //following will fail as retention period for more than 1 day is not supported. 
    create or replace transient table tt_2_days_tr (
        o_orderkey number(38,0),
        o_custkey number(38,0),
        o_orderstatus varchar(1),
        o_totalprice number(12,2),
        o_orderdate date,
        o_orderpriority varchar(15),
        o_clerk varchar(15),
        o_shippriority number(38,0),
        o_comment varchar(79)
    )
    data_retention_time_in_days=2
    ;
    
    //same is applicable for temp table

    create or replace temporary table tt_2_days_tmp (
        o_orderkey number(38,0),
        o_custkey number(38,0),
        o_orderstatus varchar(1),
        o_totalprice number(12,2),
        o_orderdate date,
        o_orderpriority varchar(15),
        o_clerk varchar(15),
        o_shippriority number(38,0),
        o_comment varchar(79)
    )
    data_retention_time_in_days=2
    ;
    
    //but stragenly snowflake does not throw any error, 
    //so should we assume that it has considered this value
    
    //lets check the show table command
    show tables like 'tt_2_days_tmp';
    
    //what if we define a permanent table with 100 days of retent period
    create or replace table tt_100_days (
        o_orderkey number(38,0),
        o_custkey number(38,0),
        o_orderstatus varchar(1),
        o_totalprice number(12,2),
        o_orderdate date,
        o_orderpriority varchar(15),
        o_clerk varchar(15),
        o_shippriority number(38,0),
        o_comment varchar(79)
    )
    data_retention_time_in_days=100
    ;
    
    // it ends with error
    //SQL compilation error: Exceeds maximum allowable retention time (90).

Drop & Undrop time travel objects

// table with 3 days retention
    create or replace table time_travel_undrop_y (
        o_orderkey number(38,0),
        o_custkey number(38,0),
        o_orderstatus varchar(1),
        o_totalprice number(12,2),
        o_orderdate date,
        o_orderpriority varchar(15),
        o_clerk varchar(15),
        o_shippriority number(38,0),
        o_comment varchar(79)
    )
    data_retention_time_in_days=0
    ;
    show tables like 'time_travel_undrop_y';
    
    

    insert into time_travel_undrop_y select * from SNOWFLAKE_SAMPLE_DATA.TPCH_SF1000.ORDERS limit 100; 
    
    select * from time_travel_undrop_y;
    
    -- Lets drop this table 
    drop table time_travel_undrop_y;
    
    -- now table is dropped, lets query it and snowflake will say "object does not exist"
    select * from time_travel_undrop_y;
    
    --so lets use the undrop feature
    undrop table time_travel_undrop_y;
    
    -- now I can query it again;
    select * from time_travel_undrop_x;
    
    
    /* 
    ===============================================
    Lets try it with database and schema level objects
    ===============================================
    */
    
        -- Chagne role
        use role sysadmin;
        
        -- create a table 
        create or replace database tt_db_undrop;
        create or replace schema tt_schema_undrop;
        create or replace table tt_undrop (field1 varchar());
        insert into tt_undrop (field1) values ('a'),('b'),('c'),('d'),('e');
        
        -- I have not used any time travel period and it has take default period (which is equal to 1)
        
        -- now lets show all these objects one by one
        
        show databases like 'tt%';
        -- the retention period is 1 day (the default value)
        -- and this can be changed to a new value using parameter data_retention_time_in_days=3
        
        show schemas like 'tt%';
        -- the retention period is 1 day (the default value)
        -- and this can be changed to a new value using parameter data_retention_time_in_days=3
        
        show tables like 'tt%';
        -- the retention period is 1 day (the default value)
        -- and this can be changed to a new value using parameter data_retention_time_in_days=3
        
        -- now select from table and see result
        select * from tt_undrop;
        
        -- now lets drop the databse itself
        drop database tt_db_undrop;
        
        -- show does not bring any result as object no more exist
        show databases like 'tt%';
        
        undrop database tt_db_undrop;
        -- if you rerun, it says it already exist.
        
        
        -- now lets drop the schema
        drop schema tt_db_undrop.tt_schema_undrop;
        show schemas like 'tt%';
        
        undrop schema tt_db_undrop.tt_schema_undrop;
        
        -- so this is how it works.
        select * from tt_db_undrop.tt_schema_undrop.tt_undrop;

Time Travel & Cloning Feature

//lets create a base table
    create or replace table tt_before_clone (
        o_orderkey number(38,0),
        o_custkey number(38,0),
        o_orderstatus varchar(1),
        o_totalprice number(12,2),
        o_orderdate date,
        o_orderpriority varchar(15),
        o_clerk varchar(15),
        o_shippriority number(38,0),
        o_comment varchar(79)
    )
    data_retention_time_in_days=3
    ;
    
    -- insert 100 records
    insert into tt_before_clone select * from SNOWFLAKE_SAMPLE_DATA.TPCH_SF1000.ORDERS limit 100; 
    -- 
    
    -- lets see how this table looks like 
    select * from tt_before_clone;
    
    -- lets perform some delete/update & insert statements
    
    delete from tt_before_clone where O_ORDERPRIORITY = '5-LOW'; -- 01a151d1-0000-6425-0001-3cfe00019dda
    select * from tt_before_clone; -- 77b rows (23 records deleted)
    
    -- lets update few records
    update tt_before_clone set O_ORDERSTATUS = 'X' where O_ORDERPRIORITY='3-MEDIUM'; -- 01a151d2-0000-6427-0001-3cfe0001ae3e
    select * from tt_before_clone; -- 77 rows, 20 records updated
    
    -- lets insert 50 records
    insert into tt_before_clone select * from SNOWFLAKE_SAMPLE_DATA.TPCH_SF1000.ORDERS where O_ORDERPRIORITY = '5-LOW' limit 50;  
    -- 01a151d2-0000-6425-0001-3cfe00019e5a
    
    select * from tt_before_clone; -- 127 rows

    -- we can use clone feature to create a back or snapshot 
    create or replace table tt_after_clone clone tt_before_clone before (statement => '01a151d2-0000-6425-0001-3cfe00019e5a');
    
    -- the retention period is also cloned
    show tables like 'tt%clone%';
    
    select count(*) from tt_before_clone; -- 127
    select count(*) from tt_after_clone; -- 77
    
    -- time travel data is not available to the cloned table 
    select * from tt_before_clone before (statement => '01a1erd1-0000-6425-0001-3cfe01219dda'); -- should return 100 rows
    select * from tt_after_clone before (statement => '01a123d1-0000-6425-0001-3cfe12319dda'); -- this statemetn fails
    
    -- Nothing is preserved
    show tables like 'tt%clone%'; 
    select * from tt_after_clone before (offset => -900); -- this statemetn fails
    
    -- When you clone a db or schema, the time period really matters here
    -- if your data retention periods are different for differt objects, then clone may fail.
    
    

Time Travel & Create Table As Select (CTAS)

    -- we can create a table using before or at SQL extended keyword
    create table tt_as_select 
        as select * from tt_before_clone before(statement => '01a151d1-0000-6425-0001-3cfe00019dda');
        
        select * from tt_as_select;

Time Travel Storage Cost

//set the context    
    use role accountadmin;
    
    -- table usage metrics show the table usage
    -- and pay attention to fail safe & time travel bytes columns.
    select * from "SNOWFLAKE"."ACCOUNT_USAGE"."TABLE_STORAGE_METRICS" 
    where table_schema ='CH14' and table_name in ('TT_AFTER_CLONE','TT_AS_SELECT')
    and table_dropped is null
    limit 10;
    
    --how to calculate the time travel cost vs table storage cost
    --lets assume that we are paying 40$ per TB per month
    
    select table_schema, 
        sum(ACTIVE_BYTES)/(1024*1024*1024*1024)  as "Active(Tb)", 
        sum(TIME_TRAVEL_BYTES)/(1024*1024*1024*1024) as "TT(Tb)", 
        sum(FAILSAFE_BYTES)/(1024*1024*1024*1024) as  "FF(Tb)",
        ((sum(ACTIVE_BYTES)+sum(TIME_TRAVEL_BYTES)+sum(FAILSAFE_BYTES))/(1024*1024*1024*1024))*(40/30) as "Cost Per Day ($)"
    from 
    "SNOWFLAKE"."ACCOUNT_USAGE"."TABLE_STORAGE_METRICS" 
    where table_schema ='CH14'
    group by table_schema;
    
    -- for all my schemas
    select table_schema, 
        sum(ACTIVE_BYTES)/(1024*1024*1024*1024)  as "Active(Tb)", 
        sum(TIME_TRAVEL_BYTES)/(1024*1024*1024*1024) as "TT(Tb)", 
        sum(FAILSAFE_BYTES)/(1024*1024*1024*1024) as  "FF(Tb)",
        ((sum(ACTIVE_BYTES)+sum(TIME_TRAVEL_BYTES)+sum(FAILSAFE_BYTES))/(1024*1024*1024*1024))*(40/30) as "Cost Per Day ($)"
    from 
    "SNOWFLAKE"."ACCOUNT_USAGE"."TABLE_STORAGE_METRICS" 
    group by table_schema;