Loading Date & Timestamp Formats In Snowflake
Summary
This blogs explain how to handle date and time formats while loading data into snowflake tables from csv files and how to make sure that milli and nano second values are also loaded for timestamp data types.
Date/Time Data Handling In Snowflake
Handling date & timestamp format while loading data into snowflake needs special consideration and many of us are not fully aware of it, specially with CSV data file loading. There are default settings which we don’t have to touch but we must know how to use then while creating file formats so copy command works as epxected and data loading happens without any error or any data loss.
This video will describe different scenario with date and timestamp text formats in csv data file and how date/timestamp paramters in the file format object control this conversion. Once you complete this video, you will be able to answer following questions
This episode will describe different scenario with date and timestamp text formats in csv data file and how date/timestamp paramters in the file format object control this conversion. Once you complete this video, you will be able to answer following questions
- date datatype handling Consideration In Snowflake with CSV data files.
- upper and lower bound values for date datatype in Snowflake.
- timestamp datatype handling Consideration In Snowflake with CSV data files.
- milli second and nano second precision applicability while loading data.
- Paramter used to configure different file format while loading csv data into snowflake.
- data loading performance with date and timestamp fields.
Sample Data Set
Here is the data set which is used in this guide.
SQL Scripts Used for Date/Time Data Formats
Part-01
Loading Date & Timestamp Formats with any special paramters
-- Step-01 : lets start with a table called bookings
-- it has 5 columns, accomodate data typles like date, time, datetime & timestamp.
create or replace transient table bookings(
booking_id number,
booking_dt date,
booking_time time,
booking_dt_time datetime,
booking_timestamp timestamp
);
-- csv file format call csv_ff with standard paramters.
-- no additional paramter for date and time is used to
-- see how does it work with default settings.
create or replace file format csv_ff
type = 'csv'
compression = 'none'
field_delimiter = ','
record_delimiter = '\n'
skip_header = 1
field_optionally_enclosed_by = '\042';
-- lets use the snowsql to run and load a sample cvs file.
-- put command to copy into user stage
put
file:///tmp/ch-10/01-bookings-std.csv
@~/ch10/csv
auto_compress=false;
-- list the user stage.
list @~/ch10/csv/01-bookings-std.csv;
-- lets run the copy command to copy data into user bookings table
copy into bookings
from @~/ch10/csv/01-bookings-std.csv
file_format = csv_ff
on_error = 'continue'
;
-- lets review the data..
select * from bookings;
Part-02
Higher & Lower Bound for Year Value in Date Field in snowflake
-- lets use the same table and same file format.
--truncate the booking table before loading new data
truncate table bookings;
-- put command to copy into user stage
put
file:///tmp/ch-10/02-bookings-year-range.csv
@~/ch10/csv
auto_compress=false;
-- list the user stage.
list @~/ch10/csv/02-bookings-year-range.csv;
truncate table bookings;
select * from bookings;
-- lets run the copy command to copy data into user table
copy into bookings
from @~/ch10/csv/02-bookings-year-range.csv
file_format = csv_ff
force = true
on_error = 'continue'
;
-- lets see the data
select * from bookings;
Part-03
Nano second precision in Timestamp Data Type in snowflake
--truncate the booking table before loading new data
truncate table bookings;
select * from bookings;
-- put command to copy into user stage
put
file:///tmp/ch-10/03_bookings_nano_seconds.csv
@~/ch10/csv
auto_compress=false;
-- list the user stage.
list @~/ch10/csv/03_bookings_nano_seconds.csv;
-- lets run the copy command to copy data into user table
copy into bookings
from @~/ch10/csv/03_bookings_nano_seconds.csv
file_format = csv_ff
force = true
on_error = 'continue';
select * from bookings;
-- the result did not bring any nano second values..
alter session set TIMESTAMP_NTZ_OUTPUT_FORMAT = 'YYYY-MM-DD HH24:MI:SS.FF9';
Part-04
Date & Timestamp Formats in File Format Objects
--truncate the booking table before loading new data
truncate table bookings;
-- put command to copy into user stage
put
file:///tmp/ch-10/04-bookings-mixed-date-formats.csv
@~/ch10/csv
auto_compress=false;
-- list the user stage.
list @~/ch10/csv/04-bookings-mixed-date-formats.csv;
-- lets run the copy command to copy data into user table
copy into bookings
from @~/ch10/csv/04-bookings-mixed-date-formats.csv
file_format = csv_ff
force = true
on_error = 'continue';
select * from bookings;
-- the result did not bring any nano second values..
-- lets see more detailed error
select * from table(validate(bookings, job_id=>'01a8626f-3200-9890-0002-14d2000890b2')) order by 3;
-- lets change the file format
-- csv file format.
create or replace file format csv_ff_v2
type = 'csv'
compression = 'none'
field_delimiter = ','
record_delimiter = '\n'
skip_header = 1
field_optionally_enclosed_by = '\042'
date_format = 'DD-MM-YYYY'
timestamp_format = 'DD-MM-YYYY HH24:MI:SS.FF9';
--timestamp_format = 'DD-MM-YYYY HH24:MI:SS.FF3';
--timestamp_format = 'DD-MM-YYYY HH24:MI:SS';
--YYYY-MM-DD HH24:MI:SS.FF3
desc file format csv_ff_v2;
-- lets rerun the copy command
truncate table bookings;
copy into bookings
from @~/ch10/csv/04-bookings-mixed-date-formats.csv
file_format = csv_ff_v2
force = true
on_error = 'continue';
-- lets see more detailed error
select * from table(validate(bookings, job_id=>'01a86271-3200-9876-0002-14d20008c0aa')) order by 3;
Part-05
Warehouse Performance While Loading Timestamp
-- lets use the same table and same file format.
create or replace transient table customer_f3 (
customer_pk number(38,0),
salutation varchar(10),
first_name varchar(20),
last_name varchar(30),
gender varchar(1),
marital_status varchar(1),
day_of_birth date,
birth_country varchar(60),
email_address varchar(50),
city_name varchar(60),
zip_code varchar(10),
country_name varchar(20),
gmt_timezone_offset number(10,2),
preferred_cust_flag boolean,
registration_time timestamp
);
-- registration_time timestamp_ltz(9)
-- registration_time timestamp_ltz(9)
create or replace file format csv_gzip_ff_f3
type = 'csv'
compression = 'gzip'
field_delimiter = ','
record_delimiter = '\n'
skip_header = 1
field_optionally_enclosed_by = '\042'
date_format = 'YYYY-MM-DD'
timestamp_format = 'YYYY-MM-DD HH24:MI:SS.FF3';
create or replace transient table customer_f9 (
customer_pk number(38,0),
salutation varchar(10),
first_name varchar(20),
last_name varchar(30),
gender varchar(1),
marital_status varchar(1),
day_of_birth date,
birth_country varchar(60),
email_address varchar(50),
city_name varchar(60),
zip_code varchar(10),
country_name varchar(20),
gmt_timezone_offset number(10,2),
preferred_cust_flag boolean,
registration_time timestamp
);
create or replace file format csv_gzip_ff_f9
type = 'csv'
compression = 'gzip'
field_delimiter = ','
record_delimiter = '\n'
skip_header = 1
field_optionally_enclosed_by = '\042'
date_format = 'YYYY-MM-DD'
timestamp_format = 'YYYY-MM-DD HH24:MI:SS.FF9';
put
file:///tmp/ch-10/0*.gz
@~/ch10/gzip
parallel = 50;
list @~/ch10/gzip;
select * from customer_f3;
-- copy command with validation mode = for all errors
copy into customer_f3
from @~/ch10/gzip/
file_format = csv_gzip_ff_f3
on_error = 'continue'
force = true
pattern='.*[.]csv[.]gz'
;
use warehouse compute_wh;
alter warehouse compute_wh resume;
copy into customer_f9
from @~/ch10/gzip/
file_format = csv_gzip_ff_f9
on_error = 'continue'
force = true
pattern='.*[.]csv[.]gz'
;
select * from customer_f3 limit 10;
select * from customer_f9 limit 10;