Month: April 2020

Oracle to Snowflake Table DDL Conversion

Oracle to Snowflake Table DDL Conversion

Jason Trewin at FreshGravity provided this Oracle to Snowflake Table DDL conversion script. FreshGravity is a great Snowflake partner, and Jason is working on his second Snowflake deployment for our shared customers.

He shared a great approach to DDL conversion from Oracle to Snowflake. I thought it was so useful that after some discussion he agreed to let me write up and post on it.

You can modify the where clauses to select the tables you need to convert, and modify the mappings from Oracle table owners to Snowflake owners.

Thanks and credit to Jason Trewin for contributing this useful script to the Snowflake community.

-- Script contributed to the Snowflake community courtesy of Jason Trewin at FreshGravity.

SELECT 'CREATE OR REPLACE TABLE '  
        || NEW_SCHEMA_NAME
        || '.' ||  TABLE_NAME || ' (' || LISTAGG(COLUMN_NAME || ' ' || CONVERTED_DATA_TYPE || CASE WHEN NULLABLE = 'N' THEN ' NOT NULL' ELSE ''END , ' , ') WITHIN GROUP (ORDER BY COLUMN_ID)  || ');' --AS COLUMN_LIST
   FROM (      
 SELECT DATA_TYPE, COLUMN_NAME, ALL_TABLES.TABLE_NAME, ALL_TABLES.OWNER,DATA_LENGTH, DATA_PRECISION, DATA_SCALE, NULLABLE,
        CASE WHEN INSTR(DATA_TYPE,'WITH LOCAL TIME ZONE') > 0  THEN 'TIMESTAMP_TZ'
             WHEN INSTR(DATA_TYPE,'TIMESTAMP') > 0 THEN 'TIMESTAMP_NTZ'
             WHEN DATA_TYPE = 'DATE' THEN 'DATE'
             WHEN DATA_TYPE = 'XMLTYPE' THEN 'VARIANT'
             WHEN DATA_TYPE IN ('BINARY_DOUBLE','NUMBER') THEN 'NUMBER'||
                  CASE WHEN DATA_PRECISION IS NOT NULL THEN  '('|| DATA_PRECISION || ',' || NVL(DATA_SCALE,0) || ')'
                            ELSE '' END
             WHEN DATA_TYPE IN ('NVARCHAR2','VARCHAR2','VARCHAR','NVARCHAR') THEN 'TEXT(' || DATA_LENGTH  || ')'
             WHEN DATA_TYPE IN ('NCHAR','CHAR') THEN 'TEXT(1)'
             WHEN DATA_TYPE = 'CLOB' THEN 'TEXT(16777216)'
             ELSE NULL END AS CONVERTED_DATA_TYPE
             ,CASE WHEN ALL_TABLES.OWNER = 'ORACLE_OWNER_1' THEN 'SNOWFLAKE_OWNER_1'  -- Map Oracle owners to Snowflake owners.
                   WHEN ALL_TABLES.OWNER = 'ORACLE_OWNER_2' THEN 'SNOWFLAKE_OWNER_2'
                   WHEN ALL_TABLES.OWNER = 'ORACLE_OWNER_3' THEN 'SNOWFLAKE_OWNER_3'                                      
                   WHEN ALL_TABLES.OWNER = 'ORACLE_OWNER_4' THEN 'SNOWFLAKE_OWNER_4'
				   WHEN ALL_TABLES.OWNER = 'ORACLE_OWNER_5' THEN 'SNOWFLAKE_OWNER_5'  
                   WHEN ALL_TABLES.OWNER = 'ORACLE_OWNER_6' THEN 'SNOWFLAKE_OWNER_6'                                      
               ELSE NULL END AS NEW_SCHEMA_NAME
             ,COLUMN_ID
  FROM ALL_TAB_COLUMNS
       INNER JOIN ALL_TABLES   
          ON ALL_TABLES.TABLE_NAME  =  ALL_TAB_COLUMNS.TABLE_NAME
         AND ALL_TABLES.OWNER       =  ALL_TAB_COLUMNS.OWNER
WHERE	-- Insert a where clause to decide which tables to include or exclude. This is just part of one approach: 
	ALL_TABLES.TABLE_NAME NOT IN ('TABLE1','TABLE2','TABLE3')
  )
GROUP BY NEW_SCHEMA_NAME, TABLE_NAME



SELECT TEXT_LINE FROM (
SELECT 1 AS INDEXING_VALUE,
     TABLE_NAME ,
     OWNER,
        'CREATE OR REPLACE TABLE '  
        || CASE WHEN ALL_TABLES.OWNER = 'ORACLE_OWNER_1' THEN 'SNOWFLAKE_OWNER_1'  		-- Map Oracle table ownwers to Snowflake table owners
                WHEN ALL_TABLES.OWNER = 'ORACLE_OWNER_2' THEN 'SNOWFLAKE_OWNER_2'
                WHEN ALL_TABLES.OWNER = 'ORACLE_OWNER_3' THEN 'SNOWFLAKE_OWNER_3'                                      
                WHEN ALL_TABLES.OWNER = 'ORACLE_OWNER_4' THEN 'SNOWFLAKE_OWNER_4'
                WHEN ALL_TABLES.OWNER = 'ORACLE_OWNER_5' THEN 'SNOWFLAKE_OWNER_5'  
                WHEN ALL_TABLES.OWNER = 'ORACLE_OWNER_6' THEN 'SNOWFLAKE_OWNER_6'                                      
           ELSE NULL END         || '.' ||  TABLE_NAME || '(' AS TEXT_LINE, 1 AS COLUMN_ID
  FROM ALL_TABLES   
WHERE ALL_TABLES.OWNER LIKE 'ORACLE_OWNER_%'	-- Set where condition for tables to process.
AND ALL_TABLES.TABLE_NAME IN ('TABLE1', 'TABLE2', 'TABLE3')
UNION ALL
SELECT 2  AS INDEXING_VALUE, ALL_TABLES.TABLE_NAME , ALL_TABLES.OWNER,
        CASE WHEN COLUMN_ID = 1 THEN '' ELSE ',' END || COLUMN_NAME || ' ' ||
            CASE WHEN INSTR(DATA_TYPE,'WITH LOCAL TIME ZONE') > 0  THEN 'TIMESTAMP_TZ'
             WHEN INSTR(DATA_TYPE,'TIMESTAMP') > 0 THEN 'TIMESTAMP_NTZ'
             WHEN DATA_TYPE = 'DATE' THEN 'DATE'
             WHEN DATA_TYPE = 'XMLTYPE' THEN 'VARIANT'
             WHEN DATA_TYPE IN ('BINARY_DOUBLE','NUMBER') THEN 'NUMBER'||
                  CASE WHEN DATA_PRECISION IS NOT NULL THEN  '('|| DATA_PRECISION || ',' || NVL(DATA_SCALE,0) || ')'
                            ELSE '' END
             WHEN DATA_TYPE IN ('NVARCHAR2','VARCHAR2','VARCHAR','NVARCHAR') THEN 'TEXT(' || DATA_LENGTH  || ')'
             WHEN DATA_TYPE IN ('NCHAR','CHAR') THEN 'TEXT(1)'
             WHEN DATA_TYPE = 'CLOB' THEN 'TEXT(16777216)'
             ELSE NULL END ||  CASE WHEN NULLABLE = 'N' THEN ' NOT NULL' ELSE ''END  AS CONVERTED_DATA_TYPE
           ,COLUMN_ID
  FROM ALL_TAB_COLUMNS
       INNER JOIN ALL_TABLES   
          ON ALL_TABLES.TABLE_NAME  =  ALL_TAB_COLUMNS.TABLE_NAME
         AND ALL_TABLES.OWNER       =  ALL_TAB_COLUMNS.OWNER
WHERE ALL_TABLES.OWNER LIKE 'ODS_%'
AND ALL_TABLES.TABLE_NAME IN ('TABLE1', 'TABLE2', 'TABLE3') 
 UNION ALL SELECT 3 AS INDEXING_VALUE,
     TABLE_NAME , OWNER,
        ');', 1 AS COLUMN_ID
  FROM ALL_TABLES   
WHERE ALL_TABLES.OWNER LIKE 'ORACLE_OWNER_%'
AND ALL_TABLES.TABLE_NAME IN ('TABLE1', 'TABLE2', 'TABLE3')
)
ORDER BY TABLE_NAME, OWNER, INDEXING_VALUE, COLUMN_ID



SELECT  'ALTER TABLE '
        || CASE WHEN ALL_TABLES.OWNER = 'ORACLE_OWNER_1' THEN 'SNOWFLAKE_OWNER_1'  		-- Map Oracle table ownwers to Snowflake table owners
                WHEN ALL_TABLES.OWNER = 'ORACLE_OWNER_2' THEN 'SNOWFLAKE_OWNER_2'
                WHEN ALL_TABLES.OWNER = 'ORACLE_OWNER_3' THEN 'SNOWFLAKE_OWNER_3'                                      
                WHEN ALL_TABLES.OWNER = 'ORACLE_OWNER_4' THEN 'SNOWFLAKE_OWNER_4'
                WHEN ALL_TABLES.OWNER = 'ORACLE_OWNER_5' THEN 'SNOWFLAKE_OWNER_5'  
                WHEN ALL_TABLES.OWNER = 'ORACLE_OWNER_6' THEN 'SNOWFLAKE_OWNER_6'                                       
           ELSE NULL END         || '.' ||  TABLE_NAME
        || ' ADD PRIMARY KEY (' ||PK_COLUMMN_LIST || ');'
FROM 
 (
SELECT --'ALTER TABLE ' 
       COLS.OWNER, COLS.TABLE_NAME, LISTAGG(cols.COLUMN_NAME , ',') WITHIN GROUP (ORDER BY POSITION) AS PK_COLUMMN_LIST
--       cols.table_name, cols.column_name, cols.POSITION, cons.owner
FROM all_constraints cons, all_cons_columns cols
WHERE cols.OWNER LIKE 'ORACLE_OWNER_%'
AND cons.constraint_type = 'P'
AND cons.constraint_name = cols.constraint_name
AND cons.owner = cols.owner
GROUP BY  COLS.OWNER, COLS.TABLE_NAME
)


Overloading JavaScript UDFs in Snowflake

Overloading JavaScript UDFs in Snowflake

A Base Function with Two Overloads

Snowflake supports overloading user defined functions. It’s a great way to handle function calls with parameters of different data types or different numbers of parameters. Developers often overload functions to let users send only relevant parameters.

Consider the common SUBSTRING function. You can call it using one of two overloads:

select substr('Hello, world.', 8);     --This returns "world."

select substr('Hello, world.', 8, 5);  --This returns "world"

In the first statement, the caller sent the string to use and the start position. Omitting the final parameter uses the overload with default behavior, returning to the end of the string.

In the second statement, the caller decided to get rid of the final period. Adding the third parameter for length used the other overload to return five characters instead of the default behavior.

This is a common design with overloaded functions. Mandatory parameters go on the left and optional parameters follow. Each allowable combination of parameters becomes an overload of the function. In this design, developers typically write one base function with all parameters. For the overloads with missing parameters, they’ll call the base function using a default value for the missing parameter(s).

This design ensures that there’s only one place to maintain and debug code. The problem is Snowflake JavaScript UDFs cannot call other UDFs. While one way to deal with this is to write the same code in all overloads, it means three places to maintain, improve, and debug code. Fortunately, there’s a way to write once base function and call it from overloaded functions using defaults.

The solution is to write the base UDF with all parameters in JavaScript. For the overloads that simply call the base function using defaults for missing parameters, call the base JavaScript UDF using an overloaded SQL UDF. This works because SQL UDFs can call other UDFs, which JavaScript UDFs cannot do.

In this example, the JavaScript UDF returns a column displaying a graphical progress bar. The opens are typical for progress bars: percent completion, number of decimal places to display on the percentage, and number of segments to display.

The only one that can’t be defaulted is the percent complete. It’s okay to default to two decimal points and ten segments.

-- Make a progress bar function that looks like this:   ⬛⬛⬜⬜⬜⬜⬜⬜⬜⬜ 24.53%

-- This is the main JavaScript function with all parameters.
create or replace function PROGRESS_BAR(PERCENTAGE float, DECIMALS float, SEGMENTS float)
returns string
language javascript
as
$$

    var percent = PERCENTAGE;
    
    if (isNaN(percent)) percent =   0;
    if (percent < 0)    percent =   0;
    if (percent > 100)  percent = 100;

    percent        = percent.toFixed(DECIMALS);

    var filledSegments = Math.round(SEGMENTS * (percent / 100));
    var emptySegments  = SEGMENTS - filledSegments;

    var bar = '⬛'.repeat(filledSegments) + '⬜'.repeat(emptySegments);
 
    return bar + " " + percent + "%";

$$;

-- This is an overload with only the percentage, using defaults for 
-- number of segments and decimal points to display on percentage.
create or replace function PROGRESS_BAR(PERCENTAGE float)
returns string
language sql
as
$$
    select progress_bar(PERCENTAGE, 2, 10)
$$;

-- This is an overload with the percentage and the option set for the
-- number of decimals to display. It uses a default for number of segments.
create or replace function PROGRESS_BAR(PERCENTAGE float, DECIMALS float)
returns string
language sql
as
$$
    select progress_bar(PERCENTAGE, DECIMALS, 10)
$$;

-- Call the main JavaScript function by sending all three parameters:
select progress_bar(24.5293, 0, 100) as PROGRESS;

-- Call the overloaded SQL function by omitting the number of segments (segments defaults to 10):
select progress_bar(24.5293, 1) as PROGRESS;

-- Call the overloaded SQL function specifying only the percentage 
-- (segments defaults to 10 and decimals to 2)
-- It should display like this:   ⬛⬛⬜⬜⬜⬜⬜⬜⬜⬜ 24.53%
select progress_bar(24.5293) as PROGRESS;

By the way, this UDF progress bar is fully functional. If you have a long-running process such as loading a large number of files, you can use it to monitor progress by refreshing the query periodically. Here’s an example using a long progress bar and 24.41%:

select progress_bar(24.41, 2, 100) as PROGRESS;
Progress Bar from Overloaded Snowflake UDF
Theme: Overlay by Kaira