LEARN ASTER IN 30 MINUTES: Data - Scripts - Links

Learn Data Science
Teradata Employee

This blog post contains some of the content from the Webinar delivered Oct 25, 2016.

IF YOU WANT TO PERFORM THESE EXERCISES:

1.  Download Aster Express

https://aster-community.teradata.com/community/download 

About the data Used:

Read the following blog post which details where the data originated from and how to load data files in Aster:

VIDEO: HOW TO REDUCE LOAD TIME: Using nCluster_Loader in Parallel 

Table DDL for initial load:

/*
DROP TABLE IF EXISTS public.aa_airlines;

CREATE TABLE public.aa_airlines
(
Year INTEGER
,Month INTEGER
,DayofMonth INTEGER
,DayOfWeek INTEGER
,DepTime VARCHAR
,CRSDepTime VARCHAR
,ArrTime VARCHAR
,CRSArrTime VARCHAR
,UniqueCarrier VARCHAR
,FlightNum VARCHAR
,TailNum VARCHAR
,ActualElapsedTime VARCHAR
,CRSElapsedTime VARCHAR
,AirTime VARCHAR
,ArrDelay VARCHAR
,DepDelay VARCHAR
,Origin VARCHAR
,Dest VARCHAR
,Distance VARCHAR
,TaxiIn VARCHAR
,TaxiOut VARCHAR
,Cancelled VARCHAR
,CancellationCode VARCHAR
,Diverted VARCHAR
,CarrierDelay VARCHAR
,WeatherDelay VARCHAR
,NASDelay VARCHAR
,SecurityDelay VARCHAR
,LateAircraftDelay VARCHAR
)
DISTRIBUTE BY HASH(deptime)
STORAGE ROW
COMPRESS LOW;

*/

How to perform a cFilter (affinity analytic)

-----------------------------------------------
-- AFFINITY ANALYTICS - ALL 22 YEARS OF DATA
-----------------------------------------------

SELECT *
FROM cfilter (
ON (SELECT 1)
PARTITION BY 1
INPUTTABLE('aa_airlines')
OUTPUTTABLE('aa_cf_airlines_origin')
INPUTCOLUMNS('origin')
JOINCOLUMNS('dest')
DROPTABLE('true')
);

How to perform Graph analytics

-----------------------------------------------------------
--GRAPH
-----------------------------------------------------------

--INITIALIZE TABLES
drop table if exists airline_edge
drop table if exists airline_nodes;
drop table if exists airline_unordered_pairs;

--BUILD EDGE TABLE


CREATE TABLE airline_edge distribute by replication as
select origin, dest from aa_airlines_nc_nd_f_2007
group by origin, dest;

select * from airline_vert limit 100;

--DATA PREPARATION:
--BUILD NODE

create table airline_nodes
distribute by hash(nodeid)
as
select origin as nodeid
from aa_airlines_nc_nd_f_2007
union
select dest as nodeid
from aa_airlines_nc_nd_f_2007
;

-- "Verify distinct nodes:"
SELECT count(*), count(distinct nodeid) FROM airline_nodes;

-- "Distinct source nodes:"
SELECT count(distinct origin) FROM aa_airlines_nc_nd_f_2007;

-- "Distinct target nodes:"
SELECT count(distinct dest) FROM aa_airlines_nc_nd_f_2007;

--------------------------------
--PAGERANK: (PLANES LANDING AND TAKING OFF)
--------------------------------

CREATE TABLE airline_gr_pagerank distribute by replication as
SELECT * FROM pagerank (
ON airline_nodes AS vertices PARTITION BY nodeid
ON aa_airlines_nc_nd_f_2007 AS edges PARTITION BY origin
TARGETKEY('dest')
-- [EDGEWEIGHT('column_name')]
ACCUMULATE('nodeid')
DAMPFACTOR('0.85')
THRESHOLD('0.0001')
MAXITERNUM('20')
)
order by nodeid;

SELECT * FROM airline_gr_pagerank order by pagerank desc;

SELECT origin FROM public.aa_airlines_nc_nd_f_2007 WHERE 1=1

--------------------------------
--ALL PAIRS SHORTEST PATH (NUMBER OF LAYOVERS FROM AIRPORT TO AIRPORT)
--------------------------------

DROP TABLE IF EXISTS public.airline_pg_apsp_out;
CREATE TABLE public.airline_pg_apsp_out
DISTRIBUTE BY REPLICATION
AS
SELECT *
FROM AllPairsShortestPath(
ON public.airline_nodes AS vertices PARTITION BY nodeid
ON public.airline_edge AS edges PARTITION BY origin
targetKey('dest')
Directed('true')
MaxDistance('-1') -- default: 10 (vertrices further apart are not output), -1 means unbounded
)
order by source, target;

SELECT * FROM airline_pg_apsp_out order by distance desc LIMIT 100;

--------------------------------
--BETWEENESS (CONNECTION AIRPORTS - NOT FINAL DESTINATIONS)
--------------------------------

-- INFO:
-- finds bridges between different airports
-- identify potential bottlenecks


DROP TABLE IF EXISTS public.airline_gr_betweenness;

CREATE TABLE public.airline_gr_betweenness
DISTRIBUTE BY HASH(nodeid)
AS
SELECT *
FROM betweenness (
ON public.airline_nodes AS vertices PARTITION BY nodeid
ON public.airline_edge AS edges PARTITION BY origin
targetKey('dest')
directed('t')
maxdistance('5') -- kdegree=5
accumulate('nodeid')
samplerate('1') -- value between 0 and 1
)
order by nodeid;

SELECT *
FROM public.airline_gr_betweenness
ORDER BY nodeid
;

Scripts to perform the TEXT analytics:

---------------------------------------------------
--TEXT PARSER STOP WORD REMOVAL AND STEMMING
---------------------------------------------------


DROP TABLE IF EXISTS public.tweets_tparser

CREATE FACT TABLE public.tweets_tparser(
guid VARCHAR,
comments VARCHAR
)
DISTRIBUTE BY HASH( guid )
COMPRESS LOW;

TRUNCATE TABLE public.tweets_tparser;
INSERT INTO public.tweets_tparser VALUES
('2.80069275417473e+17','Between LOTR and Star Trek FEEEEEELS my heart is now gone and replaced by pure sad emotions! '),
('2.80088531194495e+17','Just discovered Star Trek:TNG is on Netflix Canada, unleash the #nerd! '),
('2.80427365979542e+17','@WilliamShatner Im saving up for Vegas2016 :) really hope to join Star Trek Anniv, will be traveling from across the ocean! '),
('2.79927125501432e+17','@sashaofficial Thats not Walter Scott. Its William Shatner! Tender note at Star Trek Monopoly Limited Edition!x '),
('2.79870723835703e+17','They showed pure awesome movie previews in hobbit. Superman star trek after earth oblivion pacific rin etc lol '),
('2.80427610259984e+17','The IRON MAN 3 trailer is NUTS!!!! '),
('2.80071228474147e+17','Excited for Iron Man 3. From the trailer though, i feel disappointment. Oh well, as long as it ends. '),
('2.80251744225735e+17','Captain Kirk reminds me of @LalaDelaoCortez when she was little ?? '),
('2.8083145056625e+17','Star Trek is one of my new favorite movies. I want a truffle so bad! #socute #greatflick '),
('2.79909791910998e+17','love superhero movies; Captain America 2, Man of Steel, Iron Man 3, Thor 2 and Avengers 2 all out soonish woooooooo '),
('2.80489754787738e+17','Love that line from Star Trek “Your father was captain of a starship for 12 minutes & saved 800 lives….. I dare you to do better '),
('2.80410410484634e+17','Somebody come watch Star Trek with me ?? ' ),
('2.80688990292697e+17','@ilanaDyoung You have to bring it to birminghaaam :) Sameee! That and star trek are like my faaaves '),
('2.81028786269606e+17','@markdanielalea Blame Star Trek and The Hobbit for delaying Series 3. Hahaha. '),
('2.8081005643237e+17','i spotted a spelling mistake in the summary of a star trek TNG episode on netflix and it has completely screwed me up mentally and physically'),
('2.81250231520219e+17','The sole purpose of my Twitter is to follow porn stars and the cast of Star Trek '),
('2.80792021214573e+17','"Someone get me the ship from Star Trek, for Christmas. And I will actually kiss you.. On the face! '),
('2.81594389707497e+17','"#mustwatchmoviesin2013 Oz the great & powerful, Iron man 3,Monster Inc 2, G.I.Joe 2, WWZ, Epic, A good day to die hard '),
('2.80849888462402e+17','Someone watch Star Trek the movie that came out 2 years ago so I know it is real '),
('2.80852872114094e+17','Star Trek is easily one of my favorite movies ? '),
('2.8121308053545e+17','New Star Trek trailer: 5 scenes of people jumping/falling great heights. '),
('2.81232984588304e+17','@sarhoyt think of me everytime you watch Star Trek :* #bestmovie '),
('2.81233712975344e+17','so pumped for the new star trek. maybe we will see some Khan? '),
('2.798061644626e+17','Taun depan pengen nntn Man Of Steel,Iron Man 3,Wolverine,dan The Dark World-Thor '),
('2.79776948513825e+17','2013. Tenemos: Iron Man 3, Pacific Rim, Oblivion, Superman: The Man Of Steel, After Earth... Mayas, porfis *n* '),
('2.79795622662529e+17','The Hobbit was awesome!!! But my favorite part has to be the 10 minute sneak peek of Star Trek 2! Basically had a nerdgasm ?? ');

SELECT * FROM public.tweets_tparser LIMIT 100;

DROP TABLE public.tweets_tparser_out;

CREATE TABLE public.tweets_tparser_out
DISTRIBUTE BY HASH (guid) AS
SELECT * FROM Text_Parser (ON public.tweets_tparser
TEXT_COLUMN('comments')
CASE_INSENSITIVE('true')
STEMMING('true')
PUNCTUATION('\[.,?\!\]')
LIST_POSITIONS('false')
REMOVE_STOP_WORDS('true')
ACCUMULATE('guid','comments'))
ORDER BY guid;

SELECT * FROM public.tweets_tparser_out order by guid, position limit 100;


-- --------------------------------------------------------------------------------
-- LDA Functions
-- --------------------------------------------------------------------------------


------------------------------------
--LDA USING TWEETS
------------------------------------

select * from ldatrainer(
on (select 1) partition by 1
inputtable('tweets_tparser_out')
modeltable('ldatweetmodel')
outputtable('ldatweetout1')
topicnumber(5)
docidcolumn('guid')
wordcolumn('token')
countcolumn('frequency')
maxiterate(30)
convergencedelta(1e-3)
seed(2)
);

select * from ldainference(
on (select 1) partition by 1
inputtable('tweets_tparser_out')
modeltable('ldatweetmodel')
outputtable('ldatweetout2')
docidcolumn('guid')
wordcolumn('token')
outputtopicnumber(2)
outputtopicwordnumber(3)
);

SELECT * FROM ldatweetout2 LIMIT 100;


------------------------------------------------------
--SENTIMENT ANALYSIS
------------------------------------------------------

CREATE FACT TABLE public.tweets (
guid VARCHAR,
comments VARCHAR
)
DISTRIBUTE BY HASH( guid )
COMPRESS LOW;

------------------------------------------------------
--SENTIMENT ANALYSIS - RUN ANALYSIS
------------------------------------------------------

TRUNCATE TABLE public.tweets;

INSERT INTO public.tweets VALUES
('2.80069275417473e+17','Between LOTR and Star Trek FEEEEEELS my heart is now gone and replaced by pure sad emotions! '),
('2.80088531194495e+17','Just discovered Star Trek:TNG is on Netflix Canada, unleash the #nerd! '),
('2.80427365979542e+17','@WilliamShatner Im saving up for Vegas2016 :) really hope to join Star Trek Anniv, will be traveling from across the ocean! '),
('2.79927125501432e+17','@sashaofficial Thats not Walter Scott. Its William Shatner! Tender note at Star Trek Monopoly Limited Edition!x '),
('2.79870723835703e+17','They showed pure awesome movie previews in hobbit. Superman star trek after earth oblivion pacific rin etc lol '),
('2.80427610259984e+17','The IRON MAN 3 trailer is NUTS!!!! '),
('2.80071228474147e+17','Excited for Iron Man 3. From the trailer though, i feel disappointment. Oh well, as long as it ends. '),
('2.80251744225735e+17','Captain Kirk reminds me of @LalaDelaoCortez when she was little ?? '),
('2.8083145056625e+17','Star Trek is one of my new favorite movies. I want a truffle so bad! #socute #greatflick '),
('2.79909791910998e+17','love superhero movies; Captain America 2, Man of Steel, Iron Man 3, Thor 2 and Avengers 2 all out soonish woooooooo '),
('2.80489754787738e+17','Love that line from Star Trek “Your father was captain of a starship for 12 minutes & saved 800 lives….. I dare you to do better '),
('2.80410410484634e+17','Somebody come watch Star Trek with me ?? ' ),
('2.80688990292697e+17','@ilanaDyoung You have to bring it to birminghaaam :) Sameee! That and star trek are like my faaaves '),
('2.81028786269606e+17','@markdanielalea Blame Star Trek and The Hobbit for delaying Series 3. Hahaha. '),
('2.8081005643237e+17','i spotted a spelling mistake in the summary of a star trek TNG episode on netflix and it has completely screwed me up mentally and physically'),
('2.81250231520219e+17','The sole purpose of my Twitter is to follow porn stars and the cast of Star Trek '),
('2.80792021214573e+17','"Someone get me the ship from Star Trek, for Christmas. And I will actually kiss you.. On the face! '),
('2.81594389707497e+17','"#mustwatchmoviesin2013 Oz the great & powerful, Iron man 3,Monster Inc 2, G.I.Joe 2, WWZ, Epic, A good day to die hard '),
('2.80849888462402e+17','Someone watch Star Trek the movie that came out 2 years ago so I know it is real '),
('2.80852872114094e+17','Star Trek is easily one of my favorite movies ? '),
('2.8121308053545e+17','New Star Trek trailer: 5 scenes of people jumping/falling great heights. '),
('2.81232984588304e+17','@sarhoyt think of me everytime you watch Star Trek :* #bestmovie '),
('2.81233712975344e+17','so pumped for the new star trek. maybe we will see some Khan? '),
('2.798061644626e+17','Taun depan pengen nntn Man Of Steel,Iron Man 3,Wolverine,dan The Dark World-Thor '),
('2.79776948513825e+17','2013. Tenemos: Iron Man 3, Pacific Rim, Oblivion, Superman: The Man Of Steel, After Earth... Mayas, porfis *n* '),
('2.79795622662529e+17','The Hobbit was awesome!!! But my favorite part has to be the 10 minute sneak peek of Star Trek 2! Basically had a nerdgasm ?? ');

------------------------------------------------------
--SENTIMENT ANALYSIS - RUN ANALYSIS
------------------------------------------------------
DROP TABLE IF EXISTS public.es_tweets;

CREATE TABLE public.es_tweets
DISTRIBUTE BY HASH(guid) AS

SELECT * FROM ExtractSentiment
(
ON public.tweets
TEXT_COLUMN('comments')
MODEL('dictionary')
LEVEL('document')
ACCUMULATE('guid', 'comments')
) ORDER BY GUID;

SELECT * FROM es_tweets LIMIT 100;