Need suggestions to optimize the complex query (below) involving several Joins and Aggregation..

Database

Need suggestions to optimize the complex query (below) involving several Joins and Aggregation..

Hi,

I have below query which is failing in dev environment due to spool issue:

INSERT INTO D0_IM_RSTG_TAS_MD4.STG_TEMP_STOCK_DETAILS(MATERIAL_ID,

VALUATION_AREA_ID, PLANT_ID, STORAGE_LOCATION_ID, CUSTOMER_ID,

VENDOR_ID, WBS_ELEMENT_ID, FISCAL_PERIOD_CODE, BEGIN_DATE, END_DATE,

UNRESTRICTED_STOCK, STOCK_IN_TRANSFER, QA_STOCK, BLOCKED_STOCK,

RESTRICTED_STOCK, RETURNS_STOCK, TAS_CREATE_DATE, TAS_CHANGE_DATE,

TAS_EXTRACTION_DATE, DWH_DELETION_FLAG)

SELECT

VW_STG_TEMP_STOCK_DETAILS_C.MATERIAL_ID, VW_STG_TEMP_STOCK_DETAILS_C.VALUATION_AREA_ID,

VW_STG_TEMP_STOCK_DETAILS_C.PLANT_ID, VW_STG_TEMP_STOCK_DETAILS_C.STORAGE_LOCATION_ID,

VW_STG_TEMP_STOCK_DETAILS_C.CUSTOMER_ID, VW_STG_TEMP_STOCK_DETAILS_C.VENDOR_ID,

VW_STG_TEMP_STOCK_DETAILS_C.WBS_ELEMENT_ID, VW_STG_TEMP_STOCK_DETAILS_C.FISCAL_PERIOD_CODE,

CAST(CAST(VW_STG_TEMP_STOCK_DETAILS_C.BEGIN_DATE AS TIMESTAMP(6)) AS DATE),

CAST(CAST(VW_STG_TEMP_STOCK_DETAILS_C.END_DATE AS TIMESTAMP(6)) AS DATE),

VW_STG_TEMP_STOCK_DETAILS_C.UNRESTRICTED_STOCK, VW_STG_TEMP_STOCK_DETAILS_C.STOCK_IN_TRANSFER,

VW_STG_TEMP_STOCK_DETAILS_C.QA_STOCK, VW_STG_TEMP_STOCK_DETAILS_C.BLOCKED_STOCK,

VW_STG_TEMP_STOCK_DETAILS_C.RESTRICTED_STOCK, VW_STG_TEMP_STOCK_DETAILS_C.RETURNS_STOCK,

CAST(VW_STG_TEMP_STOCK_DETAILS_C.TAS_CREATE_DATE AS TIMESTAMP(0)),

CAST(VW_STG_TEMP_STOCK_DETAILS_C.TAS_CHANGE_DATE AS TIMESTAMP(0)),

CAST(VW_STG_TEMP_STOCK_DETAILS_C.TAS_EXTRACTION_DATE AS TIMESTAMP(0)),

VW_STG_TEMP_STOCK_DETAILS_C.DWH_DELETION_FLAG

FROM

D0_IM_RSTG_TAS_MD4.VW_STG_TEMP_STOCK_DETAILS_C VW_STG_TEMP_STOCK_DETAILS_C

SHOW SEL * FROM D0_IM_RSTG_TAS_MD4.VW_STG_TEMP_STOCK_DETAILS_C;

SELECT

Q1.MATERIAL_ID,

T9.VALUATION_AREA_ID,

Q1.PLANT_ID,

Q1.STORAGE_LOCATION_ID,

Q1.CUSTOMER_ID,

Q1.VENDOR_ID,

Q1.WBS_ELEMENT_ID,

'FP' || T9.FISCAL_PERIOD AS FISCAL_PERIOD_CODE,

MAX(T9.BEGIN_DATE) AS BEGIN_DATE,

MAX(T9.END_DATE) AS END_DATE,

SUM(Q1.UNRESTRICTED_STOCK) AS UNRESTRICTED_STOCK,

SUM(Q1.STOCK_IN_TRANSFER) AS STOCK_IN_TRANSFER,

SUM(Q1.QA_STOCK) AS QA_STOCK,

SUM(Q1.BLOCKED_STOCK) AS BLOCKED_STOCK,

SUM(Q1.RESTRICTED_STOCK) AS RESTRICTED_STOCK,

SUM(Q1.RETURNS_STOCK) AS RETURNS_STOCK,

MIN(Q1.TAS_CREATE_DATE) AS TAS_CREATE_DATE,

MAX(Q1.TAS_CHANGE_DATE) AS TAS_CHANGE_DATE,

MAX(Q1.TAS_CHANGE_DATE) AS TAS_EXTRACTION_DATE,

MAX(Q1.DWH_DELETION_FLAG) AS DWH_DELETION_FLAG

FROM

(SELECT

T1

.TAS_SOURCE_ID,

T1.MATERIAL_ID,

T1.PLANT_ID,

CAST('*' AS CHAR(4)) AS STORAGE_LOCATION_ID,

CAST('*' AS VARCHAR(10)) AS CUSTOMER_ID,

COALESCE(T1.VENDOR_ID,CAST('*' AS VARCHAR(10))) AS VENDOR_ID,

CAST('*' AS VARCHAR(8)) AS WBS_ELEMENT_ID,

T1.PERIOD_END_DATE,

T1.FISCAL_PERIOD,

T1.UNRESTRICTED_STOCK,

CAST('0.000' AS DECIMAL(13,3)) AS STOCK_IN_TRANSFER,

T1.QA_STOCK,

CAST('0.000' AS DECIMAL(13,3)) AS BLOCKED_STOCK,

T1.RESTRICTED_STOCK,

CAST('0.000' AS DECIMAL(13,3)) AS RETURNS_STOCK,

T1.TAS_CREATE_DATE,

T1.TAS_CHANGE_DATE,

T1.TAS_EXTRACTION_DATE,

T1.DWH_DELETION_FLAG

FROM D0_IM_CORE_T.CORE_MATERIAL_STOCK_VENDOR T1

/* INNER JOIN D0_IM_RSTG_TAS_MD4.STG_EXECUTION_PARAMETERS T2

ON T2.PARAMETER_VALUE = T1.TAS_SOURCE_ID

WHERE T2.PARAMETER_NAME = 'SOURCE_ID'*/

UNION ALL

SELECT

T3

.TAS_SOURCE_ID,

T3.MATERIAL_ID,

T3.PLANT_ID,

T3.STORAGE_LOCATION_ID,

CAST('*' AS VARCHAR(10)) AS CUSTOMER_ID,

CAST('*' AS VARCHAR(10)) AS VENDOR_ID,

T3.WBS_ELEMENT_ID,

T3.PERIOD_END_DATE,

T3.FISCAL_PERIOD,

T3.UNRESTRICTED_STOCK,

CAST('0.000' AS DECIMAL(13,3)) AS STOCK_IN_TRANSFER,

T3.QA_STOCK,

T3.BLOCKED_STOCK,

T3.RESTRICTED_STOCK,

CAST('0.000' AS DECIMAL(13,3)) AS RETURNS_STOCK,

T3.TAS_CREATE_DATE,

T3.TAS_CHANGE_DATE,

T3.TAS_EXTRACTION_DATE,

T3.DWH_DELETION_FLAG

FROM D0_IM_CORE_T.CORE_MATERIAL_STOCK_PROJ T3

/* INNER JOIN D0_IM_RSTG_TAS_MD4.STG_EXECUTION_PARAMETERS T4

ON T4.PARAMETER_VALUE = T3.TAS_SOURCE_ID

WHERE T4.PARAMETER_NAME = 'SOURCE_ID'*/

UNION ALL

SELECT

T5

.TAS_SOURCE_ID,

T5.MATERIAL_ID,

T5.PLANT_ID,

CAST('*' AS CHAR(4)) AS STORAGE_LOCATION_ID,

T5.CUSTOMER_ID,

CAST('*' AS VARCHAR(10)) AS VENDOR_ID,

CAST('*' AS VARCHAR(8)) AS WBS_ELEMENT_ID,

T5.PERIOD_END_DATE,

T5.FISCAL_PERIOD,

T5.UNRESTRICTED_STOCK,

CAST('0.000' AS DECIMAL(13,3)) AS STOCK_IN_TRANSFER,

T5.QA_STOCK,

CAST('0.000' AS DECIMAL(13,3)) AS BLOCKED_STOCK,

T5.RESTRICTED_STOCK,

CAST('0.000' AS DECIMAL(13,3)) AS RETURNS_STOCK,

T5.TAS_CREATE_DATE,

T5.TAS_CHANGE_DATE,

T5.TAS_EXTRACTION_DATE,

T5.DWH_DELETION_FLAG

FROM D0_IM_CORE_T.CORE_MATERIAL_STOCK_CUSTOMER T5

/* INNER JOIN D0_IM_RSTG_TAS_MD4.STG_EXECUTION_PARAMETERS T6

ON T6.PARAMETER_VALUE = T5.TAS_SOURCE_ID

WHERE T6.PARAMETER_NAME = 'SOURCE_ID'*/

UNION ALL

SELECT

T7

.TAS_SOURCE_ID,

T7.MATERIAL_ID,

T7.PLANT_ID,

T7.STORAGE_LOCATION_ID,

CAST('*' AS VARCHAR(10)) AS CUSTOMER_ID,

CAST('*' AS VARCHAR(10)) AS VENDOR_ID,

CAST('*' AS VARCHAR(8)) AS WBS_ELEMENT_ID,

T7.PERIOD_END_DATE,

T7.FISCAL_PERIOD,

T7.UNRESTRICTED_STOCK,

T7.STOCK_IN_TRANSFER,

T7.STOCK_IN_QA,

T7.BLOCKED_STOCK,

T7.RESTRICTED_STOCK,

T7.RETURNS_STOCK,

T7.TAS_CREATE_DATE,

T7.TAS_CHANGE_DATE,

T7.TAS_EXTRACTION_DATE,

T7.DWH_DELETION_FLAG

FROM D0_IM_CORE_T.CORE_MATERIAL_STOCK_STLOC T7

/* INNER JOIN D0_IM_RSTG_TAS_MD4.STG_EXECUTION_PARAMETERS T8

ON T8.PARAMETER_VALUE = T7.TAS_SOURCE_ID

WHERE T8.PARAMETER_NAME = 'SOURCE_ID'*/

) Q1

INNER

JOIN D0_IM_RSTG_TAS_MD4.STG_EXECUTION_PARAMETERS T8

ON

T8.PARAMETER_VALUE = Q1.TAS_SOURCE_ID

-- This join sets all stock records on the timeline so there are no gaps

INNER

JOIN D0_IM_RSTG_TAS_MD4.STG_TEMP_STOCK_PERIOD_LIST T9

ON T9.MATERIAL_ID = Q1.MATERIAL_ID

AND T9.PLANT_ID = Q1.PLANT_ID

AND T9.END_DATE >= Q1.PERIOD_END_DATE

WHERE

T8.PARAMETER_NAME = 'SOURCE_ID'

GROUP

BY

Q1.MATERIAL_ID,

T9.VALUATION_AREA_ID,

Q1.PLANT_ID,

Q1.STORAGE_LOCATION_ID,

Q1.CUSTOMER_ID,

Q1.VENDOR_ID,

Q1.WBS_ELEMENT_ID,

T9.FISCAL_PERIOD

;

Explain Plan: 

1) FIRST, we LOCK a DISTINCT D0_IM_RSTG_TAS_MD4."pseudo table" FOR

WRITE ON a RowHash TO prevent GLOBAL deadlock FOR

D0_IM_RSTG_TAS_MD4.STG_TEMP_STOCK_DETAILS.

2) NEXT, we LOCK D0_IM_CORE_T.T1 IN VIEW

D0_IM_RSTG_TAS_MD4.VW_STG_TEMP_STOCK_DETAILS_C FOR ACCESS, we LOCK

D0_IM_CORE_T.T7 IN VIEW

D0_IM_RSTG_TAS_MD4.VW_STG_TEMP_STOCK_DETAILS_C FOR ACCESS, we LOCK

D0_IM_CORE_T.T3 IN VIEW

D0_IM_RSTG_TAS_MD4.VW_STG_TEMP_STOCK_DETAILS_C FOR ACCESS, we LOCK

D0_IM_CORE_T.T5 IN VIEW

D0_IM_RSTG_TAS_MD4.VW_STG_TEMP_STOCK_DETAILS_C FOR ACCESS, we LOCK

D0_IM_RSTG_TAS_MD4.T9 IN VIEW VW_STG_TEMP_STOCK_DETAILS_C FOR

ACCESS, we LOCK D0_IM_RSTG_TAS_MD4.STG_TEMP_STOCK_DETAILS FOR

WRITE, AND we LOCK D0_IM_RSTG_TAS_MD4.T8 IN VIEW

VW_STG_TEMP_STOCK_DETAILS_C FOR ACCESS.

3) We DO an ALL-AMPs RETRIEVE step FROM D0_IM_CORE_T.T1 IN VIEW

D0_IM_RSTG_TAS_MD4.VW_STG_TEMP_STOCK_DETAILS_C BY way OF an

ALL-ROWS scan WITH NO residual conditions INTO SPOOL 1 (all_amps),

which IS built locally ON the AMPs. The SIZE OF SPOOL 1 IS

estimated WITH HIGH confidence TO be 29,112 ROWS (4,308,576 BYTES).

The estimated TIME FOR this step IS 0.03 seconds.

4) We DO an ALL-AMPs RETRIEVE step FROM D0_IM_CORE_T.T3 IN VIEW

D0_IM_RSTG_TAS_MD4.VW_STG_TEMP_STOCK_DETAILS_C BY way OF an

ALL-ROWS scan WITH NO residual conditions INTO SPOOL 1 (all_amps),

which IS built locally ON the AMPs. The SIZE OF SPOOL 1 IS

estimated WITH HIGH confidence TO be 705,824 ROWS (104,461,952

BYTES). The estimated TIME FOR this step IS 0.38 seconds.

5) We DO an ALL-AMPs RETRIEVE step FROM D0_IM_CORE_T.T5 IN VIEW

D0_IM_RSTG_TAS_MD4.VW_STG_TEMP_STOCK_DETAILS_C BY way OF an

ALL-ROWS scan WITH NO residual conditions INTO SPOOL 1 (all_amps),

which IS built locally ON the AMPs. The SIZE OF SPOOL 1 IS

estimated WITH HIGH confidence TO be 3,813,063 ROWS (564,333,324

BYTES). The estimated TIME FOR this step IS 1.49 seconds.

6) We EXECUTE the FOLLOWING steps IN parallel.

1) We DO an ALL-AMPs RETRIEVE step FROM D0_IM_CORE_T.T7 IN VIEW

D0_IM_RSTG_TAS_MD4.VW_STG_TEMP_STOCK_DETAILS_C BY way OF an

ALL-ROWS scan WITH NO residual conditions INTO SPOOL 1

(all_amps), which IS built locally ON the AMPs. The SIZE OF

SPOOL 1 IS estimated WITH HIGH confidence TO be 90,202,368

ROWS (13,349,950,464 BYTES). The estimated TIME FOR this

step IS 49.75 seconds.

2) We DO an ALL-AMPs RETRIEVE step FROM D0_IM_RSTG_TAS_MD4.T8 IN

VIEW VW_STG_TEMP_STOCK_DETAILS_C BY way OF an ALL-ROWS scan

WITH a CONDITION OF ("D0_IM_RSTG_TAS_MD4.T8 in view

VW_STG_TEMP_STOCK_DETAILS_C.PARAMETER_NAME = 'SOURCE_ID'"

)

INTO SPOOL 5 (all_amps), which IS duplicated ON ALL AMPs.

The SIZE OF SPOOL 5 IS estimated WITH LOW confidence TO be 48

ROWS (8,880 BYTES). The estimated TIME FOR this step IS 0.03

seconds.

7) We DO an ALL-AMPs JOIN step FROM SPOOL 5 (LAST USE) BY way OF an

ALL-ROWS scan, which IS joined TO SPOOL 1 (LAST USE) BY way OF an

ALL-ROWS scan. SPOOL 5 AND SPOOL 1 are joined USING a product

JOIN, WITH a JOIN CONDITION OF ("PARAMETER_VALUE =

Q1.TAS_SOURCE_ID"

). The RESULT goes INTO SPOOL 6 (all_amps),

which IS redistributed BY the HASH code OF (Q1.MATERIAL_ID,

Q1.PLANT_ID) TO ALL AMPs. THEN we DO a SORT TO ORDER SPOOL 6 BY

ROW HASH. The SIZE OF SPOOL 6 IS estimated WITH LOW confidence TO

be 22,550,592 ROWS (3,157,082,880 BYTES). The estimated TIME FOR

this step IS 52.49 seconds.

8) We DO an ALL-AMPs JOIN step FROM SPOOL 6 (LAST USE) BY way OF a

RowHash match scan, which IS joined TO D0_IM_RSTG_TAS_MD4.T9 IN

VIEW VW_STG_TEMP_STOCK_DETAILS_C BY way OF a RowHash match scan

WITH a CONDITION OF ("NOT (D0_IM_RSTG_TAS_MD4.T9 in view

VW_STG_TEMP_STOCK_DETAILS_C.MATERIAL_ID IS NULL)"

). SPOOL 6 AND

D0_IM_RSTG_TAS_MD4.T9 are joined USING a MERGE JOIN, WITH a JOIN

CONDITION OF ("(D0_IM_RSTG_TAS_MD4.T9.MATERIAL_ID = MATERIAL_ID)

AND ((D0_IM_RSTG_TAS_MD4.T9.PLANT_ID = PLANT_ID) AND

(D0_IM_RSTG_TAS_MD4.T9.END_DATE >= PERIOD_END_DATE ))"

). The

INPUT TABLE D0_IM_RSTG_TAS_MD4.T9 will NOT be cached IN memory.

The RESULT goes INTO SPOOL 4 (all_amps), which IS built locally ON

the AMPs. The RESULT SPOOL file will NOT be cached IN memory.

The SIZE OF SPOOL 4 IS estimated WITH INDEX JOIN confidence TO be

3,277,116,351 ROWS (544,001,314,266 BYTES). The estimated TIME

FOR this step IS 28 minutes AND 2 seconds.

9) We DO an ALL-AMPs SUM step TO AGGREGATE FROM SPOOL 4 (LAST USE) BY

way OF an ALL-ROWS scan , GROUPING BY field1 ( Q1.MATERIAL_ID

,D0_IM_RSTG_TAS_MD4.T9.VALUATION_AREA_ID ,Q1.PLANT_ID

,Q1.STORAGE_LOCATION_ID ,Q1.CUSTOMER_ID ,Q1.VENDOR_ID

,Q1.WBS_ELEMENT_ID ,D0_IM_RSTG_TAS_MD4.T9.FISCAL_PERIOD).

AGGREGATE Intermediate Results are computed locally, THEN placed

IN SPOOL 7. The AGGREGATE SPOOL file will NOT be cached IN memory.

The SIZE OF SPOOL 7 IS estimated WITH LOW confidence TO be

2,457,837,264 ROWS (757,013,877,312 BYTES). The estimated TIME

FOR this step IS 1 HOUR AND 58 minutes.

10) We DO an ALL-AMPs RETRIEVE step FROM SPOOL 7 (LAST USE) BY way OF

an ALL-ROWS scan INTO SPOOL 2 (all_amps), which IS redistributed

BY the HASH code OF (Q1.MATERIAL_ID,

D0_IM_RSTG_TAS_MD4.T9.VALUATION_AREA_ID) TO ALL AMPs. THEN we DO

a SORT TO ORDER SPOOL 2 BY ROW HASH. The RESULT SPOOL file will

NOT be cached IN memory. The SIZE OF SPOOL 2 IS estimated WITH

LOW confidence TO be 2,457,837,264 ROWS (437,495,032,992 BYTES).

The estimated TIME FOR this step IS 1 HOUR AND 58 minutes.

11) We DO an ALL-AMPs MERGE INTO

D0_IM_RSTG_TAS_MD4.STG_TEMP_STOCK_DETAILS FROM SPOOL 2 (LAST USE).

The SIZE IS estimated WITH LOW confidence TO be 2,457,837,264 ROWS.

The estimated TIME FOR this step IS 7 hours AND 2 minutes.

12) We spoil the parser's dictionary cache for the table.

13) Finally, we send out an END TRANSACTION step to all AMPs involved

in processing the request.

-> No rows are returned to the user as the result of statement 1.

ROWCOUNT:

SELCOUNT(*) FROM D0_IM_RSTG_TAS_MD4.STG_EXECUTION_PARAMETERS

54

SEL COUNT(*) FROM D0_IM_CORE_T.CORE_MATERIAL_STOCK_STLOC

86,389,305

SELCOUNT(*) FROM D0_IM_CORE_T.CORE_MATERIAL_STOCK_CUSTOMER;

3,107,239

SELCOUNT(*) FROM D0_IM_CORE_T.CORE_MATERIAL_STOCK_PROJ;

676,712

SELCOUNT(*) FROM D0_IM_CORE_T.CORE_MATERIAL_STOCK_VENDOR;

29,112

SELCOUNT(*) FROM D0_IM_RSTG_TAS_MD4.STG_TEMP_STOCK_PERIOD_LIST

1,475,950,187

 

STATISTICS:

I have collected statistics on all the tables..

HELP STATISTICS D0_IM_RSTG_TAS_MD4.STG_TEMP_STOCK_PERIOD_LIST

Date Time Unique Values Column Names Column Dictionary Names Column SQL Names Column Names UEscape

1 15/03/18 07:36:37        1,475,950,187 * * "*" ?

2 15/03/18 07:34:50           10,156,353 MATERIAL_ID,PLANT_ID MATERIAL_ID,PLANT_ID MATERIAL_ID,PLANT_ID ?

3 15/03/18 07:35:15                  204 END_DATE END_DATE END_DATE ?

4 15/03/18 07:35:53                  272 FISCAL_PERIOD FISCAL_PERIOD FISCAL_PERIOD ?

5 15/03/18 07:36:37                  121 VALUATION_AREA_ID VALUATION_AREA_ID VALUATION_AREA_ID ?

T9 table has 1.4 billion records which is taking long time in joining with other tables... Also, I see aggregation of these more than 1.4 billion records as a challenge.. I tried several things like obviously statistics, changing PI of T9 table to improve JOIN of T9 with other tables.. But I cannot think of anything which can improve aggregation performance.

Please suggest if we can tune this query somehow..

 

1 REPLY
Enthusiast

Re: Need suggestions to optimize the complex query (below) involving several Joins and Aggregation..

Hi Sumit,

1) Can you try collecting stats individually for the below columns. I mean single column stats. Multi column stats if collected already let it be.

T9.MATERIAL_ID

T9.PLANT_ID

T9.END_DATE

2) Also make sure stats in collected on the below JOIN columns and all the base tables index & join columns.

T8.PARAMETER_VALUE = Q1.TAS_SOURCE_ID

 T8.PARAMETER_NAME = 'SOURCE_ID'

3) Can you try changing the join as below and rerun:

D0_IM_CORE_T.CORE_MATERIAL_STOCK_VENDOR T1 

UNION ALL

D0_IM_CORE_T.CORE_MATERIAL_STOCK_PROJ T3 

UNION ALL

D0_IM_CORE_T.CORE_MATERIAL_STOCK_CUSTOMER T5 

UNION ALL

D0_IM_CORE_T.CORE_MATERIAL_STOCK_STLOC T7 

INNER JOIN D0_IM_RSTG_TAS_MD4.STG_TEMP_STOCK_PERIOD_LIST T9 

ON T9.MATERIAL_ID = Q1.MATERIAL_ID

AND T9.PLANT_ID = Q1.PLANT_ID

AND T9.END_DATE >= Q1.PERIOD_END_DATE

INNER JOIN D0_IM_RSTG_TAS_MD4.STG_EXECUTION_PARAMETERS T8 -- 54

ON

T8.PARAMETER_VALUE = Q1.TAS_SOURCE_ID

-- This join sets all stock records on the timeline so there are no gaps

WHERE

 T8.PARAMETER_NAME = 'SOURCE_ID'

Let me know if it helps?