SAS Vs SQL
SAS Vs SQL
Rick Andrews
Office of the Actuary
Centers for Medicare and Medicaid Services
Background
• SAS 9 WHERE
GROUP BY
HAVING
ORDER BY
• ANSI SQL: 2003
• Ordered Analytical Functions OVER
PARTITION BY
• a.k.a. Window Functions ORDER BY
• a.k.a. OLAP Functions ROWS
QUALIFY
SQL SAS
Table Data Set
Row Observation
Column Variable
Join Merge
Query Program
CREATING TABLES
SAS is Great 1
SAS is Good 2
Let us Thank 4
Jim we Should 8
SUB-SETTING TABLES
WHERE WHERE
<= var <= BETWEEN
PROC SQL;
DATA table2a; CREATE TABLE table2b AS
SET table1a; SELECT * FROM table1b
WHERE 2 <= numvar <= 4; WHERE numvar BETWEEN 2 AND 4;
RUN; QUIT;
PROC SQL;
PROC SORT DATA= table1a; CREATE TABLE table1b AS
BY var1; SELECT *
RUN; FROM table1b
ORDER BY var1;
QUIT;
ELIMINATING DUPLICATES
NODUPS DISTINCT
PROC SQL;
PROC SORT CREATE TABLE table2b AS
DATA= table1a SELECT DISTINCT *
OUT= table2a FROM table1b
NODUPS ; ORDER BY var1;
BY var1; QUIT;
RUN;
ELIMINATING DUPLICATE KEYS
NODUPKEY GROUP BY
Similar to FIRST (dot) No equivalent in SQL:1999
PROC SQL;
PROC SORT CREATE TABLE table3b AS
DATA= table1a SELECT MIN (var1) AS var1,
OUT= table3a MIN (var2) AS var2,
NODUPKEY ; MIN (var3) AS var3,
BY var1; MIN (numvar) AS numvar
RUN; FROM table1b
GROUP BY var1
ORDER BY var1;
QUIT;
GROUPING
Table A Table B
Var1 Var2 Var1 Var3
A 1 A W
B 2 B X
C 3 C Y
D 4 E Z
Inner Join Left Outer Join Right Outer Join Full Outer Join
Var1 Var2 Var3 Var1 Var2 Var3 Var1 Var2 Var3 Var1 Var2 Var3
A 1 W A 1 W A 1 W A 1 W
B 2 X B 2 X B 2 X B 2 X
C 3 Y C 3 Y C 3 Y C 3 Y
D 4 E Z D 4
E Z
INNER JOIN using Comma
MERGE BY FROM
IN= left IN= right Notice the comma
IF left AND right WHERE
IN= FROM
IF left LEFT JOIN
ON
PROC SQL;
DATA table1n; CREATE TABLE table2n AS
SET table1a; SELECT *,
IF itis = 'Snowing' CASE
THEN life = 'Good'; WHEN itis = 'Snowing'
ELSE life = 'Ok'; THEN 'Good'
RUN; ELSE 'Ok'
END AS life
FROM table1b;
QUIT;
SUB-QUERY Example
PROC SQL;
DATA t2 ; CREATE TABLE t4 AS
SET t1; SELECT *
WHERE life = 'Good'; FROM t3
KEEP var1; INNER JOIN
RUN; (
SELECT DISTINCT var1
PROC SORT DATA= t2 NODUPS ; BY var1; FROM t1
PROC SORT DATA= t3 ; BY var1; WHERE life = 'Good'
) AS t2
DATA t4; ON t2.var1 = t3.var1;
MERGE t2 ( IN= left ) QUIT;
t3 ( IN= right) ;
BY var1;
IF left AND right;
RUN;
MACRO VARIABLES
%PUT &mylist;
*RESULT = '21081','21082','21083','21084','21085';
Implicit Pass-thru Query using a DATA Step
DATA hcis1;
MERGE enroll.bene_smry (IN=left WHERE=(year='2004'))
ref.state_tbl (RENAME=(state_cd=bene_state));
BY bene_state;
KEEP bene_state state_name bene_cnt_tot;
IF FIRST.bene_state THEN bene_cnt = 0;
bene_cnt_tot + bene_cnt;
IF left AND LAST.bene_state THEN OUTPUT;
RUN;
Implicit Pass-thru Query using SQL Procedure
PROC SQL;
CREATE TABLE hcis2 AS
SELECT T1.bene_state, T2.state_name,
SUM(T1.bene_cnt) as bene_cnt_tot
FROM enroll.bene_smry T1
LEFT JOIN
ref.state_tbl T2
ON T1.bene_state = T2.state_cd
WHERE T1.year ='2004'
GROUP by
T1.bene_state, T2.state_name;
QUIT;
Explicit Pass-thru Query using SQL Procedure
PROC SQL;
CONNECT TO ORACLE ( PATH='hcisprd.world' ... );
CREATE TABLE hcis3 AS
SELECT * FROM CONNECTION TO ORACLE
(
SELECT T1.bene_state, T2.state_name,
SUM(T1. bene_cnt) as bene_cnt_tot
FROM enroll.bene_smry T1,
ref.state_tbl T2
WHERE T2.state_cd = T1.bene_state (+)
AND T1.year = '2004'
GROUP BY T1.bene_state, T2.state_name
);
DISCONNECT FROM ORACLE;
QUIT;
Ordered Analytical Functions
30
Ordered Analytical Functions (cont.)
A PARTITION BY column_reference B
,
B ORDER BY value_expression C
ASC
DESC
C )
31
OVER ( ) vs. “End Of File”
32
OVER ( ) vs. PROC SUMMARY
33
OVER ( ) vs. Derived Table
34
BY-Group Example by Claim Type
Example 1 Example 2
• Similar to Partition By Clm First Last
Type (dot) (dot)
Clm First Last
Type (dot) (dot)
HHA 1 1 DME 1 0
PROC SORT DATA=Mdcr_Clm;
IP 1 0 DME 0 0
BY Clm_Type;
RUN; IP 0 1 DME 0 0
PHY 1 0 DME 0 1
DATA Derived_Table; PHY 0 1 HOS 1 0
SET Mdcr_Clm; SNF 1 0 HOS 0 0
BY Clm_Type; SNF 0 1 HOS 0 1
IF FIRST.Clm_Type THEN Total_Paid=0;
Total_Paid + Paid_Amt; Bene Thru Clm Paid Total
IF LAST.Clm_Type THEN OUTPUT; ID Date Type Amt Paid
RUN; 2 17-Mar-11 HHA $150 $150
1 01-Jan-11 IP $500 $1,250
DATA Final_Table;
MERGE Mdcr_Clm 2 02-Feb-11 IP $750 $1,250
Derived_Table; 1 30-Jan-11 PHY $175 $300
BY Clm_Type; 2 21-Feb-11 PHY $125 $300
RUN;
1 05-Jan-11 SNF $300 $600
2 14-Feb-11 SNF $300 $600
35
PARTITION BY Phrase by Claim Type
36
Without ORDER BY Phrase
• Partition by Bene_SK
• Thru_Dt out of order This Window is by “Bene ID”
37
With ORDER BY Phrase
• Partition by Bene ID
• Order by Thru_Dt
Window is now Ordered By Thru Dt
38
ROWS Phrase
• ROWS
– starting point for the partition always first record in the group
– aggregation group end always current row
• ROWS BETWEEN
– aggregation group-start and end
– defines a set of rows relative to the current
– must precede the row specified by the group end
• UNBOUNDED PRECEDING – entire partition preceding current row
• UNBOUNDED FOLLOWING – entire partition following current row
• CURRENT ROW – start or end of aggregation group as the current row
• value PRECEDING – number of rows preceding current row
• value FOLLOWING – number of rows following current row
39
Cumulative Summary
40
Cumulative Summary by Bene
41
Previous Row Example
43
HAVING Clause Example
• Keep Bene’s w/ 12 months of coverage
Cumulative Count by “Bene ID”
45
SAS By-Group Example
PROC SORT DATA=work.Sample_Data;
BY Bene_SK State_Cd;
RUN; Twelve Months
Bene ID Mth Cnt
DATA work.Twelve_Months ( 1 12
KEEP= Bene_SK Mth_Cnt )
work.State_Counts ( State Counts
Bene ID State Cd State Cnt
KEEP= Bene_SK State_Cd State_Cnt );
1 FL 4
SET work.Sample_Data; 1 NY 8
BY Bene_SK State_Cd; 2 CA 2
IF FIRST.Bene_SK THEN Mth_Cnt = 0;
Mth_Cnt + 1;
IF FIRST.State_Cd THEN State_Cnt = 0;
State_Cnt + 1;
IF LAST.Bene_SK AND Mth_Cnt = 12
THEN OUTPUT work.Twelve_Months;
IF LAST.State_Cd
THEN OUTPUT work.State_Counts;
RUN; 46
Create Final Output with SAS
PROC SORT DATA=work.State_Counts; State Counts
BY BENE_SK Elig_Mth State_Cnt; Bene ID State Cd Elig Mth State Cnt
1 FL 01-OCT... 4
RUN;
1 NY 01-DEC... 8
DATA work.Max_State; 2 CA 01-FEB... 2
SET work.State_Counts;
BY BENE_SK Elig_Mth State_Cnt; Max State
Bene ID State Cd State Cnt
IF LAST.BENE_SK;
1 NY 8
RUN; 2 CA 2
DATA work.Final;
Twelve Months
MERGE
Bene ID Mth Cnt
work.Max_State 1 12
work.Twelve_Months (IN=Keep_It);
BY BENE_SK; Final
KEEP BENE_SK State_Cd Mth_Cnt; Bene ID State Cd Mth Cnt
1 NY 12
IF Keep_It THEN OUTPUT;
RUN; 47
SQL Qualify Clause Example
Window by Bene ID & State Cd
• Keep “Bene ID” with Window by Bene ID
12 months of coverage
Bene State Elig Mth State
SK Cd Mth Cnt Cnt
SELECT 1 NY Jan-2011.. 12 8
Bene_SK, State_Cd, Elig_Mth, 1 NY Feb-2011.. 12 8
COUNT(*) OVER ( PARTITION BY Bene_SK ) 1 NY Mar-2011.. 12 8
AS Mth_Cnt, 1 NY Apr-2011.. 12 8
1 NY May-2011.. 12 8
COUNT(*) OVER ( PARTITION BY Bene_SK, State_Cd ) 1 NY Jun-2011.. 12 8
AS State_Cnt 1 NY Jul-2011.. 12 8
FROM Bene_Fact 1 NY Aug-2011.. 12 8
QUALIFY Mth_Cnt = 12 1 FL Sep-2011.. 12 4
1 FL Oct-2011.. 12 4
1 FL Nov-2011.. 12 4
1 FL Dec-2011.. 12 4
2 CA Jan-2011.. 2 2
2 CA Feb-2011.. 2 2
RANK()
OVER (
PARTITION BY Bene_SK Bene State State
Id Cd Cnt
ORDER BY State_cnt DESC ) = 1
1 NY 8
MAX(State_Cnt)
OVER (
PARTITION BY Bene_SK ) = State_Cnt Bene State State
Id Cd Cnt
1 NY 8