SAS Main
SAS Main
SAS Main
/*SAS Library is used to store the SAS datasets. There are two types
of libraries used is SAS
<Libname>.<Datasetname>
6} Click Browse and provide path of the folder including the path and
the folder name.
7} Click ok.*/
/*Environments where SAS Runs
Data Bank;
Accno = 101;
CName= 'Sindhu';
Deposit= 50000;
Run;
proc print data=bank;
run;
3.linesize=n -
a) Data step boundaries - Data and run are the data step boundaries
b) Proc Step boundaries - proc and run or quit are the proc step
boundaries. */
Data Bank;
Accno = 101;
CName= 'Sindhu';
Deposit= 50000;
RUN;
PROC PRINT DATA=BANK;
RUN;
/*RUN GROUP PROCESSING - It is used to execute the procedures with the
help of run groups */
DATA MARKS;
INPUT SCI MATH;
DATALINES;
56 78
78 90
52 45
70 80
39 65
;
PROC PRINT DATA=MARKS;
RUN;
MODEL MATH=SCI;
RUN;
MODEL SCI=MATH ;
RUN;
DATA MARKS;
INPUT Soc GK;
DATALINES;
56 78
78 90
52 45
70 80
39 65
;
PROC CORR DATA=MARKS OUTP=M1;
RUN;
Data Bank;
Accno = 101;
CName= 'Sindhu';
Deposit= 50000;
Run;
NAME=RAVI
AGE=27
PLACE=CHENNAI
QUALIFICATION=MBA
STATE=TAMIL NADU
/*RAVI,27,CHENNAI,MBA,TAMIL NADU*/
/*INTERNAL RAW DATA - The data which is directly available in the sas
program
EXTERNAL RAW DATA - The raw data which is available outside the SAS
program and is invoked and read */
/*DATA <LIBNAME>.<DSN>;
INFILE DATALINES <OPTIONS>;
INPUT <VAR1> $ <VAR2> .....;
DATALINES;
----------
---------
---------
;
RUN;
*/
a) DSD
b) DLM
c) MISSOVER
d) TRUNCOVER
e) FIRSTOBS=m
f) OBS=n
*/
DATA EMP;
INFILE DATALINES;
INPUT EMPID $ DESIGN $ SAL;
DATALINES;
A001 ANALYST 40000
A002 HR 60000
A003 MANAGER 90000
;
RUN;
DATA SATHYA.EMP1;
INFILE DATALINES;
INPUT EMPID $ DESIGN $ SAL;
DATALINES;
A001 ANALYST 40000
A002 HR 60000
A003 MANAGER 90000
;
RUN;
DATA SATHYA.EMP2;
INFILE CARDS;
INPUT EMPID $ DESIGN $ SAL;
CARDS;
A001 ANALYST 40000
A002 HR 60000
A003 MANAGER 90000
;
RUN;
PROC PRINT DATA=SATHYA.EMP2;
RUN;
DATA SATHYA.EMP3;
INPUT EMPID $ DESIGN $ SAL;
CARDS;
A001 ANALYST 40000
A002 HR 60000
A003 MANAGER 90000
;
RUN;
PROC PRINT DATA=SATHYA.EMP3;
RUN;
DATA SATHYA.EMP4;
INPUT EMPID $ DESIGN $ SAL;
CARDS;
A001 ANALYST 40000
A002 HR 60000
A003 MANAGER 90000
;
PROC PRINT DATA=SATHYA.EMP4;
RUN;
/*SYNTAX OF DATA STEP(EXTERNAL RAW DATA)*/
/*DATA <LIBNAME>.<DSN>;*/
/*INFILE DATALINES <OPTIONS>;*/
/*INPUT <VAR1> $ <VAR2> .....;*/
/*DATALINES;*/
/*----------*/
/*---------*/
/*---------*/
/*; */
/*RUN;*/
DATA SATHYA.EMP5;
INFILE "C:\Users\system1\Desktop\EMPL.TXT";
INPUT EMPID $ DESIGN $ SAL ;
RUN;
PROC PRINT DATA=SATHYA.EMP5;
RUN;
DATA SATHYA.BMP1;
INFILE CARDS;
INPUT EMPID $ DESIGN $ SAL;
CARDS;
BB001 ANALYST 40000
BB002 HR 60000
BB003 MANAGER 90000
;
RUN;
DATA SATHYA.BMP2;
INFILE "C:\Users\system1\Documents\NOTE.1.TXT";
INPUT EMPID $ DESIGN $ SAL ;
RUN;
PROC PRINT DATA=SATHYA.BMP2;
RUN;
/*INPUT STYLES*/
1.LIST INPUT
2.FORMATTED INPUT
3.COLUMN INPUT
4.MIXED INPUT
/*1.LIST INPUT*/
DATA SATHYA.BMP5;
INFILE CARDS dsd;
INPUT EMPID $ DESIGN $ SAL;
CARDS;
BB001,ANALYST,40000
BB002,HR,60000
BB003,MANAGER,90000
;
RUN;
DATA SATHYA.BMP6;
INFILE CARDS dsd;
INPUT EMPID $ DESIGN $ SAL;
CARDS;
BB001, ANALYST ,40000
BB002,HR, 60000
BB003,MANAGER,90000
;
RUN;
/*2.dlm*/
/* delimiter*/
DATA SATHYA.BMP9;
INFILE CARDS dlm=',:$ ';
INPUT EMPID $ DESIGN $ SAL;
CARDS;
BB001,ANALYST:40000
BB002$HR, 60000
BB003,MANAGER,,90000
;
RUN;
/*3.MISSOVER*/
DATA SATHYA.BMP10;
INFILE CARDS MISSOVER;
INPUT EMPID $ DESIGN $ SAL;
CARDS;
BB001 ANALYST
BB002
BB003 MANAGER 90000
;
RUN;
PROC PRINT DATA=SATHYA.BMP10;
RUN;
/*4.Truncover*/
DATA SATHYA.BMP11;
INFILE CARDS truncover;
INPUT EMPID $ DESIGN $ SAL;
CARDS;
BB001 ANALYST
BB002
BB003 MANAGER 90000
;
RUN;
PROC PRINT DATA=SATHYA.BMP11;
RUN;
/*5.Firstobs=m*/
DATA SATHYA.BMP1;
INFILE CARDS firstobs=3;
INPUT EMPID $ DESIGN $ SAL;
CARDS;
BB001 ANALYST 40000
BB002 HR 60000
BB003 MANAGER 90000
jjoo1 ghj 89000
jhk90 klj 70000
;
RUN;
/*6.Obs=n*/
DATA SATHYA.BMP1;
INFILE CARDS obs=4;
INPUT EMPID $ DESIGN $ SAL;
CARDS;
BB001 ANALYST 40000
BB002 HR 60000
BB003 MANAGER 90000
jjoo1 ghj 89000
jhk90 klj 70000
;
RUN;
PROC PRINT DATA=SATHYA.BMP1;
RUN;
DATA SATHYA.BMP1;
INFILE CARDS firstobs=2 obs=4;
INPUT EMPID $ DESIGN $ SAL;
CARDS;
BB001 ANALYST 40000
BB002 HR 60000
BB003 MANAGER 90000
jjoo1 ghj 89000
jhk90 klj 70000
;
RUN;
PROC PRINT DATA=SATHYA.BMP1;
RUN;
/*2.Formatted Input Style*/
Data Railways;
Input @1 Trno 5. @6 Trname $9. @15 Category $9. @24 Arr time5. @29 Dep
time5. @34 Pf 1.;
datalines;
12021Rajdhani Express 07:1507:201
32021Kakinada Superfast09:2009:223
78901GorakhpurGaribRath11:1011:155
;
proc print data=Railways;
format arr dep time5.;
run;
data railways_1;
Input @1 trno 5. @6 trname $10. @16 category $7. @23 arr time5. @28
dep time5. @33 pf 1.;
datalines;
12304dehradun express17:1511:251
12303chennai express19:2019:252
14032cormonadelexpress11:2511:303
;
proc print data=Railways_1;
format arr dep time5.;
run;
data student_details;
input studid stud_name$ course$ DOJ;
informat DOJ ddmmyyyy10.; /* Informat <variable_name> <informat>;*/
datalines;
101 Arpita SAS 10/10/2016
102 Shashi SAS 11/10/2016
103 Sam SPSS 12/10/2016
;
proc print data=student_details;
format DOJ ddmmyy10.;
run;
It is used to read the raw data if equal width exists between the
values of the raw data symbol(+n). Here the n indicates the number of
blank spaces in between the values. It is used in formatted, column
and mixed input styles*/
data railways_2;
Input @1 trno 5. +1 @7 trname $10. @16 category $7. @23 arr time5. @28
dep time5. @33 pf 1.;
datalines;
12304 dehradun express 17:15 11:25 1
12303 chennai express 19:20 19:25 2
14032 cormonadel express 11:25 11:30 3
;
proc print data=Railways_2;
format arr dep time5.;
run;
/*It is uesd to read the raw data if the values are non delimited and
standard*/
data railways_3;
Input @1 trno 5. @6 trname $ 10. @16 category $ 7. @23 pf 1.;
datalines;
12304dehradun express1
12303chennai express2
14032cormonadelexpress3
;
proc print data=Railways_3;
run;
/*Range Method*/
data railways_4;
Input trno 1-5 trname $ 6-15 category $ 16-22 pf 23;
datalines;
12304dehradun express1
12303chennai express2
14032cormonadelexpress3
;
proc print data=Railways_4;
run;
/*It is used to read the raw data if the raw data is in the form of
list input, formatted input and column input styles together*/
Data transport;
input mode $ @7 dist $ 7. @14 place $9. @23 dt date9.;
datalines;
bus 7600kmspondy 31aug2005
lorry 5400kmssikkim 26apr2003
tempo 3900kmsrajasthan15mar2007
;
proc print data=transport;
format dt date9.;
run;
Data metals;
input metal $ Symbol $ / ;
datalines;
Gold AU
Silver AG
Copper CU
Alumini AL
Iron FE
Lead PB
;
Proc Print data=Metals;
run;
/* 4.Hash pointer(#n)*/
Data Metals;
input #2 Metal $ Symbol $ ;
datalines;
Gold Au
Silver Ag
Copper Cu
Alumini Al
Iron Fe
Lead Pb
;
proc print data=metals;
run;
Data Metals;
input #3 Metal $ Symbol $ ;
datalines;
Gold Au
Silver Ag
Copper Cu
Alumini Al
Iron Fe
Lead Pb
;
proc print data=metals;
run;
Data Metals;
input Metal $ Symbol $ #2 ;
datalines;
Gold Au
Silver Ag
Copper Cu
Alumini Al
Iron Fe
Lead Pb
;
proc print data=metals;
run;
Data Metals;
input Metal $ Symbol $ #3 ;
datalines;
Gold Au
Silver Ag
Copper Cu
Alumini Al
Iron Fe
Lead Pb
;
proc print data=metals;
run;
/*Multiple datalines and Multiple input statements*/
Data Metals1;
input Metal $ ;
input Symbol $ ;
datalines;
Gold
Au
Silver
Ag
Copper
Cu
Alumini
Al
Iron
Fe
Lead
Pb
;
proc print data=metals1;
run;
Data Metals2;
input Metal $ ;
input Symbol $ code ;
datalines;
Gold
Au 45
Silver
Ag 62
Copper
Cu 57
Alumini
Al 78
Iron
Fe 85
Lead
Pb 55
;
proc print data=metals2;
run;
Data Metals3;
input Metal $ Symbol $ code @@;
datalines;
Gold
Au 45
Silver Ag
62
Copper Cu 57
Alumini
Al
78
Iron
Fe 85
Lead Pb
55
;
proc print data=metals3;
run;
/*1.SAS FUNCTIONS*/
/*2.ARRAY*/
/*1.SAS FUNCTIONS*/
/*1.MAX*/
/*2.MIN*/
/*3.SUM*/
/*4.MEAN*/
/*5.INT*/
/*6.CEIL*/
/*7.FLOOR*/
/*8.ROUND*/
/*9.LOG*/
/*10.LOG10*/
/*11.FACT*/
DATA NF1;
X=5;
Y=12;
Z=-10;
MAX=MAX(X,Y,Z);
MIN=MIN(X,Y,Z);
SUM=SUM(X,Y,Z);
MEAN=MEAN(X,Y,Z);
RUN;
PROC PRINT DATA=NF1;
RUN;
DATA NF2;
X=3.2;
Y=-2.9;
INT1=INT(X);
INT2=INT(Y);
RUN;
PROC PRINT DATA=NF2;
RUN;
DATA NF2;
X=3.2;
Y=-2.9;
CEIL1=CEIL(X);
CEIL2=CEIL(Y);
RUN;
PROC PRINT DATA=NF2;
RUN;
DATA NF3;
X=3.2;
Y=-2.9;
FLOOR1=FLOOR(X);
FLOOR2=FLOOR(Y);
RUN;
PROC PRINT DATA=NF3;
RUN;
DATA NF4;
X=1937.56;
R1=ROUND(X,0.1);
R2=ROUND(X,1);
R3=ROUND(X,10);
R4=ROUND(X,100);
R5=ROUND(X,1000);
RUN;
PROC PRINT DATA=NF4;
RUN;
DATA NF5;
X=1937.56;
LOG=LOG(X);
LOG10=LOG10(X);
RUN ;
PROC PRINT DATA=NF5;
RUN;
DATA NF6;
X=5;
Y=15;
Z=12;
FACT1=FACT(X);
FACT2=FACT(Y);
FACT3=FACT(Z);
RUN;
PROC PRINT DATA=NF6;
RUN;
DATA MAXIMUM;
A=567;
B=657;
C=239;
D=MAX(A,B,C);
RUN;
PROC PRINT DATA=MAXIMUM;
RUN;
TITLE;
TITLE1;
DATA MINIMUM;
A=567;
B=657;
C=239;
D=MIN(A,B,C);
RUN;
PROC PRINT DATA=MINIMUM;
RUN;
TITLE 'SAS FUNCTIONS';
DATA TOTAL;
A=190;
B=23;
C=7689;
D=SUM(A,B,C);
RUN;
PROC PRINT DATA=TOTAL;
RUN;
DATA AVERAGE;
X=35;
Y=70;
Z=105;
G=MEAN(X,Y,Z);
RUN;
PROC PRINT DATA=AVERAGE;
RUN;
DATA INTEGER;
U=-23.4;
Y=INT(U);
RUN;
PROC PRINT DATA=INTEGER;
RUN;
DATA CEILING;
J=238.02;
K=CEIL(J);
RUN;
PROC PRINT DATA=CEILING;
RUN;
DATA CEILING1;
J=-238.62;
K=CEIL(J);
RUN;
PROC PRINT DATA=CEILING1;
RUN;
DATA FLOORING;
D=25.9;
M=FLOOR(D);
RUN;
PROC PRINT DATA=FLOORING;
RUN;
DATA FLOORING1;
D=-25.9;
M=FLOOR(D);
RUN;
PROC PRINT DATA=FLOORING1;
RUN;
DATA ROUNDING;
K=69.765;
L=ROUND(K,.01);
M=ROUND(K,.1);
N=ROUND(K,1);
O=ROUND(K,10);
RUN;
PROC PRINT DATA=ROUNDING;
RUN;
DATA NATLOG;
V=234.34;
C=LOG(V);
RUN;
PROC PRINT DATA=NATLOG;
RUN;
DATA LOGTEN;
V=234.34;
C=LOG10(V);
RUN;
PROC PRINT DATA=LOGTEN;
RUN;
DATA FACTORIAL;
S=5;
D=FACT(5);
RUN;
PROC PRINT DATA=FACTORIAL;
RUN;
DATA FACTORIAL1;
D=FACT(5);
RUN;
PROC PRINT DATA=FACTORIAL1;
RUN;
DATA C1;
A='umbrella';
B='BINOMIAL';
C='tHeoREm';
upcase=UPCASE(A);
lowcase=LOWCASE(B);
propcase=PROPCASE(C);
RUN;
PROC PRINT DATA=C1;
RUN;
/*LENGTH*/
/*LENGTHC*/
DATA C2;
X='ANIMATION';
Y=' VELOCITY ';
LEN1=LENGTH(X);
LEN2=LENGTH(Y);
LENC=LENGTHC(Y);
RUN;
PROC PRINT DATA=c2;
RUN;
/*Substring function and Scan function*/
DATA C3;
A='POTENTIAL';
B='THE ECONOMIC COUNCIL';
Substr1=SUBSTR(A,3,3);
Substr2=SUBSTR(A,3);
SCAN1=SCAN(B,-1);
SCAN2=SCAN(B,2);
RUN;
PROC PRINT DATA=c3;
RUN;
/*Index,Indexw,Indexc*/
data c4;
x='Finland is called land of thousand lakes';
index=index(x,'land');
indexw=indexw(x,'land');
indexc=indexc(x,'land');
run;
PROC PRINT DATA=c4;
RUN;
/*Find function*/
data C5;
X='India is the COUNTRY known for its peace, courage and stability.
peace is the main motto';
F1=Find(X,'pea');
F2=Find(X,'Pea','I');
F3=Find(X,'Pea','i',41);
run;
proc print data=c5;
run;
DATA C6;
A='INTERPOLATION IN NUMERICAL ANALYSIS';
B='THE SUN RISES IN THE EAST';
C='RINY DAY FOR ALL';
D=INDEX(A,'IN');
E=INDEXW(B,'IN');
F=INDEXC(C,'DAY');
RUN;
PROC PRINT DATA=EXRC6;
RUN;
DATA C7;
x=' KER ALA TUSKERS ';
compress1=COMPRESS(X);
y='Ma**k1 e';
compress2=COMPRESS(y,'*1 ');
z='SUN RISES IN THE EAST';
COMPBL=COMPBL(Z);
PROC PRINT DATA=C7;
RUN;
/*CAT*/
/*CATT*/
/*CONCATENATION OPERATOR*/
data c8;
x='Sun ';
y='rises ';
z='in ';
a='the ';
b='east';
cat=cat(x,y,z,a,b);
run;
PROC PRINT DATA=C8;
RUN;
/*CATX*/
data c9;
k='_';
x='Sun';
y='rises';
z='in';
a='the';
b='east';
catx=catx(k,x,y,z,a,b);
run;
PROC PRINT DATA=C9;
RUN;
data c10;
k='';
x='Sun';
y='rises';
z='in';
a='the';
b='east';
catx=catx(k,x,y,z,a,b);
run;
PROC PRINT DATA=C10;
RUN;
data c11;
x='Sun';
y='rises';
z='in';
a='the';
b='east';
catx=catx('',x,y,z,a,b);
run;
PROC PRINT DATA=C11;
RUN;
/*CATS*/
data c12;
x=' intel ';
y='lig ';
z=' ent';
cats=cats(x,y,z);
run;
proc print data=c12;
run;
/*CATT*/
data c13;
x='intel ';
y='lig ';
z='ent ';
catt=catt(x,y,z);
run;
proc print data=c13;
run;
data c14;
x=' intel';
y=' lig';
z=' ent';
catt=catt(x,y,z);
run;
proc print data=c14;
run;
data c15;
x='honesty ';
y='is ';
z='the best policy';
proverb=x||y||z;
run;
proc print data=c15;
run;
/*Left*/
/*Right*/
/*Trim*/
data c16;
x=' Georgia ';
left= ':'||left(x)||':';
right= ':'||right(x)||':';
trim= ':'||trim(x)||':';
run;
proc print data=c16;
run;
/*translate*/
/*tranword*/
data c17;
x='unbrekka';
y='Indus is the longest sea in India';
translate=translate(x,'ml','nk');
tranword=tranwrd(y,'sea','river');
run;
proc print data=c17;
run;
/*input*/
/*put*/
/*syntax :input(source,informat)*/
data c18;
x='195';
input=input(x,3.);
run;
proc print data=c18;
run;
/*syntax :put(source,informat)*/
data c19;
x=195;
put=put(x,$3.);
run;
proc print data=c19;
run;
/*1.Day*/
/*2.Week*/
/*3.Month*/
/*4.QTR*/
/*5.Year*/
/*6.Datejul*/
/*7.Date*/
/*8.Today*/
/*9.Datetime*/
/*10.Datepart*/
/*11.Timepart*/
/*12.time*/
/*13.INTCK*/
/*14.INTNX*/
/*15.WEEKDAY*/
/*1.Day*/
/*2.Week*/
/*3.Month*/
/*4.QTR*/
/*5.Year*/
/*SYNTAX : 'DDMMMYY<YY>'D*/
/**/
/*X='02MAR1992'D*/
/**/
/*Y='2mar92'd*/
Data DT1;
x='15sep96'd;
Day=Day(X);
WEEK=WEEK(X);
MONTH=MONTH(X);
QTR=QTR(X);
YEAR=YEAR(X);
RUN;
PROC PRINT DATA=DT1;
fORMAT X DATE9.;
RUN;
/*6.Datejul*/
/*JULIAN DATE*/
/*SYNTAX : 'YY<YY>DDD'*/
e.g. '1992095'
e.g. '92085'
data dt2;
x='92085';
datejul=datejul(x);
run;
proc print data=dt2;
format datejul ddmmyy10.;
run;
/*7.Date*/
/*8.Today*/
/*9.Datetime*/
/*10.Datepart*/
/*11.Timepart*/
/*12.time*/
data dt3;
date=date();
today=today();
datetime=datetime();
datepart=datepart(datetime);
timepart=timepart(datetime);
time=time();
run;
proc print data=dt3;
format date today datepart date9. time timepart time8. datetime
datetime17.;
run;
/*13.INTCK*/
data dt4;
x='7jul2003'd;
y='18nov2009'd;
days=intck('day',x,y);
weeks=intck('week',x,y);
months=intck('month',x,y);
quarters=intck('qtr',x,y);
years=intck('year',x,y);
run;
proc print data=dt4;
format x y date9.;
run;
/*14.INTNX*/
data dt5;
x='7jul2003'd;
d1=intnx('day',x,793);
d2=intnx('week',x,27);
d3=intnx('month',x,15);
d4=intnx('qtr',x,7);
d5=intnx('year',x,3);
run;
proc print data=dt5;
format x d1 d2 d3 d4 d5 date9.;
run;
/*15.WEEKDAY*/
data dt6;
x='17jun2006'd;
y='14jan2017'd;
weekday1=weekday(x);
weekday2=weekday(y);
run;
proc print data=dt6;
format x y date9.;
run;
15JAN2017
/*ARRAYS*/
/*Array is one of the data step function used to group the likely
variables and manipulate the variables simultaneously.
data trial1(drop=i);
infile datalines missover;
input sno m1 m2 m3;
Array MAS{3} m1-m3;
do i=1 to 3;
if MAS{i}=. then MAS{i}=25;
end;
datalines;
231 12 110 40
232 . . 35
233 54 90 .
234 . 35 .
235 90 37 88
;
proc print data=trial1;
run;
data trial29(drop=j);
input accno customer $ deposit;
Array Interest{4} q1-q4;
do j=1 to 4;
If j=1 then Interest{j}=Deposit*0.02;
If j=2 then Interest{j}=Deposit*0.03;
If j=3 then Interest{j}=Deposit*0.04;
If j=4 then Interest{j}=Deposit*0.05;
end;
datalines;
321 minu 15000
322 vinu 20000
323 dimu 18000
;
proc print data=trial29;
run;
Data doubletesting1 (drop=i);
input pno prodname $ cost sales;
Array product{2} cost sales;
Do I=1 to 2;
PRODUCT(2)=PRODUCT(1)+500;
end;
datalines;
121 table 300 350
122 chair 450 500
123 bed 600 650
;
proc print data=doubletesting1;
run;
data tr3(drop=i);
input pcode cp sp;
array status{2} profit loss;
do i=1 to 2;
If sp>cp then status{1}=sp-cp;
If cp>sp then status{2}=cp-sp;
end;
datalines;
5011 521 365
5012 234 567
5013 678 345
5014 234 231
5015 123 786
;
proc print data=tr3;
run;
data text(DROP=K);
input l1 $ l2 $ l3 $ L4 $ L5 $ L6$ L7 $ L8$ L9 $ L10 $;
array lower{10} $ l1-l10;
array upper{10} $ u1-u10;
do k=1 to 10;
upper{k}=upcase(lower{k});
end;
datalines;
smithers michaels gonzalez hurth frank bleigh
rounder joseph peters sam
;
proc print data=text;
title 'Names Changed from Lowercase to Uppercase';
run;
data text(DROP=I);
array names{*} $ n1-n10;
array capitals{*} $ c1-c10;
input names{*};
do i=1 to 10;
capitals{i}=upcase(names{i});
end;
datalines;
smithers michaels gonzalez hurth frank bleigh
rounder joseph peters sam
;
proc print data=text;
title 'Names Changed from Lowercase to Uppercase';
run;