0% found this document useful (0 votes)
115 views6 pages

Theil - Sen Multivariate - SAS

This document contains SAS code that: 1) Imports stock market and financial data from WRDS into SAS. 2) Cleans and formats the data. 3) Generates combinations of firms and estimates time-series regressions to obtain coefficients. 4) Repeats the process for different numbers of firms in combinations and saves the results.

Uploaded by

Filipe Duarte
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
115 views6 pages

Theil - Sen Multivariate - SAS

This document contains SAS code that: 1) Imports stock market and financial data from WRDS into SAS. 2) Cleans and formats the data. 3) Generates combinations of firms and estimates time-series regressions to obtain coefficients. 4) Repeats the process for different numbers of firms in combinations and saves the results.

Uploaded by

Filipe Duarte
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 6

%let wrds=wrds.wharton.upenn.

edu 4016;
options comamid=TCP remote=WRDS;
signon username=_prompt_;
rsubmit;
data dsp500list; set crspa.dsp500list; run;
proc download data=dsp500list; run;
data msp500; set crspa.msp500; run;
proc download data=msp500; run;
data funda; set compm.funda;
if 1998<=year(datadate)<=2013;
if indfmt='INDL' and datafmt='STD' and popsrc='D' and consol='C';
keep gvkey conm datadate fyear fyr cik cusip tic
at ib lt seq csho prcc_f sich au ajex;
run;
proc download data=funda; run;
data CCMXPF_LINKTABLE; set crspa.CCMXPF_LINKTABLE; run;
proc download data=CCMXPF_LINKTABLE; run;
endrsubmit;

proc sort data=funda; by gvkey DESCENDING fyear; run;


data funda_1;
set funda;
format sdate datadate yymmddn8.;
sdate=intnx("month",datadate,-11,"beg");
if gvkey=lag1(gvkey) and fyear=lag1(fyear)-1 then lead1=1; else lead1=.;
leadib=lag1(ib)*lead1;
drop lead1;
run;
proc sort data=funda_1; by gvkey fyear; run;
data funda_2; set funda_1;
if gvkey=lag1(gvkey) and fyear=lag1(fyear)+1 then lag1=1; else lag1=.;
mve = prcc_f*csho;
leadib_s = leadib/at;
mve_s = mve/at;
bv = seq;
bv_s = bv/at;
ib_s = ib/at;
lt_s = lt/at;
leadeps = leadib/csho;
price = prcc_f;
adj=ajex/(lag(ajex)*lag1); /* adjustment factor for stock splits */
lagprice = (lag(prcc_f)*lag1)*adj;
eps=ib/csho;
leadeps_s = leadeps/lagprice;
eps_s = eps/lagprice;
price_s = price/lagprice;
if at=. then delete; if lt=. then delete; if bv=. then delete; if bv_s=. then de
lete; if lt_s=. then delete; if ib_s=. then delete;
if price=. then delete; if mve=. then delete; if price=. then delete; if lagpric
e=. then delete;
if eps=. then delete; if leadeps=. then delete; if leadeps_s=. then delete; if e
ps_s=. then delete; if price_s=. then delete;
run;
proc sql;
create table funda_3 as select a.*, b.lpermno as permno
from funda_2 as a left join ccmxpf_linktable as b
on a.gvkey = b.gvkey
and b.LINKTYPE in ("LU","LC","LD","LN","LS","LX")
and b.LINKPRIM in ("C","P")
and b.USEDFLAG=1
and (b.LINKDT <= a.sdate or b.LINKDT=.B)
and (a.datadate <= b.LINKENDDT or b.LINKENDDT=.E);
quit;
data funda_4; set funda_3; if permno=. then delete; run;
proc sql;
create table funda_5 as select a.*, b.start, b.ending
from funda_4 as a left join dsp500list as b
on a.permno = b.permno;
quit;
data funda_6; set funda_5;
*if start <= sdate <= datadate <= ending;
if start <= datadate <= ending;
if 2001<=fyear<=2010;
keep gvkey fyear at lt ib ib_s bv bv_s lt_s mve mve_s price lagprice eps leadeps
leadeps_s eps_s price_s leadib leadib_s;
run;
proc datasets lib=work memtype=data; modify funda_6; attrib _all_ label=' '; qui
t;
proc sort data=funda_6; by fyear gvkey; quit;

/* The following generates all possible combinations of GVKEYs of size 2 for eac
h FYEAR */
%MACRO combination;
data comb_1; set funda_6; keep fyear gvkey; if fyear=&I; run;
proc sql;
create table comb_2 as select a.fyear, a.gvkey as gvkey1, b.gvkey as gvkey2
from comb_1 as a left join comb_1 as b on a.gvkey < b.gvkey;
quit;
data comb_3; set comb_2; if gvkey2=. then delete; run;
data comb_3; set comb_3; replicate=_n_; run;
proc transpose data=comb_3 out=comb_4 (drop=_NAME_ rename=(COL1=gvkey));
var gvkey1-gvkey2;
by replicate fyear;
quit;
proc sql;
create table comb_5 as select a.*, b.leadib, b.mve, b.at, b.lt, b.ib, b.bv, b.le
adeps, b.price, b.eps, b.lagprice
from comb_4 as a left join funda_6 as b
on a.fyear = b.fyear and a.gvkey = b.gvkey
order by fyear, replicate, gvkey;
quit;

filename templog dummy;


proc printto log=templog; run;
proc reg data=comb_5 noprint rsquare outest=ts_1_est_&I (keep=fyear replicate at
lt _rsq_);
by fyear replicate; model leadib = at lt / noint noprint; quit;
data ts_1_est_&I; set ts_1_est_&I; tse_1a=at; tse_1b=lt; run;
proc means data=ts_1_est_&I noprint; by fyear; var tse_1a tse_1b;
output out=ts_1_&I (drop= _TYPE_ _FREQ_) median(tse_1a)=ts_1a median(tse_1b)=ts_
1b; quit;
proc reg data=comb_5 noprint rsquare outest=ts_2_est_&I (keep=fyear replicate at
lt _rsq_);
by fyear replicate; model mve = at lt / noint noprint; quit;
data ts_2_est_&I; set ts_2_est_&I; tse_2a=at; tse_2b=lt; run;
proc means data=ts_2_est_&I noprint; by fyear; var tse_2a tse_2b;
output out=ts_2_&I (drop= _TYPE_ _FREQ_) median(tse_2a)=ts_2a median(tse_2b)=ts_
2b; quit;
proc printto; run;
%MEND combination;
%MACRO LOOP; %DO I=2001 %TO 2010; %combination; %END; %MEND LOOP; %LOOP;
data ts_est_1; set ts_1_2001-ts_1_2010; run;
data ts_est_2; set ts_2_2001-ts_2_2010; run;

/*
The following is an alternative quicker but dirtier way to generate 100,000 comb
inations of GVKEYs of size 2 for each FYEAR.
This may result in the same combination to enter the sample multiple times.
However, this does not affect the results materially in this example.
*/
%MACRO combination;
data funda_6_&I; set funda_6; if fyear=&I; run;
proc surveyselect data=funda_6_&I method=srs n=2 seed=3965 out=comb_5 rep=100000
noprint; quit;
filename templog dummy;
proc printto log=templog; run;
proc reg data=comb_5 noprint rsquare outest=ts_1_est_&I (keep=fyear replicate at
lt _rsq_);
by fyear replicate; model leadib = at lt / noint noprint; quit;
data ts_1_est_&I; set ts_1_est_&I; tse_1a=at; tse_1b=lt; run;
proc means data=ts_1_est_&I noprint; by fyear; var tse_1a tse_1b;
output out=ts_1_&I (drop= _TYPE_ _FREQ_) median(tse_1a)=ts_1a median(tse_1b)=ts_
1b; quit;
proc reg data=comb_5 noprint rsquare outest=ts_2_est_&I (keep=fyear replicate at
lt _rsq_);
by fyear replicate; model mve = at lt / noint noprint; quit;
data ts_2_est_&I; set ts_2_est_&I; tse_2a=at; tse_2b=lt; run;
proc means data=ts_2_est_&I noprint; by fyear; var tse_2a tse_2b;
output out=ts_2_&I (drop= _TYPE_ _FREQ_) median(tse_2a)=ts_2a median(tse_2b)=ts_
2b; quit;
proc printto; run;
%MEND combination;
%MACRO LOOP; %DO I=2001 %TO 2010; %combination; %END; %MEND LOOP; %LOOP;
data ts_est_1; set ts_1_2001-ts_1_2010; run;
data ts_est_2; set ts_2_2001-ts_2_2010; run;

/* The following generates all possible combinations of GVKEYs of size 3 for eac
h FYEAR */
%MACRO combination;
data comb_1; set funda_6; keep fyear gvkey; if fyear=&I; run;
data funda_6_&I; set funda_6; keep fyear gvkey leadib mve at lt ib; if fyear=&I;
run;
proc sql;
create table comb_2 as select a.fyear, a.gvkey as gvkey1, b.gvkey as gvkey2
from comb_1 as a left join comb_1 as b on a.gvkey < b.gvkey;
quit;
proc sql;
create table comb_3 as select a.*, b.gvkey as gvkey3
from comb_2 as a left join comb_1 as b on a.gvkey2 < b.gvkey;
quit;
data comb_3; set comb_3; if gvkey2=. then delete; if gvkey3=. then delete; run;
data comb_3; set comb_3; replicate=_n_; run;
proc transpose data=comb_3 out=comb_4 (drop=_NAME_ rename=(COL1=gvkey));
var gvkey1-gvkey3;
by fyear replicate;
quit;
proc sql;
create table comb_5 as select a.*, b.leadib, b.mve, b.at, b.lt, b.ib
from comb_4 as a left join funda_6_&I as b
on a.fyear = b.fyear and a.gvkey = b.gvkey
order by fyear, replicate;
quit;
filename templog dummy;
proc printto log=templog; run;
proc reg data=comb_5 noprint rsquare outest=ts_7_est_&I (keep=fyear replicate at
lt ib _rsq_);
by fyear replicate; model leadib = at lt ib / noint noprint; quit;
data ts_7_est_&I; set ts_7_est_&I; tse_7a=at; tse_7b=lt; tse_7c=ib; drop at lt i
b _RSQ_; run;
proc univariate data=ts_7_est_&I noprint; var tse_7a; output out=ts_7a_&I pctlpt
s=50 pctlpre=ts_7a_; quit;
proc univariate data=ts_7_est_&I noprint; var tse_7b; output out=ts_7b_&I pctlpt
s=50 pctlpre=ts_7b_; quit;
proc univariate data=ts_7_est_&I noprint; var tse_7c; output out=ts_7c_&I pctlpt
s=50 pctlpre=ts_7c_; quit;
data ts_7_&I;
merge ts_7a_&I ts_7b_&I ts_7c_&I;
fyear=&I;
ts_7a=ts_7a_50; ts_7b=ts_7b_50; ts_7c=ts_7c_50;
drop ts_7a_50 ts_7b_50 ts_7c_50;
run;
proc reg data=comb_5 noprint rsquare outest=ts_8_est_&I (keep=fyear replicate at
lt ib _rsq_);
by fyear replicate; model mve = at lt ib / noint noprint; quit;
data ts_8_est_&I; set ts_8_est_&I; tse_8a=at; tse_8b=lt; tse_8c=ni; drop at lt i
b _RSQ_; run;
proc univariate data=ts_8_est_&I noprint; var tse_8a; output out=ts_8a_&I pctlpt
s=50 pctlpre=ts_8a_; quit;
proc univariate data=ts_8_est_&I noprint; var tse_8b; output out=ts_8b_&I pctlpt
s=50 pctlpre=ts_8b_; quit;
proc univariate data=ts_8_est_&I noprint; var tse_8c; output out=ts_8c_&I pctlpt
s=50 pctlpre=ts_8c_; quit;
data ts_8_&I;
merge ts_8a_&I ts_8b_&I ts_8c_&I;
fyear=&I;
ts_8a=ts_8a_50; ts_8b=ts_8b_50; ts_8c=ts_8c_50;
drop ts_8a_50 ts_8b_50 ts_8c_50;
run;
proc printto; run;
%MEND combination;
%MACRO LOOP; %DO I=2001 %TO 2010; %combination; %END; %MEND LOOP; %LOOP;
data ts_est_7; set ts_7_2001-ts_7_2010; run;
data ts_est_8; set ts_8_2001-ts_8_2010; run;
/*
The following is an alternative quicker but dirtier way to generate 100,000 comb
inations of GVKEYs of size 3 for each FYEAR.
*/
%MACRO combination;
data funda_6_&I; set funda_6; if fyear=&I; run;
proc surveyselect data=funda_6_&I method=srs n=3 seed=3965 out=comb_5 rep=100000
noprint; quit;
filename templog dummy;
proc printto log=templog; run;
proc reg data=comb_5 noprint rsquare outest=ts_7_est_&I (keep=fyear replicate at
lt ib _rsq_);
by fyear replicate; model leadib = at lt ib / noint noprint; quit;
data ts_7_est_&I; set ts_7_est_&I; tse_7a=at; tse_7b=lt; tse_7c=ib; drop at lt i
b _RSQ_; run;
proc univariate data=ts_7_est_&I noprint; var tse_7a; output out=ts_7a_&I pctlpt
s=50 pctlpre=ts_7a_; quit;
proc univariate data=ts_7_est_&I noprint; var tse_7b; output out=ts_7b_&I pctlpt
s=50 pctlpre=ts_7b_; quit;
proc univariate data=ts_7_est_&I noprint; var tse_7c; output out=ts_7c_&I pctlpt
s=50 pctlpre=ts_7c_; quit;
data ts_7_&I;
merge ts_7a_&I ts_7b_&I ts_7c_&I;
fyear=&I;
ts_7a=ts_7a_50; ts_7b=ts_7b_50; ts_7c=ts_7c_50;
drop ts_7a_50 ts_7b_50 ts_7c_50;
run;
proc reg data=comb_5 noprint rsquare outest=ts_8_est_&I (keep=fyear replicate at
lt ib _rsq_);
by fyear replicate; model mve = at lt ib / noint noprint; quit;
data ts_8_est_&I; set ts_8_est_&I; tse_8a=at; tse_8b=lt; tse_8c=ni; drop at lt i
b _RSQ_; run;
proc univariate data=ts_8_est_&I noprint; var tse_8a; output out=ts_8a_&I pctlpt
s=50 pctlpre=ts_8a_; quit;
proc univariate data=ts_8_est_&I noprint; var tse_8b; output out=ts_8b_&I pctlpt
s=50 pctlpre=ts_8b_; quit;
proc univariate data=ts_8_est_&I noprint; var tse_8c; output out=ts_8c_&I pctlpt
s=50 pctlpre=ts_8c_; quit;
data ts_8_&I;
merge ts_8a_&I ts_8b_&I ts_8c_&I;
fyear=&I;
ts_8a=ts_8a_50; ts_8b=ts_8b_50; ts_8c=ts_8c_50;
drop ts_8a_50 ts_8b_50 ts_8c_50;
run;
proc printto; run;
%MEND combination;
%MACRO LOOP; %DO I=2001 %TO 2010; %combination; %END; %MEND LOOP; %LOOP;
data ts_est_7; set ts_7_2001-ts_7_2010; run;
data ts_est_8; set ts_8_2001-ts_8_2010; run;

You might also like