options ls=112 ps=106;
options mprint spool;
libname cycling '.';
****this program estimates regression coefficients for all firms in an industry group
except firm i, and then applies the coefficients to firm i to obtain a predicted value.
industry groups are 1 to 599 and called groupnum. Within each industry group there are n
observations which are defined as dumb1. The program first goes within each industry
group (1-599) and then iterates the regression dumb1 times. The first part of the program
is the macro, lower down is where the data is fed in and where the data is spit out...
feedin file is currently called feedall.sas7bdat and spit out file is currently called
uerefpred.sas7bdat **;
%macro shuffle; ** BEGIN MACRO CALLED 'SHUFFLE' **;
%do i = 1 %to 599;
data indgroup&i; set all;
data indgroup&i; set indgroup&i;
if groupnum ne &i then delete;
call symput('b', put(dumb1,10.)); /* again b <= 9999999999 */
proc means data=indgroup&i;
%do loop = 1 %to &b; ** b is the number of industry-year observations**;
data most; ** DATA SET WITH FIRST N-1 OBS **;
set indgroup&i;
if _n_ = &b then delete;
*proc means data=most;
data lone; ** DATASET WITH ONLY LAST OBS **;
set indgroup&i;
if _n_ < &b then delete;
proc reg data=most outest=work noprint; ** RUN REGRESSION AND GET COEFF IN 'WORK' DATASET **;
model om = pyom nonsiacc pynonsiacc chgsales chglow ato;
data work;
set work;
pyom_coeff=pyom;
nonsiacc_coeff=nonsiacc;
pynonsiacc_coeff=pynonsiacc;
chgsales_coeff=chgsales;
negchgsales_coeff=chglow;
ato_coeff=ato;
keep intercept pyom_coeff nonsiacc_coeff pynonsiacc_coeff
chgsales_coeff negchgsales_coeff ato_coeff;
data lone; ** PUT COEFF FROM N-1 WITH NTH OBS AND CALC NEW VARIABLE **;
merge lone work;
data lone;
set lone;
ue_test = intercept + (pyom_coeff * pyom) + (nonsiacc_coeff * nonsiacc) +
(pynonsiacc_coeff * pynonsiacc)+ (chgsales_coeff * chgsales) +
(negchgsales_coeff * chglow) + (ato_coeff * ato);
drop intercept pyom_coeff nonsiacc_coeff pynonsiacc_coeff
chgsales_coeff negchgsales_coeff ato_coeff;
*proc means data=lone;
data indgroup&i; ** RECOMBINE 'LONE' AND 'MOST' WITH NTH OBS AS 1ST OBS **;
set lone most;
drop b;
proc append base=savedata data=lone;
%end; ** END OF THE LOOP **;
%end;
%mend shuffle; ** END THE MACRO **;
data all; ** MAIN DATASET IS CALLED 'ALL' ;
set cycling.feedall;
%shuffle;
proc means data=savedata;
data savedata; set savedata;
data cycling.uerefpred; set savedata;
endsas;