假如变量School有1000个观测值,其中unique的观测值有50个,也就是有50个不同的学校。在linear regression中要把School这个变量变成50个dummy ariable才可以(实际用49个,因为有一个是reference)。怎么做?一个SAS Macro如下:
%macro DUMMY ( dsn = ,
var = ,
prefix = ) ;
proc summary data = &dsn nway ;
class &var ;
output out = x ( keep = &var ) ;
proc print ;
*;
data _null_ ;
set x nobs=last ;
if _n_ = 1 then call symput ( 'num',
trim(left(put( last, best. ) ) ) ) ;
call symput ( 'c' || trim ( left (
put ( _n_, best. ) ) ),trim ( left
( &var ) ) ) ;
run ;
data &dsn ;
set &dsn nobs=last;
array ct ( &num ) %do k=1 %to
&num ;
&prefix&&c&k
%end ; ;
%do i = 1 %to &num ;
select;
when (&var="&&c&i" ) ct(&i)=1;
otherwise ct(&i)=0;
end;
%end;
run ;
%mend Dummy ;
%Dummy ( dsn = name , var = name,
prefix = sic_ ) ;
proc print ;
run;
quit;
更直接一点,源程序分解:
data name;
input name $;
cards;
zhao
qian
sun
li
zou
wu
zeng
wang
zhao
wang
sun
wu
;
run;
proc summary data = name nway ;
class name;
output out = x ( keep = name ) ;
proc print ;
data _null_ ;
set x nobs=last ;
if _n_ = 1 then call symput ( 'num',
trim(left(put( last, best. ) ) ) ) ;
call symput ( 'c' || trim ( left (
put ( _n_, best. ) ) ),trim ( left
( name ) ) ) ;
run ;
%put #
%put &c1;
%put &c2;
%put &c3;
%put &c4;
%put &c5;
%put &c6;
%put &c7;
%put &c8;
No comments:
Post a Comment