Monday, March 12, 2012

sample from a data set with sample data sets are similar to each other

Purpose: sample 5 sub data set (each has 1000 obs) from a data set (test, which has 100000 obs). The purpose is to make sure in each sample, the mean of variable x is similar to the other samples. Here in the example set the difference of mean of x is less than .05.




data test0;
 do i=1 to 100000;
   x=ranuni(1);
   output;
 end;
run;

%let n_dataset=5;
%let m_size=1000;

%macro m_sample;
 %do j=1 %to 15;

   data test;
     set test0;
   run;

   %do i=1 %to &n_dataset;

     proc sort data=test;
       by i;
     run;

     proc surveyselect data=test %if &i>1 %then %do; (where=(group<1)) %end; out=sample_&i method=sys sampsize=1000;
     run;
     proc sort data=sample_&i;
       by i;
     run;
     data test;
       merge test sample_&i(in=in1);
       by i;
       if in1=1 then group=&i;
     run;

   %end;

   data test;
     set test;
     if group=. then group=6;
   run;

   proc summary data=test nomissing;
     class group;
     var x;
     output out=out_sum mean(x) = total;
   run;

   proc print data=out_sum width=min;
   run;

   data out_sum;
     set out_sum;
     id=1;
   run;

   data summary;
     merge out_sum(where=(_type_=1)) out_sum(where=(_type_=0) rename=(total=all_total));
     by id;
     pct_diff=abs(total/all_total-1);
     if pct_diff<.05 then flag=1;
   run;

   proc print data=summary width=min;
   run;

   proc sql;
     select sum(flag) into :flag from summary;
   quit;

   %if &flag=6 %then %do;
     endsas;
   %end;

 %end;

%mend;

%m_sample;


No comments:

Post a Comment