Thursday, January 13, 2011

Creating cummulative sum using DATA STEP and PROC SQL


options formdlim='-' nocenter nodate;

/*********************************************************
*Here is the original data ;                             *
*We want to calculate the cummulative sum for each famid;*
*********************************************************/
data kids;
  length kidname $ 4;
  input famid kidname birth_order wt;
cards;
1 Beth 1  60
1 Barb 3  20
4 Sam  1 100
4 Stu  2  90
1 Bob  2  40
3 Pete 1  60
3 Phil 3  20
2 Andy 1  80
3 Pam  2  40
2 Al   2  50
2 Ann  3  20
;
run;

/* First we need to sort the data */
proc sort data=kids;
by famid birth_order;
run;

/* Method 1 is using DATA STEP with first. */
data new;
   set kids;
   by famid;
   retain count cum_wt;
   if first.famid then do;
      count=0;    /* Reset count to */
         cum_wt=0;   /* Reset cumsum to */
       end;
       count+1;
       cum_wt+wt;
run;


/* The second method is using PROC SQL */
proc sql;
   select
       data1.famid as famid,
          data1.birth_order as birth_order,
       data1.wt as wt,
       (
        select sum(wt)
        from kids
        where famid        = data1.famid  and
                    birth_order >= data2.min_order and
                       birth_order <= data1.birth_order
       )   as cum_wt,
          (
           select count(famid)
              from kids
              where famid        = data1.famid  and
                    birth_order >= data2.min_order and
                       birth_order <= data1.birth_order
       ) as count
   from (select * from kids) as data1,
        (select famid, min(birth_order) as min_order from kids group by famid) as data2
   where data1.famid=data2.famid;
quit;


proc print data=&syslast;
run;

No comments:

Post a Comment