Wednesday, January 12, 2011

Reshape data set from WIDE to LONG


The original data is multi_wide as below. There are 10 variables each with three observation. What we want to do is to reshape the data from wide to long. Three ways are given to fulfill this job. The first one is using data step, which is available for a small number of variables; The second is ARRAY method, which is easy to write and understand; The last one is MACRO method.

data multi_wide;
  input famid faminc96 faminc97 faminc98 spend96 spend97 spend98
        debt96 $ debt97 $ debt98 $ ;
cards;
1 40000 40500 41000 38000 39000 40000 yes yes no
2 45000 45400 45800 42000 43000 44000 yes no  no
3 75000 76000 77000 70000 71000 72000 no  no  no
;
run;

First method, using DATA STEP:
data multi_long (keep=famid year faminc spend debt);
   set multi_wide;
   year=96;
   faminc=faminc96;
   spend=spend96;
   debt=debt96;
   output;
   year=97;
   faminc=faminc97;
   spend=spend97;
   debt=debt97;
   output;
   year=98;
   faminc=faminc98;
   spend=spend98;
   debt=debt98;
   output;
run;

Second, ARRAY method:
data multi_long (keep=famid year faminc spend debt);
   set multi_wide;
   array afaminc(96:98) faminc96-faminc98;
   array aspend(96:98) spend96-spend98;
   array adebt(96:98) debt96-debt98;
   do year = 96 to 98;
      faminc=afaminc[year];
         spend=aspend[year];
         debt=adebt[year];
         output;
       end;
run;

Third, MACRO:
%macro multilong(olddata, year1,year2);
   data multi_long (keep=famid year faminc spend debt);
      set &olddata;
         %do year=&year1 %to &year2;
            year=&year;
            faminc=faminc&year;
               spend=spend&year;
               debt=debt&year;
               output;
               %end;
%mend;

%multilong(multi_wide,96,98)


proc print data=&syslast;
run;

1 comment:

  1. data char_long (keep= id time name inc);
    set character;
    array aname(3) name_old name_now name_future;
    array ainc(3) inc_old inc_now inc_future;
    do time = 1 to 3;
    name = aname[time];
    inc=ainc[time];
    output;
    end;
    run;

    proc format;
    value t_format 1='old'
    2='now'
    3='future';
    run;

    proc print data=char_long;
    var id time name inc;
    format time t_format.;
    run;

    ReplyDelete