Saturday, January 22, 2011

Get the different chars appearance frequency

options formdlim='-';

************* Another Problem form MITBBS *****************;
* The question like this: have data x like below consists of 1 and 2 only *;
* Want to count the frequency of 1's and 2's appear consecutively *;
* More detailed, how many times 1 appear once consecutively *;
* More detailed, how many times 1 appear twice consecutively *;
* And so on *;
* Until how many times 2 appear ** times consecutively ? *;
* That is, the result should be: *;
/**************************************************************
                         x   counter      freq
                      ----------------------------
                         1         1         3
                         1         4         1
                         2         2         2
                         2         3         1
                         2         5         1
**************************************************************/


data test;
  input x;
  cards;
  1
  2
  2
  1
  1
  1
  1
  2
  2
  2
  2
  2
  1
  2
  2
  1
  2
  2
  2
  ;
run;


data new;
   set test;
   by x notsorted;
   retain counter;
   if first.x then counter=0;
      counter+1;
   if last.x then output;
run;

proc print data=new;
run;

proc sql;
   select x, counter, n(counter) as freq
   from new
   group by x, counter
   order by x, counter
   ;
quit;


***  If don't use proc sql, we can use these data steps: **;
data new;
   set test;
   by x notsorted;
   retain counter;
   if first.x then counter=0;
      counter+1;
   if last.x then output;
run;

proc print data=new;
run;

proc sort data=new;
  by x  counter;
run;

data want;
  set new;
  by x  counter;
  retain freq;
  if first.x  or first.counter then  freq=1;
    else freq=freq+1;
  if last.x or last.counter then output;
run;

proc print data=want;
run;

No comments:

Post a Comment