** **

** **

**The output is like:**

** **

** **

**In SAS, a loop is required to do this because of cumulative sum. **

** **

**data** test;

i=**1**;

p=**.99**;

output;

do i=**2** to **98**;

p=**.8**;

output;

end;

i=**99**;

p=**.2**;

output;

**run**;

**proc** **print** data=test;

**run**;

* if use proc rank, it cannot group data evenly because of ties;

**proc** **rank** data=test out=t group=**10**;

var p;

ranks rank;

**run**;

**proc** **print** data=t;

**run**;

* so need to mannuly do it;

%let dsid=%sysfunc(open(test)); *open the file;

%let nobs=%sysfunc(attrn(&dsid,nobs)); *count the obs in file; %let ngroup=10; %let overall_pct=.5; %put &nobs;

* data n_per_group only has one obs;

**data** n_per_group;

n_per_grp=int(&nobs/&ngroup.); * get quotient;

remainder=mod(&nobs,&ngroup.); * get remainder;

array ps {&ngroup} ps1-ps&ngroup;

keep ps1-ps&ngroup;

do i=**1** to &ngroup;

if remainder>**0** then do;

ps{i}=n_per_grp+**1**;

remainder=remainder-**1**;

end;

else ps{i}=n_per_grp;

end;

output;

**run**;

**proc** **print** data=n_per_group;

**run**;

* read in the only one obs, and keep it in PVM until the end by using if _n_=1 then do statement;

**data** out(drop=freq _count_ i p);

if _n_=**1** then do;

set n_per_group;

index=**1**;

end;

retain freq _count_ **0** index ;

array ps(&ngroup) ps1-ps&ngroup;

set test end=last;

* a liitle tricky: keep on adding p together unitl the # of added obs = n_per_group as expected;

* if the # of added obs = n_per_group, calculate the stats we want, otherwise, keep on adding;

if _count_=ps(index) then do;

num_obs=ps(index);

avg_pred_p=sum_p/num_obs;

lift=avg_pred_p/&overall_pct;

output;

index+**1**;

_count_=**0**;

sum_p=**0**;

end;

sum_p+p;

_count_+**1**;

if last then do;

num_obs=ps(index);

avg_pred_p=sum_p/num_obs;

lift=avg_pred_p/&overall_pct;

output;

end;

**run**;

**proc** **print** data=out;

**run**;

## It is very easy to do this in R

## a simple way

rm(list=ls())

x=c(.9,rep(.8,97),.2)

ngrp=10

nobs=rep(length(x)%/%ngrp, ngrp)+c(rep(1,length(x)%%ngrp), rep(0,ngrp-length(x)%%ngrp))

levl=rep(1:ngrp, nobs)

df=data.frame(cbind(x,levl))

aggregate(x~levl, df, mean)