The output is like:
In SAS, a loop is required to do this because of cumulative sum.
data test;
i=1;
p=.99;
output;
do i=2 to 98;
p=.8;
output;
end;
i=99;
p=.2;
output;
run;
proc print data=test;
run;
* if use proc rank, it cannot group data evenly because of ties;
proc rank data=test out=t group=10;
var p;
ranks rank;
run;
proc print data=t;
run;
* so need to mannuly do it;
%let dsid=%sysfunc(open(test)); *open the file;
%let nobs=%sysfunc(attrn(&dsid,nobs)); *count the obs in file; %let ngroup=10; %let overall_pct=.5; %put &nobs;
* data n_per_group only has one obs;
data n_per_group;
n_per_grp=int(&nobs/&ngroup.); * get quotient;
remainder=mod(&nobs,&ngroup.); * get remainder;
array ps {&ngroup} ps1-ps&ngroup;
keep ps1-ps&ngroup;
do i=1 to &ngroup;
if remainder>0 then do;
ps{i}=n_per_grp+1;
remainder=remainder-1;
end;
else ps{i}=n_per_grp;
end;
output;
run;
proc print data=n_per_group;
run;
* read in the only one obs, and keep it in PVM until the end by using if _n_=1 then do statement;
data out(drop=freq _count_ i p);
if _n_=1 then do;
set n_per_group;
index=1;
end;
retain freq _count_ 0 index ;
array ps(&ngroup) ps1-ps&ngroup;
set test end=last;
* a liitle tricky: keep on adding p together unitl the # of added obs = n_per_group as expected;
* if the # of added obs = n_per_group, calculate the stats we want, otherwise, keep on adding;
if _count_=ps(index) then do;
num_obs=ps(index);
avg_pred_p=sum_p/num_obs;
lift=avg_pred_p/&overall_pct;
output;
index+1;
_count_=0;
sum_p=0;
end;
sum_p+p;
_count_+1;
if last then do;
num_obs=ps(index);
avg_pred_p=sum_p/num_obs;
lift=avg_pred_p/&overall_pct;
output;
end;
run;
proc print data=out;
run;
## It is very easy to do this in R
## a simple way
rm(list=ls())
x=c(.9,rep(.8,97),.2)
ngrp=10
nobs=rep(length(x)%/%ngrp, ngrp)+c(rep(1,length(x)%%ngrp), rep(0,ngrp-length(x)%%ngrp))
levl=rep(1:ngrp, nobs)
df=data.frame(cbind(x,levl))
aggregate(x~levl, df, mean)
ReplyDeleteAll are saying the same thing repeatedly, but in your blog I had a chance to get some useful and unique information, I love your writing style very much, I would like to suggest your blog in my dude circle, so keep on updates.
Manual testing training in Chennai
Selenium training in Chennai
Software testing training in Chennai
thanks for sharing this information
ReplyDeleteaws training in sholinganallur
azure training in sholinganallur
data science training in siruseri
best data science training in omr
devops training in omr
best devops training institute in omr
java training in omr
python training in omr
Thanks a very sharing informative information
ReplyDeleteadvanced sas training in delhi
advanced sas training in noida
Thanks For Sharing nice information Its Very Much Use full to all Data Science Aspirants
ReplyDeletehttps://www.analyticspath.com/
Thanks for sharing this valuable post its very informative apart from that if anyone looking for e accounting institute in delhi so Contact Here-+91-9311002620 Or Visit Website- https://www.htsindia.com/Courses/Tally/e-accounting-training-course
ReplyDeleteThanks for sharing this kind of post its very helpful for me, hey if anyone looking for best ms office or advance excel training institute in delhi so join with us for further more details Contact Here-+91-9311002620 Or Visit Website- https://htsindia.com/Courses/business-analytics/adv-excel-training-course
ReplyDelete