1

I have a dataset with IDs, and each ID has multiple dates (actually datetime). I want to use PROC SQL to get the minimum datetime and also add 1 year to the minimum. I'm trying to do this all in one PROC SQL but have been fumbling and can't get this to work. Below are two attempts. Would appreciate any advice.

*** GENERATE RANDOM DATES AFTER JAN 1, 2012 AND CREATE DATE/TIME VARIABLE ***;
data have ;
    format date mmddyy10.  dt datetime15.;
    do person_id=100, 200, 300, 400, 500;
    do i = 1 to 100;
        jdate = int(1000 * ranuni(123987));
        date = mdy(1,1,2012) + jdate;
        dt = dhms(date, 0,0,0);
        output;
    end;
    end;
run;

*** TRY1: THIS DOES NOT WORK - GETS MIN DATE/TIME AND REMERGES WITH EVERY RECORD***;
proc sql;
    create table try1 as
    select min(dt) as index_dt  format=datetime15. ,
        (dt + 365*24*60*60) as followup_date  format=datetime15.  
    from have
;
quit;


*** TRY2: USE MIN() IN "HAVING" STATEMENT ***;
*** PROBLEMATIC IF PERSON_ID HAS MIN(DT) OCCUR MULTIPLE TIMES ***;
proc sql;
    create table try2 as
    select person_id,
        dt as index_dt format=datetime15.,  
        (dt + 365*24*60*60) as followup_date  format=datetime15.  
    from have
    group by person_id
    having dt=min(dt) 
;
quit;
SunnyRJ
  • 383
  • 1
  • 7

3 Answers3

0

Try using "select distinct person_id" instead of "select person_id" - that should help with your issue with duplicates. I'm not sure if SAS treats 365*24*3600 as the correct number of seconds per year, so that may be a contributing factor as well.

Sean
  • 1,120
  • 1
  • 8
  • 14
0

i don't think that you can do in only proc sql. I think to do that in this way:

*** GENERATE RANDOM DATES AFTER JAN 1, 2012 AND CREATE DATE/TIME VARIABLE ***;
data have ;
format date mmddyy10.  dt datetime15.;
do person_id=100, 200, 300, 400, 500;
do i = 1 to 100;
    jdate = int(1000 * ranuni(123987));
    date = mdy(1,1,2012) + jdate;
    dt = dhms(date, 0,0,0);
    output;
end;
end; 
run;

%macro do_elaboration(ds=);
/*count how many rows has my table */
%let dataset=&ds.;
%let DSID = %sysfunc(open(&dataset., IS));
%let nobs = %sysfunc(attrn(&DSID., NLOBS));
%let rc=%sysfunc(close(&DSID.));
/*loop over the number of rows*/
%do i=1 %to &nobs.;
    /*at each loop get one id*/
    data _NULL_;
       set &ds. (OBS=&i OBS=&i);
       call symputx("id", person_id);
    run;  
    /*with proc sql get the min_dt*/
    proc sql noprint;
       select min(dt) into:min_dt
       from &ds.
       where person_id=&id.
    ;
    quit;
    /*increment the min_dt with the function sas intnx*/
    data have_final_tmp;
        person_id = &id.;
        followup_date = intnx('dtyear',&min_dt,1);
        format followup_date datetime15.;
    run;
    /*put all id with the followup_date in only one dataset*/
    proc append base=have_final data=have_final_tmp force; 
    run;
%end;
%mend do_elaboration;
/*call the macro*/
%do_elaboration(ds=have);

I write the code very quickly and i don't test it so you should check it, but the concept is clear.

0

Try this:

proc sql;
create table try1 as
select 
 min(dt) as index_dt  format=datetime15. ,
 calculated index_dt + 365*24*60*60 as followup_date format=datetime15.
from have
;
quit;

The trick here is using the "calculated" keyword.

Also you may want to do the following to add a year on instead of your multiplications:

proc sql;
create table try1 as
select 
 min(dt) as index_dt  format=datetime15. ,
 input(compress(
  put(intnx('YEAR', datepart(calculated index_dt),1,'SAMEDAY'),date9.)||":"||
  put(timepart(calculated index_dt),time5.)),datetime15.) as followup_date format=datetime15.
from have
;
quit;
paul frith
  • 551
  • 2
  • 4
  • 21
  • Thanks - the "calculated" statement is what I needed! And a second Thanks for the INTNX code to add a year. – SunnyRJ May 03 '16 at 18:32