**************************************************************************** * This macro reads a data dictionary file (Excel) and creates a DATA step * to read the data and (optionally) check for outliers.) * * See www.alanelliott.com/DDENTRY ****************************************************************************; %macro DDENTRY(DDNAME=, OUTNAME=, ENAME=, SHEET=, DSET=); %* Create a random name for temporary imported file; %LET TMPDICT="DICT"||compress( hour(time())|| minute(time()) || int(second(time())) ); %LET TMPFMT="OFMT"||compress( hour(time())|| minute(time()) || int(second(time())) ); data tmp1; x=&TMPDICT; call symput('WORKDICT',x); x=&TMPFMT ; call symput('OUTFMT',x); run; PROC IMPORT OUT= &WORKDICT DATAFILE= "&DDNAME" DBMS=EXCEL REPLACE; SHEET="data_dictionary"; GETNAMES=YES; MIXED=YES; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES; RUN; QUIT; filename outfile "&outname" ; data tmp &OUTFMT ; set &WORKDICT end=DICTLAST; format finfo $100.;format tmp $40. tmp2 $40. dot $1. x $200.; dot="."; if did_format=. then DID_FORMAT=0; retain DID_FORMAT; tmp2=0; file outfile ; if _N_ eq 1 then do; PUT "**********************************************************************"; PUT "* This code was created by the READ_DD.SAS program *"; PUT "* See www.alanelliott.com/DDENTRY *"; PUT "**********************************************************************"; PUT " "; end; *----------------------------------------------PROC FORMATS; if outformat ne "" then do; if substr(upcase(outformat),1,5)="MAKE:" then do; outformat=substr(outformat,6); iformat=1; if did_format=0 then do; PUT "***************** CREATE A SAS FORMAT USING PROC FORMAT, IF DEFINED;"; PUT "PROC FORMAT;"; did_format=1; end; finfo=scan(outformat,1,","); * FIRST INSTANCE OF FORMAT INFO; if upcase(data_type)="NUMBER" or upcase(data_type)="NUMERIC" then do; if anydigit(SUBSTR(variable,LENGTH(variable))) gt 0 then do; tmp= CATS("FMT", variable,"_"); end; else do; tmp= CATS("FMT", variable); end; U=CATT(cat(variable," ", Tmp),dot); put "VALUE " tmp @30 finfo; end; if upcase(data_type)="TEXT" or upcase(data_type)="STRING" then do; if anydigit(SUBSTR(variable,LENGTH(variable))) gt 0 then do; tmp= CATS("$FMT", variable,"_"); end; else do; tmp= CATS("$FMT", variable); end; U=CATT(cat(variable," ", TMP),dot); put "VALUE " tmp @30 finfo; end; iformat=2; tmp=compress(scan(outformat,iformat,",")," ") ; do while (tmp ne ""); finfo=scan(outformat,iformat,","); iformat+1; tmp=scan(outformat,iformat,",") ; PUT @30 finfo; end; PUT ";"; fmt_knt+1; x="FORMAT " || catt(u,";"); output &OUTFMT; end; end; run; %*-----------------------------------------------------------LABELS; data _NULL_;set &WORKDICT; %let dsid = %sysfunc(open(&WORKDICT)); %let nobs = %sysfunc(attrn(&dsid,nobs)); file outfile mod; if _N_=1 then do; PUT "****************** IMPORT THE DATA: &SHEET FROM &ENAME ;"; PUT "PROC IMPORT OUT= &DSET"; PUT "DATAFILE= '&ENAME'"; PUT "DBMS=EXCEL REPLACE;"; PUT "RANGE='&SHEET$';"; PUT "GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES;"; put "RUN;"; put "DATA &DSET; SET &DSET ;"; put "FORMAT RANGE_ERROR $100.;"; put "***************** CREATE LABEL STATEMENTS, IF DEFINED ;"; put "LABEL "; end; * do not output blank labels; * remove single quotes from label; itmp= find(label,"'"); if itmp gt 0 then do; label=cats(substr(label,1,itmp-1), "_", substr(label,itmp+1)); end; if trim(label) ne ' ' then do; put @10 variable @30 " = " "'" label"'" ; end; if _N_=&nobs then do; put ';' ; end; run; %*-----------------------------------------------------------INFORMAT; data _NULL_;set &WORKDICT; %let dsid = %sysfunc(open(&WORKDICT)); %let nobs = %sysfunc(attrn(&dsid,nobs)); file outfile mod; if _N_=1 then do; put "***************** CREATE INFORMAT STATEMENTS, IF DEFINED ;"; end; if informat ne "" then do; put "INFORMAT " variable informat ";" ; end ; run; %*--------------------------------------------------------------OUT FORMATS; data _NULL_;set &WORKDICT; %let dsid = %sysfunc(open(&WORKDICT)); %let nobs = %sysfunc(attrn(&dsid,nobs)); file outfile mod; if _N_=1 then do; put "***************** CREATE FORMAT STATEMENTS, IF DEFINED ;"; end; if (outformat ne "" ) and (substr(upcase(outformat),1,5) NE "MAKE:") then do; put "FORMAT " variable outformat ";" ; end ; run; %* IF THE OUTPUT FORTMAT DATA SET EXISTS, THEN PUT DEFINED FORMATS; %if %sysfunc(exist(&OUTFMT)) ne 0 %then %do; DATA _NULL_;set &OUTFMT; file outfile mod; if _N_=1 then PUT "******************************DEFINED FORMATS, IF ANY;"; put x; run; %END; %*---------------------------------------------------------MISSING VALUES; data _NULL_;set &WORKDICT; %let dsid = %sysfunc(open(&WORKDICT)); %let nobs = %sysfunc(attrn(&dsid,nobs)); file outfile mod; if _N_=1 then do; put "********************** CREATE SAS MISSING VALUES ASSIGNMENTS IF DEFINED ;"; end; if missing_values ne "" then do; if upcase(data_type)="NUMBER" then do; put "IF " variable " = " missing_values "then " variable " = .;"; end; if upcase(data_type)="TEXT" then do; tmp=CATS("'",missing_values,"'"); cnull=CATS("'","'"); put "IF " variable " = " missing_Values: $quote. "then " variable " = " '""' ";"; end; end; ; run; %* ---------------------------------------------------------CODES AND RANGES; data aaa;set &WORKDICT; format tmp $20. outstring $100.; file outfile mod; prefx="AR_"; blanks =repeat(" ",22); array listarray(1:1000) $ ; if _N_=1 then do; put "********************** RANGE CHECKING;"; end; if check_range ne "" then do; %*------------------CREATE A CHECK FOR A RANGE OF VALUES; if substr(upcase(check_range),1,6)="RANGE:" then do; RANGE=substr(check_range,7); minrange=scan(range,1," "); maxrange=scan(range,3," "); put "IF " variable " LT " minrange " OR " variable " GT " maxrange " THEN RANGE_ERROR = CATS(RANGE_ERROR, '/'," VARIABLE: $QUOTE. ",'/');"; end; %*------------------CREATE A CHECK FOR A LIST OF VALUES; if substr(upcase(check_range),1,5)="LIST:" then do; put "********************** LIST CHECKING for variable:" variable ";"; DLIST=substr(check_range,6); ilist=1; do while (scan(dlist,ilist) ne ""); listarray(ilist)=scan(dlist,ilist); ilist=ilist+1; end; ilist=ilist-1; if upcase(data_type)="NUMBER" then do; outstring=CAT("IF ",variable," NOT IN("); tmp=""; do L=1 to ilist; tmp=cats(tmp,listarray(L)); if L lt ilist then tmp=cats(tmp,","); end; outstring=cats(outstring,tmp); outstring=cats(outstring, ") THEN RANGE_ERROR = CATS(RANGE_ERROR, '" , "/",variable ,"/", "');"); put outstring; end; if upcase(data_type)="TEXT" then do; outstring=CAT("IF ",variable," NOT IN("); tmp=""; do L=1 to ilist; tmp2=listarray(L); tmp=cats(tmp,"'",tmp2,"'"); if L lt ilist then tmp=cats(tmp,","); end; outstring=cats(outstring,tmp); outstring=cats(outstring, ") THEN RANGE_ERROR = CATS(RANGE_ERROR, '","/",variable,"/","');"); put outstring; end; end; end; run; data _null_; file outfile mod; put "********************** END AUTOMATIC DATA STEP SETUP;"; put "RUN;"; PUT "TITLE 'FIRST 20 records from the &DSET file';"; PUT "PROC PRINT DATA=&DSET (OBS=20) LABEL ;"; PUT "RUN;"; *..............................NOTE APPLY FORMATS --ALAN; run; quit; %mend;