Date: Mon, 14 Jul 2008 07:38:55 -0700
Reply-To: Bill McKirgan <Bill.McKirgan@GMAIL.COM>
Sender: "SAS(r) Discussion" <SAS-L@LISTSERV.UGA.EDU>
From: Bill McKirgan <Bill.McKirgan@GMAIL.COM>
Organization: http://groups.google.com
Subject: Re: 20 character limit for "proc compare"
Content-Type: text/plain; charset=ISO-8859-1
On Jul 14, 8:09 am, RolandRB <rolandbe...@hotmail.com> wrote:
> Is it possible to override the 20 character limit for "proc compare"
> text fields?
Hi Roland,
I don't think there's a way to override that limit. I use compare for
checking double-entered data. This is easy if it is all numeric, but
I've had to work on some projects where we have mixture of character
and numeric. Some of these have long character variables...medication
names, etc. I've found that proc compare does not do all I need it to
do in these situations, and so I output the comparison results to
other datasets. Of course it's not simple and easy.
Not included, below are the steps for using contents to obtain lists
of character and numeric variables in the datasets to be compared.
The compare program example I include below begins with DE1 and DE2
ready for comparision. INCLUDE files are used to pass the variable
lists into succeeding steps to make a final dataset 'TEST' that has
the re-shaped proc contents output.
I made this program because of the text handling limitations of
compare, and also because compared dates would be listed as
unformatted sas dates. The use of the PDF output destination prevents
the long character variable from being truncated by wrapping lines
within the cell. Now, when I compare subject data for one or a dozen
different datasets they all get rolled up in to this report that lists
data entry discrepancies for each subject entry, by data table, and by
variable within each table. All data were combined into DE1 and DE2
prior to the steps below.
If you'd like to see the entire program I can email it to you. What
I've provided below is probably confusing enough, but I'm hoping with
my explainations some of it might be helpful.
Bill
/* RUN THE BASIC COMPARISION AND OUTPUT TO 'COMPARED' DATASET
*/
PROC COMPARE
noprint
BASE = DE1
COMPARE = DE2
out = compared outnoequal outbase outcomp OUTDIF
;
BY &whatid.;
RUN;
/* SPLIT-OUT THE CHARARACTER VARIABLES FOR SEPARATE REPORT
PROCESSING */
data comp_txt; set compared;
%include "&progpath\varlist_chr.sas";
;
run;
/* prepare to report differences in text variables */
proc transpose
data = comp_txt
out = transcomp_txt
;
by &whatid. ;
var
%include "&progpath\varlist_chr2.sas";
;
run;
/* keep the records with diffs between de1 and de2 */
data drpt_txt;
set transcomp_txt
( rename=
( col1 = DE1
col2 = DE2
col3 = DIFF
_NAME_ = NAME
_LABEL_ = LABEL
));
if index(DIFF,'X');
order=_n_;
run;
/* OVERWRITE SAME DATASET TO ADJUST LENGTH AND FORMAT OF
'NAME' */
DATA DRPT_TXT; LENGTH NAME $32;
SET DRPT_TXT;
format NAME $32.;
RUN;
proc sort; by name order; run;
/* INSERT the VARNUM variable next as it will be needed for
final report */
data drpt_txt_varnum;
merge drpt_txt (IN=INRPT)
de1cons_chr
;
by name;
IF INRPT ;
RENAME DIFF=DIFF_STR
DE1 = DE1_CHR
DE2 = DE2_CHR
;
run;
PROC SORT;
BY &whatid. VARNUM ORDER;
RUN;
/* SPLIT-OUT THE NUMERIC VARIABLES FOR SEPARATE REPORT PROCESSING
*/
data comp_num; set compared;
%include "&progpath\varlist_num.sas";
;
run;
/* prepare to report differences in NUMERIC variables */
proc transpose
data = comp_num
out = transcomp_num
;
by &whatid. ;
var
%include "&progpath\varlist_num2.sas";
;
run;
/* keep the records with diffs between de1 and de2 */
data drpt_num;
set transcomp_num
( rename=
( col1 = DE1
col2 = DE2
col3 = DIFF
_NAME_ = NAME
_LABEL_ = LABEL
));
IF DIFF NE .E;
if name = "&WHATID." then delete; /* removing header
variable (not a comparision) */
order=_n_;
run;
/* OVERWRITE SAME DATASET TO ADJUST LENGTH AND FORMAT OF
'NAME' */
DATA DRPT_num; LENGTH NAME $32;
SET DRPT_num;
format NAME $32.;
RUN;
proc sort; by name order; run;
/* INSERT the VARNUM variable next as it will be needed for
final report */
data drpt_num_varnum;
merge drpt_num (IN=INRPT)
de1cons_num
;
by name;
IF INRPT ;
RENAME DIFF=DIFF_num
DE1 = DE1_NUM
DE2 = DE2_NUM
;
run;
PROC SORT;
BY &whatid. VARNUM ORDER;
RUN;
options ls=200;
/* need to insert section letter as new variable for sorting */
data sections; set bcs.z_varinfo;
keep name section label;
section=substr(label,1,1);
*** put letter name @20 label;
run;
proc sort;
by name;
run;
proc sort data= drpt_num_varnum out= tmp_dnv;
by name;
run;
proc sort data= drpt_txt_varnum out= tmp_dtv;
by name;
run;
data tmp_dnv_labs;
merge
tmp_dnv (in=int)
sections;
by name;
if int;
run;
data tmp_dtv_labs;
merge
tmp_dtv (in=int)
sections;
by name;
if int;
run;
data drpt_num_varnum; set tmp_dnv_labs; run;
proc sort; by &whatid. section varnum order; run;
data drpt_txt_varnum; set tmp_dtv_labs; run;
proc sort; by &whatid. section varnum order; run;
data test;
merge drpt_num_varnum
drpt_txt_varnum
;
by &whatid. section varnum order;
if lowcase(name)="&WHATID." then delete;
/* make string variables to report date or time
differences with formats instead of SAS date time numbers */
if index(LOWCASE(name),'time') then do;
de1_CHR = put(de1_NUM,time5.);
de2_CHR = put(de2_NUM,time5.);
end;
if index(LOWCASE(name),'date') then do;
de1_CHR = put(de1_NUM,mmddyy10.);
de2_CHR = put(de2_NUM,mmddyy10.);
end;
format de1_num de2_num 5.0;
count = 1;
/* get rid of &WHATID. lines (NUM VARNUM part of merge is the
source of this... */
if name = "&WHATID." then delete;
run;
/* now, we can use TEST to filter-out
all records that are not exactly equal. */
data goodones;
merge test (in=inerr)
de2 (in=in2)
;
by &whatid.;
if inerr then delete;
COUNT=1;
/* make variables needed in reports that follow */
/* here data entry is done by the same person on different
days
we will just report the same number and use the
inclusion criteria date */
&whatlib._de_1 = 1;
&whatlib._de_2 = 1;
&whatlib._date = datepart(incl_date);
format &whatlib._date mmddyy10.;
run;
/* BELOW, Notice that &WHATLIB. is used to populate part of filenames
*/
ODS PDF FILE="&RPTPATH\&WHATLIB._compare_de1_de2_&DATESTAMP..PDF";
OPTIONS PAGENO=1;
footnote1 "DE1 and DE2 are done by the same person on different days;
therefore, the DE ID number is always equal";
TITLE2 "LISTING OF CASES ALREADY COMPARED AND SAVED TO FINAL
DATASET" ;
DATA CURRENT; set goodones; /* *** started CURRENT on 6/3/08 (bm); */
*** SET HERE.&WHATLIB._Q_DATA;
COUNT=1;
RUN;
data newones;
merge current (in=inc)
goodones (in=ing);
by &whatid.;
if ing and not inc;
run;
PROC PRINT DATA=CURRENT;
ID &whatid.;
var &WHATLIB._de_1 &WHATLIB._de_2 &WHATLIB._date;
SUM COUNT;
run;
TITLE2 'COMPARISON OF 1ST- VS. 2ND- PUNCH DATA ENTRY' ;
title2 "ALL &WHATLIB. Data that compare exactly equal (INCLUDES data
already in Final Data: file=&WHATLIB._q_data)";
proc print data=goodones;
ID &whatid.;
var &WHATLIB._de_1 &WHATLIB._de_2 &WHATLIB._date;
SUM COUNT;
run;
title2 "New &WHATLIB. Data that compare exactly equal";
title3 "(New data to be added to Final Data with this update:
file=&WHATLIB._q_data)";
proc print data=newones;
ID &whatid.;
var &WHATLIB._de_1 &WHATLIB._de_2 &WHATLIB._date;
SUM COUNT;
run;
/
**********************************************************************/
/
**********************************************************************/
/
**********************************************************************/
title2 "&WHATLIB. Data that have at least one difference between DE1
and DE2";
title3 "Review coded instrument and determine what the correct value
should be and correct";
title4 "Re-run comparison report and repeat review until all
discrepancies are resolved";
PROC PRINT DATA=TEST UNIFORM;
ID &whatid.;
BY &whatid.;
PAGEBY &whatid.;
VAR NAME label DE1_NUM DE2_NUM DE1_CHR DE2_CHR;
sum count;
sumby &whatid.;
RUN;
ODS PDF CLOSE;
RUN; QUIT;
/* reorient output */
options orientation= portrait ;
run;