Date: Fri, 9 Dec 2005 03:08:32 -0800
Reply-To: Anonymous user <Number_42@CARAMAIL.COM>
Sender: "SAS(r) Discussion" <SAS-L@LISTSERV.UGA.EDU>
From: Anonymous user <Number_42@CARAMAIL.COM>
Organization: http://groups.google.com
Subject: How to find duplicate records
Content-Type: text/plain; charset="iso-8859-1"
Hello,
i have no question this time. I found out myself a solution, but since
the problem of finding duplicates is so basic and i have not found how
to do it on this group, i post my source code for the next one:
option mprint;
/*
** tested on SAS 8.02
** Description:
** - set_to_search: the set where to search for duplicates
** - key: the key to search duplicates for, can be a list of column
names!!!!
** - duplicates_set: the result set, to put only the duplicates
**
** "key" can be composed of several columns (no first/last)
** "set_to_search" does not need a numeric column (like for proc means)
*/
%macro find_duplicates(set_to_search, key, duplicates_set);
%local sorted_set filter_set;
%local i key_column;
%let sorted_set = __sorted_set;
%let filter_set = __filter_set;
proc sort data = &set_to_search out = &sorted_set; by &key; run;
data &filter_set (keep=&key);
set &sorted_set;
if
%let i = 1;
%let key_column = %scan(&key, &i);
%do %while ("&key_column" ne "");
/* first test ? */
%if &i > 1 %then %do;
and
%end;
(&key_column = lag(&key_column))
%let i = %eval(&i + 1);
%let key_column = %scan(&key, &i);
%end;
then output;
run;
/* remove duplicates "key of duplicates" */
proc sort data = &filter_set nodup; by &key; run;
/* get the data with keys */
data &duplicates_set;
merge
&sorted_set (in=in1)
&filter_set (in=in2);
by
&key;
if in1 and in2;
run;
proc datasets nolist;
delete &sorted_set;
delete &filter_set;
run;
%mend find_duplicates;
/* what i have */
data data;
input a $ b $ c $;
datalines;
a1 b1 cat
a1 b2 dog
a1 b1 .
a2 b1 dog
;
/* what i want */
/*
a b c
a1 b1 cat
a1 b1 .
*/
%find_duplicates(data, a b, dups);
HTH,
regards