Date: Wed, 12 May 2010 14:20:52 -0400
Reply-To: Gerhard Hellriegel <gerhard.hellriegel@T-ONLINE.DE>
Sender: "SAS(r) Discussion" <SAS-L@LISTSERV.UGA.EDU>
From: Gerhard Hellriegel <gerhard.hellriegel@T-ONLINE.DE>
Subject: Re: DI Studio algorithm to derive optimal hashexp value
Step source for a lookup:
/*=========================================================================
===*
* Step: Lookup
A5LNPBVO.AM003ZJE *
* Transform:
Lookup *
*
Description:
*
*
*
* Source Tables: uni_partner_04 - basedwh.uni_partner_04
A5LNPBVO.AC002Q05 *
* Lookup Target - work.W5SKP4ZP
A5LNPBVO.AU002MX3 *
* Target Table: Lookup Target - work.W5SJ58DM
A5LNPBVO.AU002M5M *
*==========================================================================
==*/
/* Access the data for basedwh_wwk */
LIBNAME basedwh BASE "/sasdata/prod/ww_project/wwk/dwhbase_wwk" ;
%rcSet(&syslibrc);
%let trans_rc = 0;
/* make the runTime macro variable to store the run time of the job */
%let runTime = %sysfunc(datetime(), );
proc datasets lib = work nolist nowarn memtype = (data view);
delete W5SJ58DM;
quit;
/*---- Begin DATA step to perform lookups ----*/
DATA work.W5SJ58DM
(keep = AEND_KEZ ANR ANREDE ANREDE_ID anrede_n ANSCHRIFTENART
BIRTH_DATE BOX_NUMBER CITY COUNTRY DAT_STND
EFFECTIVE_FROM_DATE
EFFECTIVE_TO_DATE EXTERNAL_REFERENCE FLOOR_NUMBER
FULL_NAME
HOUSE_NUMBER HSH_NR ID_IM_QUELLSYSTEM KUNDENNUMMER
NACHNAME
NATIONALITY p_anzparthh PAR_NR PLZ POSTAL_CODE
QUELLE_KUNDE
QUELLSYSTEM ROLE_PLAYER_ID STAAT STREET TYPE_ID VORNAME
xxx
ZUSTELLVERMERK POPULATION_INFO_ID)
;
attrib AEND_KEZ length=$6
ANR length=8 format=6. label="ANR"
ANREDE length=$6 format=$6. label="ANREDE"
ANREDE_ID length=8 format=11. label="ANREDE_ID"
anrede_n length=$6
ANSCHRIFTENART length=$6 format=$6. label="ANSCHRIFTENART"
BIRTH_DATE length=8 format=DATE9. label="BIRTH_DATE"
BOX_NUMBER length=$60 format=$60. label="BOX_NUMBER"
CITY length=$60 format=$60. label="CITY"
COUNTRY length=$60 format=$60. label="COUNTRY"
DAT_STND length=8 format=DATE9.
EFFECTIVE_FROM_DATE length=8 format=DATE9.
label="EFFECTIVE_FROM_DATE"
EFFECTIVE_TO_DATE length=8 format=DATE9.
label="EFFECTIVE_TO_DATE"
EXTERNAL_REFERENCE length=$60 format=$60.
label="EXTERNAL_REFERENCE"
FLOOR_NUMBER length=$60 format=$60. label="FLOOR_NUMBER"
FULL_NAME length=$60 format=$60. label="FULL_NAME"
HOUSE_NUMBER length=$60 format=$60. label="HOUSE_NUMBER"
HSH_NR length=$60 format=$60. label="HSH_NR"
ID_IM_QUELLSYSTEM length=$60 format=$60.
label="ID_IM_QUELLSYSTEM"
KUNDENNUMMER length=$60 format=$60. label="KUNDENNUMMER"
NACHNAME length=$60
NATIONALITY length=$60 format=$60. label="NATIONALITY"
p_anzparthh length=8
PAR_NR length=$11 format=$60. label="PAR_NR"
PLZ length=$6 format=$6. label="PLZ"
POSTAL_CODE length=$60 format=$60. label="POSTAL_CODE"
QUELLE_KUNDE length=$6
QUELLSYSTEM length=$6 format=$6. label="QUELLSYSTEM"
ROLE_PLAYER_ID length=8 format=11. label="ROLE_PLAYER_ID"
STAAT length=$3 format=$3. label="STAAT"
STREET length=$60 format=$60. label="STREET"
TYPE_ID length=8 format=11. label="TYPE_ID"
VORNAME length=$60
xxx length=8
ZUSTELLVERMERK length=$60 format=$60. label="ZUSTELLVERMERK"
POPULATION_INFO_ID length=8 format=11. label="POPULATION_INFO_ID"
role_player_id_kunde length= 8
PART_NR length= $11
;
retain missing0-missing0 0;
/* Build hash objects from lookup tables before reading first source
row */
if (_n_ = 1) then
do;
/* Build hash h0 from lookup table basedwh.uni_partner_04 */
nlobs = .;
dsid = open("basedwh.uni_partner_04");
if (dsid > 0) then
do;
if ( attrc(dsid, 'MTYPE') = 'DATA' ) then
nlobs = attrn(dsid, 'NLOBS');
else
nlobs = -1;
dsid = close(dsid);
if (nlobs ^= 0) then
do;
if (nlobs > 0) then
exponent = ceil(log2(nlobs));
else
exponent = 8;
declare hash h0(dataset: "basedwh.uni_partner_04", hashexp:
exponent);
h0.defineKey( "PART_NR");
h0.defineData( "role_player_id_kunde");
h0.defineDone();
if (nlobs = -1) then
do;
if (h0.Num_Items < 1) then
do;
put "NOTE: Lookup table is empty:
basedwh.uni_partner_04";
put "NOTE: Abort action indicated, condition= Lookup
table is empty:"
" basedwh.uni_partner_04";
abort 3;
end;
end;
end;
else
do;
put "NOTE: Lookup table is empty: basedwh.uni_partner_04";
put "NOTE: Abort action indicated, condition= Lookup table is
empty:"
" basedwh.uni_partner_04";
abort 3;
end;
end;
else
do;
put "NOTE: Lookup table does not exist or cannot be opened:"
" basedwh.uni_partner_04";
put "NOTE: Abort action indicated, condition= Lookup table
missing:"
" basedwh.uni_partner_04";
abort 3;
end;
call missing (PART_NR, role_player_id_kunde);
end; /* All hash objects have been defined */
/* Read a row from the source table */
set work.W5SKP4ZP end = eof;
/* Is the current key value stored in hash h0? */
PART_NR = PAR_NR;
rc0 = h0.find();
xxx = role_player_id_kunde;
/* Examine success of lookups */
if ( rc0=0 ) then
do;
/* Write row to target */
output work.W5SJ58DM;
return;
end;
else
do;
error_total + 1;
if (rc0 ^= 0) then
do;
exception_total + 1;
/* Check: Lookup value not found-Set target columns to value */
xxx = 0;
request_write_target = 1;
end;
/* Set target columns to value/missing requested? */
if (request_write_target eq 1) then
/* Write row to target */
output work.W5SJ58DM;
end; /* One or more lookups failed */
if (eof = 1) then
do;
put "Source records with errors: " error_total ;
put "Total lookup exceptions: " exception_total ;
end;
run;
%rcSet(&syserr);
Gerhard
On Tue, 11 May 2010 21:33:46 -0400, Scott Bass <sas_l_739@YAHOO.COM.AU>
wrote:
>Hi,
>
>In a previous life, I had access to DIS. Now I don't. I also can't find
>copies of my old DIS jobs.
>
>I'm writing some macros dealing with hash objects, and would like to
derive
>the optimal hashexp value.
>
>If you have DIS, with a job using the lookup transformation, could you
post
>the algorithm DIS uses to derive hashexp? From memory it is something
like
>int(log2(nobs)), where nobs is the number of observations for the lookup
>table. Otherwise 16 if nobs cannot be determined (view). Plus some
>additional error checking.
>
>If you can post (or email) the entire job that could be useful, but if
it's
>proprietary then just the hashexp algorithm would be much appreciated.
>
>Thanks,
>Scott