Date: Thu, 11 Mar 2004 13:37:33 -0600
Reply-To: Dewen Hou <Dewen.Hou@BLOCKBUSTER.COM>
Sender: "SAS(r) Discussion" <SAS-L@LISTSERV.UGA.EDU>
From: Dewen Hou <Dewen.Hou@BLOCKBUSTER.COM>
Subject: Super Size Dataset from Cartesian Join
Content-type: text/plain; charset=US-ASCII
Dear All,
I have to use a Cartesian join on a dataset, which represents a grid area,
to set up a look-up matrix table for each cell in the grid against all
other cells in the same grid file. Then I calculate the distance among all
the cells, just to keep those with distance smaller than 1 mile. Please
look at the codes below.
The problem is if the nop.newgrid dataset has 60,000 cells, the Cartesian
join table nop.dist_matrix_all will have 3.6 billion records (60,000 X
60,000). I saw a temp dataset file as big as 78 GB in my local drive. None
of my machines can reach the final nop.dist_matrix_all dataset.
So, is there a way to work around this?
Thank you so much!
David Hou
/********************************************************************/
/*Create a new distance for all &maxcells against all cells*/
/*******************************************************************/
proc sql;
create table nop.dist_matrix_all as
select a.cellid as cellid_m, a.X as X1, a.Y as Y1,
b.cellid as cellid_all, b.X as X2, b.Y as Y2
from nop.newgrid. a, nop.newgrid. b;
quit;
/********************************************************************/
/*Calculate distance */
/********************************************************************/
data nop.dist_matrix_all;
set nop.dist_matrix_all;
iLon1 = X1*3.14159/180;
iLon2 = X2*3.14159/180;
iLat1 = Y1*3.14159/180;
iLat2 = Y2*3.14159/180;
iDiffLon = iLon2 - iLon1;
iDiffLat = iLat2 - iLat1;
minNum = sqrt(sin(iDifflat/2)*sin(iDifflat/2) + cos(iLat1)*
cos(iLat2)*sin(iDiffLon/2)*sin(iDiffLon/2));
result = 2*ArSin(min(1, minNum))*6370997;
distance = result/1609.344;
if distance < 1 and distance > 0;
Keep cellid_m cellid_all distance;
run;