/***************************************************************************************************************** SAS file name: nearest_neighbor_1.sas File location: __________________________________________________________________________________________________________________ Purpose: To demonstrate how to perform a nearest neighbor match using the SAS hash object. Author: Peter Clemmensen Creation Date: 22/02/2021 This program supports the blog post "Nearest Neighbor Match in SAS - Part 1" on SASnrd.com *****************************************************************************************************************/ /* Example data */ data one; input n1; datalines; 1.4 9.3 5.1 3.8 7 2 ; data two; input n2 @@; datalines; 9 1 5 3 4 6 8 2 10 7 ; /* Naive Approach */ data want1(keep = n2 closest); if _N_ = 1 then do; dcl hash h(dataset : "one"); h.definekey("n1"); h.definedone(); dcl hiter i("h"); end; set two; n1 = .; c = constant("big"); do while (i.next() = 0); if abs(n1 - n2) < c then do; c = abs(n1 - n2); closest = n1; end; end; run; /* Direct Access with the Setcur Method */ data want2(keep = n2 closest); if _N_ = 1 then do; if 0 then set one; dcl hash h(dataset : "one", ordered : "Y"); h.definekey("n1"); h.definedone(); dcl hiter i("h"); end; set two; if h.check(key : n2) = 0 then closest = n2; else do; h.add(key : n2, data : n2); if i.setcur(key : n2) = 0 then if i.prev() = 0 then pn1 = n1; if i.setcur(key : n2) = 0 then if i.next() = 0 then nn1 = n1; if nmiss(nn1) then idx = 1; else if nmiss(pn1) then idx = 2; else idx = 1 + (n2 - pn1 > nn1 - n2); closest = choosen(idx, pn1, nn1); h.remove(key : n2); end; run; /* Larger data */ data one; do _N_ = 1 to 1000; n1 = .1 * ceil(rand('uniform') * 1e4); output; end; run; data two; do n2 = 1 to 1e5; output; end; run; data want(keep = n2 closest); if _N_ = 1 then do; if 0 then set one; dcl hash h(dataset : "one", ordered : "Y"); h.definekey("n1"); h.definedone(); dcl hiter i("h"); end; set two; if h.check(key : n2) = 0 then closest = n2; else do; h.add(key : n2, data : n2); if i.setcur(key : n2) = 0 then if i.prev() = 0 then pn1 = n1; if i.setcur(key : n2) = 0 then if i.next() = 0 then nn1 = n1; if nmiss(nn1) then idx = 1; else if nmiss(pn1) then idx = 2; else idx = 1 + (n2 - pn1 > nn1 - n2); closest = choosen(idx, pn1, nn1); h.remove(key : n2); end; run;