/***************************************************************************************************************** SAS file name: nearest_neighbor.sas File location: __________________________________________________________________________________________________________________ Purpose: To demonstrate how to do a nearest neighbor match by group in SAS using the Hash Object. Author: Peter Clemmensen Creation Date: 08/01/2021 This program supports the blog post "Closest Value Match in SAS Using the Hash Object – Part 2" on SASnrd.com *****************************************************************************************************************/ /* Handling By-Groups */ data one; input id n1; datalines; 1 3.4 1 7.9 1 6.1 2 4.7 2 9.2 3 6.3 3 1.1 3 8.3 ; proc plan; factors id = 3 n2 = 10 / noprint; output out=two; run;quit; /* Do_Over Method */ data want(keep = id n2 closest); if _N_ = 1 then do; dcl hash h(dataset : "one", multidata : "Y"); h.definekey("id"); h.definedata("n1"); h.definedone(); end; set two; n1 = .; c = constant("big"); do while (h.do_over() = 0); if abs(n2 - n1) < c then do; c = abs(n2 - n1); nn = n1; end; end; closest = nn; run; /* Setcur Method */ data want(keep = id n2 closest); if _N_ = 1 then do; if 0 then set one; dcl hash h(dataset : "one(rename = (id = _id)", ordered : "Y"); h.definekey("_id", "n1"); h.definedone(); dcl hiter i("h"); end; set two; _id = .; if h.check(key : id, key : n2) = 0 then closest = n2; else do; h.add(key : id, key : n2, data : id, data : n2); if i.setcur(key : id, key : n2) = 0 then do; if i.prev() = 0 & id = _id then pn1 = n1; end; if i.setcur(key : id, key : n2) = 0 then do; if i.next() = 0 & id = _id then nn1 = n1; end; if nmiss(nn1) then idx = 1; else if nmiss(pn1) then idx = 2; else idx = 1 + (n2 - pn1 > nn1 - n2); closest = choosen(idx, pn1, nn1); h.remove(key : id, key : n2); end; run; /* Larger Data */ data one; do id = 1 to 10; do _N_ = 1 to 10; n1 = .1 * ceil(rand('uniform') * 1000); output; end; end; run; proc plan; factors id = 10 n2 = 100 / noprint; output out=two; run;quit; data want1(keep = id n2 closest); if _N_ = 1 then do; dcl hash h(dataset : "one", multidata : "Y"); h.definekey("id"); h.definedata("n1"); h.definedone(); end; set two; n1 = .; c = constant("big"); do while (h.do_over() = 0); if abs(n2 - n1) < c then do; c = abs(n2 - n1); nn = n1; end; end; closest = nn; run; /* Setcur Method */ data want2(keep = id n2 closest); if _N_ = 1 then do; if 0 then set one; dcl hash h(dataset : "one(rename = (id = _id)", ordered : "Y"); h.definekey("_id", "n1"); h.definedone(); dcl hiter i("h"); end; set two; _id = .; if h.check(key : id, key : n2) = 0 then closest = n2; else do; h.add(key : id, key : n2, data : id, data : n2); if i.setcur(key : id, key : n2) = 0 then do; if i.prev() = 0 & id = _id then pn1 = n1; end; if i.setcur(key : id, key : n2) = 0 then do; if i.next() = 0 & id = _id then nn1 = n1; end; if nmiss(nn1) then idx = 1; else if nmiss(pn1) then idx = 2; else idx = 1 + (n2 - pn1 > nn1 - n2); closest = choosen(idx, pn1, nn1); h.remove(key : id, key : n2); end; run; proc compare base = want1 comp = want2;run;