/***************************************************************************************************************** SAS file name: fuzzy_merge.sas File location: __________________________________________________________________________________________________________________ Purpose: To demonstrate how the Hash Object can perform fuzzy merges in SAS Author: Peter Clemmensen Creation Date: 14/02/2021 This program supports the blog post "Fuzzy Merge in SAS with the Hash Object" on SASnrd.com *****************************************************************************************************************/ /* Example Data */ data one; input id x; datalines; 1 16 2 32 3 65 ; data two; input id x y; datalines; 1 10 1 1 14 2 1 25 3 1 22 4 2 30 1 2 36 2 2 43 3 3 50 1 3 61 2 3 67 3 ; /* Simple Fuzzy Numeric Merge */ data want(keep = id x y); if _N_ = 1 then do; dcl hash h(dataset : "two(rename = x = xx", multidata : "Y"); h.definekey("id"); h.definedata("xx", "y"); h.definedone(); end; set one; y = .; xx = .; c = constant("big"); do while (h.do_over() = 0); if abs(x - xx) < c then do; c = abs(x - xx); yy = y; end; end; y = yy; run; /* Multiple Obs Per ID in one */ data one; input id x; datalines; 1 15 1 16 2 32 2 33 3 65 3 66 ; data two; input id x y; datalines; 1 10 1 1 14 2 1 25 3 1 22 4 2 30 1 2 36 2 2 43 3 3 50 1 3 61 2 3 67 3 ; /* Each obs in one can only match an obs in two once */ data want1(keep = id x y); if _N_ = 1 then do; dcl hash h(dataset : "two(rename = x = xx", multidata : "Y"); h.definekey("id"); h.definedata("xx", "y"); h.definedone(); end; set one; y = .; xx = .; c = constant("big"); do i = 1 by 1 while (h.do_over() = 0); if abs(x - xx) < c then do; c = abs(x - xx); yy = y; ii = i; end; end; do i = 1 by 1 while (h.do_over() = 0); if i = ii then do; h.removedup(); leave; end; end; y = yy; run; /* Memory Management */ data want2(keep = id x y); dcl hash h(multidata : "Y"); h.definekey("id"); h.definedata("xx", "y"); h.definedone(); do until (last.id); set two(rename = x = xx); by id; h.add(); end; do until (last.id); set one; by id; y = .; xx = .; c = constant("big"); do i = 1 by 1 while (h.do_over() = 0); if abs(x - xx) < c then do; c = abs(x - xx); yy = y; ii = i; end; end; y = yy; output; do i = 1 by 1 while (h.do_over() = 0); if i = ii then do; h.removedup(); leave; end; end; end; h.clear(); run;