/***************************************************************************************************************** SAS file name: efficient_random.sas File location: __________________________________________________________________________________________________________________ Purpose: To demonstrate aproaches to efficient random sampling in the SAS Data Step using temporary arrays and the hash object. Author: Peter Clemmensen Creation Date: 20/11/2020 This program supports the blog post "Efficient Random Sampling in SAS With Arrays and Hashing" on SASnrd.com *****************************************************************************************************************/ /* Example data */ data have; do x = 1 to 1e6; y = x * 2; output; end; run; /* Proc Surveyselect */ proc surveyselect data=have out=want noprint method=srs sampsize=100000; run; /* Hash Object method 1 */ data want(keep=x y); dcl hash h(hashexp : 20); h.definekey("r"); h.definedone(); do hit = 1 by 1 until (q = 100000); r = ceil(rand('uniform') * n); if h.add() then continue; set have point = r nobs = n; output; q + 1; end; stop; run; /* Hash Object method 2 */ data want(keep=x y); dcl hash h(hashexp : 20); h.definekey("p"); h.definedata("r"); h.definedone(); do k = 1 to 100000; s = n - k + 1; p = ceil(rand("uniform")*s); if h.find (key : p) ne 0 then r = p ; set have point = r nobs = n; output; if h.find(key : s) ne 0 then r = s; h.replace(key : p, data : r); end; stop; run;