/***************************************************************************************************************** SAS file name: remove_duplicates_hash File location: _________________________________________________________________________________________________________________ Purpose: To demonstrate how to remove duplicate observations in SAS with the hash object. Author: Peter Clemmensen Creation Date: 29/12/2018 This program supports the example page "Remove Duplicate Observations In SAS with the Hash Object" on SASnrd.com *****************************************************************************************************************/ /* Create example data */ data testdata(drop=i j); array vars var1-var5; do i=1 to 10e6; do j=1 to dim(vars); vars[j]=rand('integer', 1, 10); end; output; end; run; /* The usual PROC SORT NODUPKEY Approach */ proc sort data=testdata out=test1 nodupkey; by _ALL_; run; /* A hash object alternative */ data test2; if _N_ = 1 then do; declare hash h(hashexp:20); h.defineKey('var1', 'var2', 'var3', 'var4', 'var5'); h.defineDone(); end; set testdata end=eof; if h.check() ne 0 then do; output; h.add(); end; run;