/*****************************************************************************************************************

SAS file name: remove_duplicates_hash
File location: 
_________________________________________________________________________________________________________________

Purpose: To demonstrate how to remove duplicate observations in SAS with the hash object.
Author: Peter Clemmensen
Creation Date: 29/12/2018

This program supports the example page "Remove Duplicate Observations In SAS with the Hash Object" on SASnrd.com

*****************************************************************************************************************/

/* Create example data */
data testdata(drop=i j);
   array vars var1-var5;
   do i=1 to 10e6;
      do j=1 to dim(vars);
         vars[j]=rand('integer', 1, 10);
      end;
      output;
   end;
run;

/* The usual PROC SORT NODUPKEY Approach */
proc sort data=testdata out=test1 nodupkey;
   by _ALL_;
run;

/* A hash object alternative */
data test2;
   if _N_ = 1 then do;
      declare hash h(hashexp:20);
      h.defineKey('var1', 'var2', 'var3', 'var4', 'var5');
      h.defineDone();
   end;

   set testdata end=eof;

   if h.check() ne 0 then do;
      output;
      h.add();
   end;
run;