/***************************************************************************************************************** SAS file name: Hash_Reduce_Advanced File location: _________________________________________________________________________________________________________________ Purpose: To demonstrate three advanced techniques that reduces the SAS hash object memory footprint. Author: Peter Clemmensen Creation Date: 07/12/2018 This program supports the example page "Three Advanced Techniques to Reduce SAS Hash Object Size" on SASnrd.com *****************************************************************************************************************/ /* Create example data */ data HashData(drop=i j); array id{5} $20; length first_name $50 last_name $50 gender $20 state $50 birth_date 8 children 8; array first_namesm{20}$20 _temporary_ ("Paul", "Allan", "Bob", "Michael", "Chris", "David", "John", "Jerry", "James", "Robert", "William", "Richard", "Thomas", "Daniel", "Paul", "George", "Larry", "Eric", "Charles", "Stephen"); array first_namesf{20}$20 _temporary_ ("Mary", "Linda", "Patricia", "Barbara", "Elizabeth", "Maria", "Susan", "Margaret", "Lisa", "Nancy", "Karen", "Betty", "Helen", "Sandra", "Sharon", "Laura", "Michelle", "Angela", "Melissa", "Amanda"); array last_names{20}$20 _temporary_ ("Smith", "Johnson", "Williams", "Jones", "Brown", "Miller", "Wilson", "Moore", "Taylor", "Hall", "Anderson", "Jackson", "White", "Harris", "Martin", "Thompson", "Robinson", "Lewis", "Walker", "Allen"); array states{50}$20 _temporary_ ("Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colorado", "Connecticut", "Delaware", "Florida", "Georgia", "Hawaii", "Idaho", "Illinois", "Indiana", "Iowa", "Kansas", "Kentucky", "Louisiana", "Maine", "Maryland", "Massachusetts", "Michigan", "Minnesota", "Mississippi", "Missouri", "Montana", "Nebraska", "Nevada", "New Hampshire", "New Jersey", "New Mexico", "New York", "North Carolina", "North Dakota", "Ohio", "Oklahoma", "Oregon", "Pennsylvania", "Rhode Island", "South Carolina", "South Dakota", "Tennessee", "Texas", "Utah", "Vermont", "Virginia", "Washington", "West Virginia", "Wisconsin", "Wyoming"); do i=1 to 10e5; do j=1 to dim(id); id[j]=uuidgen(); end; if rand("Uniform")<0.5 then do; gender="M"; first_name=first_namesm[ceil(rand("Uniform")*20)]; end; else do; gender="F"; first_name=first_namesf[ceil(rand("Uniform")*20)]; end; last_name=last_names[ceil(rand("Uniform")*20)]; state=states[ceil(rand("Uniform")*50)]; birth_date=rand("Integer", '01jan1950'd, '01jan1990'd); children=rand("Table", 0.1, 0.2, 0.3, 0.2, 0.1, 0.1)-1; output; end; format birth_date date9.; run; /* Read in the HashData data set as is */ data _null_; if 0 then set HashData; before=input(getoption('xmrlmem'),20.); if _N_ = 1 then do; declare hash h(dataset:"HashData"); h.defineKey('id1', 'id2', 'id3', 'id4', 'id5'); h.defineData(all:"Y"); h.defineDone(); end; after=input(getoption('xmrlmem'),20.); hashsize=before-after; put "Hash Object Takes Up:" hashsize sizekmg10.2; run; /* 1: Use metadata to specify data variables */ %let key='id1', 'id2', 'id3', 'id4', 'id5'; proc sql; select compbl(quote(strip(name))) into :datavars separated by ',' from dictionary.columns where upcase(libname)=upcase('work') and upcase(memname)=upcase('hashdata') and find(name, 'id') eq 0; quit; %put &datavars.; data _null_; if 0 then set HashData; before=input(getoption('xmrlmem'),20.); if _N_ = 1 then do; declare hash h(dataset:"HashData"); h.defineKey(&key.); h.defineData(&datavars.); h.defineDone(); end; after=input(getoption('xmrlmem'),20.); hashsize=before-after; put "Hash Object Takes Up:" hashsize sizekmg10.2; run; /* 2: Create MD5 hash representation of key variables */ data HashData; set HashData; length key $16; key=md5(cats(of id1-id5)); run; data test; if 0 then set HashData; before=input(getoption('xmrlmem'),20.); if _N_ = 1 then do; declare hash h(dataset:"HashData"); h.defineKey('key'); h.defineData(&datavars.); h.defineDone(); end; after=input(getoption('xmrlmem'),20.); hashsize=before-after; put "Hash Object Takes Up:" hashsize sizekmg10.2; run;