/***************************************************************************************************************** SAS file name: Hash_Reduce_Basic File location: _________________________________________________________________________________________________________________ Purpose: To demonstrate three basic techniques that reduces the SAS hash object memory footprint. Author: Peter Clemmensen Creation Date: 01/12/2018 This program supports the example page "Three Basic Techniques to Reduce SAS Hash Object Size" on SASnrd.com *****************************************************************************************************************/ /* Create example data */ data HashData(drop=i j); array id{5} $10; length first_name $50 last_name $50 gender $20 state $50 birth_date 8 children 8; array first_namesm{20}$20 _temporary_ ("Paul", "Allan", "Bob", "Michael", "Chris", "David", "John", "Jerry", "James", "Robert", "William", "Richard", "Thomas", "Daniel", "Paul", "George", "Larry", "Eric", "Charles", "Stephen"); array first_namesf{20}$20 _temporary_ ("Mary", "Linda", "Patricia", "Barbara", "Elizabeth", "Maria", "Susan", "Margaret", "Lisa", "Nancy", "Karen", "Betty", "Helen", "Sandra", "Sharon", "Laura", "Michelle", "Angela", "Melissa", "Amanda"); array last_names{20}$20 _temporary_ ("Smith", "Johnson", "Williams", "Jones", "Brown", "Miller", "Wilson", "Moore", "Taylor", "Hall", "Anderson", "Jackson", "White", "Harris", "Martin", "Thompson", "Robinson", "Lewis", "Walker", "Allen"); array states{50}$20 _temporary_ ("Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colorado", "Connecticut", "Delaware", "Florida", "Georgia", "Hawaii", "Idaho", "Illinois", "Indiana", "Iowa", "Kansas", "Kentucky", "Louisiana", "Maine", "Maryland", "Massachusetts", "Michigan", "Minnesota", "Mississippi", "Missouri", "Montana", "Nebraska", "Nevada", "New Hampshire", "New Jersey", "New Mexico", "New York", "North Carolina", "North Dakota", "Ohio", "Oklahoma", "Oregon", "Pennsylvania", "Rhode Island", "South Carolina", "South Dakota", "Tennessee", "Texas", "Utah", "Vermont", "Virginia", "Washington", "West Virginia", "Wisconsin", "Wyoming"); do i=1 to 10e5; do j=1 to dim(id); id[j]=uuidgen(); end; if rand("Uniform")<0.5 then do; gender="M"; first_name=first_namesm[ceil(rand("Uniform")*20)]; end; else do; gender="F"; first_name=first_namesf[ceil(rand("Uniform")*20)]; end; last_name=last_names[ceil(rand("Uniform")*20)]; state=states[ceil(rand("Uniform")*50)]; birth_date=rand("Integer", '01jan1950'd, '01jan1990'd); children=rand("Table", 0.1, 0.2, 0.3, 0.2, 0.1, 0.1)-1; output; end; format birth_date date9.; run; proc contents data=HashData; run; /* Read in the HashData data set as is */ data _null_; if 0 then set HashData; before=input(getoption('xmrlmem'),20.); if _N_ = 1 then do; declare hash h(dataset:"HashData"); h.defineKey('id1'); h.defineData(all:"Y"); h.defineDone(); call missing(k, d); end; after=input(getoption('xmrlmem'),20.); hashsize=before-after; put "Hash Object Takes Up:" hashsize sizekmg10.2; run; /* 1. Read in the necessary hash variables only */ data _null_; if 0 then set HashData; before=input(getoption('xmrlmem'),20.); if _N_ = 1 then do; declare hash h(dataset:'HashData'); h.defineKey('id1'); h.defineData('first_name', 'last_name', 'state'); h.defineDone(); call missing(k, d); end; after=input(getoption('xmrlmem'),20.); hashsize=before-after; put 'Hash Object Takes Up:' hashsize sizekmg10.2; run; /* 2. Read in the necessary items only */ data _null_; if 0 then set HashData; before=input(getoption('xmrlmem'),20.); if _N_ = 1 then do; declare hash h(dataset:"HashData(where=(gender='F'))"); h.defineKey('id1'); h.defineData('first_name', 'last_name', 'state'); h.defineDone(); call missing(k, d); end; after=input(getoption('xmrlmem'),20.); hashsize=before-after; put 'Hash Object Takes Up:' hashsize sizekmg10.2; run; /* 3. Minimize length of input variables */ data HashData(drop=i j); array id{5} $10; length first_name $20 last_name $20 gender $1 state $20 birth_date 6 children 3; array first_namesm{20}$20 _temporary_ ("Paul", "Allan", "Bob", "Michael", "Chris", "David", "John", "Jerry", "James", "Robert", "William", "Richard", "Thomas", "Daniel", "Paul", "George", "Larry", "Eric", "Charles", "Stephen"); array first_namesf{20}$20 _temporary_ ("Mary", "Linda", "Patricia", "Barbara", "Elizabeth", "Maria", "Susan", "Margaret", "Lisa", "Nancy", "Karen", "Betty", "Helen", "Sandra", "Sharon", "Laura", "Michelle", "Angela", "Melissa", "Amanda"); array last_names{20}$20 _temporary_ ("Smith", "Johnson", "Williams", "Jones", "Brown", "Miller", "Wilson", "Moore", "Taylor", "Hall", "Anderson", "Jackson", "White", "Harris", "Martin", "Thompson", "Robinson", "Lewis", "Walker", "Allen"); array states{50}$20 _temporary_ ("Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colorado", "Connecticut", "Delaware", "Florida", "Georgia", "Hawaii", "Idaho", "Illinois", "Indiana", "Iowa", "Kansas", "Kentucky", "Louisiana", "Maine", "Maryland", "Massachusetts", "Michigan", "Minnesota", "Mississippi", "Missouri", "Montana", "Nebraska", "Nevada", "New Hampshire", "New Jersey", "New Mexico", "New York", "North Carolina", "North Dakota", "Ohio", "Oklahoma", "Oregon", "Pennsylvania", "Rhode Island", "South Carolina", "South Dakota", "Tennessee", "Texas", "Utah", "Vermont", "Virginia", "Washington", "West Virginia", "Wisconsin", "Wyoming"); do i=1 to 10e5; do j=1 to dim(id); id[j]=uuidgen(); end; if rand("Uniform")<0.5 then do; gender="M"; first_name=first_namesm[ceil(rand("Uniform")*20)]; end; else do; gender="F"; first_name=first_namesf[ceil(rand("Uniform")*20)]; end; last_name=last_names[ceil(rand("Uniform")*20)]; state=states[ceil(rand("Uniform")*50)]; birth_date=rand("Integer", '01jan1950'd, '01jan1990'd); children=rand("Table", 0.1, 0.2, 0.3, 0.2, 0.1, 0.1)-1; output; end; format birth_date date9.; run; data _null_; if 0 then set HashData; before=input(getoption('xmrlmem'),20.); if _N_ = 1 then do; declare hash h(dataset:"HashData(where=(gender='F'))"); h.defineKey('id1'); h.defineData('first_name', 'last_name', 'state'); h.defineDone(); call missing(k, d); end; after=input(getoption('xmrlmem'),20.); hashsize=before-after; put 'Hash Object Takes Up:' hashsize sizekmg10.2; run;