/***************************************************************************************************************** SAS file name: binary_search_lookup.sas File location: __________________________________________________________________________________________________________________ Purpose: To demonstrate how to use a binary search algorith of a SAS array to perform table lookups Author: Peter Clemmensen Creation Date: 12/11/2020 This program supports the blog post "Use Binary Search of Array in SAS Table Lookup" on SASnrd.com *****************************************************************************************************************/ /* Use a binary search in an array lookup */ /* Example data */ data big(keep=key) small(keep=key data); call streaminit(123); array r {5000000} _temporary_ (1 : 5000000); h = 5000000; do _N_ = 1 to 5000000; i = rand ("integer", h); key = r [i]; data = rand('integer', 1, 1e8); output big; if rand('uniform') < .5 then output small; r [i] = r [h]; h = h-1; end; stop; run; /* Find the number of obs */ data _null_; set small nobs=n; call symputx('n', n); stop; run; %put &n.; /* Use the Qsort macro and binary search technique to merge data */ data want1(keep=key data); array k {&n} _temporary_; array d {&n} _temporary_; do x = 1 by 1 until (z1); set small end = z1; k[x] = key; d[x] = data; end; %qsort (Arr=k d, By=k); do until (z2); set big end = z2; l = lbound(k); h = hbound(k); x = .; data = .; do until (l > h); m = floor((l + h) * .5); if key < k[m] then h = m - 1; else if key > k[m] then l = m + 1; else do; data = d[m]; leave; end; end; output; end; run; /* Compare to the hash object */ data want2; if _N_ = 1 then do; dcl hash h(dataset : 'small'); h.definekey('key'); h.definedata('data'); h.definedone(); end; set big; data = .; _N_ = h.find(); run; /* Verify that the two results are identical */ proc compare base=want1 comp=want2;run; /* Qsort Macro Definition */ %Macro Qsort ( Arr = /* Parallel array name list */ ,By = %QScan(&Arr,1,%Str( )) /* Key array name */ ,Seq = A /* Seq=D for descending */ ,LB = Lbound(&By) /* Lower bound to sort */ ,HB = Hbound(&By) /* Upper bound to sort */ ,M = 9 /* Tuning range: (1:15) */ ); %Local _ H I J L N P Q S T W ; %Macro Swap (I,J) ; %Local W ; Do ; %Do W = 1 %To &N ; &T&W = &&A&W(&I) ; &&A&W(&I) = &&A&W(&J) ; &&A&W(&J) = &T&W ; %End ; End ; %Mend Swap ; %If %Upcase(&Seq) = %Upcase(A) %Then %Let Q = G ; %Else %Let Q = L ; %Do %Until (&&A&N EQ ) ; %Let N = %Eval(&N + 1) ; %Local A&N ; %Let A&N = %Scan(&Arr,&N,%Str( )) ; %End ; %Let N = %Eval(&N - 1) ; %Let _ = %Substr(%Sysfunc(Ranuni(0)),3, %Eval(7 - %Length(&N) + 5*(%Substr(&Sysver,1,1) GT 6))) ; %Let H = H&_ ; %Let I = I&_ ; %Let J = J&_ ; %Let L = L&_ ; %Let P = P&_ ; %Let S = S&_ ; %Let T = T&_ ; %Let Z = Z&_ ; Array &Z (0:1, 0:50) _Temporary_ ; &L = &LB ; &H = &HB ; If &H - &L GT &M Then Do &S=1 By 0 While (&S) ; &J = (&H - &L)/3 ; &I = &L + &J ; &J = &I + &J ; If &By(&L) &Q.T &By(&I) Then %Swap(&L,&I) ; If &By(&I) &Q.T &By(&J) Then %Swap(&I,&J) ; If &By(&J) &Q.T &By(&H) Then %Swap(&J,&H) ; If &By(&L) &Q.T &By(&I) Then %Swap(&L,&I) ; If &By(&I) &Q.T &By(&J) Then %Swap(&I,&J) ; If &By(&L) &Q.T &By(&I) Then %Swap(&L,&I) ; %If &M LE 3 %Then %Do ; If &H - &L LE 3 Then Do ; &L = &Z(0,&S) ; &H = &Z(1,&S) ; &S +- 1 ; Continue ; End ; %End ; %Swap(&L,&I) ; &P = &By(&L) ; &I = &L ; Do &J=&H + 1 By 0 ; Do &I=&I + 1 By + 1 Until(&By(&I) &Q.E &P) ; End ; Do &J=&J - 1 By - 1 Until(&P &Q.E &By(&J)) ; End ; If &I GE &J Then Leave ; %Swap(&I,&J) ; End ; %Swap(&L,&J) ; If &H - &J GE &J - &L GT &M Then Do &S = &S + 1 ; &Z(0,&S) = &J + 1 ; &Z(1,&S) = &H ; &H = &J - 1 ; End ; Else If &J - &L GE &H - &J GT &M Then Do &S = &S + 1 ; &Z(0,&S) = &L ; &Z(1,&S) = &J - 1 ; &L = &J + 1 ; End ; Else If &J - &L GT &M GE &H - &J Then &H = &J - 1 ; Else If &H - &J GT &M GE &J - &L Then &L = &J + 1 ; Else Do ; &L = &Z(0,&S) ; &H = &Z(1,&S) ; &S +- 1 ; End ; End ; %If &M = 1 %Then %Goto Exit ; Do &J = &LB + 1 To &HB ; If &By(&J - 1) &Q.T &By(&J) Then Do ; &P = &By(&J) ; %Do W = 1 %To &N ; %If &&A&W Ne &By %Then &T&W = &&A&W(&J) ; ; %End ; Do &I = &J - 1 To &LB By - 1 ; If &P &Q.E &By(&I) Then Leave ; %Do W = 1 %To &N ; &&A&W(&I + 1) = &&A&W(&I) ; %End ; End ; &By(&I + 1) = &P ; %Do W = 1 %To &N ; %If &&A&W Ne &By %Then &&A&W(&I + 1) = &T&W ; ; %End ; End ; End ; %Exit: Drop &H &I &J &L &P &S T&_: ; %Mend Qsort ;