/***************************************************************************************************************** SAS file name: Linear_Regression.sas File location: _________________________________________________________________________________________________________________ Purpose: Perform linear regression using PROC REG, PROC GLM and SAS/IML Author: Peter Clemmensen Creation Date: 25may2017 This program supports the blog post "Linear Regression in SAS" on SASnrd.com *****************************************************************************************************************/ /* Simple linear regression using PROC REG*/ proc reg data = sashelp.class; model weight = height; run;quit; /* Simple linear regression using PROG GLM */ proc glm data = sashelp.class; model weight = height; run;quit; /* IML program to peform linear regression */ proc iml; use sashelp.class; /* Open dataset for reading */ read all var {'weight'} into y; /* Read dependent variable into vector y */ read all var {'height'} into X[c=names];/* Read independent variable(s) into matrix X */ close sashelp.class; /* Close dataset for reading */ df_model = ncol(X); /* Model degress of freedom */ X = j(nrow(X),1,1) || X; /* Intercept */ df_error = nrow(X) - ncol(X); /* Error degrees of freedom */ beta_hat = inv(t(X)*X) * t(X)*y; /* Solve normal equations for parameter estimates */ y_hat = X*beta_hat; /* Predicted values */ res = y - y_hat; /* Residuals */ SSM = sum((y_hat - mean(y))##2); /* Model Sum of Squares */ SSE = sum(res##2); /* Eror Sum of Squares */ MSM = SSM / df_model; /* Model Mean Square */ MSE = SSE / df_error; /* Error Mean Square */ R_square = SSM / (SSM + SSE); /* R^2 */ F = MSM / MSE; /* F test statistic for overall model */ p_F = 1 - CDF('F',F,df_model,df_error); /* p-values */ std_err = sqrt(MSE*vecdiag(inv(t(X)*X))); /* Standard Errors of estimated parameters */ t = beta_hat / std_err; /* t test statistic for estimated parameters */ p_t = 2 * (1-cdf('t',abs(t),df_error)); /* p values for s */ print ('Intercept' // t(names))[l='Parameters'] beta_hat[f=best10.2 l='Estimate'] std_err[f=best10.2 l='Std. Error'] t[f=best5. l='t Value'] p_t[f=pvalue6.4 l='p Value']; /* Print beta values, t-stats and p-values */ print R_square[f=best10.2 l='R^2']; print ({'Model', 'Error', 'Corrected Total'})[l='Source'] (df_model // df_error // df_model+df_error)[f=best10. l='DF'] (SSM // SSE // SSM+SSE)[f=best10. l='Sums of Squares'] (MSM // MSE)[f=best10. l='Mean Square'] F[f=best5. l='F Value'] p_F[f = pvalue6.4 l='p Value']; /* Print sums of squares, F test and p-value */ quit;