/* (Not quite) Routines for doing pretty things ... */ #include #include #include #include "utils.h" #include "linstats.h" #include "distribs.h" /* Prototypes: */ void GenAwk (double *beta, int K, char **labels, int *maptable, int labelcount); void GenEPP (char **labels, int K, double *beta, double *S, double sigma2); /* ols(...) is the bottom line doer of all work, called by main() */ int ols (char **labels, float **data, int K, int ObsNum, int purebeta, int pscript, int epp, int *maptable, int labelcount) /* calls olsengine for computations and does display. */ { double *beta, *S; float *Yhat; double sigma2, R2, probv, F; int noinference, i, error, dof1, dof2; char *DASHLINE = "-------------+-----------------------------------------------"; if ((beta = (double *) malloc (K * sizeof (double))) == NULL) { fprintf (stderr, "Out of memory when allocating beta.\n"); return 1; } if ((S = (double *) malloc (K * sizeof (double))) == NULL) { fprintf (stderr, "Out of memory when allocating S.\n"); return 1; } if ((Yhat = (float *) malloc (ObsNum * sizeof (float))) == NULL) { fprintf (stderr, "Out of memory when allocating Yhat.\n"); return 1; } /* if pscript is on, you don't need to do inference. */ noinference = FALSE; if (pscript) noinference = TRUE; if (1 == (error = olsengine (noinference, data, K, ObsNum, beta, S, &sigma2, &R2, &F, Yhat))) { fprintf (stderr, "Errors in OLS calculations.\n"); return 1; } if (purebeta) { for (i = 0; i < K; i++) printf (" %f ", beta[i]); printf ("%f\n", sqrt (sigma2)); return 0; } if (pscript) { GenAwk (beta, K, labels, maptable, labelcount); return 0; } if (epp) { GenEPP (labels, K, beta, S, sigma2); return 0; } /* Now start printing results. */ printf ("%d observations, %d rhs regressors.\n", ObsNum, K); printf ("%s\n", DASHLINE); printf ("%12s | %12s %12s %8s %8s\n", "Variable", "Coefficient", "Std.Error", "t ", "Prob>|t|"); printf ("%s\n", DASHLINE); for (i = 0; i < K; i++) { if (S[i] == 0.0) probv = 0.0; else { if (1 == tprob ((double) -fabs (beta[i] / S[i]), ObsNum - K, &probv)) { fprintf (stderr, "Fatal error in computing t probability.\n"); return 1; } } printf ("%12s | %12.6f %12.6f %8.3f %8.3f\n", labels[i], beta[i], S[i], (S[i] == 0.0 ? 0.0 : beta[i] / S[i]), (probv * 2.0) ); } printf ("%s\n", DASHLINE); dof1 = K - 1; dof2 = ObsNum - K; /* F(dof1, dof2) */ printf ("R2 = %5.3f, F(%d, %d) = %3.1f, Prob>F = %5.3f\n", R2, dof1, dof2, F, pF (F, dof1, dof2)); printf ("RMSE = %5.3f\n", sqrt (sigma2)); return 0; } void GenAwk (double *beta, int K, char **labels, int *maptable, int labelcount) /* Recall layout of maptable: entries are defined from 1..K (not 1..labelcount), (maptable[i] == j) ==> i'th field of beta comes from j'th field of input line. */ { int i; char *ylabel; printf ("\n#\n# This awk program was generated by ols.\n"); printf ("# It reads data from StdIn and prints predictions to StdOut.\n"); printf ("#\n"); printf ("# It is built for the data layout used when estimating.\n"); printf ("# However, this should not be hard to modify.\n"); printf ("#\n\n"); printf ("{\n"); for (i = 0; i < K; i++) printf ("\t%s = $%d;\n", labels[i], (1 + maptable[i])); ylabel = labels[K]; printf ("\t%s = $%d;\n", ylabel, (1 + maptable[K])); printf ("\n\tpredicted = "); for (i = 0; i < K; i++) { printf ("(%f*%s)", beta[i], labels[i]); if (i != (K - 1)) printf (" \\\n\t\t+ "); else printf (";\n"); } /* Now to find y. */ printf ("\terror = %s - predicted;\n", ylabel); printf ("\tprint predicted \" \" %s \" \" error;\n", ylabel); printf ("\tSSE += (error*error);\n"); printf ("}\n"); printf ("\nEND {\n"); printf ("\tMSE = SSE/NR;\n"); printf ("\tprint \"MSE = \" MSE \", SSE = \" SSE > \"/dev/tty\";\n"); printf ("}\n\n"); printf ("# End of generated script.\n\n"); } /* Example of generated awk, when labels are unknown: * # * # This awk program was generated by ols. * # It reads data from StdIn and prints predictions to StdOut. * # * # It is built for the data layout used when estimating. * # However, this should not be hard to modify. * # * * { * $1 = $1; * $2 = $2; * $3 = $3; * # In out-of-sample prediction, $3 defaults to 0.0. * * predicted = (1.500000*$1) \ * + (0.700000*$2); * error = $3 - predicted; * print predicted " " $3 " " error; * SSE += (error*error); * } * * END { * MSE = SSE/NR; * print "MSE = " MSE ", SSE = " SSE > "/dev/tty"; * } * * # End of script. */ void GenEPP (char **labels, int K, double *beta, double *S, double sigma2) /* Given estimation results, generate EPP. This can be post-processed into TeX for sure. */ { int i; printf ("caption Results of OLS Regression for %s\n", labels[K]); printf ("comment Regression Coefficients\n"); for (i = 0; i < K; i++) printf ("estimate %s yes %12.6f %8.3f\n", labels[i], beta[i], S[i]); printf ("blank\n"); printf ("comment Regression Standard Error\n"); printf ("estimate $\\sigma$ yes %5.3f not-estimated\n", sqrt (sigma2)); } /* Example: * caption Typical display of results after OLS * comment Regression Coefficients * estimate $\beta$ yes -9.209 0.0286 * estimate $\tau$ yes -6.000 0.234 * estimate $\rho$ yes -1.078 0.0288 * estimate $\delta$ yes -2.303 0.21 * estimate $\theta$ yes -9.000 0.113 * estimate $\xi$ yes -9.000 1.00 * blank * comment Regression Standard Error * estimate $\sigma_v$ no 0.867 */