|
467 | 467 | "metadata": {},
|
468 | 468 | "outputs": [],
|
469 | 469 | "source": [
|
470 |
| - "dfhoggs = (dfhogg[[\"x\", \"y\"]] - dfhogg[[\"x\", \"y\"]].mean(0)) / (\n", |
471 |
| - " 2 * dfhogg[[\"x\", \"y\"]].std(0)\n", |
472 |
| - ")\n", |
| 470 | + "dfhoggs = (dfhogg[[\"x\", \"y\"]] - dfhogg[[\"x\", \"y\"]].mean(0)) / (2 * dfhogg[[\"x\", \"y\"]].std(0))\n", |
473 | 471 | "dfhoggs[\"sigma_x\"] = dfhogg[\"sigma_x\"] / (2 * dfhogg[\"x\"].std())\n",
|
474 | 472 | "dfhoggs[\"sigma_y\"] = dfhogg[\"sigma_y\"] / (2 * dfhogg[\"y\"].std())"
|
475 | 473 | ]
|
|
505 | 503 | " marginal_kws={\"bins\": 12, \"kde\": True, \"kde_kws\": {\"cut\": 1}},\n",
|
506 | 504 | " joint_kws={\"edgecolor\": \"w\", \"linewidth\": 1, \"s\": 80},\n",
|
507 | 505 | ")\n",
|
508 |
| - "_ = gd.ax_joint.errorbar(\n", |
509 |
| - " \"x\", \"y\", \"sigma_y\", \"sigma_x\", fmt=\"none\", ecolor=\"#4878d0\", data=dfhoggs\n", |
510 |
| - ")\n", |
| 506 | + "_ = gd.ax_joint.errorbar(\"x\", \"y\", \"sigma_y\", \"sigma_x\", fmt=\"none\", ecolor=\"#4878d0\", data=dfhoggs)\n", |
511 | 507 | "_ = gd.fig.suptitle(\n",
|
512 | 508 | " (\n",
|
513 | 509 | " \"Quick view to confirm action of\\n\"\n",
|
|
639 | 635 | " y_est = b0 + b1 * dfhoggs[\"x\"]\n",
|
640 | 636 | "\n",
|
641 | 637 | " ## Define Normal likelihood\n",
|
642 |
| - " likelihood = pm.Normal(\n", |
643 |
| - " \"likelihood\", mu=y_est, sigma=dfhoggs[\"sigma_y\"], observed=dfhoggs[\"y\"]\n", |
644 |
| - " )\n", |
| 638 | + " likelihood = pm.Normal(\"likelihood\", mu=y_est, sigma=dfhoggs[\"sigma_y\"], observed=dfhoggs[\"y\"])\n", |
645 | 639 | "\n",
|
646 | 640 | "pm.model_to_graphviz(mdl_ols)"
|
647 | 641 | ]
|
|
1274 | 1268 | "\n",
|
1275 | 1269 | "gd = sns.JointGrid(x=\"b0_intercept\", y=\"b1_slope\", data=df_trc, height=8)\n",
|
1276 | 1270 | "_ = gd.fig.suptitle(\n",
|
1277 |
| - " (\n", |
1278 |
| - " \"Posterior joint distributions\"\n", |
1279 |
| - " + \"\\n(showing general movement from OLS to StudentT)\"\n", |
1280 |
| - " ),\n", |
| 1271 | + " (\"Posterior joint distributions\" + \"\\n(showing general movement from OLS to StudentT)\"),\n", |
1281 | 1272 | " y=1.05,\n",
|
1282 | 1273 | ")\n",
|
1283 | 1274 | "\n",
|
|
1297 | 1288 | " grp[\"b0_intercept\"], grp[\"b1_slope\"], ax=gd.ax_joint, alpha=0.2, label=idx\n",
|
1298 | 1289 | " )\n",
|
1299 | 1290 | " _ = sns.kdeplot(grp[\"b0_intercept\"], grp[\"b1_slope\"], ax=gd.ax_joint, **kde_kws)\n",
|
| 1291 | + " _ = sns.distplot(grp[\"b0_intercept\"], bins=x_bin_edges, ax=gd.ax_marg_x, **dist_kws)\n", |
1300 | 1292 | " _ = sns.distplot(\n",
|
1301 |
| - " grp[\"b0_intercept\"], bins=x_bin_edges, ax=gd.ax_marg_x, **dist_kws\n", |
1302 |
| - " )\n", |
1303 |
| - " _ = sns.distplot(grp[\"b1_slope\"], vertical=True, bins=y_bin_edges,\n", |
1304 |
| - " ax=gd.ax_marg_y, **dist_kws\n", |
| 1293 | + " grp[\"b1_slope\"], vertical=True, bins=y_bin_edges, ax=gd.ax_marg_y, **dist_kws\n", |
1305 | 1294 | " )\n",
|
1306 | 1295 | "_ = gd.ax_joint.legend()"
|
1307 | 1296 | ]
|
|
1419 | 1408 | " y_est_out = pm.Normal(\"y_est_out\", mu=0, sigma=10, testval=pm.floatX(0.0)) # (1, )\n",
|
1420 | 1409 | "\n",
|
1421 | 1410 | " # very weakly informative prior for additional variance for outliers\n",
|
1422 |
| - " sigma_y_out = pm.HalfNormal(\n", |
1423 |
| - " \"sigma_y_out\", sigma=10, testval=pm.floatX(1.0)\n", |
1424 |
| - " ) # (1, )\n", |
| 1411 | + " sigma_y_out = pm.HalfNormal(\"sigma_y_out\", sigma=10, testval=pm.floatX(1.0)) # (1, )\n", |
1425 | 1412 | "\n",
|
1426 | 1413 | " # create in/outlier distributions to get a logp evaluated on the observed y\n",
|
1427 | 1414 | " # this is not strictly a pymc3 likelihood, but behaves like one when we\n",
|
1428 | 1415 | " # evaluate it within a Potential (which is minimised)\n",
|
1429 | 1416 | " inlier_logp = pm.Normal.dist(mu=y_est_in, sigma=tsv_sigma_y).logp(tsv_y)\n",
|
1430 | 1417 | "\n",
|
1431 |
| - " outlier_logp = pm.Normal.dist(mu=y_est_out, sigma=tsv_sigma_y + sigma_y_out).logp(\n", |
1432 |
| - " tsv_y\n", |
1433 |
| - " )\n", |
| 1418 | + " outlier_logp = pm.Normal.dist(mu=y_est_out, sigma=tsv_sigma_y + sigma_y_out).logp(tsv_y)\n", |
1434 | 1419 | "\n",
|
1435 | 1420 | " # frac_outliers only needs to span [0, .5]\n",
|
1436 | 1421 | " # testval for is_outlier initialised in order to create class asymmetry\n",
|
|
1860 | 1845 | "dist_kws = dict(kde_kws=dict(cut=1), axlabel=False)\n",
|
1861 | 1846 | "\n",
|
1862 | 1847 | "for idx, grp in df_trc.groupby(\"model\"):\n",
|
1863 |
| - " _ = sns.scatterplot(\n", |
1864 |
| - " grp[\"b0_intercept\"], grp[\"b1_slope\"], ax=gd.ax_joint, alpha=0.2, label=idx\n", |
1865 |
| - " )\n", |
| 1848 | + " _ = sns.scatterplot(grp[\"b0_intercept\"], grp[\"b1_slope\"], ax=gd.ax_joint, alpha=0.2, label=idx)\n", |
1866 | 1849 | " _ = sns.kdeplot(grp[\"b0_intercept\"], grp[\"b1_slope\"], ax=gd.ax_joint, **kde_kws)\n",
|
1867 | 1850 | " _ = sns.distplot(grp[\"b0_intercept\"], **dist_kws, bins=x_bin_edges, ax=gd.ax_marg_x)\n",
|
1868 |
| - " _ = sns.distplot(\n", |
1869 |
| - " grp[\"b1_slope\"], **dist_kws, vertical=True, bins=y_bin_edges, ax=gd.ax_marg_y\n", |
1870 |
| - " )\n", |
| 1851 | + " _ = sns.distplot(grp[\"b1_slope\"], **dist_kws, vertical=True, bins=y_bin_edges, ax=gd.ax_marg_y)\n", |
1871 | 1852 | "_ = gd.ax_joint.legend()"
|
1872 | 1853 | ]
|
1873 | 1854 | },
|
|
1931 | 1912 | }
|
1932 | 1913 | ],
|
1933 | 1914 | "source": [
|
1934 |
| - "df_outlier_results = pd.DataFrame.from_records(\n", |
1935 |
| - " trc_hogg[\"is_outlier\"], columns=dfhoggs.index\n", |
1936 |
| - ")\n", |
1937 |
| - "dfm_outlier_results = pd.melt(\n", |
1938 |
| - " df_outlier_results, var_name=\"datapoint_id\", value_name=\"is_outlier\"\n", |
1939 |
| - ")\n", |
| 1915 | + "df_outlier_results = pd.DataFrame.from_records(trc_hogg[\"is_outlier\"], columns=dfhoggs.index)\n", |
| 1916 | + "dfm_outlier_results = pd.melt(df_outlier_results, var_name=\"datapoint_id\", value_name=\"is_outlier\")\n", |
1940 | 1917 | "\n",
|
1941 | 1918 | "gd = sns.catplot(\n",
|
1942 | 1919 | " y=\"datapoint_id\",\n",
|
|
2032 | 2009 | }
|
2033 | 2010 | ],
|
2034 | 2011 | "source": [
|
2035 |
| - "dfhoggs[\"annotate_for_investigation\"] = (\n", |
2036 |
| - " np.quantile(trc_hogg[\"is_outlier\"], 0.75, axis=0) == 1\n", |
2037 |
| - ")\n", |
| 2012 | + "dfhoggs[\"annotate_for_investigation\"] = np.quantile(trc_hogg[\"is_outlier\"], 0.75, axis=0) == 1\n", |
2038 | 2013 | "dfhoggs[\"annotate_for_investigation\"].value_counts()"
|
2039 | 2014 | ]
|
2040 | 2015 | },
|
|
0 commit comments