fix pandas errors in notebooks (#1490)

vafl · web-flow · commit aa5c9c2ec046 · 2020-10-07T12:35:33.000-07:00
diff --git a/introduction_to_amazon_algorithms/deepar_electricity/DeepAR-Electricity.ipynb b/introduction_to_amazon_algorithms/deepar_electricity/DeepAR-Electricity.ipynb
@@ -548,7 +548,7 @@
     "        \n",
     "        Return value: list of `pandas.DataFrame` objects, each containing the predictions\n",
     "        \"\"\"\n",
-    "        prediction_time = ts.index[-1] + 1\n",
+    "        prediction_time = ts.index[-1] + ts.index.freq\n",
     "        quantiles = [str(q) for q in quantiles]\n",
     "        req = self.__encode_request(ts, cat, dynamic_feat, num_samples, return_samples, quantiles)\n",
     "        res = super(DeepARPredictor, self).predict(req)\n",
@@ -575,7 +575,7 @@
     "        # however, if possible one will pass multiple time series as predictions will then be faster\n",
     "        predictions = json.loads(response.decode('utf-8'))['predictions'][0]\n",
     "        prediction_length = len(next(iter(predictions['quantiles'].values())))\n",
-    "        prediction_index = pd.DatetimeIndex(start=prediction_time, freq=freq, periods=prediction_length)        \n",
+    "        prediction_index = pd.date_range(start=prediction_time, freq=freq, periods=prediction_length)\n",
     "        if return_samples:\n",
     "            dict_of_samples = {'sample_' + str(i): s for i, s in enumerate(predictions['samples'])}\n",
     "        else:\n",
@@ -669,6 +669,7 @@
     "    plot_history=7 * 12,\n",
     "    confidence=80\n",
     "):\n",
+    "    freq = target_ts.index.freq\n",
     "    print(\"calling served model to generate predictions starting from {}\".format(str(forecast_date)))\n",
     "    assert(confidence > 50 and confidence < 100)\n",
     "    low_quantile = 0.5 - confidence * 0.005\n",
@@ -706,7 +707,7 @@
     "                \n",
     "                \n",
     "    # plot the target\n",
-    "    target_section = target_ts[forecast_date-plot_history:forecast_date+prediction_length]\n",
+    "    target_section = target_ts[forecast_date - plot_history * freq:forecast_date + prediction_length * freq]\n",
     "    target_section.plot(color=\"black\", label='target')\n",
     "    \n",
     "    # plot the confidence interval and the median predicted\n",
@@ -726,10 +727,10 @@
     "        for i, f in enumerate(dynamic_feat, start=1):\n",
     "            ax = plt.subplot(len(dynamic_feat) * 2, 1, len(dynamic_feat) + i, sharex=ax)\n",
     "            feat_ts = pd.Series(\n",
-    "                index=pd.DatetimeIndex(start=target_ts.index[0], freq=target_ts.index.freq, periods=len(f)),\n",
+    "                index=pd.date_range(start=target_ts.index[0], freq=target_ts.index.freq, periods=len(f)),\n",
     "                data=f\n",
     "            )\n",
-    "            feat_ts[forecast_date-plot_history:forecast_date+prediction_length].plot(ax=ax, color='g')"
+    "            feat_ts[forecast_date - plot_history * freq:forecast_date + prediction_length * freq].plot(ax=ax, color='g')"
    ]
   },
   {
@@ -908,8 +909,8 @@
     "test_data_new_features = [\n",
     "    {\n",
     "        \"start\": str(start_dataset),\n",
-    "        \"target\": encode_target(ts[start_dataset:end_training + 2*k*prediction_length]),\n",
-    "        \"dynamic_feat\": [special_day_features[i][start_dataset:end_training + 2*k*prediction_length].tolist()]\n",
+    "        \"target\": encode_target(ts[start_dataset:end_training + 2*k*prediction_length * ts.index.freq]),\n",
+    "        \"dynamic_feat\": [special_day_features[i][start_dataset:end_training + 2*k*prediction_length * ts.index.freq].tolist()]\n",
     "    }\n",
     "    for k in range(1, num_test_windows + 1) \n",
     "    for i, ts in enumerate(timeseries_uplift)\n",
@@ -1070,9 +1071,11 @@
     ")\n",
     "def plot_interact(customer_id, forecast_day, confidence, missing_ratio, show_samples): \n",
     "    forecast_date = end_training + datetime.timedelta(days=forecast_day)\n",
-    "    target = time_series_processed[customer_id][start_dataset:forecast_date + prediction_length]\n",
+    "    ts = time_series_processed[customer_id]\n",
+    "    freq = ts.index.freq\n",
+    "    target = ts[start_dataset:forecast_date + prediction_length * freq]\n",
     "    target = drop_at_random(target, missing_ratio)\n",
-    "    dynamic_feat = [special_day_features[customer_id][start_dataset:forecast_date + prediction_length].tolist()]\n",
+    "    dynamic_feat = [special_day_features[customer_id][start_dataset:forecast_date + prediction_length * freq].tolist()]\n",
     "    plot(\n",
     "        predictor_new_features,\n",
     "        target_ts=target, \n",
diff --git a/introduction_to_applying_machine_learning/deepar_chicago_traffic_violations/deepar_chicago_traffic_violations.ipynb b/introduction_to_applying_machine_learning/deepar_chicago_traffic_violations/deepar_chicago_traffic_violations.ipynb
@@ -357,7 +357,7 @@
     "        \n",
     "        Return value: list of `pandas.DataFrame` objects, each containing the predictions\n",
     "        \"\"\"\n",
-    "        prediction_times = [x.index[-1]+1 for x in ts]\n",
+    "        prediction_times = [x.index[-1] + x.index.freq for x in ts]\n",
     "        req = self.__encode_request(ts, cat, encoding, num_samples, quantiles)\n",
     "        res = super(DeepARPredictor, self).predict(req)\n",
     "        return self.__decode_response(res, prediction_times, encoding)\n",
@@ -372,7 +372,7 @@
     "        response_data = json.loads(response.decode(encoding))\n",
     "        list_of_df = []\n",
     "        for k in range(len(prediction_times)):\n",
-    "            prediction_index = pd.DatetimeIndex(start=prediction_times[k], freq=self.freq, periods=self.prediction_length)\n",
+    "            prediction_index = pd.date_range(start=prediction_times[k], freq=self.freq, periods=self.prediction_length)\n",
     "            list_of_df.append(pd.DataFrame(data=response_data['predictions'][k]['quantiles'], index=prediction_index))\n",
     "        return list_of_df\n",
     "\n",