|
110 | 110 | lambda s: s
|
111 | 111 | in (
|
112 | 112 | "columns",
|
| 113 | + "item_records", |
113 | 114 | "timestamp_records",
|
114 | 115 | ),
|
115 | 116 | ),
|
@@ -391,89 +392,74 @@ class DatasetType(Enum):
|
391 | 392 | class TimeSeriesJSONDatasetFormat(Enum):
|
392 | 393 | """Possible dataset formats for JSON time series data files.
|
393 | 394 |
|
394 |
| - Below is an example ``COLUMNS`` dataset for time series explainability: |
| 395 | + Below is an example ``COLUMNS`` dataset for time series explainability.:: |
395 | 396 |
|
396 |
| - ``` |
397 |
| - { |
398 |
| - "ids": [1, 2], |
399 |
| - "timestamps": [3, 4], |
400 |
| - "target_ts": [5, 6], |
401 |
| - "rts1": [0.25, 0.5], |
402 |
| - "rts2": [1.25, 1.5], |
403 |
| - "scv1": [10, 20], |
404 |
| - "scv2": [30, 40] |
405 |
| - } |
| 397 | + { |
| 398 | + "ids": [1, 2], |
| 399 | + "timestamps": [3, 4], |
| 400 | + "target_ts": [5, 6], |
| 401 | + "rts1": [0.25, 0.5], |
| 402 | + "rts2": [1.25, 1.5], |
| 403 | + "scv1": [10, 20], |
| 404 | + "scv2": [30, 40] |
| 405 | + } |
406 | 406 |
|
407 |
| - ``` |
| 407 | + For this example, JMESPaths are specified when creating ``TimeSeriesDataConfig`` as follows.:: |
408 | 408 |
|
409 |
| - For this example, JMESPaths are specified when creating ``TimeSeriesDataConfig`` as follows: |
| 409 | + item_id="ids" |
| 410 | + timestamp="timestamps" |
| 411 | + target_time_series="target_ts" |
| 412 | + related_time_series=["rts1", "rts2"] |
| 413 | + static_covariates=["scv1", "scv2"] |
410 | 414 |
|
411 |
| - ``` |
412 |
| - item_id="ids" |
413 |
| - timestamp="timestamps" |
414 |
| - target_time_series="target_ts" |
415 |
| - related_time_series=["rts1", "rts2"] |
416 |
| - static_covariates=["scv1", "scv2"] |
417 |
| - ``` |
| 415 | + Below is an example ``ITEM_RECORDS`` dataset for time series explainability.:: |
418 | 416 |
|
419 |
| - Below is an example ``ITEM_RECORDS`` dataset for time series explainability: |
| 417 | + [ |
| 418 | + { |
| 419 | + "id": 1, |
| 420 | + "scv1": 10, |
| 421 | + "scv2": "red", |
| 422 | + "timeseries": [ |
| 423 | + {"timestamp": 1, "target_ts": 5, "rts1": 0.25, "rts2": 10}, |
| 424 | + {"timestamp": 2, "target_ts": 6, "rts1": 0.35, "rts2": 20}, |
| 425 | + {"timestamp": 3, "target_ts": 4, "rts1": 0.45, "rts2": 30} |
| 426 | + ] |
| 427 | + }, |
| 428 | + { |
| 429 | + "id": 2, |
| 430 | + "scv1": 20, |
| 431 | + "scv2": "blue", |
| 432 | + "timeseries": [ |
| 433 | + {"timestamp": 1, "target_ts": 4, "rts1": 0.25, "rts2": 40}, |
| 434 | + {"timestamp": 2, "target_ts": 2, "rts1": 0.35, "rts2": 50} |
| 435 | + ] |
| 436 | + } |
| 437 | + ] |
420 | 438 |
|
421 |
| - ``` |
422 |
| - [ |
423 |
| - { |
424 |
| - "id": 1, |
425 |
| - "scv1": 10, |
426 |
| - "scv2": "red", |
427 |
| - "timeseries": [ |
428 |
| - {"timestamp": 1, "target_ts": 5, "rts1": 0.25, "rts2": 10}, |
429 |
| - {"timestamp": 2, "target_ts": 6, "rts1": 0.35, "rts2": 20}, |
430 |
| - {"timestamp": 3, "target_ts": 4, "rts1": 0.45, "rts2": 30} |
431 |
| - ] |
432 |
| - }, |
433 |
| - { |
434 |
| - "id": 2, |
435 |
| - "scv1": 20, |
436 |
| - "scv2": "blue", |
437 |
| - "timeseries": [ |
438 |
| - {"timestamp": 1, "target_ts": 4, "rts1": 0.25, "rts2": 40}, |
439 |
| - {"timestamp": 2, "target_ts": 2, "rts1": 0.35, "rts2": 50} |
440 |
| - ] |
441 |
| - } |
442 |
| - ] |
443 |
| - ``` |
444 |
| -
|
445 |
| - For this example, JMESPaths are specified when creating ``TimeSeriesDataConfig`` as follows: |
446 |
| -
|
447 |
| - ``` |
448 |
| - item_id="[*].id" |
449 |
| - timestamp="[*].timeseries[].timestamp" |
450 |
| - target_time_series="[*].timeseries[].target_ts" |
451 |
| - related_time_series=["[*].timeseries[].rts1", "[*].timeseries[].rts2"] |
452 |
| - static_covariates=["[*].scv1", "[*].scv2"] |
453 |
| - ``` |
454 |
| -
|
455 |
| - Below is an example ``TIMESTAMP_RECORDS`` dataset for time series explainability: |
456 |
| -
|
457 |
| - ``` |
458 |
| - [ |
459 |
| - {"id": 1, "timestamp": 1, "target_ts": 5, "scv1": 10, "rts1": 0.25}, |
460 |
| - {"id": 1, "timestamp": 2, "target_ts": 6, "scv1": 10, "rts1": 0.5}, |
461 |
| - {"id": 1, "timestamp": 3, "target_ts": 3, "scv1": 10, "rts1": 0.75}, |
462 |
| - {"id": 2, "timestamp": 5, "target_ts": 10, "scv1": 20, "rts1": 1} |
463 |
| - ] |
| 439 | + For this example, JMESPaths are specified when creating ``TimeSeriesDataConfig`` as follows.:: |
| 440 | +
|
| 441 | + item_id="[*].id" |
| 442 | + timestamp="[*].timeseries[].timestamp" |
| 443 | + target_time_series="[*].timeseries[].target_ts" |
| 444 | + related_time_series=["[*].timeseries[].rts1", "[*].timeseries[].rts2"] |
| 445 | + static_covariates=["[*].scv1", "[*].scv2"] |
464 | 446 |
|
465 |
| - ``` |
| 447 | + Below is an example ``TIMESTAMP_RECORDS`` dataset for time series explainability.:: |
466 | 448 |
|
467 |
| - For this example, JMESPaths are specified when creating ``TimeSeriesDataConfig`` as follows: |
| 449 | + [ |
| 450 | + {"id": 1, "timestamp": 1, "target_ts": 5, "scv1": 10, "rts1": 0.25}, |
| 451 | + {"id": 1, "timestamp": 2, "target_ts": 6, "scv1": 10, "rts1": 0.5}, |
| 452 | + {"id": 1, "timestamp": 3, "target_ts": 3, "scv1": 10, "rts1": 0.75}, |
| 453 | + {"id": 2, "timestamp": 5, "target_ts": 10, "scv1": 20, "rts1": 1} |
| 454 | + ] |
468 | 455 |
|
469 |
| - ``` |
470 |
| - item_id="[*].id" |
471 |
| - timestamp="[*].timestamp" |
472 |
| - target_time_series="[*].target_ts" |
473 |
| - related_time_series=["[*].rts1"] |
474 |
| - static_covariates=["[*].scv1"] |
475 |
| - ``` |
| 456 | + For this example, JMESPaths are specified when creating ``TimeSeriesDataConfig`` as follows.:: |
476 | 457 |
|
| 458 | + item_id="[*].id" |
| 459 | + timestamp="[*].timestamp" |
| 460 | + target_time_series="[*].target_ts" |
| 461 | + related_time_series=["[*].rts1"] |
| 462 | + static_covariates=["[*].scv1"] |
477 | 463 | """
|
478 | 464 |
|
479 | 465 | COLUMNS = "columns"
|
@@ -693,11 +679,10 @@ def __init__(
|
693 | 679 | Note: For JSON, the JMESPath query must result in a list of labels for each
|
694 | 680 | sample. For JSON Lines, it must result in the label for each line.
|
695 | 681 | Only a single label per sample is supported at this time.
|
696 |
| - headers (str): List of column names in the dataset. If not provided, Clarify will |
| 682 | + headers ([str]): List of column names in the dataset. If not provided, Clarify will |
697 | 683 | generate headers to use internally. For time series explainability cases,
|
698 |
| - please provide headers in the following order: |
699 |
| - item_id, timestamp, target_time_series, all related_time_series columns, |
700 |
| - all static_covariate columns |
| 684 | + please provide headers in the order of item_id, timestamp, target_time_series, |
| 685 | + all related_time_series columns, and then all static_covariate columns. |
701 | 686 | features (str): JMESPath expression to locate the feature values
|
702 | 687 | if the dataset format is JSON/JSON Lines.
|
703 | 688 | Note: For JSON, the JMESPath query must result in a 2-D list (or a matrix) of
|
@@ -959,10 +944,10 @@ def __init__(
|
959 | 944 | forecast (str): JMESPath expression to extract the forecast result.
|
960 | 945 |
|
961 | 946 | Raises:
|
962 |
| - AssertionError: when either ``forecast`` or ``forecast_horizon`` are not provided |
| 947 | + AssertionError: when ``forecast`` is not provided |
963 | 948 | ValueError: when any provided argument are not of specified type
|
964 | 949 | """
|
965 |
| - # assert forecast and forecast_horizon are provided |
| 950 | + # assert forecast is provided |
966 | 951 | assert (
|
967 | 952 | forecast
|
968 | 953 | ), "Please provide ``forecast``, a JMESPath expression to extract the forecast result."
|
@@ -1775,8 +1760,23 @@ def __init__(
|
1775 | 1760 | num_samples (None or int): Number of samples to be used in the Asymmetric Shapley
|
1776 | 1761 | Value forecasting algorithm. Only applicable when using ``"fine_grained"``
|
1777 | 1762 | explanations.
|
1778 |
| - baseline (str or dict): Link to a baseline configuration or a dictionary for it. |
1779 |
| - # TODO: improve above. |
| 1763 | + baseline (str or dict): Link to a baseline configuration or a dictionary for it. The |
| 1764 | + baseline config is used to replace out-of-coalition values for the corresponding |
| 1765 | + datasets (also known as background data). For temporal data (target time series, |
| 1766 | + related time series), the baseline value types are "zero", where all |
| 1767 | + out-of-coalition values will be replaced with 0.0, or "mean", all out-of-coalition |
| 1768 | + values will be replaced with the average of a time series. For static data |
| 1769 | + (static covariates), a baseline value for each covariate should be provided for |
| 1770 | + each possible item_id. An example config follows, where ``item1`` and ``item2`` |
| 1771 | + are item ids.:: |
| 1772 | + { |
| 1773 | + "related_time_series": "zero", |
| 1774 | + "static_covariates": { |
| 1775 | + "item1": [1, 1], |
| 1776 | + "item2": [0, 1] |
| 1777 | + }, |
| 1778 | + "target_time_series": "zero" |
| 1779 | + } |
1780 | 1780 |
|
1781 | 1781 | Raises:
|
1782 | 1782 | AssertionError: when ``direction`` or ``granularity`` are not valid,
|
|
0 commit comments