@@ -48,8 +48,8 @@ def __init__(
48
48
headers (list[str]): A list of column names in the input dataset.
49
49
features (str): JSONPath for locating the feature columns for bias metrics if the
50
50
dataset format is JSONLines.
51
- dataset_type (str): Format of the dataset. Valid values are "text/csv" for CSV
52
- and "application/jsonlines" for JSONLines.
51
+ dataset_type (str): Format of the dataset. Valid values are "text/csv" for CSV,
52
+ "application/jsonlines" for JSONLines, and "application/x-parquet" for Parquet .
53
53
s3_data_distribution_type (str): Valid options are "FullyReplicated" or
54
54
"ShardedByS3Key".
55
55
s3_compression_type (str): Valid options are "None" or "Gzip".
@@ -61,6 +61,11 @@ def __init__(
61
61
self .label = label
62
62
self .headers = headers
63
63
self .features = features
64
+ if dataset_type not in ["text/csv" , "application/jsonlines" , "application/x-parquet" ]:
65
+ raise ValueError (
66
+ f"Invalid dataset_type { dataset_type } ."
67
+ f" Please choose text/csv or application/jsonlines or application/x-parquet."
68
+ )
64
69
self .analysis_config = {
65
70
"dataset_type" : dataset_type ,
66
71
}
@@ -79,8 +84,9 @@ class BiasConfig:
79
84
def __init__ (
80
85
self ,
81
86
label_values_or_threshold ,
82
- facet_name ,
87
+ facet_name = None ,
83
88
facet_values_or_threshold = None ,
89
+ facet_list = None ,
84
90
group_name = None ,
85
91
):
86
92
"""Initializes a configuration of the sensitive groups in the dataset.
@@ -94,15 +100,55 @@ def __init__(
94
100
threshold for a numeric facet column that defines the lower bound of a sensitive
95
101
group. Defaults to considering each possible value as sensitive group and
96
102
computing metrics vs all the other examples.
103
+ facet_list (list[dict]): Optional list of dictionaries that defines the sensitive
104
+ attribute(s). Each dictionary contains two keys in the form of the following:
105
+ 'name_or_index' (int or str) for facet column name or index,
106
+ optional 'value_or_threshold' (list[int or float or str]) for list of values or
107
+ threshold that the facet column can take which indicates the sensitive group.
108
+ This should can be defined only if there are more than one sensitive attribute.
97
109
group_name (str): Optional column name or index to indicate a group column to be used
98
110
for the bias metric 'Conditional Demographic Disparity in Labels - CDDL' or
99
111
'Conditional Demographic Disparity in Predicted Labels - CDDPL'.
100
112
"""
101
- facet = {"name_or_index" : facet_name }
102
- _set (facet_values_or_threshold , "value_or_threshold" , facet )
113
+ if facet_list :
114
+ for facet_object in facet_list :
115
+ if not all (
116
+ field in ["name_or_index" , "value_or_threshold" ] for field in facet_object
117
+ ):
118
+ raise ValueError (
119
+ f"Invalid facet_list { facet_list } ."
120
+ f" Please only include 'name_or_index' or 'value_or_threshold'"
121
+ f" in dictionary keys."
122
+ )
123
+ if "name_or_index" not in facet_object or not isinstance (
124
+ facet_object ["name_or_index" ], (str , int )
125
+ ):
126
+ raise ValueError (
127
+ f"Invalid facet_list { facet_list } ."
128
+ f" Please include valid format of 'name_or_index' in dictionary:"
129
+ f" str, int."
130
+ )
131
+ if "value_or_threshold" in facet_object and not (
132
+ isinstance (facet_object ["value_or_threshold" ], list )
133
+ and all (
134
+ isinstance (v , (str , int , float )) for v in facet_object ["value_or_threshold" ]
135
+ )
136
+ ):
137
+ raise ValueError (
138
+ f"Invalid facet_list { facet_list } ."
139
+ f" Please include valid format of 'value_or_threshold' in dictionary:"
140
+ f" list[int or float or str]."
141
+ )
142
+ elif facet_name is not None :
143
+ facet = {"name_or_index" : facet_name }
144
+ _set (facet_values_or_threshold , "value_or_threshold" , facet )
145
+ facet_list = [facet ]
146
+ else :
147
+ raise ValueError ("Please specify facet_name or facet_list." )
148
+
103
149
self .analysis_config = {
104
150
"label_values_or_threshold" : label_values_or_threshold ,
105
- "facet" : [ facet ] ,
151
+ "facet" : facet_list ,
106
152
}
107
153
_set (group_name , "group_variable" , self .analysis_config )
108
154
0 commit comments