@@ -34,6 +34,7 @@ def __init__(
34
34
label = None ,
35
35
headers = None ,
36
36
features = None ,
37
+ joinsource = None ,
37
38
dataset_type = "text/csv" ,
38
39
s3_data_distribution_type = "FullyReplicated" ,
39
40
s3_compression_type = "None" ,
@@ -48,6 +49,11 @@ def __init__(
48
49
headers (list[str]): A list of column names in the input dataset.
49
50
features (str): JSONPath for locating the feature columns for bias metrics if the
50
51
dataset format is JSONLines.
52
+ joinsource (str): the name or index of the column in the dataset that acts an
53
+ identifier column (for instance, while performing a join). This column is only
54
+ used as an identifier, and not used for any other computations. This is an
55
+ optional field in all cases except when the dataset contains more than one file,
56
+ and `save_local_shap_values` is set to true in SHAPConfig.
51
57
dataset_type (str): Format of the dataset. Valid values are "text/csv" for CSV,
52
58
"application/jsonlines" for JSONLines, and "application/x-parquet" for Parquet.
53
59
s3_data_distribution_type (str): Valid options are "FullyReplicated" or
@@ -72,6 +78,7 @@ def __init__(
72
78
_set (features , "features" , self .analysis_config )
73
79
_set (headers , "headers" , self .analysis_config )
74
80
_set (label , "label" , self .analysis_config )
81
+ _set (joinsource , "joinsource_name_or_index" , self .analysis_config )
75
82
76
83
def get_config (self ):
77
84
"""Returns part of an analysis config dictionary."""
0 commit comments