Skip to content

Commit 1f03397

Browse files
authored
Changed random_state default to None in fair_stratified_train_test_split. Renamed preprocessing to preparation in fairness code to avoid collision with AIF360 terminology. (#615)
1 parent ad2793e commit 1f03397

17 files changed

+66
-68
lines changed

docs/img/fairness_patterns.png

3.51 KB
Loading

examples/demo_aif360.ipynb

+6-6
Original file line numberDiff line numberDiff line change
@@ -593,7 +593,7 @@
593593
"source": [
594594
"from lale.lib.aif360 import fair_stratified_train_test_split\n",
595595
"train_X, test_X, train_y, test_y = fair_stratified_train_test_split(\n",
596-
" all_X, all_y, **fairness_info, test_size=0.33)"
596+
" all_X, all_y, **fairness_info, test_size=0.33, random_state=42)"
597597
]
598598
},
599599
{
@@ -1093,7 +1093,7 @@
10931093
"In the visualization, light blue indicates trainable operators\n",
10941094
"and dark blue indicates that automation must make a choice before\n",
10951095
"the operators can be trained. Compared to the earlier pipeline,\n",
1096-
"we pass the preprocessing as an argument to `DisparateImpactRemover`,\n",
1096+
"we pass the data preparation sub-pipeline as an argument to `DisparateImpactRemover`,\n",
10971097
"since that fairness mitigator needs numerical data to work on."
10981098
]
10991099
},
@@ -1120,7 +1120,7 @@
11201120
"</a>\n",
11211121
"</g>\n",
11221122
"<g id=\"clust1\" class=\"cluster\"><title>cluster:disparate_impact_remover</title>\n",
1123-
"<g id=\"a_clust1\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.aif360.disparate_impact_remover.html\" xlink:title=\"disparate_impact_remover = DisparateImpactRemover(favorable_labels=[&#39;good&#39;], protected_attributes=[{&#39;feature&#39;: &#39;personal_status&#39;, &#39;privileged_groups&#39;: [&#39;male div/sep&#39;, &#39;male mar/wid&#39;, &#39;male single&#39;]}, {&#39;feature&#39;: &#39;age&#39;, &#39;privileged_groups&#39;: [[26, 1000]]}], preprocessing=pipeline_0)\">\n",
1123+
"<g id=\"a_clust1\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.aif360.disparate_impact_remover.html\" xlink:title=\"disparate_impact_remover = DisparateImpactRemover(favorable_labels=[&#39;good&#39;], protected_attributes=[{&#39;feature&#39;: &#39;personal_status&#39;, &#39;privileged_groups&#39;: [&#39;male div/sep&#39;, &#39;male mar/wid&#39;, &#39;male single&#39;]}, {&#39;feature&#39;: &#39;age&#39;, &#39;privileged_groups&#39;: [[26, 1000]]}], preparation=pipeline_0)\">\n",
11241124
"<polygon fill=\"#b0e2ff\" stroke=\"black\" points=\"8,-59 8,-213 296.108,-213 296.108,-59 8,-59\"/>\n",
11251125
"<text text-anchor=\"middle\" x=\"152.054\" y=\"-197.8\" font-family=\"Times,serif\" font-size=\"14.00\">DisparateImpactRemover</text>\n",
11261126
"</a>\n",
@@ -1231,7 +1231,7 @@
12311231
],
12321232
"source": [
12331233
"di_remover = DisparateImpactRemover(\n",
1234-
" **fairness_info, preprocessing=prep_to_numbers)\n",
1234+
" **fairness_info, preparation=prep_to_numbers)\n",
12351235
"planned_fairer = di_remover >> (LR | Tree | KNN)\n",
12361236
"planned_fairer.visualize()"
12371237
]
@@ -1347,7 +1347,7 @@
13471347
"</a>\n",
13481348
"</g>\n",
13491349
"<g id=\"clust1\" class=\"cluster\"><title>cluster:disparate_impact_remover</title>\n",
1350-
"<g id=\"a_clust1\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.aif360.disparate_impact_remover.html\" xlink:title=\"disparate_impact_remover = DisparateImpactRemover(favorable_labels=[&#39;good&#39;], protected_attributes=[{&#39;feature&#39;: &#39;personal_status&#39;, &#39;privileged_groups&#39;: [&#39;male div/sep&#39;, &#39;male mar/wid&#39;, &#39;male single&#39;]}, {&#39;feature&#39;: &#39;age&#39;, &#39;privileged_groups&#39;: [[26, 1000]]}], preprocessing=pipeline_0, repair_level=0.8641...)\">\n",
1350+
"<g id=\"a_clust1\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.aif360.disparate_impact_remover.html\" xlink:title=\"disparate_impact_remover = DisparateImpactRemover(favorable_labels=[&#39;good&#39;], protected_attributes=[{&#39;feature&#39;: &#39;personal_status&#39;, &#39;privileged_groups&#39;: [&#39;male div/sep&#39;, &#39;male mar/wid&#39;, &#39;male single&#39;]}, {&#39;feature&#39;: &#39;age&#39;, &#39;privileged_groups&#39;: [[26, 1000]]}], preparation=pipeline_0, repair_level=0.8641...)\">\n",
13511351
"<polygon fill=\"white\" stroke=\"black\" points=\"8,-8 8,-162 296.108,-162 296.108,-8 8,-8\"/>\n",
13521352
"<text text-anchor=\"middle\" x=\"152.054\" y=\"-146.8\" font-family=\"Times,serif\" font-size=\"14.00\">DisparateImpactRemover</text>\n",
13531353
"</a>\n",
@@ -1472,7 +1472,7 @@
14721472
" },\n",
14731473
" {\"feature\": \"age\", \"privileged_groups\": [[26, 1000]]},\n",
14741474
" ],\n",
1475-
" preprocessing=((project >> one_hot_encoder) & project_0)\n",
1475+
" preparation=((project >> one_hot_encoder) & project_0)\n",
14761476
" >> ConcatFeatures(),\n",
14771477
" repair_level=0.8641279154649505,\n",
14781478
")\n",

lale/lib/aif360/__init__.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -105,21 +105,21 @@
105105
106106
pipeline = LFR(
107107
**fairness_info,
108-
preprocessing=(
108+
preparation=(
109109
(Project(columns={"type": "string"}) >> OneHotEncoder(handle_unknown="ignore"))
110110
& Project(columns={"type": "number"})
111111
)
112112
>> ConcatFeatures
113113
) >> LogisticRegression(max_iter=1000)
114114
115115
In this example, the *mitigator* is LFR (which is pre-estimator), the
116-
*estimator* is LogisticRegression, and the *preprocessing* is a
116+
*estimator* is LogisticRegression, and the *preparation* is a
117117
sub-pipeline that one-hot-encodes strings. If all features of the data
118-
are numerical, then the preprocessing can be omitted. Internally, the
118+
are numerical, then the preparation can be omitted. Internally, the
119119
LFR higher-order operator uses two auxiliary operators, Redacting
120120
and ProtectedAttributesEncoder. Redacting sets protected attributes
121121
to a constant to prevent them from directly influencing
122-
fairness-agnostic preprocessing or estimators. And the
122+
fairness-agnostic data preparation or estimators. And the
123123
ProtectedAttributesEncoder encodes protected attributes and labels as
124124
zero or one to simplify the task for the mitigator.
125125

lale/lib/aif360/adversarial_debiasing.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def __init__(
3939
favorable_labels,
4040
protected_attributes,
4141
redact=True,
42-
preprocessing=None,
42+
preparation=None,
4343
scope_name="adversarial_debiasing",
4444
sess=None,
4545
seed=None,
@@ -75,7 +75,7 @@ def __init__(
7575
favorable_labels=favorable_labels,
7676
protected_attributes=protected_attributes,
7777
redact=redact,
78-
preprocessing=preprocessing,
78+
preparation=preparation,
7979
mitigator=mitigator,
8080
)
8181

@@ -94,7 +94,7 @@ def __init__(
9494
"required": [
9595
*_categorical_fairness_properties.keys(),
9696
"redact",
97-
"preprocessing",
97+
"preparation",
9898
"scope_name",
9999
"sess",
100100
"seed",
@@ -113,11 +113,11 @@ def __init__(
113113
"properties": {
114114
**_categorical_fairness_properties,
115115
"redact": {
116-
"description": "Whether to redact protected attributes before preprocessing (recommended) or not.",
116+
"description": "Whether to redact protected attributes before data preparation (recommended) or not.",
117117
"type": "boolean",
118118
"default": True,
119119
},
120-
"preprocessing": {
120+
"preparation": {
121121
"description": "Transformer, which may be an individual operator or a sub-pipeline.",
122122
"anyOf": [
123123
{"laleType": "operator"},

lale/lib/aif360/calibrated_eq_odds_postprocessing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def __init__(
8181
"laleType": "operator",
8282
},
8383
"redact": {
84-
"description": "Whether to redact protected attributes before preprocessing (recommended) or not.",
84+
"description": "Whether to redact protected attributes before data preparation (recommended) or not.",
8585
"type": "boolean",
8686
"default": True,
8787
},

lale/lib/aif360/disparate_impact_remover.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -37,15 +37,15 @@ def __init__(
3737
favorable_labels,
3838
protected_attributes,
3939
redact=True,
40-
preprocessing=None,
40+
preparation=None,
4141
repair_level=1.0,
4242
):
4343
self.favorable_labels = favorable_labels
4444
self.protected_attributes = protected_attributes
4545
self.redact = redact
46-
if preprocessing is None:
47-
preprocessing = lale.lib.lale.NoOp
48-
self.preprocessing = preprocessing
46+
if preparation is None:
47+
preparation = lale.lib.lale.NoOp
48+
self.preparation = preparation
4949
self.repair_level = repair_level
5050

5151
def _prep_and_encode(self, X, y=None):
@@ -79,8 +79,8 @@ def fit(self, X, y=None):
7979
"protected_attributes": self.protected_attributes,
8080
}
8181
redacting = Redacting(**fairness_info) if self.redact else lale.lib.lale.NoOp
82-
preprocessing = self.preprocessing
83-
trainable_redact_and_prep = redacting >> preprocessing
82+
preparation = self.preparation
83+
trainable_redact_and_prep = redacting >> preparation
8484
assert isinstance(trainable_redact_and_prep, lale.operators.TrainablePipeline)
8585
self.redact_and_prep = trainable_redact_and_prep.fit(X, y)
8686
self.prot_attr_enc = ProtectedAttributesEncoder(
@@ -130,18 +130,18 @@ def transform(self, X):
130130
"required": [
131131
*_categorical_fairness_properties.keys(),
132132
"redact",
133-
"preprocessing",
133+
"preparation",
134134
"repair_level",
135135
],
136136
"relevantToOptimizer": ["repair_level"],
137137
"properties": {
138138
**_categorical_fairness_properties,
139139
"redact": {
140-
"description": "Whether to redact protected attributes before preprocessing (recommended) or not.",
140+
"description": "Whether to redact protected attributes before data preparation (recommended) or not.",
141141
"type": "boolean",
142142
"default": True,
143143
},
144-
"preprocessing": {
144+
"preparation": {
145145
"description": "Transformer, which may be an individual operator or a sub-pipeline.",
146146
"anyOf": [
147147
{"laleType": "operator"},

lale/lib/aif360/eq_odds_postprocessing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ def __init__(
7373
"laleType": "operator",
7474
},
7575
"redact": {
76-
"description": "Whether to redact protected attributes before preprocessing (recommended) or not.",
76+
"description": "Whether to redact protected attributes before data preparation (recommended) or not.",
7777
"type": "boolean",
7878
"default": True,
7979
},

lale/lib/aif360/gerry_fair_classifier.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def __init__(
3333
favorable_labels,
3434
protected_attributes,
3535
redact=True,
36-
preprocessing=None,
36+
preparation=None,
3737
C=10,
3838
printflag=False,
3939
heatmapflag=False,
@@ -68,7 +68,7 @@ def __init__(
6868
favorable_labels=favorable_labels,
6969
protected_attributes=protected_attributes,
7070
redact=redact,
71-
preprocessing=preprocessing,
71+
preparation=preparation,
7272
mitigator=mitigator,
7373
)
7474

@@ -87,7 +87,7 @@ def __init__(
8787
"required": [
8888
*_categorical_fairness_properties.keys(),
8989
"redact",
90-
"preprocessing",
90+
"preparation",
9191
"C",
9292
"printflag",
9393
"heatmapflag",
@@ -102,11 +102,11 @@ def __init__(
102102
"properties": {
103103
**_categorical_fairness_properties,
104104
"redact": {
105-
"description": "Whether to redact protected attributes before preprocessing (recommended) or not.",
105+
"description": "Whether to redact protected attributes before data preparation (recommended) or not.",
106106
"type": "boolean",
107107
"default": True,
108108
},
109-
"preprocessing": {
109+
"preparation": {
110110
"description": "Transformer, which may be an individual operator or a sub-pipeline.",
111111
"anyOf": [
112112
{"laleType": "operator"},

lale/lib/aif360/lfr.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def __init__(
3838
favorable_labels,
3939
protected_attributes,
4040
redact=True,
41-
preprocessing=None,
41+
preparation=None,
4242
k=5,
4343
Ax=0.01,
4444
Az=1.0,
@@ -50,9 +50,9 @@ def __init__(
5050
self.favorable_labels = favorable_labels
5151
self.protected_attributes = protected_attributes
5252
self.redact = redact
53-
if preprocessing is None:
54-
preprocessing = lale.lib.lale.NoOp
55-
self.preprocessing = preprocessing
53+
if preparation is None:
54+
preparation = lale.lib.lale.NoOp
55+
self.preparation = preparation
5656
prot_attr_names = [pa["feature"] for pa in protected_attributes]
5757
unprivileged_groups = [{name: 0 for name in prot_attr_names}]
5858
privileged_groups = [{name: 1 for name in prot_attr_names}]
@@ -93,8 +93,8 @@ def fit(self, X, y):
9393
"protected_attributes": self.protected_attributes,
9494
}
9595
redacting = Redacting(**fairness_info) if self.redact else lale.lib.lale.NoOp
96-
preprocessing = self.preprocessing
97-
trainable_redact1_and_prep = redacting >> preprocessing
96+
preparation = self.preparation
97+
trainable_redact1_and_prep = redacting >> preparation
9898
assert isinstance(trainable_redact1_and_prep, lale.operators.TrainablePipeline)
9999
self.redact1_and_prep = trainable_redact1_and_prep.fit(X, y)
100100
self.prot_attr_enc = ProtectedAttributesEncoder(
@@ -132,7 +132,7 @@ def transform(self, X):
132132
"required": [
133133
*_categorical_fairness_properties.keys(),
134134
"redact",
135-
"preprocessing",
135+
"preparation",
136136
"k",
137137
"Ax",
138138
"Az",
@@ -145,11 +145,11 @@ def transform(self, X):
145145
"properties": {
146146
**_categorical_fairness_properties,
147147
"redact": {
148-
"description": "Whether to redact protected attributes before preprocessing (recommended) or not.",
148+
"description": "Whether to redact protected attributes before data preparation (recommended) or not.",
149149
"type": "boolean",
150150
"default": True,
151151
},
152-
"preprocessing": {
152+
"preparation": {
153153
"description": "Transformer, which may be an individual operator or a sub-pipeline.",
154154
"anyOf": [
155155
{"laleType": "operator"},

lale/lib/aif360/meta_fair_classifier.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def __init__(
2626
favorable_labels,
2727
protected_attributes,
2828
redact=True,
29-
preprocessing=None,
29+
preparation=None,
3030
tau=0.8,
3131
type="fdr",
3232
):
@@ -38,7 +38,7 @@ def __init__(
3838
favorable_labels=favorable_labels,
3939
protected_attributes=protected_attributes,
4040
redact=redact,
41-
preprocessing=preprocessing,
41+
preparation=preparation,
4242
mitigator=mitigator,
4343
)
4444

@@ -100,19 +100,19 @@ def __init__(
100100
"required": [
101101
*_categorical_fairness_properties.keys(),
102102
"redact",
103-
"preprocessing",
103+
"preparation",
104104
"tau",
105105
"type",
106106
],
107107
"relevantToOptimizer": ["tau", "type"],
108108
"properties": {
109109
**_categorical_fairness_properties,
110110
"redact": {
111-
"description": "Whether to redact protected attributes before preprocessing (recommended) or not.",
111+
"description": "Whether to redact protected attributes before data preparation (recommended) or not.",
112112
"type": "boolean",
113113
"default": True,
114114
},
115-
"preprocessing": {
115+
"preparation": {
116116
"description": "Transformer, which may be an individual operator or a sub-pipeline.",
117117
"anyOf": [
118118
{"laleType": "operator"},

lale/lib/aif360/prejudice_remover.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -32,15 +32,15 @@ def __init__(
3232
favorable_labels,
3333
protected_attributes,
3434
redact=True,
35-
preprocessing=None,
35+
preparation=None,
3636
eta=1.0,
3737
):
3838
mitigator = aif360.algorithms.inprocessing.PrejudiceRemover(eta=eta)
3939
super(PrejudiceRemoverImpl, self).__init__(
4040
favorable_labels=favorable_labels,
4141
protected_attributes=protected_attributes,
4242
redact=redact,
43-
preprocessing=preprocessing,
43+
preparation=preparation,
4444
mitigator=mitigator,
4545
)
4646

@@ -59,18 +59,18 @@ def __init__(
5959
"required": [
6060
*_categorical_fairness_properties.keys(),
6161
"redact",
62-
"preprocessing",
62+
"preparation",
6363
"eta",
6464
],
6565
"relevantToOptimizer": ["eta"],
6666
"properties": {
6767
**_categorical_fairness_properties,
6868
"redact": {
69-
"description": "Whether to redact protected attributes before preprocessing (recommended) or not.",
69+
"description": "Whether to redact protected attributes before data preparation (recommended) or not.",
7070
"type": "boolean",
7171
"default": True,
7272
},
73-
"preprocessing": {
73+
"preparation": {
7474
"description": "Transformer, which may be an individual operator or a sub-pipeline.",
7575
"anyOf": [
7676
{"laleType": "operator"},

lale/lib/aif360/protected_attributes_encoder.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@
167167
protected attributes suitable as input for downstream fairness
168168
mitigation operators. This operator does not encode the remaining
169169
(non-protected) attributes. A common usage is to encode non-protected
170-
attributes with a separate preprocessing pipeline and to perform a
170+
attributes with a separate data preparation pipeline and to perform a
171171
feature union before piping the transformed data to downstream
172172
operators that require numeric data.
173173
""",

lale/lib/aif360/reject_option_classification.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ def __init__(
9696
"laleType": "operator",
9797
},
9898
"redact": {
99-
"description": "Whether to redact protected attributes before preprocessing (recommended) or not.",
99+
"description": "Whether to redact protected attributes before data preparation (recommended) or not.",
100100
"type": "boolean",
101101
"default": True,
102102
},

lale/lib/aif360/reweighing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ def predict(self, X):
110110
"laleType": "operator",
111111
},
112112
"redact": {
113-
"description": "Whether to redact protected attributes before preprocessing (recommended) or not.",
113+
"description": "Whether to redact protected attributes before data preparation (recommended) or not.",
114114
"type": "boolean",
115115
"default": True,
116116
},

0 commit comments

Comments
 (0)