Skip to content
This repository was archived by the owner on Dec 16, 2022. It is now read-only.

Bias Metrics #5139

Merged
merged 10 commits into from
May 13, 2021
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
finished WEAT
Arjun Subramonian committed Apr 21, 2021
commit 85f963969b14417c95dbab193561500c95db6e5a
5 changes: 5 additions & 0 deletions allennlp/fairness/__init__.py
Original file line number Diff line number Diff line change
@@ -12,3 +12,8 @@
Sufficiency,
DemographicParityWithoutGroundTruth,
)
from allennlp.fairness.bias_metrics import (
WordEmbeddingAssociationTest,
EmbeddingCoherenceTest,
NaturalLanguageInference,
)
136 changes: 136 additions & 0 deletions allennlp/fairness/bias_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
"""
A suite of metrics to quantify how much bias is
encoded by word embeddings and determine the effectiveness
of bias mitigation.
Bias metrics are based on:
1. Caliskan, A., Bryson, J., & Narayanan, A. (2017). [Semantics derived automatically
from language corpora contain human-like biases](https://api.semanticscholar.org/CorpusID:23163324).
Science, 356, 183 - 186.
2. Dev, S., & Phillips, J.M. (2019). [Attenuating Bias in Word Vectors]
(https://api.semanticscholar.org/CorpusID:59158788). AISTATS.
3. Dev, S., Li, T., Phillips, J.M., & Srikumar, V. (2020). [On Measuring and Mitigating
Biased Inferences of Word Embeddings](https://api.semanticscholar.org/CorpusID:201670701).
ArXiv, abs/1908.09369.
"""

import torch
from allennlp.common.checks import ConfigurationError


class WordEmbeddingAssociationTest:
"""
Word Embedding Association Test (WEAT) score measures the unlikelihood there is no
difference between two sets of target words in terms of their relative similarity
to two sets of attribute words by computing the probability that a random
permutation of attribute words would produce the observed (or greater) difference
in sample means. Analog of Implicit Association Test from psychology for word embeddings.
Based on: Caliskan, A., Bryson, J., & Narayanan, A. (2017). [Semantics derived automatically
from language corpora contain human-like biases](https://api.semanticscholar.org/CorpusID:23163324).
Science, 356, 183 - 186.
"""

def __call__(
self,
target_embeddings1: torch.Tensor,
target_embeddings2: torch.Tensor,
attribute_embeddings1: torch.Tensor,
attribute_embeddings2: torch.Tensor,
) -> torch.FloatTensor:
"""
# Parameters
!!! Note
In the examples below, we treat gender identity as binary, which does not accurately
characterize gender in real life.
target_embeddings1 : `torch.Tensor`, required.
A tensor of size (target_embeddings_batch_size, ..., dim) containing target word
embeddings related to a concept group. For example, if the concept is gender,
target_embeddings1 could contain embeddings for linguistically masculine words, e.g.
"man", "king", "brother", etc. Represented as X.
target_embeddings2 : `torch.Tensor`, required.
A tensor of the same size as target_embeddings1 containing target word
embeddings related to a different group for the same concept. For example,
target_embeddings2 could contain embeddings for linguistically feminine words, e.g.
"woman", "queen", "sister", etc. Represented as Y.
attribute_embeddings1 : `torch.Tensor`, required.
A tensor of size (attribute_embeddings1_batch_size, ..., dim) containing attribute word
embeddings related to a concept group. For example, if the concept is professions,
attribute_embeddings1 could contain embeddings for stereotypically male professions, e.g.
"doctor", "banker", "engineer", etc. Represented as A.
attribute_embeddings2 : `torch.Tensor`, required.
A tensor of size (attribute_embeddings2_batch_size, ..., dim) containing attribute word
embeddings related to a different group for the same concept. For example, if the concept is
professions, attribute_embeddings2 could contain embeddings for stereotypically female
professions, e.g. "nurse", "receptionist", "homemaker", etc. Represented as B.
# Returns
weat_score : `torch.FloatTensor`
The unlikelihood there is no difference between target_embeddings1 and target_embeddings2 in
terms of their relative similarity to attribute_embeddings1 and attribute_embeddings2.
Typical values are around [-1, 1], with values closer to 0 indicating less biased associations.
"""

# Some sanity checks
if target_embeddings1.ndim < 2 or target_embeddings2.ndim < 2:
raise ConfigurationError(
"target_embeddings1 and target_embeddings2 must have at least two dimensions."
)
if attribute_embeddings1.ndim < 2 or attribute_embeddings2.ndim < 2:
raise ConfigurationError(
"attribute_embeddings1 and attribute_embeddings2 must have at least two dimensions."
)
if target_embeddings1.size() != target_embeddings2.size():
raise ConfigurationError(
"target_embeddings1 and target_embeddings2 must be of the same size."
)
if attribute_embeddings1.size(dim=-1) != attribute_embeddings2.size(
dim=-1
) or attribute_embeddings1.size(dim=-1) != target_embeddings1.size(dim=-1):
raise ConfigurationError("All embeddings must have the same dimensionality.")

target_embeddings1 = target_embeddings1.flatten(end_dim=-2)
target_embeddings2 = target_embeddings2.flatten(end_dim=-2)
attribute_embeddings1 = attribute_embeddings1.flatten(end_dim=-2)
attribute_embeddings2 = attribute_embeddings2.flatten(end_dim=-2)

# Normalize
target_embeddings1 = torch.nn.functional.normalize(target_embeddings1, p=2, dim=-1)
target_embeddings2 = torch.nn.functional.normalize(target_embeddings2, p=2, dim=-1)
attribute_embeddings1 = torch.nn.functional.normalize(attribute_embeddings1, p=2, dim=-1)
attribute_embeddings2 = torch.nn.functional.normalize(attribute_embeddings2, p=2, dim=-1)

# Compute cosine similarities
X_sim_A = torch.mm(target_embeddings1, attribute_embeddings1.t())
X_sim_B = torch.mm(target_embeddings1, attribute_embeddings2.t())
Y_sim_A = torch.mm(target_embeddings2, attribute_embeddings1.t())
Y_sim_B = torch.mm(target_embeddings2, attribute_embeddings2.t())
X_union_Y_sim_A = torch.cat([X_sim_A, Y_sim_A])
X_union_Y_sim_B = torch.cat([X_sim_B, Y_sim_B])

s_X_A_B = torch.mean(X_sim_A, dim=-1) - torch.mean(X_sim_B, dim=-1)
s_Y_A_B = torch.mean(Y_sim_A, dim=-1) - torch.mean(Y_sim_B, dim=-1)
s_X_Y_A_B = torch.mean(s_X_A_B) - torch.mean(s_Y_A_B)
S_X_union_Y_A_B = torch.mean(X_union_Y_sim_A, dim=-1) - torch.mean(X_union_Y_sim_B, dim=-1)
return s_X_Y_A_B / torch.std(S_X_union_Y_A_B, unbiased=False)


class EmbeddingCoherenceTest:
pass


class NaturalLanguageInference:
pass
2,602 changes: 2,602 additions & 0 deletions test_fixtures/fairness/bias_embeddings.json

Large diffs are not rendered by default.

79 changes: 79 additions & 0 deletions tests/fairness/bias_metrics_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import pytest
import torch
import json

from allennlp.common.checks import ConfigurationError
from allennlp.common.testing import AllenNlpTestCase, multi_device
from allennlp.fairness.bias_metrics import (
WordEmbeddingAssociationTest,
EmbeddingCoherenceTest,
NaturalLanguageInference,
)


class WordEmbeddingAssociationTestTest(AllenNlpTestCase):
def setup_method(self):
# embedding data from VERB demo
emb_filename = str(self.FIXTURES_ROOT / "fairness" / "bias_embeddings.json")
with open(emb_filename) as emb_file:
emb_data = json.load(emb_file)

self.X = torch.cat(
[
torch.Tensor(emb_data["he"]).reshape(1, -1),
torch.Tensor(emb_data["him"]).reshape(1, -1),
]
)
self.Y = torch.cat(
[
torch.Tensor(emb_data["she"]).reshape(1, -1),
torch.Tensor(emb_data["her"]).reshape(1, -1),
]
)
self.A = torch.cat(
[
torch.Tensor(emb_data["engineer"]).reshape(1, -1),
torch.Tensor(emb_data["banker"]).reshape(1, -1),
]
)
self.B = torch.cat(
[
torch.Tensor(emb_data["nurse"]).reshape(1, -1),
torch.Tensor(emb_data["receptionist"]).reshape(1, -1),
]
)

def teardown_method(self):
pass

def test_invalid_dims(self):
weat = WordEmbeddingAssociationTest()
with pytest.raises(ConfigurationError):
weat(torch.zeros(2), torch.zeros(2), torch.zeros(2), torch.zeros(2))
with pytest.raises(ConfigurationError):
weat(torch.zeros((2, 2)), torch.zeros((2, 2)), torch.zeros(2), torch.zeros(2))
with pytest.raises(ConfigurationError):
weat(torch.zeros((2, 2)), torch.zeros((2, 3)), torch.zeros((2, 2)), torch.zeros((2, 2)))
with pytest.raises(ConfigurationError):
weat(torch.zeros((2, 2)), torch.zeros((2, 2)), torch.zeros((2, 3)), torch.zeros((2, 2)))

@multi_device
def test_weat(self, device: str):
self.X = self.X.to(device)
self.Y = self.Y.to(device)
self.A = self.A.to(device)
self.B = self.B.to(device)

weat = WordEmbeddingAssociationTest()
test_weat_score = weat(self.X, self.Y, self.A, self.B)
assert test_weat_score.item() == pytest.approx(1.872, rel=1e-4)


class EmbeddingCoherenceTestTest(AllenNlpTestCase):
def test_invalid_dims(self):
EmbeddingCoherenceTest()


class NaturalLanguageInferenceTest(AllenNlpTestCase):
def test_invalid_dims(self):
NaturalLanguageInference()