Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SMACT Metallicity Handling Enhancements #367

Merged
merged 26 commits into from
Jan 30, 2025
Merged
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
17d4ae6
new_intermetallics_screening_features
ryannduma Jan 16, 2025
d0e999d
run pre-commit
ryannduma Jan 16, 2025
5dbe9d7
changes to intermetallics_readme
ryannduma Jan 16, 2025
e4f4d84
intermetallics_readme_update
ryannduma Jan 16, 2025
1ff91b8
coder rabbit nitpick changes
ryannduma Jan 17, 2025
56246d3
backward compatibility, & composition handling
ryannduma Jan 17, 2025
f5ec89a
added usage examples to docs,
ryannduma Jan 17, 2025
92aeb6c
small nitpick changes to resolve coderrabbitai rev
ryannduma Jan 17, 2025
db78427
nitpick changes to references in readme
ryannduma Jan 17, 2025
4727c9a
these fixes, correct the three failing tests:
ryannduma Jan 17, 2025
96b4f30
potential fix for Regex pattern did not match
ryannduma Jan 17, 2025
b6dd89a
final changes
ryannduma Jan 17, 2025
82b8f6d
move intermetallics classification example to docs
ryannduma Jan 20, 2025
9b90f92
resetting latest commit
ryannduma Jan 23, 2025
30b7acb
trying to test something
ryannduma Jan 23, 2025
ad8fe19
move intermetallics_classification.ipynb to
ryannduma Jan 23, 2025
7dcf738
major changes to PR from intermetallics to
ryannduma Jan 29, 2025
484df32
Add colab installation to notebook
AntObi Jan 29, 2025
cfe9e56
Update pyproject.toml with dependencies for notebook
AntObi Jan 29, 2025
7669102
Update requirements.txt for binder compataibility
AntObi Jan 29, 2025
272fd4a
Convert metallicity readme to an example notebook
AntObi Jan 30, 2025
a4980b4
Add metallicity to examples
AntObi Jan 30, 2025
94aec86
Rename header in examples/metallicity
AntObi Jan 30, 2025
fdb2167
Fix order of execution
AntObi Jan 30, 2025
7b278f3
Try to fix test
AntObi Jan 30, 2025
ca030ef
changes from get_d_electron_fraction to
ryannduma Jan 30, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/examples.rst
Original file line number Diff line number Diff line change
@@ -20,3 +20,4 @@ For workflows that have been used in real examples and in published work, visit
examples/valence_electron_count
examples/doper
examples/distorter
examples/metallicity
496 changes: 496 additions & 0 deletions docs/examples/metallicity.ipynb

Large diffs are not rendered by default.

7 changes: 7 additions & 0 deletions docs/smact.metallicity.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
smact.metallicity module
====================

.. automodule:: smact.metallicity
:members:
:undoc-members:
:show-inheritance:
1 change: 1 addition & 0 deletions docs/smact.rst
Original file line number Diff line number Diff line change
@@ -29,6 +29,7 @@ Submodules
smact.properties
smact.screening
smact.oxidation_states
smact.metallicity
smact.builder
smact.distorter
smact.lattice
1 change: 1 addition & 0 deletions docs/tutorials.rst
Original file line number Diff line number Diff line change
@@ -10,6 +10,7 @@ Tutorials are intended as a more complete example of `smact` being applied in re

tutorials/smact_generation_of_solar_oxides
tutorials/oxidation_states
tutorials/metallicity_classification
tutorials/smact_validity_of_GNoMe
tutorials/structure_prediction
tutorials/crystal_space
485 changes: 485 additions & 0 deletions docs/tutorials/metallicity_classification.ipynb

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -91,6 +91,7 @@ visualisation = [
"pymatviz",
"dash",
]
ml = ["xgboost","shap","smact[featurisers]"]

optional = [
"pydantic>=2.9.2",
@@ -104,6 +105,7 @@ optional = [
"llvmlite>=0.40",
"ElementEmbeddings>=0.4",
"dash>=2.18.2",
"smact[ml]"

]
strict = [
24 changes: 21 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -34,6 +34,8 @@ charset-normalizer==3.3.2
# via requests
click==8.1.7
# via flask
cloudpickle==3.1.1
# via shap
colorama==0.4.6 ; platform_system == 'Windows'
# via
# click
@@ -101,6 +103,7 @@ latexcodec==2.0.1
# via pybtex
llvmlite==0.43.0
# via
# smact (pyproject.toml)
# numba
# pynndescent
maggma==0.70.0
@@ -145,6 +148,7 @@ networkx==3.2.1
numba==0.60.0
# via
# pynndescent
# shap
# umap-learn
numpy==1.26.2
# via
@@ -166,8 +170,12 @@ numpy==1.26.2
# scikit-learn
# scipy
# seaborn
# shap
# spglib
# umap-learn
# xgboost
nvidia-nccl-cu12==2.25.1 ; platform_machine != 'aarch64' and platform_system == 'Linux'
# via xgboost
opentsne==1.0.2
# via elementembeddings
orjson==3.10.12
@@ -177,6 +185,7 @@ packaging==24.0
# matplotlib
# mongomock
# plotly
# shap
palettable==3.3.3
# via pymatgen
pandas==2.2.3
@@ -187,6 +196,7 @@ pandas==2.2.3
# pymatgen
# pymatviz
# seaborn
# shap
paramiko==3.5.0
# via sshtunnel
pathos==0.3.2
@@ -290,6 +300,7 @@ scikit-learn==1.5.2
# opentsne
# pymatviz
# pynndescent
# shap
# umap-learn
scipy==1.14.1
# via
@@ -302,7 +313,9 @@ scipy==1.14.1
# pymatviz
# pynndescent
# scikit-learn
# shap
# umap-learn
# xgboost
seaborn==0.13.2
# via
# smact (pyproject.toml)
@@ -314,12 +327,16 @@ setuptools==75.6.0
# dash
# maggma
# mp-api
shap==0.46.0
# via smact (pyproject.toml)
six==1.16.0
# via
# latexcodec
# pybtex
# python-dateutil
# retrying
slicer==0.0.8
# via shap
smart-open==7.0.5
# via mp-api
spglib==2.5.0
@@ -345,6 +362,7 @@ tqdm==4.66.4
# maggma
# matminer
# pymatgen
# shap
# umap-learn
typing-extensions==4.12.2
# via
@@ -359,9 +377,7 @@ typing-extensions==4.12.2
tzdata==2024.1
# via pandas
umap-learn==0.5.3
# via
# smact (pyproject.toml)
# elementembeddings
# via elementembeddings
uncertainties==3.2.2
# via pymatgen
urllib3==2.2.2
@@ -374,5 +390,7 @@ werkzeug==3.0.6
# flask
wrapt==1.17.0
# via smart-open
xgboost==2.1.3
# via smact (pyproject.toml)
zipp==3.21.0
# via importlib-metadata
157 changes: 157 additions & 0 deletions smact/metallicity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
"""Utility functions for handling intermetallic compounds in SMACT."""

from __future__ import annotations

import numpy as np
from pymatgen.core import Composition

import smact
from smact import Element
from smact.properties import valence_electron_count


def _ensure_composition(composition: str | Composition) -> Composition:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know if this function is needed. Could you explain if it is?
I believe the Composition class is already robust enough with its error handling.

In each function call in the rest of module, you could probably have comp=Composition(composition) even if composition is an instance of the Composition class.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think this might be a redundancy on my end, but I originally wrote this to ensure the composition was a pymatgen composition object even in contexts where they may enter a composition that's otherwise - ill do some testing on my end and possibly get rid of the function should this be the case and incorporate your suggested changes.

"""Convert input to a pymatgen Composition if it isn't already.
Args:
composition: Chemical formula as string or pymatgen Composition
Returns:
Composition: A pymatgen Composition object
Raises:
ValueError: If the composition string is empty
ValueError: If the formula is invalid and can't be parsed.
"""
if isinstance(composition, str):
if not composition.strip():
raise ValueError("Empty composition")
# Try to parse with pymatgen
try:
return Composition(composition)
except ValueError as exc:
# If pymatgen can't parse, re-raise with a message the test expects
raise ValueError("Invalid formula") from exc
return composition


def get_element_fraction(composition: str | Composition, element_set: set[str]) -> float:
"""Calculate the fraction of elements from a given set in a composition.
This helper function is used to avoid code duplication in functions that
calculate fractions of specific element types (e.g., metals, d-block elements).
Args:
composition: Chemical formula as string or pymatgen Composition
element_set: Set of element symbols to check for
Returns:
float: Fraction of the composition that consists of elements from the set (0-1)
"""
comp = _ensure_composition(composition)
total_amt = sum(comp.values())
target_amt = sum(amt for el, amt in comp.items() if el.symbol in element_set)
return target_amt / total_amt


def get_metal_fraction(composition: str | Composition) -> float:
"""Calculate the fraction of metallic elements in a composition.
Implemented using get_element_fraction helper with smact.metals set.
"""
return get_element_fraction(composition, smact.metals)


def get_d_block_element_fraction(composition: str | Composition) -> float:
"""Calculate the fraction of d-block elements in a composition.
Implemented using get_element_fraction helper with smact.d_block set.
"""
return get_element_fraction(composition, smact.d_block)


def get_distinct_metal_count(composition: str | Composition) -> int:
"""Count the number of distinct metallic elements in a composition."""
comp = _ensure_composition(composition)
return sum(1 for el in comp.elements if el.symbol in smact.metals)


def get_pauling_test_mismatch(composition: str | Composition) -> float:
"""Calculate a score for how much the composition deviates from ideal
Pauling electronegativity ordering.
Higher mismatch => more difference (ionic, e.g. NaCl).
Lower mismatch => metal-metal bonds (e.g. Fe-Al).
Returns:
float: Mismatch score (0=perfect match, higher=more deviation, NaN=missing data)
"""
comp = _ensure_composition(composition)
elements = [Element(el.symbol) for el in comp.elements]
electronegativities = [el.pauling_eneg for el in elements]

# If any element lacks a known electronegativity, return NaN
if None in electronegativities:
return float("nan")

Check warning on line 94 in smact/metallicity.py

Codecov / codecov/patch

smact/metallicity.py#L94

Added line #L94 was not covered by tests
else:
mismatches = []
for i, (_el1, eneg1) in enumerate(zip(elements, electronegativities, strict=False)):
for _el2, eneg2 in zip(elements[i + 1 :], electronegativities[i + 1 :], strict=False):
# Always use absolute difference
mismatch = abs(eneg1 - eneg2)
mismatches.append(mismatch)

# Return average mismatch across all unique pairs
return np.mean(mismatches) if mismatches else 0.0


def metallicity_score(composition: str | Composition) -> float:
"""Calculate a score (0-1) indicating the degree of a compound's metallic/alloy nature.
1. Fraction of metallic elements
2. Number of distinct metals
3. d-block element fraction
4. Electronegativity difference (Pauling mismatch)
5. Valence electron count proximity to 8
Args:
composition: Chemical formula or pymatgen Composition
"""
comp = _ensure_composition(composition)

# Basic metrics
metal_fraction = get_metal_fraction(comp)
d_block_element_fraction = get_d_block_element_fraction(comp)
n_metals = get_distinct_metal_count(comp)

# Valence electron count factor
try:
vec = valence_electron_count(comp.reduced_formula)
vec_factor = 1.0 - abs(vec - 8.0) / 8.0
except ValueError:
vec_factor = 0.5

Check warning on line 131 in smact/metallicity.py

Codecov / codecov/patch

smact/metallicity.py#L130-L131

Added lines #L130 - L131 were not covered by tests

# Pauling mismatch => large => penalize
pauling_mismatch = get_pauling_test_mismatch(comp)
if np.isnan(pauling_mismatch):
pauling_term = 0.5

Check warning on line 136 in smact/metallicity.py

Codecov / codecov/patch

smact/metallicity.py#L136

Added line #L136 was not covered by tests
else:
scale = 3.0
penalty = min(pauling_mismatch / scale, 1.0)
pauling_term = 1.0 - penalty

# Weighted sum
weights = {
"metal_fraction": 0.3,
"d_block_element_fraction": 0.2,
"n_metals": 0.2,
"vec": 0.15,
"pauling": 0.15,
}
score = (
weights["metal_fraction"] * metal_fraction
+ weights["d_block_element_fraction"] * d_block_element_fraction
+ weights["n_metals"] * min(n_metals / 3.0, 1.0)
+ weights["vec"] * vec_factor
+ weights["pauling"] * pauling_term
)
return max(0.0, min(1.0, score))
17 changes: 16 additions & 1 deletion smact/screening.py
Original file line number Diff line number Diff line change
@@ -18,6 +18,7 @@
from smact.data_loader import (
lookup_element_oxidation_states_custom as oxi_custom,
)
from smact.metallicity import metallicity_score

if TYPE_CHECKING:
import pymatgen
@@ -437,11 +438,14 @@
use_pauling_test: bool = True,
include_alloys: bool = True,
oxidation_states_set: str = "icsd24",
check_metallicity: bool = False,
metallicity_threshold: float = 0.7,
) -> bool:
"""
Check if a composition is valid according to the SMACT rules.
Composition is considered valid if it passes the charge neutrality test and the Pauling electronegativity test.
Can also validate metal alloys by using a metallicity scoring system.
.. warning::
For backwards compatibility in SMACT >=2.7, expllicitly set oxidation_states_set to 'smact14' if you wish to use the 2014 SMACT default oxidation states.
@@ -457,6 +461,9 @@
'pymatgen_sp' and 'wiki' for the 2014 SMACT default, 2016 ICSD, 2024 ICSD, pymatgen structure predictor and Wikipedia
(https://en.wikipedia.org/wiki/Template:List_of_oxidation_states_of_the_elements) oxidation states respectively.
A filepath to an oxidation states text file can also be supplied.
check_metallicity (bool): If True, uses the metallicity scoring system to validate potential metallic/alloy compounds.
metallicity_threshold (float): Score threshold (0-1) above which a compound is considered a valid metallic/alloy.
Only used if check_metallicity is True.
Returns:
-------
@@ -467,13 +474,22 @@
composition = Composition(composition)
elem_symbols = tuple(composition.as_dict().keys())

# Single element case
if len(set(elem_symbols)) == 1:
return True

Check warning on line 480 in smact/screening.py

Codecov / codecov/patch

smact/screening.py#L480

Added line #L480 was not covered by tests
# Check for simple metal alloys if enabled
if include_alloys:
is_metal_list = [elem_s in smact.metals for elem_s in elem_symbols]
if all(is_metal_list):
return True

# Check for metallic-like compounds if enabled
if check_metallicity:
score = metallicity_score(composition)
if score >= metallicity_threshold:
return True

count = tuple(composition.as_dict().values())
count = [int(c) for c in count]
# Reduce stoichiometry to gcd
@@ -500,7 +516,6 @@
stacklevel=2,
)
ox_combos = [e.oxidation_states_wiki for e in smact_elems]

else:
raise (
Exception(
172 changes: 172 additions & 0 deletions smact/tests/test_metallicity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
"""Tests for the metallicity module."""

from __future__ import annotations

import unittest

import pytest
from pymatgen.core import Composition

import smact
from smact.metallicity import (
get_d_block_element_fraction,
get_distinct_metal_count,
get_element_fraction,
get_metal_fraction,
get_pauling_test_mismatch,
metallicity_score,
)


class TestMetallicity(unittest.TestCase):
"""Test the metallicity module functionality."""

def setUp(self):
"""Set up test cases."""
# Known metallic/alloy compounds
self.metallic_compounds = [
"Fe3Al", # Classic intermetallic
"Ni3Ti", # Superalloy component
"Cu3Au", # Ordered alloy
"Fe2Nb", # Laves phase
]

# Known non-metallic compounds
self.non_metallic_compounds = [
"NaCl", # Ionic
"SiO2", # Covalent
"Fe2O3", # Metal oxide
"CuSO4", # Complex ionic
]

def test_get_element_fraction(self):
"""Test the helper function for element fraction calculations."""
# Test with metals set
self.assertAlmostEqual(
get_element_fraction(Composition("Fe3Al"), smact.metals),
1.0,
places=6,
msg="Expected all elements in Fe3Al to be metals",
)

# Test with d-block set
self.assertAlmostEqual(
get_element_fraction(Composition("Fe2Nb"), smact.d_block),
1.0,
places=6,
msg="Expected all elements in Fe2Nb to be d-block",
)

# Test with empty set
self.assertAlmostEqual(
get_element_fraction(Composition("Fe3Al"), set()),
0.0,
places=6,
msg="Expected zero fraction for empty element set",
)

def test_get_metal_fraction(self):
"""Test metal fraction calculation."""
# Should be 1.0 for pure metallic compounds
self.assertAlmostEqual(
get_metal_fraction(Composition("Fe3Al")),
1.0,
places=6,
msg="Expected pure metallic composition for Fe3Al",
)

# Should be 0.0 for compounds with no metals
self.assertAlmostEqual(
get_metal_fraction(Composition("SiO2")),
0.0,
places=6,
msg="Expected no metallic elements in SiO2",
)

# Should be fractional for mixed compounds
fe2o3 = get_metal_fraction(Composition("Fe2O3"))
self.assertTrue(
0 < fe2o3 < 1,
msg=f"Expected fractional metal content for Fe2O3, got {fe2o3:.2f}",
)

def test_get_d_block_element_fraction(self):
"""Test d-block element fraction calculation."""
# Should be 1.0 for pure transition metal compounds
self.assertAlmostEqual(
get_d_block_element_fraction(Composition("Fe2Nb")),
1.0,
places=6,
msg="Expected all d-block elements in Fe2Nb",
)

# Should be 0.0 for compounds with no d-block elements
self.assertAlmostEqual(
get_d_block_element_fraction(Composition("NaCl")),
0.0,
places=6,
msg="Expected no d-block elements in NaCl",
)

def test_get_distinct_metal_count(self):
"""Test counting of distinct metals."""
self.assertEqual(
get_distinct_metal_count(Composition("Fe3Al")),
2,
msg="Expected 2 distinct metals in Fe3Al",
)
self.assertEqual(
get_distinct_metal_count(Composition("NaCl")),
1,
msg="Expected 1 distinct metal in NaCl",
)
self.assertEqual(
get_distinct_metal_count(Composition("SiO2")),
0,
msg="Expected no metals in SiO2",
)

def test_get_pauling_test_mismatch(self):
"""Test Pauling electronegativity mismatch calculation."""
# Ionic compounds should have high mismatch
nacl_mismatch = get_pauling_test_mismatch(Composition("NaCl"))

# Metallic compounds should have lower mismatch
fe3al_mismatch = get_pauling_test_mismatch(Composition("Fe3Al"))

self.assertTrue(
fe3al_mismatch < nacl_mismatch,
msg=f"Expected lower Pauling mismatch for Fe3Al ({fe3al_mismatch:.2f}) than NaCl ({nacl_mismatch:.2f})",
)

def test_metallicity_score(self):
"""Test the overall metallicity scoring function."""
# Known metallic compounds should score high
for formula in self.metallic_compounds:
score = metallicity_score(formula)
self.assertTrue(
score > 0.7,
msg=f"Expected high metallicity score (>0.7) for {formula}, but got {score:.2f}",
)

# Non-metallic compounds should score low
for formula in self.non_metallic_compounds:
score = metallicity_score(formula)
self.assertTrue(
score < 0.5,
msg=f"Expected low metallicity score (<0.5) for {formula}, but got {score:.2f}",
)

def test_edge_cases(self):
"""Test edge cases and error handling."""
# Single element
score = metallicity_score("Fe")
self.assertTrue(0.0 <= score <= 1.0, msg=f"Expected score between 0 and 1 for Fe, got {score:.2f}")

# Empty composition -> expect ValueError("Empty composition")
with pytest.raises(ValueError, match="Empty composition"):
metallicity_score("")

# Invalid formula -> e.g. "NotAnElement"
with pytest.raises(ValueError, match="Invalid formula"):
metallicity_score("NotAnElement")
6 changes: 5 additions & 1 deletion smact/tests/test_structure.py
Original file line number Diff line number Diff line change
@@ -229,7 +229,11 @@ def test_ele_stoics(self):
self.assertEqual(SmactStructure._get_ele_stoics(test.species), expected)

@pytest.mark.skipif(
(skip_mprester_tests or not MP_API_AVAILABLE),
(
skip_mprester_tests
or not MP_API_AVAILABLE
or not (os.environ.get("MP_API_KEY") or SETTINGS.get("PMG_MAPI_KEY"))
),
reason="Materials Project API not available or not configured.",
)
def test_from_mp(self):