High-throughput Calculations¶
Performing many PyCalphad equilibrium calculations is a common requirement for high-throughput workflows such as training a surrogate model, building an active learning dataset, or screening a large composition-temperature space.
A typical pattern for high-throughput workflows is to perform calculations over a sequence of compositions. PyCalphad Workspace objects are optimized for re-use and tight loop calling patterns.
Here we give two examples of performing high-throughput calculations to produce pandas DataFrame objects, one with a lattice grid and one using random sampling.
[1]:
from itertools import product
from time import perf_counter
from importlib.resources import files
import numpy as np
import pandas as pd
from pycalphad import Database, Workspace, variables as v
import pycalphad.tests.databases
def composition_grid(component_count: int, density: int, eps: float = 1e-6):
"""Helper function to create an evenly spaced lattice of compositions
component_count : int
number of components in the system
density : int
number of lattice points in each dimension
eps : float
Minimum amount of each component to avoid edges of composition space
"""
rows = []
for counts in product(range(density + 1), repeat=component_count):
if sum(counts) == density:
row = np.asarray(counts, dtype=float) / density
row = np.clip(row, eps, 1.0 - eps * (component_count - 1))
rows.append(row / row.sum())
return np.asarray(rows)
def drop_empty_columns(df: pd.DataFrame):
return df.dropna(axis="columns", how="all")
Lattice grid¶
[2]:
# Set up the composition grid
components = ["AL", "MG", "SI", "CU", "VA"]
elements = [component for component in components if component != "VA"]
solutes = elements[1:]
temperatures = [1000.0, 1250.0, 1500.0]
pressure = 101325.0
density = 4
composition_rows = composition_grid(len(elements), density)
composition_conditions = [
{v.X(solute): amount for solute, amount in zip(solutes, composition_row[1:])}
for composition_row in composition_rows
]
n_points = len(composition_rows) * len(temperatures)
[3]:
# set up a Workspace and perform the calculations
test_database_directory = files(pycalphad.tests.databases)
dbf = Database(test_database_directory / "COST507.tdb")
phases = list(dbf.phases.keys())
wks = Workspace(dbf, components, phases)
data_rows = []
print(f"Evaluating {len(composition_rows)} simplex points × {len(temperatures)} temperatures = {n_points} total")
for temperature in temperatures:
for comp_conds in composition_conditions:
conditions = {v.T: temperature, v.P: pressure, v.N: 1, **comp_conds}
wks.conditions = conditions
out = {
str(key): value[()]
for key, value in wks.get_dict("T", "X(*)", "NP(*)", "X(*,*)").items()
}
data_rows.append(out)
/Users/bocklund1/src/calphad-workspace/packages/pycalphad/pycalphad/io/tdb.py:293: UserWarning: The type definition character `C` in `TYPE_DEFINITION C GES AMEND_PHASE_DESCRIPTION CBCC_A12 MAGNETIC -3 0.280, ` is not used by any phase.
warnings.warn(f"The type definition character `{typechar}` in `TYPE_DEFINITION {typechar} {line}` is not used by any phase.")
/Users/bocklund1/src/calphad-workspace/packages/pycalphad/pycalphad/io/tdb.py:1017: UserWarning: The type definition character `R` was defined in the following phases: ['LIQUID'], but no corresponding TYPE_DEFINITION line was found in the TDB.
warnings.warn(f"The type definition character `{typechar}` was defined in the following phases: "
Evaluating 35 simplex points × 3 temperatures = 105 total
[4]:
# Build a pandas DataFrame from the results
workspace_df = drop_empty_columns(pd.DataFrame(data_rows))
workspace_df
[4]:
| T | X_AL | X_CU | X_MG | X_SI | NP_ALCU_EPSILON | NP_BCC_B2 | NP_CU19SI6_ETA | NP_CUMGSI_SIGMA | NP_CUMGSI_TAU | ... | X_LAVES_C15_MG | X_LAVES_C15_SI | X_LIQUID_AL | X_LIQUID_CU | X_LIQUID_MG | X_LIQUID_SI | X_MG2SI_AL | X_MG2SI_CU | X_MG2SI_MG | X_MG2SI_SI | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1000.0 | 1.000000e-06 | 9.999970e-01 | 1.000000e-06 | 1.000000e-06 | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 1 | 1000.0 | 9.999985e-07 | 7.499985e-01 | 9.999985e-07 | 2.499995e-01 | NaN | NaN | 0.986806 | NaN | NaN | ... | NaN | NaN | 0.026538 | 6.892373e-01 | 2.653855e-02 | 2.576861e-01 | NaN | NaN | NaN | NaN |
| 2 | 1000.0 | 9.999977e-07 | 4.999990e-01 | 9.999976e-07 | 4.999990e-01 | NaN | NaN | 0.657859 | NaN | NaN | ... | NaN | NaN | 0.026533 | 6.892331e-01 | 2.654473e-02 | 2.576892e-01 | NaN | NaN | NaN | NaN |
| 3 | 1000.0 | 9.999962e-07 | 2.499995e-01 | 9.999979e-07 | 7.499985e-01 | NaN | NaN | 0.328913 | NaN | NaN | ... | NaN | NaN | 0.026528 | 6.892290e-01 | 2.655093e-02 | 2.576923e-01 | NaN | NaN | NaN | NaN |
| 4 | 1000.0 | 1.000000e-06 | 1.000000e-06 | 9.999999e-07 | 9.999970e-01 | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | 0.140855 | 2.934158e-01 | 2.843417e-01 | 2.813874e-01 | 0.0 | 0.0 | 0.666667 | 0.333333 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 100 | 1500.0 | 4.999990e-01 | 9.999980e-07 | 4.999990e-01 | 9.999980e-07 | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | 0.499999 | 9.999980e-07 | 4.999990e-01 | 9.999980e-07 | NaN | NaN | NaN | NaN |
| 101 | 1500.0 | 7.499985e-01 | 2.499995e-01 | 9.999980e-07 | 9.999980e-07 | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | 0.749999 | 2.499995e-01 | 9.999980e-07 | 9.999980e-07 | NaN | NaN | NaN | NaN |
| 102 | 1500.0 | 7.499985e-01 | 9.999980e-07 | 9.999980e-07 | 2.499995e-01 | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | 0.749999 | 9.999980e-07 | 9.999980e-07 | 2.499995e-01 | NaN | NaN | NaN | NaN |
| 103 | 1500.0 | 7.499985e-01 | 9.999980e-07 | 2.499995e-01 | 9.999980e-07 | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | 0.749999 | 9.999980e-07 | 2.499995e-01 | 9.999980e-07 | NaN | NaN | NaN | NaN |
| 104 | 1500.0 | 9.999970e-01 | 1.000000e-06 | 1.000000e-06 | 1.000000e-06 | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | 0.999997 | 1.000000e-06 | 1.000000e-06 | 1.000000e-06 | NaN | NaN | NaN | NaN |
105 rows × 60 columns
Random sampling¶
In high-dimensional composition spaces, lattice grids can produce intractably many compositions. Random sampling from the composition space is attractive to generate training data for these cases, but care should be taken because naïve random sampling approaches in composition simplices can produce highly correlated points in high dimensions. Otis, Emelianenko, and Liu [1] demonstrated the use of scrambled Halton sequences to produce low discrepancy points on an n-simplex and this method is used internally in PyCalphad’s starting point generation.
Here we import PyCalphad’s scrambled Halton sequence sampler to generate low-discrepancy samples from our composition space for high-throughput calculations.
[1] R. Otis, M. Emelianenko, Z.-K. Liu, Computational Materials Science 130 (2017) 282–291
[5]:
from pycalphad.core.utils import point_sample
# Set up the composition grid using PyCalphad's scrambled Halton sequence sampler
components = ["AL", "MG", "SI", "CU", "VA"]
elements = [component for component in components if component != "VA"]
solutes = elements[1:]
temperatures = [1000.0, 1250.0, 1500.0]
pressure = 101325.0
density = 10
composition_rows = point_sample([len(elements)], density)
composition_conditions = [
{v.X(solute): amount for solute, amount in zip(solutes, composition_row[1:])}
for composition_row in composition_rows
]
n_points = len(composition_rows) * len(temperatures)
[6]:
# set up a Workspace and perform the calculations
test_database_directory = files(pycalphad.tests.databases)
dbf = Database(test_database_directory / "COST507.tdb")
phases = list(dbf.phases.keys())
wks = Workspace(dbf, components, phases)
data_rows = []
print(f"Evaluating {len(composition_rows)} simplex points × {len(temperatures)} temperatures = {n_points} total")
for temperature in temperatures:
for comp_conds in composition_conditions:
conditions = {v.T: temperature, v.P: pressure, v.N: 1, **comp_conds}
wks.conditions = conditions
out = {
str(key): value[()]
for key, value in wks.get_dict("T", "X(*)", "NP(*)", "X(*,*)").items()
}
data_rows.append(out)
/Users/bocklund1/src/calphad-workspace/packages/pycalphad/pycalphad/io/tdb.py:293: UserWarning: The type definition character `C` in `TYPE_DEFINITION C GES AMEND_PHASE_DESCRIPTION CBCC_A12 MAGNETIC -3 0.280, ` is not used by any phase.
warnings.warn(f"The type definition character `{typechar}` in `TYPE_DEFINITION {typechar} {line}` is not used by any phase.")
/Users/bocklund1/src/calphad-workspace/packages/pycalphad/pycalphad/io/tdb.py:1017: UserWarning: The type definition character `R` was defined in the following phases: ['LIQUID'], but no corresponding TYPE_DEFINITION line was found in the TDB.
warnings.warn(f"The type definition character `{typechar}` was defined in the following phases: "
Evaluating 30 simplex points × 3 temperatures = 90 total
[7]:
# Build a pandas DataFrame from the results
workspace_df = drop_empty_columns(pd.DataFrame(data_rows))
workspace_df
[7]:
| T | X_AL | X_CU | X_MG | X_SI | NP_BCC_B2 | NP_CUMGSI_TAU | NP_DIAMOND_A4 | NP_FCC_A1 | NP_GAMMA_D83 | ... | X_LAVES_C15_MG | X_LAVES_C15_SI | X_LIQUID_AL | X_LIQUID_CU | X_LIQUID_MG | X_LIQUID_SI | X_MG2SI_AL | X_MG2SI_CU | X_MG2SI_MG | X_MG2SI_SI | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1000.0 | 0.294784 | 0.143096 | 0.172437 | 0.389683 | NaN | NaN | 0.187663 | NaN | NaN | ... | NaN | NaN | 0.372407 | 0.180777 | 0.200347 | 0.246469 | 0.0 | 0.0 | 0.666667 | 0.333333 |
| 1 | 1000.0 | 0.389918 | 0.238316 | 0.309003 | 0.062763 | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | 0.403179 | 0.246421 | 0.296839 | 0.053561 | 0.0 | 0.0 | 0.666667 | 0.333333 |
| 2 | 1000.0 | 0.053801 | 0.363918 | 0.281288 | 0.300992 | NaN | NaN | 0.090452 | NaN | NaN | ... | 0.332141 | 0.235545 | 0.232509 | 0.417196 | 0.147563 | 0.202732 | NaN | NaN | NaN | NaN |
| 3 | 1000.0 | 0.726518 | 0.053857 | 0.041151 | 0.178473 | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | 0.726518 | 0.053857 | 0.041151 | 0.178473 | NaN | NaN | NaN | NaN |
| 4 | 1000.0 | 0.113442 | 0.135071 | 0.141870 | 0.609618 | NaN | NaN | 0.474927 | NaN | NaN | ... | NaN | NaN | 0.236623 | 0.281740 | 0.232434 | 0.249203 | 0.0 | 0.0 | 0.666667 | 0.333333 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 85 | 1500.0 | 0.333667 | 0.163897 | 0.228379 | 0.274057 | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | 0.333667 | 0.163897 | 0.228379 | 0.274057 | NaN | NaN | NaN | NaN |
| 86 | 1500.0 | 0.032311 | 0.225124 | 0.703895 | 0.038670 | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | 0.032311 | 0.225124 | 0.703895 | 0.038670 | NaN | NaN | NaN | NaN |
| 87 | 1500.0 | 0.275258 | 0.380345 | 0.066848 | 0.277549 | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | 0.275258 | 0.380345 | 0.066848 | 0.277549 | NaN | NaN | NaN | NaN |
| 88 | 1500.0 | 0.163474 | 0.088235 | 0.508454 | 0.239838 | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | 0.163474 | 0.088235 | 0.508454 | 0.239838 | NaN | NaN | NaN | NaN |
| 89 | 1500.0 | 0.148688 | 0.116956 | 0.274485 | 0.459871 | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | 0.148688 | 0.116956 | 0.274485 | 0.459871 | NaN | NaN | NaN | NaN |
90 rows × 45 columns