from matminer.featurizers.base import MultipleFeaturizer
from matminer.featurizers import composition as cf
from matminer.featurizers.conversions import StrToComposition
import pandas as pd
用dataframe储存材料数据,至少要有材料的化学式,用于得到材料的Magpie特征;
这里提供一个函数得到材料的composition;
def get_magpie_composition(formula):
upper_letter = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
'U', 'V', 'W', 'X', 'Y', 'Z']
number = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
ins = []
for i in range(len(formula)):
if i == 0:
continue
if formula[i] in upper_letter:
if formula[i - 1] not in number:
ins.append(i)
formula_list = list(formula)
k = 0
for j in ins:
formula_list.insert(j + k, '1')
k = k + 1
formula = ''.join(formula_list)
if formula[len(formula) - 1] not in number:
formula = formula + '1'
return formula
对dataframe中的每个材料化学式执行get_magpie_composition()函数,并且将执行结果储存到dataframe中的列composition中,得到如下dataframe。
"""
需要输入材料的化学式(例如Nb1Pt1Si1),可以得到材料的145维向量表示
"""
def magpie(df,composition):
df = StrToComposition(target_col_id='composition_obj').featurize_dataframe(df, composition)
feature_calculators = MultipleFeaturizer([cf.Stoichiometry(), cf.ElementProperty.from_preset("magpie"),
cf.ValenceOrbital(props=['avg']), cf.IonProperty(fast=True)])
feature_labels = feature_calculators.feature_labels()
print('feature_labels')
print(feature_labels)
df = feature_calculators.featurize_dataframe(df, col_id='composition_obj')
return df
对dataframe利用上述函数可以得到材料的145维特征,储存在原dataframe中。