RDKit | 基于随机森林(RF)预测SARS-CoV 3CL蛋白酶抑制剂的pIC50

 

导入库

import sklearn
from rdkit.Chem import AllChem
from rdkit import Chem
from rdkit.Chem import Descriptors
from sklearn.model_selection import train_test_split
from rdkit.ML.Descriptors import MoleculeDescriptors
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
import numpy as np
from sklearn.metrics import r2_score,mean_absolute_error, mean_squared_error
import seaborn as sn
from sklearn.preprocessing import StandardScaler
from rdkit.Avalon import pyAvalonTools

from rdkit import rdBase
print(rdBase.rdkitVersion)
2020.09.1

载入数据

dataset = pd.read_csv('3CLprotease_inhibitors_133.csv')
dataset.head()

你可能感兴趣的:(Life,Sciences.AI,python,机器学习,化学信息学,药物设计,RDKit)