COGMEN: COntextualized GNN based Multimodal Emotion recognitioN
COGMEN: 基于GNN的多模态情感识别技术
Paper: https://arxiv.org/abs/2205.02455
源代码GitHub - Exploration-Lab/COGMEN
论文翻译及总结可参考我另外一篇博文:多模态 |COGMEN: COntextualized GNN based Multimodal Emotion recognitioN论文详解_夏天|여름이다的博客-CSDN博客
如果安装GPU版本,cuda版本需要11.7及以上
Installation — pytorch_geometric documentation (pytorch-geometric.readthedocs.io)
conmet.ml
Install SBERT
对于已经有pytorch的情况,我只安装了以下命令
pip install comet_ml --upgrade #使用默认的 Python,comet_ml升级到最新版本
pip install torch_geometric
pip install -U SentenceTransformer
数据集:iemocap_4
对数据集进行处理,运行
python preprocess.py --dataset="iemocap_4"
运行后,结果如图
python train.py --dataset="iemocap_4" --modalities="atv" --from_begin --epochs=55
训练效果如图
训练后,生成model_checkpoints目录,如下
python eval.py --dataset="iemocap_4" --modalities="atv"
结果如图
COGMEN/cogmen/model/COGMEN.py
import torch
import torch.nn as nn
from .SeqContext import SeqContext
from .GNN import GNN
from .Classifier import Classifier
from .functions import batch_graphify
import cogmen
log = cogmen.utils.get_logger()
#定义网络模型
class COGMEN(nn.Module):
def __init__(self, args):
super(COGMEN, self).__init__()
u_dim = 100
if args.rnn == "transformer":
g_dim = args.hidden_size
else:
g_dim = 200
h1_dim = args.hidden_size
h2_dim = args.hidden_size
hc_dim = args.hidden_size
#数据集的标签设置
dataset_label_dict = {
"iemocap": {"hap": 0, "sad": 1, "neu": 2, "ang": 3, "exc": 4, "fru": 5},
"iemocap_4": {"hap": 0, "sad": 1, "neu": 2, "ang": 3},
"mosei": {"Negative": 0, "Positive": 1},
}
#数据集的人员speaker设置
dataset_speaker_dict = {
"iemocap": 2,
"iemocap_4": 2,
"mosei": 1,
}
if args.dataset and args.emotion == "multilabel":
dataset_label_dict["mosei"] = {
"happiness": 0,
"sadness": 1,
"anger": 2,
"surprise": 3,
"disgust": 4,
"fear": 5,
}
tag_size = len(dataset_label_dict[args.dataset])
args.n_speakers = dataset_speaker_dict[args.dataset]
self.concat_gin_gout = args.concat_gin_gout
self.wp = args.wp
self.wf = args.wf
self.device = args.device
self.rnn = SeqContext(u_dim, g_dim, args)
self.gcn = GNN(g_dim, h1_dim, h2_dim, args)
if args.concat_gin_gout:
self.clf = Classifier(
g_dim + h2_dim * args.gnn_nheads, hc_dim, tag_size, args
)
else:
self.clf = Classifier(h2_dim * args.gnn_nheads, hc_dim, tag_size, args)
edge_type_to_idx = {}
for j in range(args.n_speakers):
for k in range(args.n_speakers):
edge_type_to_idx[str(j) + str(k) + "0"] = len(edge_type_to_idx)
edge_type_to_idx[str(j) + str(k) + "1"] = len(edge_type_to_idx)
self.edge_type_to_idx = edge_type_to_idx
log.debug(self.edge_type_to_idx)
def get_rep(self, data):
# [batch_size, mx_len, D_g]
node_features = self.rnn(data["text_len_tensor"], data["input_tensor"])
features, edge_index, edge_type, edge_index_lengths = batch_graphify(
node_features,
data["text_len_tensor"],
data["speaker_tensor"],
self.wp,
self.wf,
self.edge_type_to_idx,
self.device,
)
graph_out = self.gcn(features, edge_index, edge_type)
return graph_out, features
def forward(self, data):
graph_out, features = self.get_rep(data)
if self.concat_gin_gout:
out = self.clf(
torch.cat([features, graph_out], dim=-1), data["text_len_tensor"]
)
else:
out = self.clf(graph_out, data["text_len_tensor"])
return out
def get_loss(self, data):
graph_out, features = self.get_rep(data)
if self.concat_gin_gout:
loss = self.clf.get_loss(
torch.cat([features, graph_out], dim=-1),
data["label_tensor"],
data["text_len_tensor"],
)
else:
loss = self.clf.get_loss(
graph_out, data["label_tensor"], data["text_len_tensor"]
)
return loss
COGMEN/cogmen/Optim.py
import torch.optim as optim
from torch.nn.utils import clip_grad_value_
from torch.optim import lr_scheduler
#定义优化器类
class Optim:
def __init__(self, lr, max_grad_value, weight_decay):
self.lr = lr
self.max_grad_value = max_grad_value
self.weight_decay = weight_decay
self.params = None
self.optimizer = None
def set_parameters(self, params, name):
self.params = list(params)
#优化器优先默认sgd
if name == "sgd":
self.optimizer = optim.SGD(
self.params, lr=self.lr, weight_decay=self.weight_decay
)
elif name == "rmsprop":
self.optimizer = optim.RMSprop(
self.params, lr=self.lr, weight_decay=self.weight_decay
)
elif name == "adam":
self.optimizer = optim.Adam(
self.params, lr=self.lr, weight_decay=self.weight_decay
)
elif name == "adamw":
self.optimizer = optim.AdamW(
self.params, lr=self.lr, weight_decay=self.weight_decay
)
def get_scheduler(self, sch):
print("Using Scheduler")
if sch == "reduceLR":
sched = lr_scheduler.ReduceLROnPlateau(self.optimizer, "min")
elif sch == "expLR":
sched = ExponentialLR(self.optimizer, gamma=0.9)
return sched
def step(self):
if self.max_grad_value != -1:
clip_grad_value_(self.params, self.max_grad_value)
self.optimizer.step()
def load_state_dict(self, state_dict):
self.optimizer.load_state_dict(state_dict)
COGMEN/cogmen/Dataset.py
#导入SBERT
from sentence_transformers import SentenceTransformer
#使用预训练模型 paraphrase-distilroberta-base-v1:
sbert_model = SentenceTransformer("paraphrase-distilroberta-base-v1")
#定义样本类:声音编号,人员编号,标签,文本,语音,视觉,通过sbert编码后的向量值
class Sample:
def __init__(self, vid, speaker, label, text, audio, visual, sentence):
self.vid = vid
self.speaker = speaker
self.label = label
self.text = text
self.audio = audio
self.visual = visual
self.sentence = sentence
self.sbert_sentence_embeddings = sbert_model.encode(sentence)