更新公式如下:
p ( α i ∣ y i , π , Ω ) = ∏ c = 1 C π ~ i , c I ( α i T ⋅ q j = q j T ⋅ q j ) , π ~ i , c = p ( α i = α c ∣ y i , π , Ω ) (10) p(\boldsymbol{\alpha_i|y_i,\pi,\Omega}) = \prod_{c=1}^C{\widetilde{\pi}_{i,c}^{\mathcal{I} (\boldsymbol{\alpha_i^T \cdot q_j = q_j^T \cdot q_j} ) }}, \widetilde{\pi}_{i,c}=p(\boldsymbol{\alpha_i=\alpha_c|y_i,\pi,\Omega}) \tag{10} p(αi∣yi,π,Ω)=c=1∏Cπ i,cI(αiT⋅qj=qjT⋅qj),π i,c=p(αi=αc∣yi,π,Ω)(10) π ∣ α i , … , α N ∝ D i r i c h l e t ( N ~ + δ 0 ) N ~ = ( N ~ 1 , … , N ~ C ) = ( ∑ i = 1 N I ( α i = α 1 ) , … , ∑ i = 1 N I ( α i = α C ) ) (11) \boldsymbol{\pi|\alpha_i,…,\alpha_N} \propto Dirichlet(\boldsymbol{\widetilde{N} + \delta_0}) \\ \boldsymbol{\widetilde{N}} = (\widetilde{N}_1,…,\widetilde{N}_C) = (\sum_{i=1}^{N}{\boldsymbol{\mathcal{I}(\alpha_i = \alpha_1)} },…,\sum_{i=1}^{N}{\boldsymbol{\mathcal{I}(\alpha_i = \alpha_C)} }) \tag{11} π∣αi,…,αN∝Dirichlet(N +δ0)N =(N 1,…,N C)=(i=1∑NI(αi=α1),…,i=1∑NI(αi=αC))(11)
逻辑如下:
被试属于每种认知状态的得分计算方法如下:
Rcpp::List update_alpha(const arma::mat &Amat, const arma::mat &Q,
const arma::vec &ss, const arma::vec &gs,
const arma::mat &Y, const arma::vec &PIs,
arma::mat &ALPHAS, const arma::vec &delta0)
{
unsigned int N = Y.n_rows;
unsigned int J = Q.n_rows;
unsigned int C = Amat.n_rows;
unsigned int K = Q.n_cols;
double ci;
arma::mat alphas_new = arma::zeros<arma::mat>(N, K);
arma::vec PYCS(C);
arma::vec CLASSES(N);
arma::vec PS;
arma::vec Ncs = arma::zeros<arma::vec>(C);
double etaij;
// 三重循环,被试->知识状态->试题,估计出每个人归属那种知识状态
for (unsigned int i = 0; i < N; i++) {
PYCS = arma::ones<arma::vec>(C);
for (unsigned int c = 0; c < C; c++) {
for (unsigned int j = 0; j < J; j++) {
etaij = 1.0;
if (arma::dot(Amat.row(c), Q.row(j)) <
arma::dot(Q.row(j), Q.row(j))) {
etaij = 0.0;
}
// 如果掌握了试题所需知识点且答对了试题
if (etaij == 1.0 and Y(i, j) == 1.0) {
PYCS(c) = (1.0 - ss(j)) * PYCS(c);
}
// 如果没有掌握试题所需知识点但猜对了试题
if (etaij == 0.0 and Y(i, j) == 1.0) {
PYCS(c) = gs(j) * PYCS(c);
}
// 如果掌握了试题所需知识点但答错了试题
if (etaij == 1.0 and Y(i, j) == 0.0) {
PYCS(c) = ss(j) * PYCS(c);
} else {
// 如果没有掌握试题所需知识点且答错了试题
PYCS(c) = (1.0 - gs(j)) * PYCS(c);
}
}
}
// 计算被试属于各种知识状态的概率(进行归一化)
PS = PYCS % PIs / (arma::conv_to<double>::from(PYCS.t() * PIs));
// 使用多项式采样,获取被试归属的知识状态类别
ci = rgen::rmultinomial(PS);
// 依据知识状态类别,获取对应的知识状态向量
ALPHAS.row(i) = Amat.row(ci);
// 对应知识状态类别学生数+1
Ncs(ci) = 1.0 + Ncs(ci);
// 记录被试所属的知识状态类别
CLASSES(i) = ci;
}
// PIs_new 为新的各种知识状态类别出行的概率,使用Dirichlet分布进行采样
return Rcpp::List::create(
Rcpp::Named("PYCS") = PYCS, Rcpp::Named("PS") = PS,
Rcpp::Named("PIs_new") = rgen::rdirichlet(Ncs + delta0),
Rcpp::Named("CLASSES") = CLASSES);
}
更新公式如下:
p ( Ω j ∣ y j , η 1 , j , … , η N , j ) ∝ s j ( α ~ s − 1 ) ( 1 − s j ) ( β ~ s − 1 ) g j ( α ~ g − 1 ) ( 1 − g j ) ( β ~ g − 1 ) I ( ( s j , g j ) ∈ P ) α ~ s = S j + α s , β ~ s = T j − S j + β s α ~ g = G j + α g , β ~ g = N − T j − G j + β g T j = ∑ i = 1 N η i , j , S j = ∑ i ∣ y i , j = 0 N η i , j , G j = ∑ i ∣ y i , j = 1 N ( 1 − η i , j ) (12) p(\boldsymbol{\Omega_j|y_j},\eta_{1,j},…,\eta_{N,j}) \propto s_j^{(\widetilde{\alpha}_s - 1)}(1 - s_j)^{(\widetilde{\beta}_s - 1)}g_j^{(\widetilde{\alpha}_g - 1)}(1 - g_j)^{(\widetilde{\beta}_g - 1)}\mathcal{I} \left((s_j,g_j) \in \mathcal{P} \right) \\ \widetilde{\alpha}_s = \mathcal{S}_j + \alpha_s, \widetilde{\beta}_s = \mathcal{T}_j - \mathcal{S}_j + \beta_s \\ \widetilde{\alpha}_g = \mathcal{G}_j + \alpha_g, \widetilde{\beta}_g = N - \mathcal{T}_j - \mathcal{G}_j + \beta_g \\ \mathcal{T}_j = \sum_{i=1}^N{\eta_{i,j}}, \mathcal{S}_j = \sum_{i|y_{i,j} = 0}^N{\eta_{i,j}}, \mathcal{G}_j = \sum_{i|y_{i,j}=1}^N(1 - \eta_{i,j}) \tag{12} p(Ωj∣yj,η1,j,…,ηN,j)∝sj(α s−1)(1−sj)(β s−1)gj(α g−1)(1−gj)(β g−1)I((sj,gj)∈P)α s=Sj+αs,β s=Tj−Sj+βsα g=Gj+αg,β g=N−Tj−Gj+βgTj=i=1∑Nηi,j,Sj=i∣yi,j=0∑Nηi,j,Gj=i∣yi,j=1∑N(1−ηi,j)(12)
逻辑如下:
注意在第七步使用Beta分布进行采样时,需要限定 0 ≤ s j + g j ≤ 1 0 \le s_j + g_j \le 1 0≤sj+gj≤1。因此采用1减去旧值再乘以一个小于1的随机数。
Rcpp::List update_sg(const arma::mat &Y, const arma::mat &Q,
const arma::mat &ALPHAS, const arma::vec &ss_old,
double as0, double bs0, double ag0, double bg0)
{
unsigned int N = Y.n_rows;
unsigned int J = Y.n_cols;
arma::vec ETA;
arma::vec ss_new(J);
arma::vec gs_new(J);
arma::mat AQ = ALPHAS * Q.t();
double T, S, G, y_dot_eta, qq, ps, pg;
double ug, us;
// 针对每道试题更新对应的试题猜对率和失误率
for (unsigned int j = 0; j < J; j++) {
// 采用平均分布产生两个随机数
us = R::runif(0, 1);
ug = R::runif(0, 1);
// 计算理想情况下的答题结果eta
ETA = arma::zeros<arma::vec>(N);
qq = arma::conv_to<double>::from(Q.row(j) * (Q.row(j)).t());
ETA.elem(arma::find(AQ.col(j) == qq)).fill(1.0);
// 掌握该题且答对的人数
y_dot_eta = arma::conv_to<double>::from((Y.col(j)).t() * ETA);
// 掌握该题的人数
T = sum(ETA);
// 失误的人数=掌握的人数-掌握答对的人数
S = T - y_dot_eta;
// 猜对的人数 = 该题答对的总人数 - 掌握答对的人数
G = sum(Y.col(j)) - y_dot_eta;
// sample s and g as linearly truncated bivariate beta
// draw g conditoned upon s_t-1
// 使用Beta分布对g和s进行采样,同时限定0 < g + s < 1
pg = R::pbeta(1.0 - ss_old(j), G + ag0, N - T - G + bg0, 1, 0);
gs_new(j) = R::qbeta(ug * pg, G + ag0, N - T - G + bg0, 1, 0);
// draw s conditoned upon g
ps = R::pbeta(1.0 - gs_new(j), S + as0, T - S + bs0, 1, 0);
ss_new(j) = R::qbeta(us * ps, S + as0, T - S + bs0, 1, 0);
}
return Rcpp::List::create(Rcpp::Named("ss_new") = ss_new,
Rcpp::Named("gs_new") = gs_new);
}
逻辑如下:
Rcpp::List DINA_Gibbs_cpp(const arma::mat &Y, const arma::mat &Q,
unsigned int chain_length = 10000)
{
// Number of Observations
unsigned int N = Y.n_rows;
// Number of Items
unsigned int J = Y.n_cols;
// Number of Attributes
unsigned int K = Q.n_cols;
// Number of Latent Classes (2^k)
unsigned int C = static_cast<unsigned int>(pow(2.0, static_cast<double>(K)));
// Generate the latent class alpha matrix
arma::mat Amat = simcdm::attribute_classes(K);
// Prior values for betas and Dirichlet distribution
arma::vec delta0 = arma::ones<arma::vec>(C);
double as0 = 1.0;
double bs0 = 1.0;
double ag0 = 1.0;
double bg0 = 1.0;
arma::vec pil0 = arma::ones<arma::vec>(C) / double(C); // prior probability
// Saving output
arma::mat SigS(J, chain_length);
arma::mat GamS(J, chain_length);
arma::mat US(J, chain_length);
arma::mat PIs(C, chain_length);
arma::mat CLASSES(N, chain_length);
arma::cube QS(J, K, chain_length);
// Need to initialize, alphas, ss, gs, and pis
// arma::mat alphas = arma::zeros(N,K); //K>1 is assumed
// 被试的知识点掌握矩阵(知识状态)
arma::mat alphas = arma::randu<arma::mat>(N, K); // K>1 is assumed
alphas.elem(find(alphas > 0.5)).ones();
alphas.elem(find(alphas <= 0.5)).zeros();
// 随机生成失误率、猜对率、各个潜在知识状态类别发生的概率
arma::vec ss = arma::randu<arma::vec>(J);
arma::vec gs = arma::randu<arma::vec>(J);
arma::vec pis = arma::randu<arma::vec>(C);
// Start Markov chain
for (unsigned int t = 0; t < chain_length; t++) {
// updata alpha and pi
// 更新被试的知识掌握矩阵和各种知识状态的分布概率
Rcpp::List step1a =
update_alpha(Amat, Q, ss, gs, Y, pis, alphas, delta0);
// update value for pis. alphas are updated via pointer. save classes
// and PIs
// 保存本步迭代被试所属的知识状态类别和知识状态分布概率
pis = Rcpp::as<arma::vec>(step1a[2]);
CLASSES.col(t) = Rcpp::as<arma::vec>(step1a[3]);
PIs.col(t) = pis;
// update s and g
// 对试题的猜对率和失误率进行重新采样
Rcpp::List step1b = update_sg(Y, Q, alphas, ss, as0, bs0, ag0, bg0);
// update value for ss and gs.
// 更新并保存最新的试题失误率和猜对率
ss = Rcpp::as<arma::vec>(step1b[0]);
gs = Rcpp::as<arma::vec>(step1b[1]);
SigS.col(t) = ss;
GamS.col(t) = gs;
}
return Rcpp::List::create(
Rcpp::Named("CLASSES", CLASSES), Rcpp::Named("PIs", PIs),
Rcpp::Named("SigS", SigS), Rcpp::Named("GamS", GamS));
}