Balance diagnostics after propensity score matching(PSM)

Standardized mean difference (SMD) is the most commonly used statistic to examine the balance of covariate distribution between treatment groups. Because SMD is independent of the unit of measurement, it allows comparison between variables with different unit of measurement. (Zhang Z et al. 2019)

  1. to create a simulated dataset
psSim<-function(CatVarN=2,ContVarN=2,
                seed=123,n=1000){
  set.seed(seed);
  Xcont <- replicate(ContVarN, rnorm(n))
  Xcat <- replicate(CatVarN, rbinom(n,size = 1,prob = 0.3))
  linpredT<-cbind(1, Xcont,Xcat) %*%
    sample(c(-5:-1,1:5), 1+CatVarN+ContVarN) +
    rnorm(n,-0.8,1)
  probTreatment <- exp(linpredT) / (1 + exp(linpredT))
  Treat <- rbinom(n, 1, probTreatment);
  linpredY <- 1 + cbind(Xcont,Xcat) %*%
    rep(1, CatVarN+ContVarN) +
    Treat + rnorm(n, -2, 2);
  prY = 1/(1+exp(-linpredY));
  mort <- rbinom(n,1,prY);
  dt <- data.frame(Xcont=Xcont,Xcat=Xcat,Treat, mort)
  return(dt)
}
dt<-psSim()

str(dt)
## 'data.frame':    1000 obs. of  6 variables:
##  $ Xcont.1: num  -0.5605 -0.2302 1.5587 0.0705 0.1293 ...
##  $ Xcont.2: num  -0.996 -1.04 -0.018 -0.132 -2.549 ...
##  $ Xcat.1 : int  0 1 0 1 0 0 1 0 0 1 ...
##  $ Xcat.2 : int  0 1 1 0 0 0 0 0 1 0 ...
##  $ Treat  : int  0 0 0 0 0 0 0 1 0 0 ...
##  $ mort   : int  0 1 1 0 1 0 1 0 1 1 ...
  1. to examine the balance of covariates between treated and untreated groups
library(tableone)
myVars <- names(dt)[1:4]
tabbefore <- CreateTableOne(vars = myVars,
                              data = dt,
                              strata = 'Treat',
                              factorVars = c('Xcat.1','Xcat.2'),
                              smd = T)
tabbefore <- print(tabbefore,
                     printToggle = FALSE,
                     noSpaces = TRUE, smd=TRUE,
                     quote=T)

tabbefore
##                        "Stratified by Treat"
##  ""                     "0"            "1"            "p"      "test" "SMD"  
##   "n"                   "766"          "234"          ""       ""     ""     
##   "Xcont.1 (mean (SD))" "0.12 (0.99)"  "-0.32 (0.94)" "<0.001" ""     "0.455"
##   "Xcont.2 (mean (SD))" "-0.25 (0.89)" "1.00 (0.77)"  "<0.001" ""     "1.499"
##   "Xcat.1 = 1 (%)"      "263 (34.3)"   "19 (8.1)"     "<0.001" ""     "0.677"
##   "Xcat.2 = 1 (%)"      "279 (36.4)"   "23 (9.8)"     "<0.001" ""     "0.665"
  1. PSM
#install.packages("MatchIt")
library(MatchIt);
m.out<-matchit(Treat~ Xcont.1+Xcont.2+Xcat.1+Xcat.2,
                 dt, method = "nearest", caliper=0.1)
  1. SMD
#install.packages("cobalt")
library(cobalt)

bal.tab(m.out,m.threshold=0.1)
## Call
##  matchit(formula = Treat ~ Xcont.1 + Xcont.2 + Xcat.1 + Xcat.2, 
##     data = dt, method = "nearest", caliper = 0.1)
## 
## Balance Measures
##              Type Diff.Adj    M.Threshold
## distance Distance   0.0455 Balanced, <0.1
## Xcont.1   Contin.   0.0271 Balanced, <0.1
## Xcont.2   Contin.   0.0506 Balanced, <0.1
## Xcat.1     Binary  -0.0114 Balanced, <0.1
## Xcat.2     Binary   0.0000 Balanced, <0.1
## 
## Balance tally for mean differences
##                    count
## Balanced, <0.1         5
## Not Balanced, >0.1     0
## 
## Variable with the greatest mean difference
##  Variable Diff.Adj    M.Threshold
##   Xcont.2   0.0506 Balanced, <0.1
## 
## Sample sizes
##           Control Treated
## All           766     234
## Matched        88      88
## Unmatched     678     146
  1. Visualization the distribution changes before and after matching
bal.plot(m.out,var.name = 'Xcont.2',which = 'both')
image.png
bal.plot(m.out,var.name = 'Xcat.1',which = 'both')
image.png
  1. Publication quality plot (change the temporary variable names)
v <- data.frame(old = c("Xcont.1","Xcont.2","Xcat.1","Xcat.2"),
                new = c("Age", "WBC", "Gender",'Surgery'))
love.plot(bal.tab(m.out, m.threshold=0.1),
            stat = "mean.diffs", var.names = v, abs = F)
## Warning: Standardized mean differences and raw mean differences are present in the same plot. 
## Use the 'stars' argument to distinguish between them and appropriately label the x-axis.
image.png
  1. to compare difference after PSM (refer to tabbefore)
df.match <- match.data(m.out)[1:ncol(dt)]
tabafter <- CreateTableOne(vars = myVars,
                             data = df.match,
                             strata = 'Treat',
                             factorVars = c('Xcat.1','Xcat.2'),
                             smd = T)
tabafter <- print(tabafter,
                    printToggle = FALSE,
                    noSpaces = TRUE,smd=TRUE)

tabafter;
##                      Stratified by Treat
##                       0              1              p       test SMD     
##   n                   "88"           "88"           ""      ""   ""      
##   Xcont.1 (mean (SD)) "-0.18 (0.93)" "-0.15 (1.04)" "0.864" ""   "0.026" 
##   Xcont.2 (mean (SD)) "0.50 (0.67)"  "0.54 (0.76)"  "0.719" ""   "0.054" 
##   Xcat.1 = 1 (%)      "9 (10.2)"     "8 (9.1)"      "1.000" ""   "0.038" 
##   Xcat.2 = 1 (%)      "12 (13.6)"    "12 (13.6)"    "1.000" ""   "<0.001"

8.1 SMD calculation in CreateTableOne()

abs((mean(df.match[df.match$Treat==1,'Xcont.1'])-
       mean(df.match[df.match$Treat==0,'Xcont.1'])))/
  sqrt((var(df.match[df.match$Treat==1,'Xcont.1'])+
          var(df.match[df.match$Treat==0,'Xcont.1']))/2)
## [1] 0.02581165

8.2 SMD calculation in tab():

(mean(df.match[df.match$Treat==1,'Xcont.1'])-
    mean(df.match[df.match$Treat==0,'Xcont.1']))/
  sqrt((var(dt[dt$Treat==1,'Xcont.1'])))
## [1] 0.02705606
  1. Variance ratio

Note: A variance ratio of 1 in matched sample indicates a good matching, and a variance ratio below 2 is generally acceptable.

bal.tab(m.out,v.threshold=2)
## Call
##  matchit(formula = Treat ~ Xcont.1 + Xcont.2 + Xcat.1 + Xcat.2, 
##     data = dt, method = "nearest", caliper = 0.1)
## 
## Balance Measures
##              Type Diff.Adj V.Ratio.Adj  V.Threshold
## distance Distance   0.0455      1.0950 Balanced, <2
## Xcont.1   Contin.   0.0271      1.2512 Balanced, <2
## Xcont.2   Contin.   0.0506      1.2954 Balanced, <2
## Xcat.1     Binary  -0.0114           .             
## Xcat.2     Binary   0.0000           .             
## 
## Balance tally for variance ratios
##                  count
## Balanced, <2         3
## Not Balanced, >2     0
## 
## Variable with the greatest variance ratio
##  Variable V.Ratio.Adj  V.Threshold
##   Xcont.2      1.2954 Balanced, <2
## 
## Sample sizes
##           Control Treated
## All           766     234
## Matched        88      88
## Unmatched     678     146
  1. Prognostic score for assessing balance

The prognostic score is defined as the predicted probability of outcome under the control condition. It can be estimated by regressing the outcome on covariates in the control group. Then that fitted model is used to predict outcome for all subjects.

ctrl.data <- dt[dt$Treat == 0,]
ctrl.fit <- glm(mort ~ Xcont.1+Xcont.2+Xcat.1+Xcat.2,
                  data = ctrl.data)
dt$prog.score <- predict(ctrl.fit, dt)

bal.tab(m.out, distance = dt["prog.score"])
## Call
##  matchit(formula = Treat ~ Xcont.1 + Xcont.2 + Xcat.1 + Xcat.2, 
##     data = dt, method = "nearest", caliper = 0.1)
## 
## Balance Measures
##                Type Diff.Adj
## prog.score Distance   0.0360
## distance   Distance   0.0455
## Xcont.1     Contin.   0.0271
## Xcont.2     Contin.   0.0506
## Xcat.1       Binary  -0.0114
## Xcat.2       Binary   0.0000
## 
## Sample sizes
##           Control Treated
##  All           766     234
## Matched        88      88
## Unmatched     678     146
  1. to identify evidence of model misspecification
library(car)
mod1<-glm(Treat~Xcont.1+Xcont.2+Xcat.1+Xcat.2,
            dt,family = binomial)
residualPlots(mod1,terms=~Xcont.1+Xcont.2,fitted=T)

##         Test stat Pr(>|Test stat|)
## Xcont.1    0.5216           0.4701
## Xcont.2    0.6596           0.4167
image.png

Reference:
Zhang Z, Kim HJ, Lonjon G, Zhu Y; written on behalf of AME Big-Data Clinical Trial Collaborative Group. Balance diagnostics after propensity score matching. Ann Transl Med 2019;7(1):16. doi: 10.21037/atm.2018.12.10

你可能感兴趣的:(Balance diagnostics after propensity score matching(PSM))