SAS编程-宏:Source侧与QC侧程序运行时间检查

目前SAS编程工作一般都要求双侧独立编程,这就像双盲试验一样,减少偏倚,提升输出结果的准确性。双侧编程中,QC侧程序运行时间需在Source侧之后。这不仅是一个很好的编程习惯,不少公司也将这一要求添加到工作手册中,程序提交之前会有专门的系统来检测。

这篇文章运用SAS编程来完成这样的程序运行时间检查,为方便程序调用,将代码封装为宏,宏程序的完整代码在第4节汇总

1. 宏程序的整体思路

宏程序主要分为两部分,第一部分,获取SAS程序以及SAS日志的末次修改时间;第二部分,比较各个末次修改时间,得出对应的结论

第一部分内容,参考文章SAS编程:如何获取某路径下所有文件的修改时间?。

第二部分,我把各个末次修改时间的比较划分为4类:

  1. 程序修改时间缺失,需创建缺失程序;
  2. 日志修改时间缺失,程序需要Batch Run;
  3. SAS程序修改时间,晚于日志修改时间,程序需要Batch Run;
  4. Source侧日志修改时间,晚于QC侧修改时间,QC侧程序需要Batch Run。

演示文件夹如下,两个文件夹,一个是Source侧,一个是QC侧,SAS程序对应的日志文件在同一文件夹中

Source侧
QC侧

2. 获取SAS程序以及日志的末次修改时间

这一部分分为两个步骤,先使用Dopen系列函数获取所有文件的名称(文件路径),再使用Fopen系列函数获取所有文件的修改时间

直接参照之前的文章代码,整合成一个宏程序,宏程序中做2点额外的考量。

第一点,考虑Windows和UNIX系统中,文件地址的斜杠不同,以及在输入文件夹地址时,地址末尾可能添加斜杠,也可能不添加。根据输入的地址判断斜杠的类型,同时,统一将输入地址末尾的斜杠移除,后续使用在手动添加

第二点,因为函数FINO返回的信息,受SAS语言的影响,且返回的中文时间不方便读入。所以,在调用FINO时将SAS系统语言设置为英文(options locale = EN_US),调用结束后还原之前的选项值。这个操作是不是跟之前提到的ods listing close;ods listing;options mprint;options nomprint有异曲同工之妙?

第一部分,该程序如下:

%macro  get_last_mod_date( dirpath =, suffix =, outdt =  );

%if  "&dirpath." ne "" %then %do;

%local dirpath_tmp slash;

%let slash = %substr(%sysfunc(compress(&dirpath., : _ , a d)), 1, 1);

*Remove trailing slash;
%if "%substr(&dirpath.,%length(&dirpath.),1)" = "&slash." %then %let dirpath_tmp=%substr(&dirpath.,1,%length(&dirpath.)-1);
%else %let  dirpath_tmp = &dirpath.;


**Dopen--Get filepath;
data _tmp1;
  fileres = filename("dirpath", "&dirpath_tmp");
  dirid = dopen("dirpath");
  num = dnum(dirid);

  length direct filename filepath $200;

  if dirid > 0 and num >0 then do;
    do i = 1 to num;
      direct = "&dirpath_tmp.";
      filename = dread(dirid, i);
      filepath = catx("&slash.", direct, filename);

      if strip(scan(filename, 2, "."))="sas" or strip(scan(filename, 2, "."))="log"  then output;
    end;
  end;

  keep filename filepath;

  proc sort;
    by filename;
run;

*Set SAS language;
 %local locale_sys ;

 %let locale_sys = %sysfunc(getoption(locale));
options locale = EN_US;

**Fopen--Get Last Modified date;
data _tmp2;
  set _tmp1;

  *Get fileID;
  fileres = filename("filepath", filepath);
  fileid = fopen("filepath");

  *Get Last Modified date;
  if fileid > 0 then do;
    length lmdtc $200;
    lmdtc = finfo(fileid, "Last Modified"); 
    if lmdtc ne "" then lmdtm = input(lmdtc, datetime19.);
  end;

  *Close fileID;
  fileid_c = fclose(fileid);
  
  format lmdtm e8601dt.;

  keep filename filepath lmdtc lmdtm;
run;

options locale = &locale_sys.;


**3. Combine lmdtm of .sas and .log file;
proc sql noprint;
  create table &outdt. as
    select scan(a.filename, -2, ".") as domain_&suffix., a.lmdtm as lmdtm_sas_&suffix.,   b.lmdtm as lmdtm_log_&suffix.
  from _tmp2 as a
    left join
    _tmp2 as b
  on scan(a.filename, 1, ".") = scan(b.filename, 1,  ".") and index(a.filename, ".sas") and index(b.filename, ".log")
  where index(a.filename, ".sas")
  ;
quit;

%end;

%else %put Dirpath is missing ! ;

%mend get_last_mod_date;

将上面两个文件夹地址代入宏程序:

*Source;
%get_last_mod_date(
  dirpath = E:\99_Test\Test\test1\
  ,suffix = S
  ,outdt = Source_lmdtm
);

*QC;
%get_last_mod_date(
  dirpath = E:\99_Test\Test\test1\validation
  ,suffix = QC
  ,outdt = QC_lmdtm
);

结果如下:

Source_lmdtm
QC_lmdtm

3. 比较各文件的末次修改时间

获取Source侧和QC侧程序和日志的末次修改时间后,将两侧获取到的时间数据集拼接到一起(Full join),比较输出第1节提到的4类输出结果。

%macro check_date(resdt=, SourcePath=, QCPath= );

**Get last modified dates of files in each folder;
*Source;
%get_last_mod_date(
  dirpath = &SourcePath.
  ,suffix = S
  ,outdt = Source_lmdtm
);

*QC;
%get_last_mod_date(
  dirpath = &QCPath.
  ,suffix = QC
  ,outdt = QC_lmdtm
);


**Combine source and QC results;
proc sql noprint;
  create table _tmp3 as
    select a.*, b.*
    from source_lmdtm as a
      full join
      QC_lmdtm as b
    on a.domain_S = substr(b.domain_QC, 3) or a.domain_S = b.domain_QC
  ;
quit;


**Create results dataset;
data &resdt.;
  retain domain side resultsn results;

  length domain $64 side $10 results $200;

  set _tmp3;

  *1. SAS missing;
  if missing(lmdtm_sas_s) then do;
    domain = strip(domain_QC);
    side = "Source";
    resultsn = 11;
    results = "Source program for **"||strip(domain)||"** is not created!";

    put "results = " results;
    output;
  end;

  if missing(lmdtm_sas_QC) then do;
    domain = strip(domain_S);
    side = "QC";
    resultsn = 12;
    results = "QC program for **"||strip(domain)||"** is not created!";

    put "results = " results;
    output;
  end;

  *2. Log missing;
if not missing(lmdtm_sas_s)  and missing(lmdtm_log_s) then do;
    domain = strip(domain_S);
    side = "Source";
    resultsn = 21;
    results = "Source program for **"||strip(domain)||"** does not putty run!";

    put "results = " results;
    output;
  end;

  if not missing(lmdtm_sas_QC)  and missing(lmdtm_log_QC) then do;
    domain = strip(domain_QC);
    side = "QC";
    resultsn = 22;
    results = "QC program for **"||strip(domain)||"** does not putty run!";

    put "results = " results;
    output;
  end;

  *3. SAS LM after LOG;
  if lmdtm_sas_s > lmdtm_log_s >. then do;
    domain = strip(domain_S);
    side = "Source";
    resultsn = 31;
    results = "Source program for **"||strip(domain)||"** does not putty run after code update!";

    put "results = " results;
    output;
  end;

  if  lmdtm_sas_QC > lmdtm_log_QC >.  then do;
    domain = strip(domain_QC);
    side = "QC";
    resultsn = 32;
    results = "QC program for **"||strip(domain)||"** does not putty run after code update!";

    put "results = " results;
    output;
  end;

 *4. Source log LM after QC log;
  if  lmdtm_log_S > lmdtm_log_QC >.  then do;
    domain = strip(domain_QC);
    side = "QC";
    resultsn = 41;
    results = "QC program for **"||strip(domain)||"** does not putty run after Source putty run!";

    put "results = " results;
    output;
  end;
run;

%mend check_date;

宏程序的参数为,输出的结果数据集,Source文件夹地址,QC文件夹地址:

%check_date(
    resdt = check_date_SDTM
    ,SourcePath = E:\99_Test\Test\test1\
    ,QCPath = E:\99_Test\Test\test1\validation
);

输出结果为:

Check_date_SDTM

4. 完整宏程序汇总

%macro check_date(resdt=, SourcePath=, QCPath= );
**Author: Jihai;
**Date: 2022-05-22;

***1. Create a macro to get last modified dates of files in each folder;
%macro  get_last_mod_date( dirpath =, suffix =, outdt =  );

%if  "&dirpath." ne "" %then %do;

%local dirpath_tmp slash;

%let slash = %substr(%sysfunc(compress(&dirpath., : _ , a d)), 1, 1);

*Remove trailing slash;
%if "%substr(&dirpath.,%length(&dirpath.),1)" = "&slash." %then %let dirpath_tmp=%substr(&dirpath.,1,%length(&dirpath.)-1);
%else %let  dirpath_tmp = &dirpath.;


**1.1 Dopen--Get filepath;
data _tmp1;
  fileres = filename("dirpath", "&dirpath_tmp");
  dirid = dopen("dirpath");
  num = dnum(dirid);

  length direct filename filepath $200;

  if dirid > 0 and num >0 then do;
    do i = 1 to num;
      direct = "&dirpath_tmp.";
      filename = dread(dirid, i);
      filepath = catx("&slash.", direct, filename);

      if strip(scan(filename, 2, "."))="sas" or strip(scan(filename, 2, "."))="log"   then output;
    end;
  end;

  keep filename filepath;

  proc sort;
    by filename;
run;

*Set SAS language;
 %local locale_sys ;

 %let locale_sys = %sysfunc(getoption(locale));
options locale = EN_US;


**1.2 Fopen--Get Last Modified date;
data _tmp2;
  set _tmp1;

  *Get fileID;
  fileres = filename("filepath", filepath);
  fileid = fopen("filepath");

  *Get Last Modified date;
  if fileid > 0 then do;
    length lmdtc $200;
    lmdtc = finfo(fileid, "Last Modified"); 
    if lmdtc ne "" then lmdtm = input(lmdtc, datetime19.);
  end;

  *Close fileID;
  fileid_c = fclose(fileid);
  
  format lmdtm e8601dt.;

  keep filename filepath lmdtc lmdtm;
run;

options locale = &locale_sys.;


**1.3 Combine lmdtm of .sas and .log file;
proc sql noprint;
  create table &outdt. as
    select scan(a.filename, -2, ".") as domain_&suffix., a.lmdtm as lmdtm_sas_&suffix.,   b.lmdtm as lmdtm_log_&suffix.
  from _tmp2 as a
    left join
    _tmp2 as b
  on scan(a.filename, 1, ".") = scan(b.filename, 1,  ".") and index(a.filename, ".sas") and index(b.filename, ".log")
  where index(a.filename, ".sas")
  ;
quit;

%end;

%else %put Dirpath is missing ! ;

%mend get_last_mod_date;

*Source;
%get_last_mod_date(
  dirpath = &SourcePath.
  ,suffix = S
  ,outdt = Source_lmdtm
);

*QC;
%get_last_mod_date(
  dirpath = &QCPath.
  ,suffix = QC
  ,outdt = QC_lmdtm
);


***2. Combine source and QC results;
proc sql noprint;
  create table _tmp3 as
    select a.*, b.*
    from source_lmdtm as a
      full join
      QC_lmdtm as b
    on a.domain_S = substr(b.domain_QC, 3) or a.domain_S = b.domain_QC
  ;
quit;


***3. Create results dataset;
data &resdt.;
  retain domain side resultsn results;

  length domain $64 side $10 results $200;

  set _tmp3;

  **3.1 SAS missing;
  if missing(lmdtm_sas_s) then do;
    domain = strip(domain_QC);
    side = "Source";
    resultsn = 11;
    results = "Source program for **"||strip(domain)||"** is not created!";

    put "results = " results;
    output;
  end;

  if missing(lmdtm_sas_QC) then do;
    domain = strip(domain_S);
    side = "QC";
    resultsn = 12;
    results = "QC program for  **"||strip(domain)||"** is not created!";

    put "results = " results;
    output;
  end;

  **3.2 Log missing;
if not missing(lmdtm_sas_s)  and missing(lmdtm_log_s) then do;
    domain = strip(domain_S);
    side = "Source";
    resultsn = 21;
    results = "Source program for **"||strip(domain)||"** does not putty run!";

    put "results = " results;
    output;
  end;

  if not missing(lmdtm_sas_QC)  and missing(lmdtm_log_QC) then do;
    domain = strip(domain_QC);
    side = "QC";
    resultsn = 22;
    results = "QC program for **"||strip(domain)||"** does not putty run!";

    put "results = " results;
    output;
  end;

  **3.3 SAS LM after LOG;
  if lmdtm_sas_s > lmdtm_log_s >. then do;
    domain = strip(domain_S);
    side = "Source";
    resultsn = 31;
    results = "Source program for **"||strip(domain)||"** does not putty run after code update!";

    put "results = " results;
    output;
  end;

  if  lmdtm_sas_QC > lmdtm_log_QC >.  then do;
    domain = strip(domain_QC);
    side = "QC";
    resultsn = 32;
    results = "QC program for **"||strip(domain)||"** does not putty run after code update!";

    put "results = " results;
    output;
  end;

 **3.4 Source log LM after QC log;
  if  lmdtm_log_S > lmdtm_log_QC >.  then do;
    domain = strip(domain_QC);
    side = "QC";
    resultsn = 41;
    results = "QC program for **"||strip(domain)||"** does not putty run after Source putty run!";

    put "results = " results;
    output;
  end;
run;

%mend check_date;

***Invoke the macro;
%check_date(
    resdt = check_date_SDTM
    ,SourcePath = E:\99_Test\Test\test1\
    ,QCPath = E:\99_Test\Test\test1\validation
);

总结

这个宏的关键点在于获取特定文件夹下的所有文件末次修改时间,涉及DopenFopen系列函数的使用。

相关阅读:
SAS编程:Dopen系列函数介绍
SAS编程:Fopen系列函数介绍

感谢阅读, 欢迎关注!
若有疑问,欢迎评论交流!

你可能感兴趣的:(SAS编程-宏:Source侧与QC侧程序运行时间检查)