目前SAS编程工作一般都要求双侧独立编程,这就像双盲试验一样,减少偏倚,提升输出结果的准确性。双侧编程中,QC侧程序运行时间需在Source侧之后。这不仅是一个很好的编程习惯,不少公司也将这一要求添加到工作手册中,程序提交之前会有专门的系统来检测。
这篇文章运用SAS编程来完成这样的程序运行时间检查,为方便程序调用,将代码封装为宏,宏程序的完整代码在第4节汇总。
1. 宏程序的整体思路
宏程序主要分为两部分,第一部分,获取SAS程序以及SAS日志的末次修改时间;第二部分,比较各个末次修改时间,得出对应的结论。
第一部分内容,参考文章SAS编程:如何获取某路径下所有文件的修改时间?。
第二部分,我把各个末次修改时间的比较划分为4类:
- 程序修改时间缺失,需创建缺失程序;
- 日志修改时间缺失,程序需要Batch Run;
- SAS程序修改时间,晚于日志修改时间,程序需要Batch Run;
- Source侧日志修改时间,晚于QC侧修改时间,QC侧程序需要Batch Run。
演示文件夹如下,两个文件夹,一个是Source侧,一个是QC侧,SAS程序对应的日志文件在同一文件夹中。
2. 获取SAS程序以及日志的末次修改时间
这一部分分为两个步骤,先使用Dopen
系列函数获取所有文件的名称(文件路径),再使用Fopen
系列函数获取所有文件的修改时间。
直接参照之前的文章代码,整合成一个宏程序,宏程序中做2点额外的考量。
第一点,考虑Windows和UNIX系统中,文件地址的斜杠不同,以及在输入文件夹地址时,地址末尾可能添加斜杠,也可能不添加。根据输入的地址判断斜杠的类型,同时,统一将输入地址末尾的斜杠移除,后续使用在手动添加。
第二点,因为函数FINO返回的信息,受SAS语言的影响,且返回的中文时间不方便读入。所以,在调用FINO
时将SAS系统语言设置为英文(options locale = EN_US),调用结束后还原之前的选项值。这个操作是不是跟之前提到的ods listing close;
, ods listing;
;options mprint;
,options nomprint
有异曲同工之妙?
第一部分,该程序如下:
%macro get_last_mod_date( dirpath =, suffix =, outdt = );
%if "&dirpath." ne "" %then %do;
%local dirpath_tmp slash;
%let slash = %substr(%sysfunc(compress(&dirpath., : _ , a d)), 1, 1);
*Remove trailing slash;
%if "%substr(&dirpath.,%length(&dirpath.),1)" = "&slash." %then %let dirpath_tmp=%substr(&dirpath.,1,%length(&dirpath.)-1);
%else %let dirpath_tmp = &dirpath.;
**Dopen--Get filepath;
data _tmp1;
fileres = filename("dirpath", "&dirpath_tmp");
dirid = dopen("dirpath");
num = dnum(dirid);
length direct filename filepath $200;
if dirid > 0 and num >0 then do;
do i = 1 to num;
direct = "&dirpath_tmp.";
filename = dread(dirid, i);
filepath = catx("&slash.", direct, filename);
if strip(scan(filename, 2, "."))="sas" or strip(scan(filename, 2, "."))="log" then output;
end;
end;
keep filename filepath;
proc sort;
by filename;
run;
*Set SAS language;
%local locale_sys ;
%let locale_sys = %sysfunc(getoption(locale));
options locale = EN_US;
**Fopen--Get Last Modified date;
data _tmp2;
set _tmp1;
*Get fileID;
fileres = filename("filepath", filepath);
fileid = fopen("filepath");
*Get Last Modified date;
if fileid > 0 then do;
length lmdtc $200;
lmdtc = finfo(fileid, "Last Modified");
if lmdtc ne "" then lmdtm = input(lmdtc, datetime19.);
end;
*Close fileID;
fileid_c = fclose(fileid);
format lmdtm e8601dt.;
keep filename filepath lmdtc lmdtm;
run;
options locale = &locale_sys.;
**3. Combine lmdtm of .sas and .log file;
proc sql noprint;
create table &outdt. as
select scan(a.filename, -2, ".") as domain_&suffix., a.lmdtm as lmdtm_sas_&suffix., b.lmdtm as lmdtm_log_&suffix.
from _tmp2 as a
left join
_tmp2 as b
on scan(a.filename, 1, ".") = scan(b.filename, 1, ".") and index(a.filename, ".sas") and index(b.filename, ".log")
where index(a.filename, ".sas")
;
quit;
%end;
%else %put Dirpath is missing ! ;
%mend get_last_mod_date;
将上面两个文件夹地址代入宏程序:
*Source;
%get_last_mod_date(
dirpath = E:\99_Test\Test\test1\
,suffix = S
,outdt = Source_lmdtm
);
*QC;
%get_last_mod_date(
dirpath = E:\99_Test\Test\test1\validation
,suffix = QC
,outdt = QC_lmdtm
);
结果如下:
3. 比较各文件的末次修改时间
获取Source侧和QC侧程序和日志的末次修改时间后,将两侧获取到的时间数据集拼接到一起(Full join
),比较输出第1节提到的4类输出结果。
%macro check_date(resdt=, SourcePath=, QCPath= );
**Get last modified dates of files in each folder;
*Source;
%get_last_mod_date(
dirpath = &SourcePath.
,suffix = S
,outdt = Source_lmdtm
);
*QC;
%get_last_mod_date(
dirpath = &QCPath.
,suffix = QC
,outdt = QC_lmdtm
);
**Combine source and QC results;
proc sql noprint;
create table _tmp3 as
select a.*, b.*
from source_lmdtm as a
full join
QC_lmdtm as b
on a.domain_S = substr(b.domain_QC, 3) or a.domain_S = b.domain_QC
;
quit;
**Create results dataset;
data &resdt.;
retain domain side resultsn results;
length domain $64 side $10 results $200;
set _tmp3;
*1. SAS missing;
if missing(lmdtm_sas_s) then do;
domain = strip(domain_QC);
side = "Source";
resultsn = 11;
results = "Source program for **"||strip(domain)||"** is not created!";
put "results = " results;
output;
end;
if missing(lmdtm_sas_QC) then do;
domain = strip(domain_S);
side = "QC";
resultsn = 12;
results = "QC program for **"||strip(domain)||"** is not created!";
put "results = " results;
output;
end;
*2. Log missing;
if not missing(lmdtm_sas_s) and missing(lmdtm_log_s) then do;
domain = strip(domain_S);
side = "Source";
resultsn = 21;
results = "Source program for **"||strip(domain)||"** does not putty run!";
put "results = " results;
output;
end;
if not missing(lmdtm_sas_QC) and missing(lmdtm_log_QC) then do;
domain = strip(domain_QC);
side = "QC";
resultsn = 22;
results = "QC program for **"||strip(domain)||"** does not putty run!";
put "results = " results;
output;
end;
*3. SAS LM after LOG;
if lmdtm_sas_s > lmdtm_log_s >. then do;
domain = strip(domain_S);
side = "Source";
resultsn = 31;
results = "Source program for **"||strip(domain)||"** does not putty run after code update!";
put "results = " results;
output;
end;
if lmdtm_sas_QC > lmdtm_log_QC >. then do;
domain = strip(domain_QC);
side = "QC";
resultsn = 32;
results = "QC program for **"||strip(domain)||"** does not putty run after code update!";
put "results = " results;
output;
end;
*4. Source log LM after QC log;
if lmdtm_log_S > lmdtm_log_QC >. then do;
domain = strip(domain_QC);
side = "QC";
resultsn = 41;
results = "QC program for **"||strip(domain)||"** does not putty run after Source putty run!";
put "results = " results;
output;
end;
run;
%mend check_date;
宏程序的参数为,输出的结果数据集,Source文件夹地址,QC文件夹地址:
%check_date(
resdt = check_date_SDTM
,SourcePath = E:\99_Test\Test\test1\
,QCPath = E:\99_Test\Test\test1\validation
);
输出结果为:
4. 完整宏程序汇总
%macro check_date(resdt=, SourcePath=, QCPath= );
**Author: Jihai;
**Date: 2022-05-22;
***1. Create a macro to get last modified dates of files in each folder;
%macro get_last_mod_date( dirpath =, suffix =, outdt = );
%if "&dirpath." ne "" %then %do;
%local dirpath_tmp slash;
%let slash = %substr(%sysfunc(compress(&dirpath., : _ , a d)), 1, 1);
*Remove trailing slash;
%if "%substr(&dirpath.,%length(&dirpath.),1)" = "&slash." %then %let dirpath_tmp=%substr(&dirpath.,1,%length(&dirpath.)-1);
%else %let dirpath_tmp = &dirpath.;
**1.1 Dopen--Get filepath;
data _tmp1;
fileres = filename("dirpath", "&dirpath_tmp");
dirid = dopen("dirpath");
num = dnum(dirid);
length direct filename filepath $200;
if dirid > 0 and num >0 then do;
do i = 1 to num;
direct = "&dirpath_tmp.";
filename = dread(dirid, i);
filepath = catx("&slash.", direct, filename);
if strip(scan(filename, 2, "."))="sas" or strip(scan(filename, 2, "."))="log" then output;
end;
end;
keep filename filepath;
proc sort;
by filename;
run;
*Set SAS language;
%local locale_sys ;
%let locale_sys = %sysfunc(getoption(locale));
options locale = EN_US;
**1.2 Fopen--Get Last Modified date;
data _tmp2;
set _tmp1;
*Get fileID;
fileres = filename("filepath", filepath);
fileid = fopen("filepath");
*Get Last Modified date;
if fileid > 0 then do;
length lmdtc $200;
lmdtc = finfo(fileid, "Last Modified");
if lmdtc ne "" then lmdtm = input(lmdtc, datetime19.);
end;
*Close fileID;
fileid_c = fclose(fileid);
format lmdtm e8601dt.;
keep filename filepath lmdtc lmdtm;
run;
options locale = &locale_sys.;
**1.3 Combine lmdtm of .sas and .log file;
proc sql noprint;
create table &outdt. as
select scan(a.filename, -2, ".") as domain_&suffix., a.lmdtm as lmdtm_sas_&suffix., b.lmdtm as lmdtm_log_&suffix.
from _tmp2 as a
left join
_tmp2 as b
on scan(a.filename, 1, ".") = scan(b.filename, 1, ".") and index(a.filename, ".sas") and index(b.filename, ".log")
where index(a.filename, ".sas")
;
quit;
%end;
%else %put Dirpath is missing ! ;
%mend get_last_mod_date;
*Source;
%get_last_mod_date(
dirpath = &SourcePath.
,suffix = S
,outdt = Source_lmdtm
);
*QC;
%get_last_mod_date(
dirpath = &QCPath.
,suffix = QC
,outdt = QC_lmdtm
);
***2. Combine source and QC results;
proc sql noprint;
create table _tmp3 as
select a.*, b.*
from source_lmdtm as a
full join
QC_lmdtm as b
on a.domain_S = substr(b.domain_QC, 3) or a.domain_S = b.domain_QC
;
quit;
***3. Create results dataset;
data &resdt.;
retain domain side resultsn results;
length domain $64 side $10 results $200;
set _tmp3;
**3.1 SAS missing;
if missing(lmdtm_sas_s) then do;
domain = strip(domain_QC);
side = "Source";
resultsn = 11;
results = "Source program for **"||strip(domain)||"** is not created!";
put "results = " results;
output;
end;
if missing(lmdtm_sas_QC) then do;
domain = strip(domain_S);
side = "QC";
resultsn = 12;
results = "QC program for **"||strip(domain)||"** is not created!";
put "results = " results;
output;
end;
**3.2 Log missing;
if not missing(lmdtm_sas_s) and missing(lmdtm_log_s) then do;
domain = strip(domain_S);
side = "Source";
resultsn = 21;
results = "Source program for **"||strip(domain)||"** does not putty run!";
put "results = " results;
output;
end;
if not missing(lmdtm_sas_QC) and missing(lmdtm_log_QC) then do;
domain = strip(domain_QC);
side = "QC";
resultsn = 22;
results = "QC program for **"||strip(domain)||"** does not putty run!";
put "results = " results;
output;
end;
**3.3 SAS LM after LOG;
if lmdtm_sas_s > lmdtm_log_s >. then do;
domain = strip(domain_S);
side = "Source";
resultsn = 31;
results = "Source program for **"||strip(domain)||"** does not putty run after code update!";
put "results = " results;
output;
end;
if lmdtm_sas_QC > lmdtm_log_QC >. then do;
domain = strip(domain_QC);
side = "QC";
resultsn = 32;
results = "QC program for **"||strip(domain)||"** does not putty run after code update!";
put "results = " results;
output;
end;
**3.4 Source log LM after QC log;
if lmdtm_log_S > lmdtm_log_QC >. then do;
domain = strip(domain_QC);
side = "QC";
resultsn = 41;
results = "QC program for **"||strip(domain)||"** does not putty run after Source putty run!";
put "results = " results;
output;
end;
run;
%mend check_date;
***Invoke the macro;
%check_date(
resdt = check_date_SDTM
,SourcePath = E:\99_Test\Test\test1\
,QCPath = E:\99_Test\Test\test1\validation
);
总结
这个宏的关键点在于获取特定文件夹下的所有文件末次修改时间,涉及Dopen
、Fopen
系列函数的使用。
相关阅读:
SAS编程:Dopen系列函数介绍
SAS编程:Fopen系列函数介绍
感谢阅读, 欢迎关注!
若有疑问,欢迎评论交流!