Calculating p values with SAS

 http://www.listserv.uga.edu/cgi-bin/wa?A2=ind0101e&L=sas-l&P=9455

*/
%let Distribution='NORMAL';
%let location=0;  *Note: Location parameter, a.k.a. Mean;
%let scale=1.0;   *Note: Scale parameter a.k.a. Std Dev;
%let z=1.625;     * Example z value;
data junk;
    do z=-5 to 5 by 0.01;
        dens=pdf(&Distribution,z,&location,&scale);
        cum=cdf(&Distribution,z,&location,&scale);
        output;
    end;
    call
symput('cdf_value',put(cdf(&Distribution,&z,&location,&scale),best6.5));
    if &z ge 0 then do;
        phigher=1-cdf(&Distribution,&z,&location,&scale);
        call symput('p_value',put(phigher,best6.5));
        output;
    end;
    else do;
        plower=cdf(&Distribution,&z,&location,&scale);
        call symput('p_value',put(plower,best6.5));
        output;
    end;
run;

data labelDensity;   * this is just an annotate dataset to label curves;
    length text$ 32 color $8;
        xsys='2';  * X scale is same as plot scale;
        x=&z;      * plot label at x=z value;
        ysys='3';  * Y scale is in pct of plot area;
        y=25;      * plot label at 25% of y space;
        color='Blue';
        if &z ge 0 then text="Prob z > &z is &p_value";
        else text="Prob z < &z is &p_value";
        output;
run;
data labelCum;      * define label for cum prob plot;
    length text$ 32 color $8;
        xsys='3';  * X scale is in pct of x space;
        x=68;      * plot label at x=z value;
        ysys='2';  * Y scale is same as plot scale;
        y=&cdf_value-0.0075;      * plot label at value of cdf;
        color='Green';
        text="Cum. Prob at z=&z is &cdf_value";
        output;
run;

goptions vsize=5.25 hsize=5.75;
goptions ftitle=swiss ftext=swiss htitle=4 pct;
axis1 label=(angle=90);
axis3 label=(angle=-90);
symbol1 c=blue i=spline v=none l=1;
symbol2 c=green i=spline v=none l=1;
title1 "&Distribution Distribution with Location=&location and Scale=&scale
 ";
proc gplot data=junk;
    plot dens*z / href=&z chref=blue lhref=2
                 vaxis=axis1 annotate=labelDensity;
    plot2 cum*z / vref=&cdf_value cvref=green lvref=2
                  vaxis=axis3 annotate=labelCum;
    label dens="Probability Density Function (Blue Trace)"
          cum="Cumulative Probability (Green Trace)";
run;
quit;


* Now we with play with another distribution.Let's try Weibull;

%let Distribution='WEIBULL';
%let location=1.0;  *Define an arbitrary location param;
%let scale=2.0;     *Choose a Scale Param;
%let z=5.0;         *choose an example z value;
data junk;
    do z=-1 to 10 by 0.01;
        dens=pdf(&Distribution,z,&location,&scale);
        cum=cdf(&Distribution,z,&location,&scale);
        output;
    end;
    call
symput('cdf_value',put(cdf(&Distribution,&z,&location,&scale),best6.5));
    if &z ge 0 then do;
        phigher=1-cdf(&Distribution,&z,&location,&scale);
        call symput('p_value',put(phigher,best6.5));
        output;
    end;
    else do;
        plower=cdf(&Distribution,&z,&location,&scale);
        call symput('p_value',put(plower,best6.5));
        output;
    end;
run;

data labelDensity;   * this is just an annotate dataset to label curves;
    length text$ 32 color $8;
        xsys='2';  * X scale is same as plot scale;
        x=&z;      * plot label at x=z value;
        ysys='3';  * Y scale is in pct of plot area;
        y=25;      * plot label at 25% of y space;
        color='Blue';
        if &z ge 0 then text="Prob z > &z is &p_value";
        else text="Prob z < &z is &p_value";
        output;
run;
data labelCum;      * define label for cum prob;
    length text$ 32 color $8;
        xsys='3';  * X scale is in pct of x space;
        x=68;      * plot label at x=z value;
        ysys='2';  * Y scale is same as plot scale;
        y=&cdf_value-0.0075;      * plot label at value of cdf;
        color='Green';
        text="Cum. Prob at z=&z is &cdf_value";
        output;
run;

goptions vsize=5.25 hsize=5.75;
goptions ftitle=swiss ftext=swiss htitle=4 pct;
axis1 label=(angle=90);
axis2 order=(-1 to 10 by 1);  * note...this will have to change based on
data;
axis3 label=(angle=-90);
symbol1 c=blue i=spline v=none l=1;
symbol2 c=green i=spline v=none l=1;
title1 "&Distribution Distribution with Location=&location and Scale=&scale
 ";
proc gplot data=junk;
    plot dens*z / href=&z chref=blue lhref=2
                 vaxis=axis1 haxis=axis2 annotate=labelDensity;
    plot2 cum*z / vref=&cdf_value cvref=green lvref=2
                  vaxis=axis3 haxis=axis2 annotate=labelCum;
    label dens="Probability Density Function (Blue Trace)"
          cum="Cumulative Probability (Green Trace)";
run;
quit;


* Now we with play with another distribution.
Let's try Laplace;

%let Distribution='LAPLACE';
%let location=1.0;  *Choose a location param;
%let scale=2.0;     *Choose a scale param;
%let z=-5.0;        *Choose an example z ;

data junk;
    do z=-10 to 10 by 0.01;
        dens=pdf(&Distribution,z,&location,&scale);
        cum=cdf(&Distribution,z,&location,&scale);
        output;
    end;
    call
symput('cdf_value',put(cdf(&Distribution,&z,&location,&scale),best6.5));
    if &z ge 0 then do;
        phigher=1-cdf(&Distribution,&z,&location,&scale);
        call symput('p_value',put(phigher,best6.5));
        output;
    end;
    else do;
        plower=cdf(&Distribution,&z,&location,&scale);
        call symput('p_value',put(plower,best6.5));
        output;
    end;
run;

data labelDensity;   * this is just an annotate dataset to label curves;
    length text$ 32 color $8;
        xsys='2';  * X scale is same as plot scale;
        x=&z;      * plot label at x=z value;
        ysys='3';  * Y scale is in pct of plot area;
        y=25;      * plot label at 25% of y space;
        color='Blue';
        if &z ge 0 then text="Prob z > &z is &p_value";
        else text="Prob z < &z is &p_value";
        output;
run;
data labelCum;      * define label for cum prob;
    length text$ 32 color $8;
        xsys='3';  * X scale is in pct of x space;
        x=68;      * plot label at x=z value;
        ysys='2';  * Y scale is same as plot scale;
        y=&cdf_value-0.0075;      * plot label at value of cdf;
        color='Green';
        text="Cum. Prob at z=&z is &cdf_value";
        output;
run;

goptions vsize=5.25 hsize=5.75;
goptions ftitle=swiss ftext=swiss htitle=4 pct;
axis1 label=(angle=90);
axis2 order=(-10 to 10 by 2);  * note this will have to change based on
data;
axis3 label=(angle=-90);
symbol1 c=blue i=spline v=none l=1;
symbol2 c=green i=spline v=none l=1;
title1 "&Distribution Distribution with Location=&location and Scale=&scale
 ";
proc gplot data=junk;
    plot dens*z / href=&z chref=blue lhref=2
                 vaxis=axis1 haxis=axis2 annotate=labelDensity;
    plot2 cum*z / vref=&cdf_value cvref=green lvref=2
                  vaxis=axis3 haxis=axis2 annotate=labelCum;
    label dens="Probability Density Function (Blue Trace)"
          cum="Cumulative Probability (Green Trace)";
run;
quit;

* This time we play with a discrete distribution;


%let Distribution='BINOMIAL';
%let p=0.01;     *Prob of success;
%let n=100;      *Number of trials ;
%let m=3;        *Number of 'successes';;
%let tail='upper';
*%let plotstop=5;
data junk;
    do m=0 to &n;
        dens=pdf(&Distribution,m,&p,&n);
        cum=cdf(&Distribution,m,&p,&n);
        output;
    end;
    call symput('cdf_value',put(cdf(&Distribution,&m,&p,&n),best6.5));
    call symput('plotstop',put(&m+3,best6.0));
    call symput('refline',put(&m+1,best6.0));
    if &tail='lower' then do;
    phigher=cdf(&Distribution,&m,&p,&n);
        call symput('p_value',put(phigher,best6.5));
        output;
    end;
    else do;
        plower=1-cdf(&Distribution,&m,&p,&n);
        call symput('p_value',put(plower,best6.5));
        output;
    end;
run;

data labelDensity;   * this is just an annotate dataset to label curves;
    length text$ 32 color $8;
        xsys='2';  * X scale is same as plot scale;
        x=&refline;      * plot label at x=z value;
        ysys='3';  * Y scale is in pct of plot area;
        y=25;      * plot label at 25% of y space;
        color='Blue';
        if &tail='upper' then text="Prob > &m successes=&p_value";
        if &tail='lower' then text="Prob <= &m successes=&p_value";
        output;
run;
data labelCum;      * define label for cum prob;
    length text$ 32 color $8;
        xsys='3';  * X scale is in pct of x space;
        x=68;      * plot label at x=z value;
        ysys='2';  * Y scale is same as plot scale;
        y=&cdf_value-0.0075;      * plot label at value of cdf;
        color='Green';
        text="Cum. Prob for m<=&m is &cdf_value";
        output;
run;

goptions vsize=5.25 hsize=5.75;
goptions ftitle=swiss ftext=swiss htitle=4 pct;
axis1 label=(angle=90);
axis2 order=(0 to &plotstop);  * note this will have to change based on
data;
axis3 label=(angle=-90);
symbol1 c=blue i=stepj v=none l=1;
symbol2 c=green i=spline v=none l=1;
title1 "&Distribution Distribution";
title2 "Probability of success=&p, Number of Trials=&n, m=&m succeses";
proc gplot data=junk;
    plot dens*m / href=&refline chref=blue lhref=2
                 vaxis=axis1 haxis=axis2 annotate=labelDensity;
    plot2 cum*m / vref=&cdf_value cvref=green lvref=2
                  vaxis=axis3 haxis=axis2 annotate=labelCum;
    label dens="Probability Density Function (Blue Trace)"
          cum="Cumulative Probability (Green Trace)";
run;
quit;


 

你可能感兴趣的:(Calculating p values with SAS)