/********************************************** Authors: Steve Perez, sjperez@csus.edu, California State University, Sacramento Kevin Hoover, Duke University Piyachart Phiromswad, Sasin Graduate Institute of Business Administration of Chulalongkorn University, Bangkok, Thailand Date: 1-7-08 Provided free, without guarantees, for public non-commercial use. */ /****************************************************************************************** ******************************************************************************************* Edit the input variables below ******************************************************************************************* ******************************************************************************************* *******************************************************************************************/ prt = 0; /* set prt = 1 for detailed output, set prt = 0 for result only output */ algo = 1; /* set algo = 1 if pc algorithm, and = 0 if sgs algorithm */ /* set output file for results to be appended to output file = (filename.out)*/ output file = m2data121807.out; /*Set up a title for the program run*/ output on; "M2 Dataset from 1990.02 to 2005.03"; /*edit this line*/ output off; /*Alpha level for One-shot run through algorithm (0.10 in the paper)*/ mcalpha = 0.10; /*Alpha level for Bootstraps (0.025 in the paper)*/ bsalpha = 0.025; /*Number of lags for the VAR and the bootstrapped data (4 in the paper)*/ lags = 4; /*Number of bootstraps number of boots to run for each monte carlo (10000 in the paper)*/ straps = 100; /*Type in list of variable names. Use all caps, separate by a space, less than 8 letters each*/ let vars = CORE FF LIP LDEP REFI SPVOL SP500 M2OWN MORG3 SPPE TBILL; /*let vars = COREINF FF LIP LLIQDEP LREFI LSP100VL LSP500 M2OWN MORG30 SPPE TBILL3;*/ /* data may be loaded in different formats */ /* data in example runs from 1990.02 to 2005.03 */ /* this load statement expects data in Gauss .fmt file*/ /*load data = msdata1;*/ /* this load statement expects data in text file with variables arranged by columns data[# of observations, # of variables] */ load data[181,11] = m2data121807.txt; /****************************************************************************************** ******************************************************************************************* ******************************************************************************************* Do not edit below here ******************************************************************************************* ******************************************************************************************* *******************************************************************************************/ let arrows = "----" "<--" "noedge" "--->" "<-->"; /* possible arrows to be found */ tstart = timestr(0); outwidth 256; format /m1 /lds 16,6; tstart = timestr(0); output on; if algo == 1; "Algorithm = PC Algorithm"; else; "Algorithm = SGS Algorithm"; endif; "Significance for algorithm = " mcalpha; "Significance for bootstraps = " bsalpha; "Lags in VAR and for Bootstrapped data = " lags; "Number of Boostraps = " straps; "Observations = " rows(data)-lags; output off; /************************************************************ *******set up data********************** ************************************************************/ T = rows(data); /* number of observations in dataset */ k = cols(data); /* set the number of variables */ edges = nCr(k,2); structure_bs = zeros(rows(edges),straps); /* catalogs the structures found as the mode of struc_bs */ edge_list = vars[edges[1,1]]~vars[edges[1,2]]; /* creates list of edges with variable names */ l1 = 2; do while l1 <= rows(edges); edge_list = edge_list|vars[edges[l1,1]]~vars[edges[l1,2]]; l1 = l1 + 1; endo; /************************************************************ ************************************************************* ************************************************************/ links_bs = nCr(k,2); /* this catalogs the links and directions that are found for each edge for each run of the bootstrap */ /* catalog the possible unshielded colliders */ poss_usc = create_usc(k); poss_usc = poss_usc~zeros(rows(poss_usc),2); /* this will be used to count the number of times a usc is found */ ucslist = vars[poss_usc[1,1]]~vars[poss_usc[1,2]]~vars[poss_usc[1,3]]; /* creates list of unshielded colliders with variable names */ l1 = 2; do while l1 <= rows(poss_usc); ucslist = ucslist|vars[poss_usc[l1,1]]~vars[poss_usc[l1,2]]~vars[poss_usc[l1,3]]; l1 = l1 + 1; endo; /************************************************************ ************************************************************* ************************************************************* run VAR to get set of residuals ************************************************************* ************************************************************* ************************************************************/ Bv = var(data,lags,1,"mc"); /* run var to get residuals */ loadm Uv = mcu; /* load var residuals */ RU = corrx(Uv); /* correlation matrix of VAR residuals */ "******************************************"; "Correlation matrix of VAR residuals"; ""; RU; if algo == 1; {sim_link_os, usc_os} = pc_search(RU,mcalpha,T-lags,k); /* run through entire algorithm for one-shot structure */ else; {sim_link_os, usc_os} = sgs_search(RU,mcalpha,T-lags,k); endif; /* catalog which unshielded colliders were found in the one shot of the pc-algorithm */ b6 = 1; do while b6 <= rows(usc_os); b5 = 1; do while b5 <= rows(poss_usc); if usc_os[b6,1] == poss_usc[b5,1] and usc_os[b6,2] == poss_usc[b5,2] and usc_os[b6,3] == poss_usc[b5,3]; poss_usc[b5,4] = poss_usc[b5,4] + 1; b5 = rows(poss_usc) + 2; else; b5 = b5 + 1; endif; endo; b6 = b6 + 1; endo; /****************************************************************************************/ /* get information ready for the bootstraps */ Us = Uv*sqrt((T-lags)/(T-lags-k)); /* Scale bootstrap errors to keep their variance compatible with that of */ /* the monte carlo */ Cv = Bv[rows(Bv),.]; /* Coefficients for the constant */ Bv = Bv[1:rows(Bv)-1,.]; /* Coefficients for the lags */ b3 = 1; /* for each bootstrap catalog the structure found and determine which is the modal structure*/ struc_bs = zeros(rows(edges),1); /* set up to catalog the structures found for each boot strap */ do while b3 <= straps; ybs = zeros(3*T,K); b2 = lags+1; do while b2 <= 3*T; ybslag = ybs[b2-1,.]; b4 = 2; do while b4 <= lags; ybslag = ybslag~ybs[b2-b4,.]; b4 = b4 + 1; endo; ybs[b2,.] = ybslag*Bv + Cv + Us[ceil(rows(Us)*rndu(1,1)),.]; b2 = b2 + 1; endo; xbs = ybs[rows(Ybs)-T:rows(Ybs),.]; boot_ar = var(xbs,lags,1,"boot"); loadm Ubs = bootu; Rbs = corrx(Ubs); /* run the full algorithm */ if algo == 1; {sim_link_bs, usc_bs} = pc_search(Rbs,bsalpha,T-lags,k); else; {sim_link_bs, usc_bs} = sgs_search(Rbs,bsalpha,T-lags,k); endif; /* catalog all of the structures found */ struc_bs = struc_bs~sim_link_bs; /******************************************************************************************/ /* determine which unshielded colliders were found */ /* to determine how many times an unshileded collider was found */ b6 = 1; do while b6 <= rows(usc_bs); b5 = 1; do while b5 <= rows(poss_usc); if usc_bs[b6,1] == poss_usc[b5,1] and usc_bs[b6,2] == poss_usc[b5,2] and usc_bs[b6,3] == poss_usc[b5,3]; poss_usc[b5,5] = poss_usc[b5,5] + 1; b5 = rows(poss_usc) + 2; else; b5 = b5 + 1; endif; endo; b6 = b6 + 1; endo; b3 = b3 + 1; endo; /* tabulate the results */ struc_bs = struc_bs[.,2:cols(struc_bs)]; pre_bs = zeros(rows(struc_bs),1); bins = {-1.5, -0.5, 0.5, 1.5, 2.5}; possedge = {-2, -1, 0, 1, 2}; bsout = counts(struc_bs[1,.]',bins)'; i1 = 2; do while i1 <= rows(struc_bs); bsout = bsout|counts(struc_bs[i1,.]',bins)'; i1 = i1 + 1; endo; bsout = bsout/(straps); labs = possedge'; output on; sim_link_arr = zeros(rows(sim_link_os),1); a1 = 1; do while a1 <= rows(sim_link_os); if sim_link_os[a1] == -2; sim_link_arr[a1] = arrows[1]; elseif sim_link_os[a1] == -1; sim_link_arr[a1] = arrows[2]; elseif sim_link_os[a1] == 0; sim_link_arr[a1] = arrows[3]; elseif sim_link_os[a1] == 1; sim_link_arr[a1] = arrows[4]; else; sim_link_arr[a1] = arrows[5]; endif; a1 = a1 + 1; endo; cause_out = edge_list[.,1]~sim_link_arr~edge_list[.,2]; ""; "************************************************************************************"; "Distribution of structures found in bootstrap"; "for each Bootstrap using the entire algorithm"; "Second column is found using the pc-algorithm"; bs_rep = sortc(cause_out~bsout, 6); let mask[1,8] = 0 0 0 1 1 1 1 1; let fmt[8,3] = "-*.*s" 8 8 /* first column format */ "-*.*s" 8 8 /* second column format */ "-*.*s" 8 8 /* third column format */ "*.*lf" 10 5 /* fourth column format */ "*.*lf" 10 5 /* fifth column format */ "*.*lf" 10 5 /* sixth column format */ "*.*lf" 10 5 /* seventh column format */ "*.*lf" 10 5; /* eighth column format */ ""; " -- <-- No Edge --> <--> "; outp = printfm(bs_rep , mask , fmt ); "************************************************************************************"; "Distribution of structures found in bootstrap"; "Summary Stastics for each possible edge"; "Second column is found using the pc-algorithm"; bsout_s = (1-bsout[1,3])*100~((bsout[1,2]+bsout[1,4]+bsout[1,5])/(1-bsout[1,3]))*100~ ((bsout[1,4]-bsout[2,2])/(1-bsout[3,3]))*100; r1 = 2; do while r1 <= rows(bsout); bsout_st = (1-bsout[r1,3])*100~((bsout[r1,2]+bsout[r1,4]+bsout[r1,5])/(1-bsout[r1,3]))*100~ ((bsout[r1,4]-bsout[r1,2])/(1-bsout[r1,3]))*100; bsout_s = bsout_s|bsout_st; r1 = r1 + 1; endo; let mask[1,6] = 0 0 0 1 1 1; let fmt[6,3] = "-*.*s" 8 8 /* first column format */ "-*.*s" 8 8 /* second column format */ "-*.*s" 8 8 /* third column format */ "*.*lf" 10 5 /* fourth column format */ "*.*lf" 10 5 /* fifth column format */ "*.*lf" 10 5; /* sixth column format */ ""; " Exists Directed Net Direction "; out_mat = rev(sortc(cause_out~bsout_s,4)); outp = printfm(out_mat , mask , fmt ); "************************************************************************************"; "************************************************************************************"; "Identification of unshielded colliders"; "Fourth column are the colliders found with the one-shot of the pc-algorithm"; let mask[1,5] = 0 0 0 1 1 ; let fmt[5,3] = "-*.*s" 8 8 /* first column format */ "-*.*s" 8 8 /* second column format */ "-*.*s" 8 8 /* third column format */ "*.*lf" 10 5 /* fourth column format */ "*.*lf" 10 5; /* fifth column format */ ""; " OutsideVar. Middle Var. Outside Var. PCalg found(=1) BS found (%) "; out_mat1 = rev(sortc(ucslist~poss_usc[.,4]~(poss_usc[.,5]/straps)*100, 5)); outp = printfm(out_mat1 , mask , fmt ); "************************************************************************************"; output off; /***************************************************** ****************************************************** ****************procs********************************* ****************************************************** ****************************************************** *****************************************************/ /******************* VAR code *************************** Author: Paul Fackler Date: 7 Mar 1996 Provided free, without guarantees, for public non-commercial use. There have been several postings lately about VAR methods. It seems that there are several out there but some of these are not provided as source code and some are not free. Here some fairly plain code to compute VARS, IRFs, and FEVDs that I wrote a number of years ago. It gets the job done and is open to examination. It is modular and allows for alternative identification schemes besides the usual Cholesky factorization. I have other code to compute maximum likelihood estimates for non-triangular identifications and will be glad to send or post it if there is interest. (I published two papers on this topic in the 80s and would be happy to rid myself of some old reprints on a first come first served basis.) Description: Procedure for doing vector autoregressions (VARs). This procedure calculates an AR representation of a vector time series, i.e. a regression of a set of variables on a specified number of lags of all the variables. INPUTS: X : an NxK data matrix. This includes just the endogenous variables of the model. LAGS : the number of lags desired in the AR representation. The first LAGS observations are used only for the regressors, hence there will effectively be N-LAGS observations in this problem. Z : an NxP or (N-LAGS)xP matrix of deterministic (or exogenous) variables (these might include a constant, time trends, or dummy variables as well as stochastic but exogenous variables). if there are N rows in the matrix, the first LAGS rows will be ignored. Other legal values for Z are 0 and 1. if Z=0 then no deternimistic variables will be used. if Z=1 then a constant term will be included. NAMEOUT : a 6 letter or less prefix used for names of GAUSS matrix files saved by the program. OUTPUT AR: the regression coefficients, stored as a (K*LAGS+P)xK matrix. This may be thought of as LAGS KxK matrices, the ith of which is associated with the ith lag, with a PxK matrix concatenated at its bottom. Note that these matrices postmultiply the variables so columns are associated with equations, rows with regressors. SAVED TO DISK: XX: the inverse moment matrix (inv(moment([X(t-1)...X(t-LAGS) Z]))). This is useful in calculating the sampling variances of the coefficients when multipied by the diagonal elements of CV. if the problem is large (# of regressors>90) then only the diagonal elements of XX are saved. CV: the sample covariance matrix of the errors, a KxK matrix. Note that this is the ML estimator, calculated as U'U/(N-LAGS). U : the regression residuals, an (N-LAGS)xK matrix. Each of these matrices will be saved to disk with the prefix sent followed by the names given above. Thus if the prefix sent is "R1" then the following matrices will be stored on disk: R1XX, R1CV, R1U. These may be accessed by using the load command. *************************************************************************/ proc var(x,lags,z,nameout); local k,kl,i,j,ik,jk,n,numdet,numreg,xx,xy,ar,u,vcv,name, l1,l2,kl1,kl2,xx1,xx2,xx3,xy1,xy2,temp,d1,d2,dyy,dzz; /* cls; */ @ Calculates problem dimensions and checks for errors in problem size, data, or in room to write output disk files @ k=cols(x); n=rows(x); if rows(packr(x))/=n; "Error: Missing values encountered in X data"; stop;endif; if rows(z)>1; if rows(z)/=n-lags; if rows(z)==n;z=z[lags+1:n,.]; else; "Error: Rows(Z) must equal rows(X)-lags"; stop;endif;endif; if rows(z)/=rows(packr(z)); "Error: Missing values encountered in deterministic data (Z)"; stop;endif; numdet=cols(z); elseif z==0; numdet=0; else; z=ones(n-lags,1); numdet=1; endif; lags=trunc(lags[1,1]); if lags<1;"Error: Improper lag specification";stop;endif; kl=k*lags; numreg=kl+numdet; @ if numreg>180;"Error: Problem too big";stop;endif;@ nameout=upper(nameout); i=vals(nameout); if rows(i)>2; if i[2,1]==58;i=i[1,1]-64;else;i=0;endif; if i;i=2;endif; if strlen(nameout)>6+i; nameout=strsect(nameout,1,6+i); "Note: Output file name prefix shortened to " $ nameout; endif; endif; dyy=sqrt(diag(moment(trimr(x,lags,0),0))'); x=x./dyy; if numdet; dzz=sqrt(diag(moment(z,0))'); z=z./dzz; endif; @ Calculates the coefficient matrix, AR, a (numreg x k) matrix. This matrix is written to disk. Inv(x'x), a (numreg x numreg) matrix, stored as xx, is written to disk. XX and XY are cleared to open work space for further calculations. Covariance matrix of the ith column of AR calculated by multipling xx by the variance of the residuals of the ith equation. @ xx=zeros(numreg,numreg);xy=zeros(numreg,k); i=0;do until i==lags;i=i+1; ik=i*k; xy[ik-k+1:ik,.]=trimr(x,lags-i,i)'trimr(x,lags,0); if numdet; xx[ik-k+1:ik,kl+1:numreg]=trimr(x,lags-i,i)'z; xx[kl+1:numreg,i*k-k+1:i*k]=xx[ik-k+1:ik,kl+1:numreg]'; endif; j=i-1;do until j==lags;j=j+1; jk=j*k; xx[ik-k+1:ik,jk-k+1:jk]=trimr(x,lags-i,i)'trimr(x,lags-j,j); if j/=i; xx[jk-k+1:jk,ik-k+1:ik]=xx[ik-k+1:ik,jk-k+1:jk]'; endif; endo; endo; if numdet; xx[kl+1:numreg,kl+1:numreg]=moment(z,0); xy[kl+1:numreg,.]=z'trimr(x,lags,0); endif; name=nameout$+"XX";save ^name=xx; name=nameout$+"XY";save ^name=xy; xx=invpd(xx); ar=xx*xy; xy=0; @ Calculates the residuals, stored in u. These are written to nameu and cleared. Residual covariance matrix stored as VCV. These are written to namecv. @ u=trimr(x,lags,0); if numdet; u=u-z*ar[kl+1:numreg,.]; endif; i=0;do until i==lags;i=i+1; u=u-trimr(x,lags-i,i)*ar[(i-1)*k+1:i*k,.]; endo; clear x,z; u=u.*dyy; vcv=moment(u,0)/(n-lags); name=nameout$+"U";save ^name=u; name=nameout$+"CV";save ^name=vcv; clear u,vcv; ar=ar.*dyy; ar[1:kl,.]=ar[1:kl,.]./(ones(lags,1).*.dyy'); if numdet; ar[kl+1:numreg,.]=ar[kl+1:numreg,.]./dzz'; endif; retp(ar); endp; @ A procedure for calculating a vector moving average representation from a vector autoregressive representation. INPUTS AR: the regression coefficients, stored as a (K*LAGS+P)xK matrix. These may be calculated by proc VAR. LAGS : the number of lags desired in the AR representation. The first LAGS observations are used only for the regressors, hence there will effectively be N-LAGS observations in this problem. MALAGS : the number of lags desired in the MA representation and hence in the impulse response function. OUTPUT MA: the MA representation associated with the innovations, i.e. the step ahead forecast errors. Note this is stored as an (MALAGS*KxK) matrix. The ith block of k rows is the matrix of coefficients associated with the ith lagged innovation. Also note the convention followed here is that u (the innovation vector) is a row vector and the coefficients therefore postmultipy u. @ proc vma(ar,lags,malags); local i,j,k,ik,ma; k=cols(ar); ma=zeros(malags*k,k); i=k*minc(lags|malags); ma[1:i,.]=ar[1:i,.]; i=0;do until i==malags-1;i=i+1; j=k*minc((i+lags)|malags); ik=i*k; ma[ik+1:j,.]=ma[ik+1:j,.] + ar[1:(j-ik),.]*ma[ik-k+1:ik,.]; endo; retp(ma); endp; @ Computes the A0 matrix for a recursive identification. This may be passed to IMPULSE to obtain the IRF associated with such a model, etc. The Cholesky decompostion of the innovation VCV matrix is used. INPUTS: VCV: the innovation covariance matrix (called SIGMA in VAR2) IND: a K-vector listing the ordering of the recursive model OUTPUTS: A0: the contemporaneous coefficients matrix (inv(A0*A0')=VCV) @ proc reca0(vcv,ind); local a0; vcv=vcv[ind,ind]; a0=vcv; a0[ind,ind]=inv(chol(vcv)); retp(a0); endp; @ Computes the Impulse Response Function (IRF) of a K variable VAR with an orthogonalization defined by A0, the matrix of contemporaneous coefficients. Input is the Moving Average (MA) representation of the system and the A0 matrix. The former can be obtained from the VAR procedure, while the latter is a transformation of the VAR residual covariance matrix (VCV), which is placed in memory by the VAR procedure. A general set of routines for calculating A is available in the file VAR2, which also uses proc MINMUM and associated procedures. (Note VCV is called SIGMA in those routines). RECA0 can be used to obtain a triangular A matrix. This proc returns the IRF as an (MALAGS+1)x(K*K) matrix, made up of K (L+1)xK blocks, the ith of which gives the response of the the K variables to a unit shock in the ith impulse. Note that row i of the IRF correspond to the responses to shocks at a lag of i-1. Hence the 1st row is the contemporaneous response. A standard output is printed if OUTCODE is non-zero. @ proc impulse(ma,a0,outcode); local i,j,k,l,m,s; cls; k=rows(a0); if cols(a0)/=k;"Error: A0 is not square";stop;endif; l=rows(ma)/k; @ This is MALAGS of proc VAR @ if l/=trunc(l) or cols(ma)/=k; "Error: MA and A0 are not conformable";stop;endif; a0=inv(a0); i=0;do until i==l;i=i+1; ma[(i-1)*k+1:i*k,.]=a0*ma[(i-1)*k+1:i*k,.]; endo; ma=reshape((a0|ma),l+1,k*k); if outcode; s=seqa(1,1,k); format 8,0; "\f"; " IMPULSE RESPONSE FUNCTIONS"; " response of variable:"; " to at"; "shock lag" s'; "-------------";; j=0;do until j==k;j=j+1;"---------";;endo;""; j=0; do until j==k;j=j+1; i=0;do until i==l+1; format 4,0; if i==0;j;;" " i;; else;" " i;;endif; format 8,4;" ";;ma[i+1,s]; i=i+1; endo; "\f"; s=s+k; endo; endif; retp(ma); endp; @ Computes the Forecast Error Decomposition (FED) for orthogonal shocks. Input is the Impulse Response function for the system. This can be obtained from the VAR procedure or from the IMPULSE procedure. This proc returns the FED as an (MALAGS+1)x(K*K) matrix. This is K (MALAGS+1)xK blocks, the ith of which gives the decomposition for the ith variable. The ith row of each block is associated with the i- step ahead forecast, the jth column is associated with the jth impulse. A standard output is printed if OUTCODE is non-zero. @ proc fedecomp(irf,outcode); local i,j,d,k,kk,l,t,q,v,s,var; cls; l=rows(irf); @ Equals MALAGS+1 @ k=round(sqrt(cols(irf))); kk=k*k; irf=reshape(irf.*irf,k*l,k); d=zeros(k,k);v=zeros(l,kk); var=zeros(l,k); t=0;do until t==l;t=t+1; d=d+irf[(t-1)*k+1:t*k,.]; var[t,.]=sumc(d)'; v[t,.]=reshape((100*d./var[t,.])',1,k*k); endo; if outcode; s=seqa(1,1,k); format 5,0; " FORECAST ERROR DECOMPOSITION"; " % of forecast error variance due to shock:"; " for steps std."; "variable ahead err. " s'; "------------------------";; j=0;do until j==k+1;j=j+1;"------";;endo;""; j=0; do until j==k;j=j+1; i=0;do until i==l;i=i+1; format 4,0; if i==1;j;;" " i;; else;" " i;;endif; format 8,4;" ";;sqrt(var[i,j]);; format 5,1; v[i,s]; endo; ""; s=s+k; endo; endif; retp(v); endp; @ This procedure calculates 1 through m step ahead projections starting at period t (i,e, the tth observation of X, the data matrix). AR are the autoregressive coefficients calculated by the VAR procedure. Z is a MxP matrix of deterministic variables corresponding to the projection period. if Z=0 then no deterministic variables are used. if Z=1 then an m vector of ones is used. if Z and X have the same number of rows then rows (t+1) through (t+m) of Z will be used. Other than these cases Z must be MxP or an error message will be printed. Output is an MxK matrix with the i,jth element representing the expectation at time t of the jth variable at time t+i. @ proc proj(x,ar,t,m,z); local i,ey,k,l,kl,temp,r; if z==1;z=ones(m,1);endif; if rows(z)==rows(x);z=z[t+1:t+m,.];endif; if t>rows(x);"Error: Starting observation,t, greater than rows(X)"; stop; endif; if rows(z)/=m and z/=0; "Error: Deterministic variables should contain m observations";stop;endif; k=cols(ar); if z==0;l=rows(ar)/k;else;l=(rows(ar)-cols(z))/k;endif; kl=k*l; if l/=trunc(l);"Error: cols(Z) doesn't match size of AR";stop;endif; ey=rev(x[t-l+1:t,.]); i=0;do until i==m;i=i+1; if z==0;ey=(reshape(ey[1:l,.],1,kl)*ar)|ey; else;ey=((reshape(ey[1:l,.],1,kl)~z[i,.])*ar)|ey; endif; endo; retp(rev(ey[1:m,.])); endp; @ Calculates the roots of an AR model and calculates how many, if any, lie outside the unit circle (indicating non-stationarity). INPUTS: AR: the AR coefficients matrix obtained from proc VAR LAGS: the number of lags in the VAR OUTPUTS: R: a K*LAGS x 3 matrix containing the real and imaginary parts of the roots of the model and their modulus Note: the external function EIG must be previously loaded. @ proc roottest(ar,lags); local r,rv,iv,k,kl; k=cols(ar); kl=k*lags; ar=ar[1:kl,.]~(eye(kl-k)|zeros(k,kl-k)); {rv,iv}=eigrg(ar); r=sqrt(rv.*rv+iv.*iv); k=sumc(r.>=1); k "root(s) outside the unit circle"; retp(rv~iv~r); endp; /***************************************************** ****************************************************** ****************************************************** ****************************************************** ****************************************************** *****************************************************/ /*** This set of procs perform calculations necessary to run the graphical search causality. ***/ /* Computes the partial correlation coefficients between A and B, conditional on C C may have several variables in it. Let con = number of conditioning variables The partial correlation coefficient is computed as described in Johnston (2nd edition, page 135) the p-value is calculated assuming z = 0.5*ln((1+pc)/(1-pc)) is distributed N(0.5*ln((1+pc(true))/(1-pc(true))), 1/(n-3)). */ proc(2) = pcorr(R,A,B,C,obs); local Rab, Raa, Rbb, zstat, pc, pval, Rs; Rab = (-1^(A*B))*det(R[B~C,A~C]); Raa = det(R[B~C,B~C]); Rbb = det(R[A~C,A~C]); pc = -1*(Rab)/sqrt(Raa*Rbb); zstat = (0.5*ln((1+pc)/(1-pc)))/(sqrt(1/(obs-3-cols(C)))); pval = 2*cdfnc(abs(zstat)); retp(pc,pval); endp; /****************************************** ** nCr(n,r) ** ** Written by Tsz-Cheong Lai, July 2002 ** ** This code is provided gratis without any guarantees or ** warrantees. Proprietary modifications of this code are ** not permitted. ** ** Function: compute all nCr combinations ** Inputs: n -- the total number of items ** r -- the number of items to choose from the n ** ** Output: nCr x r matrix ** where each row is a combination ** ** e.g. nCr(3,2) = 1 2 ** 1 3 ** 2 3 ** ** Auxillary function: _serial_sum ******************************************/ proc nCr(n,r); local errout, errmsg, start_n, mtrx, idx, rows_to_add, idx_n; if n <= 0; GOTO errout("* ERROR: n = " $+ FTOS(n,"*.*lf",CEIL(LOG(n))+1,2) $+ " is negative."); elseif r <= 0; GOTO errout("* ERROR: r = " $+ FTOS(r,"*.*lf",CEIL(LOG(r))+1,2) $+ " is negative."); elseif n /= TRUNC(n); GOTO errout("* ERROR: n = " $+ FTOS(n,"*.*lf",CEIL(LOG(n))+1,2) $+ " is not an integer."); elseif r /= TRUNC(r); GOTO errout("* ERROR: r = " $+ FTOS(r,"*.*lf",CEIL(LOG(r))+1,2) $+ " is not an integer."); elseif r > n; GOTO errout("* ERROR: r = " $+ FTOS(r,"*.*lf",CEIL(LOG(r))+1,2) $+ " is larger than n = " $+ FTOS(n,"*.*lf",CEIL(LOG(n))+1,2) $+ "."); elseif r == 1; retp(SEQA(1,1,n)); elseif r == n; retp(SEQA(1,1,n)'); endif; /* Create aCb where a = n - r + 2 and b = 2 */ start_n = n - r + 2; mtrx = ONES(start_n - 1,1) ~ SEQA(2,1,start_n - 1); idx = 1; do until idx == start_n - 1; mtrx = mtrx | (mtrx[1:start_n-1-idx,.] + idx); idx = idx + 1; endo; /* The computation is already done if r = 2 */ if r == 2; retp(mtrx); endif; /* Move from aCb to (a+1)C(b+1) until we reach nCr */ rows_to_add = SEQA(1,1,start_n - 2); idx_n = start_n; do until idx_n == n; /* The matrix aCb is flipped over to form the upper-right part of ** the new matrix (a+1)C(b+1) ** which contains all the combinations with the first item chosen */ mtrx = (idx_n + 1 - mtrx); mtrx = rev(rev(mtrx')'); mtrx = ONES(ROWS(mtrx), 1) ~ (1 + mtrx); /* The remaining combinations with the second item chosen are added, ** then the third, so on and so forth */ rows_to_add = _serial_sum(rows_to_add); idx = 0; do until idx == ROWS(rows_to_add); mtrx = mtrx | (mtrx[1:rows_to_add[ROWS(rows_to_add)-idx],.] + idx + 1); idx = idx + 1; endo; idx_n = idx_n + 1; endo; retp(mtrx); errout: pop errmsg; errorlog "* Procedure \34nCr\34 is in use."; errorlog errmsg; STOP; endp; proc _serial_sum(vctr); local idx; idx = 2; do until idx > ROWS(vctr); vctr[idx] = vctr[idx-1] + vctr[idx]; idx = idx + 1; endo; retp(vctr); endp; /* This proc counts how many elements of a vector are equal to a given number n = count(X,C); n is the number of times the scalar C appears in the column vector X. */ proc(1) = count(X,C); local i1, click; click = 0; i1 = 1; do while i1 <= rows(X); if X[i1] == C; click = click + 1; endif; i1 = i1 + 1; endo; retp(click); endp; /* This proc checks if all of the links in a chain are the same link n = samelink(X); n = 1 if all of the links in X are the same n = 0 if all of the links in X are not the same X must be a column vector */ proc(1) = samelink(X); local n, i1, l; n = 1; i1 = 2; do while i1 <= rows(X); if X[i1] /= X[1]; n = 0; i1 = rows(X)+1; else; i1 = i1 + 1; endif; endo; retp(n); endp; /***************************************************************** ****************************************************************** ****************************************************************** ****************************************************************** This proc implements the SGS Algorithm found in Spirtes et al. (2000, pp. 64-85) ****************************************************************** ****************************************************************** ****************************************************************** ****************************************************************** *****************************************************************/ proc(2) = sgs_search(R,alpha,T,k); local zalpha, obs, Adj, links_num, Adj_Link_Sep, i, i1, i2, i3, adj_test, v1, v2, pc, zstat, pval, n, Cond_var, triples, Link_new, link1, link2, link3, chain, ae, v3, ol, v1_v2, v1_v3, v2_v3, loop_stop, ue, poss_cycles, link_check, subchain1, chain_break, acyclic, C, crow, ccol, pt, nt, loopcheck, p1, n1, loopstop, n1t, link_choose, usc; zalpha = alpha; /*** Set significance level for Fisher's Z-stat **/ acyclic = 1; /* set acyclic = 1 for acyclic graph, 0 otherwise */ k = cols(R); /* number of variables */ obs = T; /* number of observations */ Adj = nCr(k,2); /* matrix of possible adjacencies */ usc = zeros(1,3); /* matrix of unshielded colliders */ links_num = rows(Adj); Adj_Link_Sep = zeros(rows(Adj),k+2); Adj_Link_Sep[.,1:2] = Adj; Adj_Link_Sep[.,3] = seqa(1,1,rows(Adj_Link_Sep)); Adj_Link_Sep[.,4] = -2*ones(rows(Adj_Link_Sep),1); /* matrix with adjacencies, links, and Sepsets first two columns are the variable numbers, next column indexes the link, remaining columns are the Sepset for an adjacency */ /**** Set n = 0. Test for nth-order conditional correlation between every pair of variables conditioning on every subset of variables size n. (for n = 0, the conditioning set is the null set, so that conditional correlation is equivalent to unconditional correlation). if a pair of variables is conditionally uncorrelated, eliminate the edge between them. *****/ i = 1; do while i <= rows(Adj); adj_test = Adj[i,.]; /* lists the adjacency to test */ v1 = adj_test[1,1]; v2 = adj_test[1,2]; pc = R[v1,v2]; zstat = (0.5*ln((1+pc)/(1-pc)))/(sqrt(1/(obs-3))); pval = 2*cdfnc(abs(zstat)); if pval >= zalpha; /* If there is no correlation */ Adj_link_Sep[i,4] = 0; else; n = 1; do while n <= k-2; Cond_var = nCr(k,n); /* this is the set of all possible variables to condition v1 and v2 on */ Cond_var = selif(Cond_var, (Cond_var[.,.] ./= v1) .and (Cond_var[.,.] ./= v2)); /* extracts rows which do not include v1 and v2 */ i1 = 1; do while i1 <= rows(Cond_var); {pc, pval} = pcorr(R, v1, v2, Cond_var[i1,.], obs); /* test for partial correlation */ if pval >= zalpha; /* If there is no correlation */ Adj_link_Sep[i,4] = 0; /* delete link as possible */ Adj_link_Sep[i,5:5+n-1] = Cond_var[i1,.]; /* put the SepSet in the end of matrix */ i1 = rows(Cond_var) + 1; /* end loop */ n = k; /* end search for sepset for this adjacency */ else; i1 = i1 + 1; /* if correlation exists, check next set */ endif; endo; n = n + 1; endo; endif; i = i + 1; endo; /* now we have all of the direct links and need to give them direction. */ /*Consider each pair of variables (X and Y) that are unconnected by a direct edge but are connected through an undirected path through a third variable (Z). Orient X Z Y as X -> Z <- Y if, and only if, Z is not in SepSet for X and Y */ /* look at all triples and see if there are any unshielded colliders */ triples = nCr(k,3); Link_new = Adj_Link_Sep[.,4]; i1 = 1; do while i1 <= rows(triples); link1 = selif(Adj_Link_Sep, (Adj_Link_Sep[.,1] .== triples[i1,1]) .and (Adj_Link_Sep[.,2] .== triples[i1,2])); link2 = selif(Adj_Link_Sep, (Adj_Link_Sep[.,1] .== triples[i1,2]) .and (Adj_Link_Sep[.,2] .== triples[i1,3])); link3 = selif(Adj_Link_Sep, (Adj_Link_Sep[.,1] .== triples[i1,1]) .and (Adj_Link_Sep[.,2] .== triples[i1,3])); chain = link1|link2|link3; if count(chain[.,4], 0) == 1; ae = indnv(0,chain[.,4]); /* identify index of abscent edge */ v1 = chain[ae,1]; /* smaller variable for the abscent edge */ v2 = chain[ae,2]; /* larger variable for the abscent edge */ v3 = delif(triples[i1,.]', (triples[i1,.]' .== v1) .or (triples[i1,.]' .== v2)); /* third variable */ ol = intrsect(chain[ae,5:cols(chain)]', v3, 1); /* find if v3 is in the SepSet */ if ismiss(ol) == 1; /* v3 is not in SepSet for missing link */ /* so v1 -> v3 and v2 -> v3 */ v1_v2 = chain[ae,3]; v1_v3 = selif(chain, (chain[.,1] .== v1) .or (chain[.,2] .== v1)); v1_v3 = selif(v1_v3, (v1_v3[.,1] .== v3) .or (v1_v3[.,2] .== v3)); v1_v3 = v1_v3[.,3]; v2_v3 = selif(chain, (chain[.,1] .== v2) .or (chain[.,2] .== v2)); v2_v3 = selif(v2_v3, (v2_v3[.,1] .== v3) .or (v2_v3[.,2] .== v3)); v2_v3 = v2_v3[.,3]; if v1 <= v3 and Link_new[v1_v3] == -1; Link_new[v1_v3] = 2; elseif v1 <= v3; Link_new[v1_v3] = 1; elseif v1 >= v3 and Link_new[v1_v3] == 1; Link_new[v1_v3] = 2; else; Link_new[v1_v3] = -1; endif; if v2 <= v3 and Link_new[v2_v3] == -1; Link_new[v2_v3] = 2; elseif v2 <= v3; Link_new[v2_v3] = 1; elseif v2 >= v3 and Link_new[v2_v3] == 1; Link_new[v2_v3] = 2; else; Link_new[v2_v3] = -1; endif; if v1 < v2; usc = usc|v1~v3~v2; /* this is the unshielded collider */ else; usc = usc|v2~v3~v1; endif; endif; i1 = i1 + 1; else; i1 = i1 + 1; endif; endo; if rows(usc) >= 2; usc = usc[2:rows(usc),.]; endif; Adj_Link_Sep[.,4] = Link_new; /* Now look at the remaining undirected links. If X -> Z and Z -- Y and X and Y are not directly connected, then orient Z -> Y */ loop_stop = 1; /* loop_stop is an index that will force the loop to stop only when no changes to the causal structure were made in the last pass through */ if loop_stop == 1; loop_stop = 0; /* reset loop_stop */ i1 = 1; do while i1 <= rows(triples); link1 = selif(Adj_Link_Sep, (Adj_Link_Sep[.,1] .== triples[i1,1]) .and (Adj_Link_Sep[.,2] .== triples[i1,2])); link2 = selif(Adj_Link_Sep, (Adj_Link_Sep[.,1] .== triples[i1,2]) .and (Adj_Link_Sep[.,2] .== triples[i1,3])); link3 = selif(Adj_Link_Sep, (Adj_Link_Sep[.,1] .== triples[i1,1]) .and (Adj_Link_Sep[.,2] .== triples[i1,3])); chain = link1|link2|link3; /* chain must look like this v1 -> or <> v2 -- v3 */ if count(chain[.,4], 0) == 1 and count(chain[.,4], -2) == 1; ae = indnv(0,chain[.,4]); /* identify index of abscent edge */ ue = indnv(-2,chain[.,4]); /* identify index of undirected edge */ v3 = intrsect(chain[ae,1 2]', chain[ue,1 2]', 1); /* v3 must be in both of these edges */ v1 = delif(chain[ae,1 2]', chain[ae,1 2]' .== v3); /* v1 is the other variable in ae */ v2 = delif(chain[ue,1 2]', chain[ue,1 2]' .== v3); /* v2 is the other variable in ue */ v1_v3 = chain[ae,3]; v2_v3 = chain[ue,3]; v1_v2 = delif(chain[.,3], (chain[.,3] .== v1_v3) .or (chain[.,3] .== v2_v3)); if v1 <= v2 and (Adj_Link_Sep[v1_v2,4] == 1 or Adj_Link_Sep[v1_v2,4] == 2); if v2 <= v3; Link_new[v2_v3] = 1; else; Link_new[v2_v3] = -1; endif; elseif v1 >= v2 and (Adj_Link_Sep[v1_v2,4] == -1 or Adj_Link_Sep[v1_v2,4] == 2); if v2 <= v3; Link_new[v2_v3] = 1; else; Link_new[v2_v3] = -1; endif; Loop_stop = 1; endif; endif; i1 = i1 + 1; endo; Adj_Link_Sep[.,4] = Link_new; /* make the changes*/ endif; /* create a matrix with the lower triangle representing the links */ C = 8*eye(k); i1 = 1; do while i1 <= rows(Adj_Link_Sep); crow = Adj_Link_Sep[i1,2]; ccol = Adj_Link_Sep[i1,1]; C[crow,ccol] = Adj_Link_Sep[i1,4]; i1 = i1 + 1; endo; /* Now look for possible circles. If there is a directed path FROM x to y (through other variables) and there is an edge between x and y, x -> y. */ if acyclic == 1; C = C-C'; C = C + 8*eye(k); /* check for undirected edges in each column*/ pt = 0; nt = 0; loopcheck = 1; /* this is the indicator that allows for iterating */ /* when a change is made set loop = 1. If there are no changes in the last round stop looping (loop = 0) */ do while loopcheck == 1; loopcheck = 0; ue = indexmat(C, -2); if ue /= 0; i1 = 1; do while i1 <= rows(ue); i2 = 1; v1 = ue[i1,2]; v2 = ue[i1,1]; /* v1 < v2 */ p1 = indexcat(C[v1,.]',1); n1 = indexcat(C[v1,.]',-1); loopstop = 0; do while loopstop == 0 and i2 <= k; /* loopstop triggers the end of check for a given link. */ /* this can happen if we get to the end of a path (i2 > k-1), or */ /* there are no additional links */ /* or we get back to v2 */ if ismiss(p1) == 1 and ismiss(n1) == 1; /* if there are no chains to follow, stop */ loopstop = 1; else; if ismiss(p1) == 0; /* elseif there are positive chains to follow */ if ismiss(intrsect(p1,v2,1)) == 0; /* cycle has returned to the other variable */ loopstop = 1; loopcheck = 1; C[ue[i1,1], ue[i1,2]] = -1; /* direct the undirected link as a -1, and stop*/ else; /* else find the list of rows to continue checking */ pt = 0; i3 = 1; do while i3 <= rows(p1); pt = union(pt,indexcat(C[p1[i3],.]',1),1); i3 = i3 + 1; endo; p1 = selif(pt, pt .gt 0); endif; endif; if ismiss(n1) == 0; /* elseif there are positive chains to follow */ if ismiss(intrsect(n1,v2,1)) == 0; /* cycle has returned to the other variable */ loopstop = 1; loopcheck = 1; C[ue[i1,1], ue[i1,2]] = 1; /* direct the undirected link as a 1, and stop*/ else; /* else find the list of rows to continue checking */ n1t = 0; i3 = 1; do while i3 <= rows(n1); nt = union(nt,indexcat(C[n1[i3],.]',-1),1); i3 = i3 + 1; endo; n1 = selif(nt, nt .gt 0); endif; endif; endif; i2 = i2 + 1; endo; i1 = i1 + 1; endo; endif; endo; endif; link_choose = 0; i2 = 1; /* extract lower triangle of C as the chosen set of links */ do while i2 <= rows(C) - 1; i1 = i2 + 1; do while i1 <= rows(C); link_choose = link_choose|C[i1,i2]; i1 = i1 + 1; endo; i2 = i2 + 1; endo; retp(link_choose[2:rows(link_choose)], usc); endp; /* this procedure returns the indices of the elements of a matrix that equal a specified value y = indexmat(x,v) x = n x k matrix v = scalar y = L x 2 matrix of indices of the elements of x that are equal to v */ proc(1) = indexmat(x,v); local y, i1, i2, y1, y2; y = zeros(1,2); i1 = 1; do while i1 <= cols(x); y1 = indexcat(x[.,i1],v); if ismiss(y1) == 0; i2 = 1; do while i2 <= rows(y1); y2 = y1[i2,1]~i1; y = y|y2; i2 = i2 + 1; endo; endif; i1 = i1 + 1; endo; if rows(y) == 1; retp(0); else; retp(y[2:rows(y),.]); endif; endp; /***************************************************************** ****************************************************************** ****************************************************************** ****************************************************************** This proc implements the PC Algorithm found in Spirtes et al. (2000, pp. 84-85), this is edited by Tom's PC alg. ****************************************************************** ****************************************************************** ****************************************************************** ****************************************************************** *****************************************************************/ proc(2) = pc_search(R,alpha,T,k); /* R is the correlation matrix for the variables alpha is the significance level for Fisher's Z-stat T is the number of observations k is the number of variables. */ local zalpha, obs, Adj, links_num, Adj_Link_Sep, i, i1, i2, i3, adj_test, v1, v2, pc, zstat, pval, n, Cond_var, triples, Link_new, link1, link2, link3, chain, ae, v3, ol, v1_v2, v1_v3, v2_v3, loop_stop, ue, poss_cycles, link_check, subchain1, chain_break, acyclic, C, crow, ccol, pt, nt, loopcheck, p1, n1, loopstop, n1t, link_choose, Linkmat, dirlinks, vlist, d1, d2, Cond_ind, pc_count, seq_index, ii, j, row_index, adj_index, old_index, od, vlist1, Cond_var1, num_adj1, cond_ind1, Cond_var_r1, cc, fixc1, fixc2, final, usc, poss_usc, b4, b5; zalpha = alpha; /*** Set significance level for Fisher's Z-stat **/ acyclic = 1; /* set acyclic = 1 for acyclic graph, 0 otherwise */ obs = T; /* number of observations */ Adj = nCr(k,2); /* matrix of possible adjacencies */ usc = zeros(1,3); /* matrix of unshielded colliders */ links_num = rows(Adj); Adj_Link_Sep = zeros(links_num,k+2); Adj_Link_Sep[.,1:2] = Adj; Adj_Link_Sep[.,3] = seqa(1,1,links_num); Adj_Link_Sep[.,4] = -2*ones(links_num,1); /* matrix with adjacencies, links, and Sepsets first two columns are the variable numbers, next column indexes the link, remaining columns are the Sepset for an adjacency */ Linkmat = ones(k,k); /* Matrix of link existence. =1 if link exists, = 0 otherwise */ Linkmat = diagrv(Linkmat, zeros(k,1)); /**** Set n = 0. Test for nth-order conditional correlation between every pair of variables conditioning on every subset of variables size n. (for n = 0, the conditioning set is the null set, so that conditional correlation is equivalent to unconditional correlation). if a pair of variables is conditionally uncorrelated, eliminate the edge between them. *****/ n = 0; /* check unconditional correlations first */ pc_count = 0; i = 1; do while i <= links_num; adj_test = Adj[i,.]; /* lists the adjacency to test */ v1 = adj_test[1,1]; /* variable 1 */ v2 = adj_test[1,2]; /* variable 2 */ pc = R[v1,v2]; /* correlation between variable 1 and variable 2 */ zstat = (0.5*ln((1+pc)/(1-pc)))/(sqrt(1/(obs-3))); /* Calculate zstat for corr(v1,v2) */ pval = 2*cdfnc(abs(zstat)); /* Find p-value for zstat */ if pval >= zalpha; /* If pval > alpha, there is no significant correlation */ Adj_link_Sep[i,4] = 0; /* remove the edge */ Linkmat[v1,v2] = 0; Linkmat[v2,v1] = 0; if prt == 1; print "elimination 0th order remove edge of," v1 "," v2 "," pval; endif; endif; i = i + 1; endo; seq_index = seqa(1,1,k); ii = 1; adj = zeros(1,2); do while ii <= k; j = seq_index[ii]; row_index = reshape(j,1,k); adj_index = row_index'~seq_index; adj = adj|adj_index; ii = ii+1; endo; adj = adj[2:rows(adj),.]; adj = selif(adj, (adj[.,1] ./= adj[.,2])); links_num = rows(Adj); n = 1; /* check conditional correlations of variable direcly adjacent */ do while n <= k-2; i = 1; do while i <= links_num; adj_test = Adj[i,.]; /* lists the adjacency to test */ v1 = adj_test[1,1]; /* variable 1 */ v2 = adj_test[1,2]; /* variable 2 */ old_index = selif(adj_link_sep,(adj_link_sep[.,1] .== v1) .and (adj_link_sep[.,2] .== v2)); if ismiss(old_index) == 1; old_index = selif(adj_link_sep,(adj_link_sep[.,2] .== v1) .and (adj_link_sep[.,1] .== v2)); endif; od = old_index[1,3]; if Linkmat[v1,v2] /= 0; dirlinks = Linkmat[v1 v2,.]; /* columns of adjacency matrix for the two variables */ vlist1 = 0; /* create a list of variable that are directly adjacent to the first (v1) variable */ Cond_var1 = zeros(1,2); d1 = 1; do while d1 <= cols(dirlinks); if dirlinks[1,d1] == 1; vlist1 = vlist1|d1; endif; d1 = d1 + 1; endo; num_adj1 = rows(vlist1) - 2; /* number conditioning variable excludes 0,and v1 since not relevent*/ vlist1 = vlist1[2:rows(vlist1),1]; vlist1 = selif(vlist1, (vlist1[.,.] ./= v1) .and (vlist1[.,.] ./= v2)); /* extracts rows which do not include v1 and v2 */ if num_adj1 /= 0; /* no more coditioning set to test*/ if n <= rows(vlist1); Cond_ind1 = nCr(rows(vlist1),n); /* this is the set possible variables to choose n of */ Cond_var1 = zeros(1,cols(Cond_ind1)); d2 = 1; /* now choose the variables from the list of direct adjacencies */ do while d2 <= rows(Cond_ind1); Cond_var_r1 = vlist1[Cond_ind1[d2,.]]'; Cond_var1 = Cond_var1|Cond_var_r1; d2 = d2 + 1; endo; Cond_var1 = Cond_var1[2:rows(Cond_var1),.]; i1 = 1; /* check variables directly adjacent to link */ do while i1 <= rows(Cond_var1); {pc, pval} = pcorr(R, v1, v2, Cond_var1[i1,.], obs); /* test for partial correlation */ if pval >= zalpha; /* If there is no correlation */ Adj_link_Sep[od,4] = 0; /* delete link as possible */ Linkmat[v1,v2] = 0; Linkmat[v2,v1] = 0; if prt == 1; print "elimination" n "th order remove edge of," v1 "," v2 "," "given," Cond_var1[i1,.] "," pval; endif; Adj_link_Sep[od,5:5+n-1] = Cond_var1[i1,.]; /* put the SepSet in the end of matrix */ i1 = rows(Cond_var1) + 1; /* end search for sepset for this adjacency */ /*i = links_num + 1; */ else; i1 = i1 + 1; /* if correlation exists, check next set */ endif; endo; endif; endif; endif; i = i + 1; endo; n = n + 1; endo; Adj = nCr(k,2); links_num = rows(Adj); /***************************************************************** ****************************************************************** ****************************************************************** ****************************************************************** Start PC algorithm orientation stage ****************************************************************** ****************************************************************** ****************************************************************** ****************************************************************** *****************************************************************/ /* now we have all of the direct links and need to give them direction. */ /*Consider each pair of variables (X and Y) that are unconnected by a direct edge but are connected through an undirected path through a third variable (Z). Orient X Z Y as X -> Z <- Y if, and only if, Z is not in SepSet for X and Y */ /* look at all triples and see if there are any unshielded colliders */ triples = nCr(k,3); Link_new = Adj_Link_Sep[.,4]; i1 = 1; do while i1 <= rows(triples); link1 = selif(Adj_Link_Sep, (Adj_Link_Sep[.,1] .== triples[i1,1]) .and (Adj_Link_Sep[.,2] .== triples[i1,2])); link2 = selif(Adj_Link_Sep, (Adj_Link_Sep[.,1] .== triples[i1,2]) .and (Adj_Link_Sep[.,2] .== triples[i1,3])); link3 = selif(Adj_Link_Sep, (Adj_Link_Sep[.,1] .== triples[i1,1]) .and (Adj_Link_Sep[.,2] .== triples[i1,3])); chain = link1|link2|link3; if count(chain[.,4], 0) == 1; ae = indnv(0,chain[.,4]); /* identify index of abscent edge */ v1 = chain[ae,1]; /* smaller variable for the abscent edge */ v2 = chain[ae,2]; /* larger variable for the abscent edge */ v3 = delif(triples[i1,.]', (triples[i1,.]' .== v1) .or (triples[i1,.]' .== v2)); /* third variable */ ol = intrsect(chain[ae,5:cols(chain)]', v3, 1); /* find if v3 is in the SepSet */ if ismiss(ol) == 1; /* v3 is not in SepSet for missing link */ /* so v1 -> v3 and v2 -> v3 */ if prt == 1; print "orientation (toward colider)" v1 "->" v3 "and" v2 "->" v3; endif; v1_v2 = chain[ae,3]; v1_v3 = selif(chain, (chain[.,1] .== v1) .or (chain[.,2] .== v1)); v1_v3 = selif(v1_v3, (v1_v3[.,1] .== v3) .or (v1_v3[.,2] .== v3)); v1_v3 = v1_v3[.,3]; v2_v3 = selif(chain, (chain[.,1] .== v2) .or (chain[.,2] .== v2)); v2_v3 = selif(v2_v3, (v2_v3[.,1] .== v3) .or (v2_v3[.,2] .== v3)); v2_v3 = v2_v3[.,3]; if v1 <= v3 and (Link_new[v1_v3] == -1 or Link_new[v1_v3] == 2); Link_new[v1_v3] = 2; elseif v1 <= v3; Link_new[v1_v3] = 1; elseif v1 >= v3 and (Link_new[v1_v3] == 1 or Link_new[v1_v3] == 2); Link_new[v1_v3] = 2; else; Link_new[v1_v3] = -1; endif; if v2 <= v3 and (Link_new[v2_v3] == -1 or Link_new[v2_v3] == 2); Link_new[v2_v3] = 2; elseif v2 <= v3; Link_new[v2_v3] = 1; elseif v2 >= v3 and (Link_new[v2_v3] == 1 or Link_new[v2_v3] == 2); Link_new[v2_v3] = 2; else; Link_new[v2_v3] = -1; endif; if v1 < v2; usc = usc|v1~v3~v2; /* this is the unshielded collider */ else; usc = usc|v2~v3~v1; endif; endif; i1 = i1 + 1; else; i1 = i1 + 1; endif; endo; if rows(usc) >= 2; usc = usc[2:rows(usc),.]; endif; Adj_Link_Sep[.,4] = Link_new; /* Now look at the remaining undirected links. If X -> Z and Z -- Y and X and Y are not directly connected, then orient Z -> Y */ loop_stop = 1; /* loop_stop is an index that will force the loop to stop only when no changes to the causal structure were made in the last pass through */ do while loop_stop == 1; loop_stop = 0; /* reset loop_stop */ i1 = 1; do while i1 <= rows(triples); link1 = selif(Adj_Link_Sep, (Adj_Link_Sep[.,1] .== triples[i1,1]) .and (Adj_Link_Sep[.,2] .== triples[i1,2])); link2 = selif(Adj_Link_Sep, (Adj_Link_Sep[.,1] .== triples[i1,2]) .and (Adj_Link_Sep[.,2] .== triples[i1,3])); link3 = selif(Adj_Link_Sep, (Adj_Link_Sep[.,1] .== triples[i1,1]) .and (Adj_Link_Sep[.,2] .== triples[i1,3])); chain = link1|link2|link3; /* chain must look like this v1 -> or <> v2 -- v3 */ if count(chain[.,4], 0) == 1 and count(chain[.,4], -2) == 1; ae = indnv(0,chain[.,4]); /* identify index of abscent edge */ ue = indnv(-2,chain[.,4]); /* identify index of undirected edge */ v3 = intrsect(chain[ae,1 2]', chain[ue,1 2]', 1); /* v3 must be in both of these edges */ v1 = delif(chain[ae,1 2]', chain[ae,1 2]' .== v3); /* v1 is the other variable in ae */ v2 = delif(chain[ue,1 2]', chain[ue,1 2]' .== v3); /* v2 is the other variable in ue */ v1_v3 = chain[ae,3]; v2_v3 = chain[ue,3]; v1_v2 = delif(chain[.,3], (chain[.,3] .== v1_v3) .or (chain[.,3] .== v2_v3)); if v1 <= v2 and (Adj_Link_Sep[v1_v2,4] == 1 or Adj_Link_Sep[v1_v2,4] == 2); Loop_stop = 1; if v2 <= v3; if link_new[v2_v3] == -1 or link_new[v2_v3] == 2; link_new[v2_v3] = 2; if prt == 1; print "orientation (step 2)" v2 "<->" v3; endif; else; Link_new[v2_v3] = 1; if prt == 1; print "orientation (step 2)" v2 "->" v3; endif; endif; else; if link_new[v2_v3] == 1 or link_new[v2_v3] == 2; link_new[v2_v3] = 2; if prt == 1; print "orientation (step 2)" v2 "<->" v3; endif; else; Link_new[v2_v3] = -1; if prt == 1; print "orientation (step 2)" v2 "->" v3; endif; endif; endif; elseif v1 >= v2 and (Adj_Link_Sep[v1_v2,4] == -1 or Adj_Link_Sep[v1_v2,4] == 2); Loop_stop = 1; if v2 <= v3; if link_new[v2_v3] == -1 or link_new[v2_v3] == 2; link_new[v2_v3] = 2; if prt == 1; print "orientation (step 2)" v2 "<->" v3; endif; else; Link_new[v2_v3] = 1; if prt == 1; print "orientation (step 2)" v2 "->" v3; endif; endif; else; if link_new[v2_v3] == 1 or link_new[v2_v3] == 2; link_new[v2_v3] = 2; if prt == 1; print "orientation (step 2)" v2 "<->" v3; endif; else; Link_new[v2_v3] = -1; if prt == 1; print "orientation (step 2)" v2 "->" v3; endif; endif; endif; endif; endif; i1 = i1 + 1; endo; Adj_Link_Sep[.,4] = Link_new; /* make the changes*/ endo; /* create a matrix with the lower triangle representing the links */ C = 8*eye(k); i1 = 1; do while i1 <= rows(Adj_Link_Sep); crow = Adj_Link_Sep[i1,2]; ccol = Adj_Link_Sep[i1,1]; C[crow,ccol] = Adj_Link_Sep[i1,4]; i1 = i1 + 1; endo; /* this section is to fix the fact that -2 and 2 cannot be reversed */ cc = c; /* Now look for possible circles. If there is a directed path FROM x to y (through other variables) and there is an edge between x and y, x -> y. */ if acyclic == 1; C = C-C'; C = C + 8*eye(k); fixc1 = indexmat(cc,-2); if fixc1 /= 0; i1=1; do while i1 <= rows(fixc1); c[fixc1[i1,2],fixc1[i1,1]] = cc[fixc1[i1,1],fixc1[i1,2]]; i1 = i1+1; endo; endif; fixc2 = indexmat(cc,2); if fixc2 /= 0; i1=1; do while i1 <= rows(fixc2); c[fixc2[i1,2],fixc2[i1,1]] = cc[fixc2[i1,1],fixc2[i1,2]]; i1 = i1+1; endo; endif; /* check for undirected edges in each column*/ pt = 0; nt = 0; loopcheck = 1; /* this is the indicator that allows for iterating */ /* when a change is made set loop = 1. If there are no changes in the last round stop looping (loop = 0) */ do while loopcheck == 1; loopcheck = 0; ue = indexmat(C, -2); if ue /= 0; i1 = 1; do while i1 <= rows(ue); i2 = 1; v1 = ue[i1,2]; v2 = ue[i1,1]; /* v1 < v2 */ p1 = indexcat(C[v1,.]',1); n1 = indexcat(C[v1,.]',-1); loopstop = 0; do while loopstop == 0 and i2 <= k; /* loopstop triggers the end of check for a given link. */ /* this can happen if we get to the end of a path (i2 > k-1), or */ /* there are no additional links */ /* or we get back to v2 */ if ismiss(p1) == 1 and ismiss(n1) == 1; /* if there are no chains to follow, stop */ loopstop = 1; else; if ismiss(p1) == 0; /* elseif there are positive chains to follow */ if ismiss(intrsect(p1,v2,1)) == 0; /* cycle has returned to the other variable */ loopstop = 1; loopcheck = 1; C[ue[i1,1], ue[i1,2]] = -1; /* direct the undirected link as a -1, and stop*/ if prt == 1; print "orientation (step 3)" ue[i1,1] "->" ue[i1,2]; endif; else; /* else find the list of rows to continue checking */ pt = 0; i3 = 1; do while i3 <= rows(p1); pt = union(pt,indexcat(C[p1[i3],.]',1),1); i3 = i3 + 1; endo; p1 = selif(pt, pt .gt 0); endif; endif; if ismiss(n1) == 0; /* elseif there are positive chains to follow */ if ismiss(intrsect(n1,v2,1)) == 0; /* cycle has returned to the other variable */ loopstop = 1; loopcheck = 1; C[ue[i1,1], ue[i1,2]] = 1; /* direct the undirected link as a 1, and stop*/ if prt == 1; print "orientation (step 3)" ue[i1,1] "<-" ue[i1,2]; endif; else; /* else find the list of rows to continue checking */ n1t = 0; i3 = 1; do while i3 <= rows(n1); nt = union(nt,indexcat(C[n1[i3],.]',-1),1); i3 = i3 + 1; endo; n1 = selif(nt, nt .gt 0); endif; endif; endif; i2 = i2 + 1; endo; i1 = i1 + 1; endo; endif; endo; endif; link_choose = 0; i2 = 1; /* extract lower triangle of C as the chosen set of links */ do while i2 <= rows(C) - 1; i1 = i2 + 1; do while i1 <= rows(C); link_choose = link_choose|C[i1,i2]; i1 = i1 + 1; endo; i2 = i2 + 1; endo; final = link_choose[2:rows(link_choose)]; retp(final, usc); endp; /* this procedure returns the indices of the elements of a matrix that equal a specified value y = indexmat(x,v) x = n x k matrix v = scalar y = L x 2 matrix of indices of the elements of x that are equal to v */ proc(1) = indexmat(x,v); local y, i1, i2, y1, y2; y = zeros(1,2); i1 = 1; do while i1 <= cols(x); y1 = indexcat(x[.,i1],v); if ismiss(y1) == 0; i2 = 1; do while i2 <= rows(y1); y2 = y1[i2,1]~i1; y = y|y2; i2 = i2 + 1; endo; endif; i1 = i1 + 1; endo; if rows(y) == 1; retp(0); else; retp(y[2:rows(y),.]); endif; endp; /************************** *************************** *************************** *************************** *************************** Procedure creates the possible unshielded colliders for k variables. *************************** *************************** *************************** ***************************/ proc(1) = create_usc(k); local vs, vm, vl, vr, uscs; vs = seqa(1,1,k); uscs = {1 1 1}; vm = 1; do while vm <= k; vl = 1; do while vl <= k; if vl == vm; vl = vl + 1; endif; vr = vl + 1; do while vr <= k; if vr == vm and vm < 10; vr = vr + 1; endif; if vl <= k and vm <= k; uscs = uscs|vl~vm~vr; endif; vr = vr + 1; endo; vl = vl + 1; endo; vm = vm + 1; endo; uscs = uscs[2:rows(uscs),.]; retp(uscs); endp;