//ADJUST JOB DU.D04.ZU1660,TAUCHEN,TIME=(5,00),PRTY=0 /*JOBPARM COPIES=3 // EXEC SAS //WORK DD UNIT=SYSDA,SPACE=(CYL,(40,10),,,ROUND) //IN20 DD DSN=DGETAU.SP.DATA.RAW.SPWORK20,DISP=SHR //IN30 DD DSN=DGETAU.SP.DATA.RAW.SPWORK30,DISP=SHR //IN40 DD DSN=DGETAU.SP.DATA.RAW.SPWORK40,DISP=SHR //IN50 DD DSN=DGETAU.SP.DATA.RAW.SPWORK50,DISP=SHR //IN60 DD DSN=DGETAU.SP.DATA.RAW.SPWORK60,DISP=SHR //IN70 DD DSN=DGETAU.SP.DATA.RAW.SPWORK70,DISP=SHR //IN80 DD DSN=DGETAU.SP.DATA.RAW.SPWORK80,DISP=SHR //OUT DD DSN=DGETAU.SP.DATA.SPADJUST.YR2887,DISP=(NEW,CATLG), // DCB=(RECFM=FB,LRECL=80,BLKSIZE=3600), // SPACE=(TRK,(25,5),RLSE), // UNIT=DISK,VOL=REF=DU.DISK //SYSIN DD * *-------------------------------------------------------; * 11/14/88 ADJUST.SAS ; * ; * Purpose: SAS program to remove systematic calendar ; * effects from the location and scale of ; * the S&P composite price index and the ; * total volume of trading on the NYSE, ; * daily, 1928-87. ; * ; * Collaborators: A. Ronald Gallant, NC State ; * Peter Rossi, U. of Chicago ; * George Tauchen, Duke Univ. ; * ; * Programmer: George Tauchen ; * Department of Economics ; * Duke University ; * Durham, NC 27706 ; * ; * Note: The main steps of the program are to: ; * ; * (1) Regress P and V on calendar variables,; * where P is 100 times the log first ; * difference of the price index and ; * V is the log volume. ; * ; * (2) Regress the log squared residuals from; * step (1) on calendar variables. ; * ; * (3) Adjust the the residuals from (1) ; * by dividing by the estimate of ; * conditional scale obtained from ; * step (2). ; * ; * (4) Linearly transform the rescaled ; * residuals from (3) so that the sample ; * means equal those of the raw P, V data; * and the sample standard deviations ; * equal the residual standard deviations; * from step (1). This step puts ; * the adjusted series back into the ; * units of the original P, V series. ; * ; * (5) Output the results as the adjusted ; * volume, price series. ; * ; *-------------------------------------------------------; * ; * Start Program ; * ; *-------------------------------------------------------; * Read the raw data into data sets W20-W80; *-------------------------------------------------------; OPTIONS LINESIZE=78; DATA W20; INFILE IN20; INPUT YYMMDD DAY VOL PRICE; PROC MEANS DATA = W20; DATA W30; INFILE IN30; INPUT YYMMDD DAY VOL PRICE; PROC MEANS DATA = W30; DATA W40; INFILE IN40; INPUT YYMMDD DAY VOL PRICE; PROC MEANS DATA = W40; DATA W50; INFILE IN50; INPUT YYMMDD DAY VOL PRICE; PROC MEANS DATA = W50; DATA W60; INFILE IN60; INPUT YYMMDD DAY VOL PRICE; PROC MEANS DATA = W60; DATA W70; INFILE IN70; INPUT YYMMDD DAY VOL PRICE; PROC MEANS DATA = W70; DATA W80; INFILE IN80; INPUT YYMMDD DAY VOL PRICE; PROC MEANS DATA = W80; *-------------------------------------------------------; * Combine W20-W80 into a single dataset; *-------------------------------------------------------; DATA ZERO; SET W20 W30 W40 W50 W60 W70 W80; PROC DATASETS DDNAME=WORK NOLIST; DELETE W20 W30 W40 W50 W60 W70 W80 ; *-------------------------------------------------------; * Create the relevant variables ; *-------------------------------------------------------; * ; * V - LOG(NYSE VOLUME) ; * P - 100*(LOG(SP INDEX) - LOG(LAG(SP INDEX))) ; * T - TREND [_N_/16127] ; * T2 - T*T ; * YY - YEAR (28, 29, ..., 87) ; * MM - MONTH (1, 2, ..., 12) ; * DD - DAY (1, 2, ..., 31) ; * YEAR - 1900 + YY ; * DAY - DAY OF WEEK (1=MONDAY, ..., 6=SATURDAY) ; * DAYx - DUMMIES FOR (DAY = x), x=1, 2, ..., 6 ; * YRxx - WWII DUMMIES FOR (YY = xx), xx = 41, 42, 43, 44, 45; * HOL - HOLIDAY DUMMY ; * WKEND - WEEKEND DUMMY ; * GAP - NUMBER OF CALENDAR DAYS FROM PRECEDING TRADING DAY ; * GAPx - DUMMIES FOR (GAP = x), x = 1,2,3,4,5 ; * LEAPYR - DUMMY, 1 = LEAP YEAR, 0 OTHERWISE ; * BASE - NUMBER OF CALENDAR DAYS IN MONTHS OF CURRENT QUARTER; * THAT PRECEDE CURENT MONTH ; * ; * DAYOFQ - DAY OF QUARTER (1, 2, ..., 92) ; * ; * MONxx - MONTH OF YEAR DUMMIES (xx = 2, 3, ..., 11) ; * MON01_x - WEEK WITHIN JANUARY (xx = 1, 2, 3, 4) ; * MON12_x - WEEK WITHIN DECEMBER (xx = 1, 2, 3, 4) ; * ; * ; DATA ONE; SET ZERO; V = LOG(VOL); P = 100*( LOG(PRICE) - LOG(LAG(PRICE)) ); T = _N_/16127; T2 = T*T; YY = FLOOR(YYMMDD/10000); DD = MOD(YYMMDD,100); MM = (YYMMDD - 10000*YY - DD)/100; YEAR = 1900+YY; DAY1 = (DAY EQ 1); DAY2 = (DAY EQ 2); DAY3 = (DAY EQ 3); DAY4 = (DAY EQ 4); DAY5 = (DAY EQ 5); DAY6 = (DAY EQ 6); HOL = ( YYMMDD LE 520526 )*( MOD(LAG(DAY)+1,6) NE MOD(DAY,6) ) + ( YYMMDD GE 520527 )*( MOD(LAG(DAY)+1,5) NE MOD(DAY,5) ); WKEND = ( LAG(DAY) > DAY); YR41 = (YEAR EQ 1941); YR42 = (YEAR EQ 1942); YR43 = (YEAR EQ 1943); YR44 = (YEAR EQ 1944); YR45 = (YEAR EQ 1945); DAYNUM = MDY(MM,DD,YY); GAP = DAYNUM - LAG(DAYNUM); GAP1 = (GAP EQ 1); GAP2 = (GAP EQ 2); GAP3 = (GAP EQ 3); GAP4 = (GAP EQ 4); GAP5 = (GAP EQ 5); LEAPYR = (MOD(YY,4) EQ 0); BASE = (MM EQ 1)* 0 + (MM EQ 2)* (0+31) + (MM EQ 3)*( (0+31+28)*(1-LEAPYR)+(0+31+29)*LEAPYR ) + (MM EQ 4)* 0 + (MM EQ 5)* (0+30) + (MM EQ 6)* (0+31+30) + (MM EQ 7)* 0 + (MM EQ 8)* (0+31) + (MM EQ 9)* (0+31+31) + (MM EQ 10)* 0 + (MM EQ 11)* (0+31) + (MM EQ 12)* (0+30+31); MON01_1 = (MM EQ 1 AND (1 LE DD AND DD LE 7) ); MON01_2 = (MM EQ 1 AND (8 LE DD AND DD LE 14) ); MON01_3 = (MM EQ 1 AND (15 LE DD AND DD LE 21) ); MON01_4 = (MM EQ 1 AND (22 LE DD AND DD LE 31) ); MON02 = (MM EQ 2); MON03 = (MM EQ 3); MON04 = (MM EQ 4); MON05 = (MM EQ 5); MON06 = (MM EQ 6); MON07 = (MM EQ 7); MON08 = (MM EQ 8); MON09 = (MM EQ 9); MON10 = (MM EQ 10); MON11 = (MM EQ 11); MON12_1 = (MM EQ 12 AND (1 LE DD AND DD LE 7) ); MON12_2 = (MM EQ 12 AND (8 LE DD AND DD LE 14) ); MON12_3 = (MM EQ 12 AND (15 LE DD AND DD LE 21) ); MON12_4 = (MM EQ 12 AND (22 LE DD AND DD LE 31) ); DAYOFQ = BASE + DD; LABEL P = '100*LOG(SP/SP_1)'; LABEL V = 'LOG(NYSE Volume)'; * IF (_N_ LE 2000); PROC DATASETS DDNAME=WORK NOLIST; DELETE ZERO; *-------------------------------------------------------; * Keep a copy of the raw data around for later use; *-------------------------------------------------------; DATA RAW; SET ONE; KEEP YYMMDD DAY HOL WKEND GAP V P VOL PRICE; *-------------------------------------------------------; * Check the values of GAP; *-------------------------------------------------------; PROC MEANS DATA=ONE; DATA CHECKGAP; SET ONE; KEEP GAP P V YYMMDD; PROC SORT DATA=CHECKGAP; BY GAP; PROC MEANS DATA=CHECKGAP; BY GAP; *-------------------------------------------------------; * Regress on dummies and/or trend, keep residuals in RES01; *-------------------------------------------------------; PROC DATASETS DDNAME=WORK NOLIST; DELETE CHECKGAP; PROC REG DATA=ONE ; MODEL P=T T2; MODEL P= YR41-YR45; MODEL P= YR41-YR45 DAY2-DAY6; MODEL P= YR41-YR45 DAY2-DAY6 GAP2-GAP5; MODEL P=T T2 YR41-YR45 DAY2-DAY6 GAP2-GAP5 MON01_1-MON01_4 MON03-MON11 MON12_1-MON12_4; TEST T=0, T2=0; MODEL P= YR41-YR45 DAY2-DAY6 GAP2-GAP5 MON01_1-MON01_4 MON03-MON11 MON12_1-MON12_4; OUTPUT OUT=RES011 R=PR P=PH; MODEL V=T T2; MODEL V=T T2 YR41-YR45; MODEL V=T T2 YR41-YR45 DAY2-DAY6; MODEL V=T T2 YR41-YR45 DAY2-DAY6 GAP2-GAP5; MODEL V=T T2 YR41-YR45 DAY2-DAY6 GAP2-GAP5 MON01_1-MON01_4 MON03-MON11 MON12_1-MON12_4; RESTRICT GAP4=2*GAP3-GAP2, GAP5=3*GAP3-2*GAP2; OUTPUT OUT=RES012 R=VR P=VH; DATA RES01; MERGE RES011 RES012; *-------------------------------------------------------; * Keep a copy of the residual second moments in STATS; *-------------------------------------------------------; PROC MEANS DATA=RES01; VAR PH PR VH VR; OUTPUT MEAN = PHBAR PRBAR VHBAR VRBAR STD = PHSE PRSE VHSE VRSE OUT = STATS; PROC PRINT DATA=STATS; *-------------------------------------------------------; * Form log squared residuals; *-------------------------------------------------------; DATA TWO; MERGE ONE RES01; LPP = LOG(PR*PR); LVV = LOG(VR*VR); PROC DATASETS DDNAME=WORK NOLIST; DELETE ONE; *-------------------------------------------------------; * Regress log squared residuals on dummies and trend and retain predicted values; *-------------------------------------------------------; PROC REG DATA=TWO ; MODEL LPP=T T2; MODEL LPP=T T2 YR41-YR45; MODEL LPP=T T2 YR41-YR45 DAY2-DAY6; MODEL LPP=T T2 YR41-YR45 DAY2-DAY6 GAP2-GAP5; MODEL LPP=T T2 YR41-YR45 DAY2-DAY6 GAP2-GAP5 MON01_1-MON01_4 MON03-MON11 MON12_1-MON12_4; OUTPUT OUT=RES021 P=LPPH; MODEL LVV=T T2; MODEL LVV=T T2 YR41-YR45; MODEL LVV=T T2 YR41-YR45 DAY2-DAY6; MODEL LVV=T T2 YR41-YR45 DAY2-DAY6 GAP2-GAP5; MODEL LVV= YR41-YR45 DAY2-DAY6 GAP2-GAP5 MON01_1-MON01_4 MON03-MON11 MON12_1-MON12_4; MODEL LVV=T T2 YR41-YR45 DAY2-DAY6 GAP2-GAP5 MON01_1-MON01_4 MON03-MON11 MON12_1-MON12_4; RESTRICT GAP4=2*GAP3-GAP2, GAP5=3*GAP3-2*GAP2; OUTPUT OUT=RES022 P=LVVH; DATA RES02; MERGE RES021 RES022; KEEP LPPH LVVH; PROC DATASETS DDNAME=WORK NOLIST; DELETE RES011 RES012 RES021 RES022; *-------------------------------------------------------; * Normalize original residuals by the predicted standard deviation; *-------------------------------------------------------; DATA THREE; MERGE TWO RES02; ZP = PR/SQRT(EXP(LPPH)); ZV = VR/SQRT(EXP(LVVH)); KEEP YYMMDD ZP ZV VR; IF (P NE .); PROC DATASETS DDNAME=WORK NOLIST; DELETE TWO; *-------------------------------------------------------; * Standardize normalized residuals and then rescale to units of original residuals; *-------------------------------------------------------; PROC STANDARD DATA=THREE MEAN=0 STD=1 OUT=FOUR; VAR ZP ZV; PROC DATASETS DDNAME=WORK NOLIST; DELETE THREE; DATA FIVE; SET FOUR; IF _N_=1 THEN SET STATS; PADJ = PHBAR + PRSE*ZP; VADJ = VHBAR + VRSE*ZV; KEEP YYMMDD VR VADJ PADJ; PROC DATASETS DDNAME=WORK NOLIST; DELETE FOUR; PROC MEANS DATA=FIVE; *-------------------------------------------------------; * Bring in a copy of the original raw data; *-------------------------------------------------------; DATA SIX; MERGE RAW FIVE; BY YYMMDD; IF (_N_ EQ 1) THEN DELETE; PROC MEANS DATA=SIX; PROC UNIVARIATE DATA=SIX NORMAL; VAR VR VADJ P PADJ; *-------------------------------------------------------; * Write out adjusted data; *-------------------------------------------------------; DATA _NULL_; SET SIX; FILE OUT NOTITLES; PUT YYMMDD 1-6 DAY 7-8 HOL 9-10 WKEND 11-12 GAP 13-15 +1 VADJ 13.10 +1 PADJ E17. +1 V 13.10 +1 P E17.;