C------------------------------------------------------------------------------ C ISUSSE By Matt Seidl C June 6, 1996 C This program takes measured data from a t-file and predicted data from C a *.out file and produces a new file with statistical analysis and C data arranged in columns for easy import and graphing in Excel C C Last update June 11, 1996 Lines 336 - 339, and 381 C----------------------------------------------------------------------------- IMPLICIT NONE CHARACTER*13 FILET,FILEM CHARACTER*2 TRTNO,CROPID,TT CHARACTER*8 FILE,FILE1,COMP CHARACTER*10 TYPE CHARACTER*250 RowG,ROWN,ROWW,ROW250,G250,P250 CHARACTER*4 GCOLP,GCOLM CHARACTER*6 USE,TRT,HEAD,HEADER,T1,T2,T3 CHARACTER*1 CONT,TYP CHARACTER*40 EXPER,RN DIMENSION VAR(150,42),GCOLP(42),GCOLM(42),PRED(150,42) DIMENSION GMATCH(42),HEADER(42),FL(42) INTEGER I,LENGTH,VARN,J,K,YEAR,YR,BDAT,DATE,TR,TOP INTEGER GMATCH,FL1,FL2,FL3,FL4,FL5,FL,L,RUNS,M,N,O,P REAL VAR,PRED,SUMX,SUMY,SUMX2,SUMY2,SUMXY,SUMDIF2,SUMDIF,D REAL STDERR,LASTJ,RMSE,PCTERR,PERERR,C,RSQ,CORR,MEANDIF,AVGERR C ----DEFINITIONS---------------------------------------------------- C FILET -- The T-file to be read in C FILEM -- The statistical output file -- Uses IBSNAT filename with extension .[type][treatment#] C TRTNO -- Treatment number (character) C CROPID -- The IBSNAT crop C TT -- String used to annotate treatment number to filem C FILE -- The original IBSNAT file name without extension C FILE1 -- Used to output type of data in filem C TYPE -- Used to read for the special header line required by this program (! GROWTH, ! NITROGEN, ! WATER) C ROWG -- Used to read in the header row for the growth section C ROWN -- Used to read in the header row for the nitrogen section C ROWW -- Used to read in the header row for the water section C ROW250 -- Used to read in the numerical data C G250 -- Used to read in a line from the *.out file C P250 -- Used in matching the headers from the *.out file with the T-file C GCOLP -- An array containing the headers from the *.out file C GCOLM -- An array containing the headers from the T-file C USE -- Used to find the beginning of a section C TRT -- Used to find the right treatment in a section C HEAD -- Used to read in the headers from the T-file C HEADER -- An array used to write headers to the output file C T1,T2,T3 -- Used as flags for comparison with USE C CONT -- Read in from the user to determine if the t-file is in the proper format C TYP -- Composes the [type] part of filem C EXPER -- The treatment name C C I,J,K,L -- Loop counters C LENGTH -- The length of the header line from the T-file without spaces after it C VARN -- The number of variables in the T-file C YR -- Year multiplied by 1000 C YEAR -- Year of the simulation C BDAT -- The first date of predicted data C DATE -- The first day of simulation is read into date and yr is subtracted to get BDAT C TR -- The treatment as a number instead of a string C TOP -- Defines how many lines there are before the actual data in a *.out file C GMATCH -- Array used to match up headers from the T-file and the *.out file C FL1,FL2,FL3,FL4,FL5 -- Flags that tell the program which types of data have and have not been run through C FL -- An array used to flag if a measured data set does not have a corresponding predicted data set C C VAR -- The array which contains the measured data from the IBSNAT T-file C - Data in the form VAR(row,column) C - The number of rows is LASTJ and the number of columns is VARN C - In this program, VAR is often used where the program is looking C for VAR(J,I+2). This is used because the first two columns C should always be TRTNO and DATE and these do not correspond to C any measured data. C PRED -- The array which contains the predicted data C - Data should be in the same rows and columns as VAR C SUMX -- The sum of the measured values C SUMY -- The sum of the predicted values C SUMX2 -- The sum of the squares of the measured values C SUMY2 -- The sum of the squares of the predicted values C SUMXY -- The sum of the x y products C SUMDIF2 -- The sum of difference between x and y squared C SUMDIF -- The sum of the x and y differences C D -- A counter used to determine how many variables are available C C -- A counter used in the R-squared calculation C STDERR -- Standard error (from Excel) C LASTJ -- The number of records of measured data C RMSE -- Root Mean Squared Error (SSE averaged and rooted) C PCTERR -- Percent error C PERERR -- Used to calculate percent error C RSQ -- R-squared of the equation y = x (determines how well the data fits the 1:1 line) C CORR -- Correlation coefficient (determines how well the data fit any line) C MEANDIF -- The sum of the differences between a measured value and the average of the measured values squared C AVGERR -- The average error (the average residual) C ----INSTRUCTIONS--------------------------------------------------- DO 1002 I = 1,80 WRITE(*,*) 1002 CONTINUE WRITE(*,11) 11 FORMAT(' --------------------------------------------------------- $---------------------') WRITE(*,12) 12 FORMAT(' ISUSSE By Matt Seidl',/,' June 6, 1996') WRITE(*,13) 13 FORMAT(' This program takes measured data from a t-file and predic $ted data from',/,' a *.out file and produces new files with statis $tical analysis and',/,' data arranged in columns for easy import a $nd graphing in Excel.') WRITE(*,11) write(*,14) 14 FORMAT(1X,/) WRITE(*,1) 1 FORMAT(' Before running this program, you must have your T-File in $ the proper format with these additions:') WRITE(*,2) 2 FORMAT(' 1. Each section of measured data must have a heading.' $,/,' This program will read for the headings ! GROWTH,! NITROG $EN, and ! WATER.',/,' These headings must appear directly ahea $d of the line of headers that',' begins with @TRNO.') WRITE(*,7) 7 FORMAT(' 2. All columns and headers must be LEFT justified. ') WRITE(*,15) 15 FORMAT(X,/,' BE SURE TO SET THE X-FILE TO OUTPUT GROWTH, WATER, AN $D NITROGEN ON A DAILY BASIS',/) WRITE(*,3) 3 FORMAT(' IF YOU ARE READY TO CONTINUE, ENTER C. ',/) write(*,14) READ(*,4)CONT 4 FORMAT(A1) IF(CONT.EQ.'c'.OR.CONT.EQ.'C') GOTO 5 STOP C ----This section added June 12, 1996 -- allows reading of numerous runs 5 RUNS = 0 OPEN(13,FILE='GROWTH.OUT') OPEN(14,FILE='NITROGEN.OUT') OPEN(15,FILE='WATER.OUT') OPEN(16,FILE='GTEMP.OUT') OPEN(17,FILE='NTEMP.OUT') OPEN(18,FILE='WTEMP.OUT') DO 501 M = 1, 3 READ(13,510)ROWG READ(14,510)ROWN READ(15,510)ROWW 510 FORMAT(A250) WRITE(16,510)ROWG WRITE(17,510)ROWN WRITE(18,510)ROWW 501 CONTINUE DO 502 M = 1, 100000 READ(13,4,END=503)CONT IF(CONT.EQ.'*')RUNS = RUNS +1 502 CONTINUE 503 REWIND(13) DO 550 M = 1,RUNS DO 540 P = 16,18 IF(M.LT.10)COMP = '*RUN '//CHAR(M+48) IF(M.GE.10)THEN COMP = '*RUN '//CHAR(M/10+48)//CHAR(M-(M/10)*10+48) ENDIF DO 505 N = 1, 100000 READ(P-3,510,END=560)ROW250 IF(ROW250(1:8).EQ.COMP)GOTO 506 505 CONTINUE 506 WRITE(P,510)ROW250 DO 507 O = 1,450 READ(P-3,510,END=508)ROW250 IF(ROW250(1:1).EQ.'*')GOTO 508 WRITE(P,32)ROW250 507 CONTINUE 508 REWIND(P) DO 509 I = 1,3 READ(P,*) 509 CONTINUE REWIND(P-3) 540 CONTINUE C ------------------------------------------------------------------- C ----INITIALIZATION------------------------------------------------- T1 = ' ' T2 = ' ' T3 = ' ' FILE1 = 'GROWTH' TYP = 'G' FL1=0 FL2=0 FL3=0 FL4=0 FL5=0 DO 1001 J = 1, 150 DO 1000 I = 1, 42 FL(I) = 0 GMATCH(I) = 0 GCOLP(I) = ' ' GCOLM(I) = ' ' HEADER(I) = ' ' PRED(J,I) = 0 VAR(J,I) = 0 1000 CONTINUE 1001 CONTINUE C ------------------------------------------------------------------- C -----THIS PROGRAM ALWAYS STARTS WITH THE GROWTH FILE TO DETERMINE SOME C OF THE PARAMETERS. IT WILL CLOSE THIS FILE WHEN THERE ARE NO C MEASURED DATA C ---------------------------------------------------------------------- OPEN(10,FILE='GTEMP.OUT',STATUS='OLD') REWIND(10) DO 6 I = 1,3 READ(10,*) 6 CONTINUE READ(10,22) RN READ(10,*) 22 FORMAT(18X,A40) READ(10,20) FILE,CROPID 20 FORMAT(18X,A8,1X,A2) FILET = FILE //'.'//CROPID//'T' READ(10,30) TRTNO,EXPER 30 FORMAT(11X,A2,5X,A40) DO 29 I = 1,3 READ(10,*) 29 CONTINUE READ(10,31)YEAR 31 FORMAT(27X,I2) YR = YEAR * 1000 DO 28 I = 1,10000 READ(10,32)ROW250 IF(ROW250(1:1).EQ.'@')THEN TOP=I GOTO 21 ENDIF 28 CONTINUE 32 FORMAT(A250) 21 READ(10,33)DATE 33 FORMAT(1X,I5) BDAT=DATE-YR OPEN(11,FILE=FILET,STATUS='OLD') REWIND(11) DO 54 I = 1,10000 READ(11,51,END=55)TYPE 51 FORMAT(A10) IF(TYPE.EQ.'! GROWTH')THEN FL1 =1 READ(11,32)ROWG READ(TYPE,52)T1 52 FORMAT(A6) ENDIF IF(TYPE.EQ.'! NITROGEN')THEN READ(11,32)ROWN FL2 = 1 READ(TYPE,52)T2 ENDIF IF(TYPE.EQ.'! WATER')THEN READ(11,32)ROWW FL3 = 1 READ(TYPE,52)T3 ENDIF 54 CONTINUE 55 CONTINUE IF(FL1.EQ.0.AND.FL2.EQ.0.AND.FL3.EQ.0) GOTO 322 C ------AT THIS POINT, THE PROGRAM HAS DETERMINED WHICH TYPES OF DATA C (GROWTH, NITROGEN, OR WATER) IT WILL HAVE TO DEAL WITH. FLAGS C ARE SET AND THE HEADER ROW IS READ FOR EACH TYPE. NOW, IT IS C NECESSARY TO READ THE MEASURED DATA INTO AN ARRAY. THE PROGRAM C WILL READ THROUGH SEVERAL TIMES IF THERE ARE DATA OF EACH TYPE. C THE RETURN POINT IS LINE 5000. FROM HERE IT DETERMINES USING C THE FLAGS WHICH TYPES HAVE BEEN READ AND WHICH HAVE NOT. SOME C OF THE VARIABLES WILL ALSO BE REINITIALIZED. C --------------------------------------------------------------------- 5000 REWIND(10) REWIND(11) IF(FL1.EQ.0)FL4 = 1 IF(FL1.EQ.0.AND.FL2.EQ.0)FL5 = 1 IF(FL1.EQ.0.AND.FL2.EQ.0.AND.FL3.EQ.0) GOTO 239 IF(FL2.EQ.1.AND.FL4.EQ.1)THEN CLOSE(10) OPEN(10,FILE='NTEMP.OUT',STATUS = 'OLD') FILE1 = 'NITROGEN' TYP = 'N' DO 56 I = 1,TOP+10 READ(10,*) 56 CONTINUE READ(10,32)ROW250 READ(10,33)DATE BDAT=DATE-YR REWIND(10) ROWG = ROWN DO 2001 J = 1, 150 DO 2000 I = 1, 42 FL(I) = 0 GMATCH(I) = 0 GCOLP(I) = ' ' GCOLM(I) = ' ' HEADER(I) = ' ' PRED(J,I) = 0 VAR(J,I) = 0 2000 CONTINUE 2001 CONTINUE ENDIF IF(FL3.EQ.1.AND.FL5.EQ.1)THEN CLOSE(10) OPEN(10,FILE='WTEMP.OUT',STATUS = 'OLD') FILE1 = 'WATER' TYP = 'W' DO 57 I = 1,TOP+10 READ(10,*) 57 CONTINUE READ(10,32)ROW250 READ(10,33)DATE BDAT=DATE-YR REWIND(10) ROWG = ROWW DO 3001 J = 1, 150 DO 3000 I = 1, 42 FL(I) = 0 GMATCH(I) = 0 GCOLP(I) = ' ' GCOLM(I) = ' ' HEADER(I) = ' ' PRED(J,I) = 0 VAR(J,I) = 0 3000 CONTINUE 3001 CONTINUE ENDIF READ(ROW250,70)(GCOLP(I),I = 1,42) 70 FORMAT(6X,42(2X,A4)) READ(ROWG,71)(GCOLM(I),I=1,42) 71 FORMAT(12X,42(2X,A4)) DO 75 J = 1,42 DO 74 K = 1,42 IF(GCOLP(J).EQ.GCOLM(K)) GMATCH(K)=J 74 CONTINUE 75 CONTINUE 219 FORMAT(A6) 220 DO 218 I = 1,42 READ(ROWG(I*6-5:250),219)HEAD IF(HEAD.EQ.' ') GOTO 217 HEADER(I) = HEAD 218 CONTINUE 217 LENGTH = (I-1)*6 VARN = LENGTH / 6 TRT = ' '//TRTNO DO 222 K = 1,10000 READ(11,32,END=250)G250 READ(G250,221)USE 221 FORMAT(A6) IF(FL1.EQ.1.AND.USE.EQ.T1)GOTO 223 IF(FL2.EQ.1.AND.FL4.EQ.1.AND.USE.EQ.T2)GOTO 223 IF(FL3.EQ.1.AND.FL5.EQ.1.AND.USE.EQ.T3)GOTO 223 222 CONTINUE STOP 223 DO 227 K = 1,10000 READ(11,32,END=250)G250 READ(G250,221)USE IF(USE.EQ.TRT) GOTO 224 227 CONTINUE 224 READ(G250,*)(VAR(1,I),I=1,VARN) 225 J=2 226 READ(11,*,END=230,ERR=226)(VAR(J,I),I=1,VARN) IF(VAR(J,1).EQ.0.OR.VAR(J,1).NE.VAR(J-1,1)) GOTO 230 IF(VAR(J,2).EQ.VAR(J-1,2))THEN DO 228 L = 3, VARN IF(VAR(J-1,L).NE.-99.AND.VAR(J,L).NE.-99)THEN VAR(J-1,L) = (VAR(J,L)+VAR(J-1,L))/2 ENDIF IF(VAR(J-1,L).EQ.-99.AND.VAR(J,L).NE.-99)VAR(J-1,L) = VAR (J,L) 228 CONTINUE J = J-1 ENDIF J = J +1 GOTO 226 230 CONTINUE LASTJ = J - 1 C ---AT THIS POINT, ALL MEASURED DATA IS IN AN ARRAY VAR(ROWS,COLS) WHICH C HAS LASTJ ROWS AND VARN COLUMNS C ---NOW THE DATA IN THE *****.OUT FILE WILL BE PICKED KNOWING THE LASTJ C NUMBER OF MEASURED RECORDS, THE FIRST PREDICTED DATE, BDAT, AND THE C MATCH BETWEEN COLUMNS IN T-FILE AND ******.OUT, GMATCH( ) C ---------------------------------------------------------------------- DO 300 I = 1, (TOP+11+ VAR(1,2)-YR-BDAT) READ(10,*,end=344,err=344) 300 CONTINUE 310 DO 350 J = 1,LASTJ IF(J.NE.1.AND.(VAR(J,2)-VAR(J-1,2)).NE.1)THEN DO 340 K = 1,(VAR(J,2)-VAR(J-1,2)-1) READ(10,*,ERR=344,END=344) 340 CONTINUE ENDIF READ(10,32)P250 DO 345 I = 1,VARN-2 READ(P250(GMATCH(I)*6+1:GMATCH(I)*6+7),*,END=344)PRED(J,i+2) IF(GMATCH(I).EQ.0)THEN PRED(j,I+2) = -99 FL(I+2) = 1 ENDIF READ(P250(1:6),*)PRED(J,2) 345 CONTINUE 350 CONTINUE READ(TRTNO,352)TR 352 FORMAT(I2) IF(TR.LT.10)TT = '0'//CHAR(M+48) IF(TR.GE.10)TT = CHAR(M/10+48)//CHAR(M-(M/10)*10+48) FILEM = FILE//'.'//TYP//TT OPEN(12,FILE=FILEM) WRITE(*,351)FILE1,FILEM 351 FORMAT(1X,A8,' OUTPUT FILE = ',A13) WRITE(12,399)FILE1,FILET,TRTNO,EXPER,RN 399 FORMAT(A8,' STATISTICAL OUTPUT FOR COMPARISON BETWEEN MEASURED AND $ PREDICTED VALUES',/,'FILE = ',A13,' TREATMENT = ',A2,5X,A40,/, $' RUN = ',A40,/) DO 400 I = 1, VARN-2 WRITE(12,401)HEADER(I+2) 401 FORMAT(A12) WRITE(12,402) 402 FORMAT(' MEASURED PREDICTED') SUMX=0 SUMY=0 SUMXY=0 SUMX2=0 SUMY2=0 SUMDIF=0 SUMDIF2=0 PERERR=0 STDERR=0 RMSE=0 AVGERR=0 CORR=0 PCTERR=0 RSQ=0 C = 0 D = 0 DO 405 J = 1,LASTJ IF(VAR(J,I+2).NE.-99)THEN WRITE(12,403)VAR(J,i+2),PRED(J,I+2) 403 FORMAT(2F12.3) SUMX = SUMX + VAR(j,I+2) SUMY = SUMY + PRED(J,I+2) SUMXY = SUMXY + VAR(J,I+2) * PRED(J,i+2) SUMX2 = SUMX2 + VAR(J,i+2)**2 SUMY2 = SUMY2 + PRED(J,i+2)**2 SUMDIF = SUMDIF + ABS(VAR(J,I+2) - PRED(J,I+2)) SUMDIF2 = SUMDIF2 + (VAR(J,I+2) - PRED(J,I+2))**2 IF(VAR(J,I+2).NE.0) THEN PERERR = PERERR + (ABS(VAR(J,I+2) - PRED(J,I+2)) / VAR(J,I+2)*100) C = C+1 ENDIF D = D+1 ENDIF 405 CONTINUE IF(FL(i+2).NE.1) THEN IF(D.GT.2.AND.(D*SUMX2 - SUMX**2).NE.0)THEN IF((D*SUMY2-SUMY**2-((D*SUMXY-SUMX*SUMY)**2)/(D*SUMX2-SUMX**2)) $.GT.0) STDERR = ((1/(D*(D-2)))*(D*SUMY2 - SUMY**2 $ - (((D*SUMXY - SUMX*SUMY)**2)/(D*SUMX2 - SUMX**2))))**.5 ENDIF IF(C.NE.0)PCTERR = PERERR/C IF(D.NE.0)THEN RMSE = (SUMDIF2/D)**.5 MEANDIF = 0 DO 407 J = 1,LASTJ IF(VAR(J,I+2).NE.-99)MEANDIF = MEANDIF + (VAR(J,i+2)-(SUMX/D))**2 407 CONTINUE IF(MEANDIF.GT.0.AND.D.GT.2)RSQ = (MEANDIF - SUMDIF2)/MEANDIF IF((SUMX2-(SUMX**2)/D).GT.0.AND.(SUMY2-(SUMY**2)/D).GT.0)THEN CORR = (SUMXY - SUMX*SUMY/D) / (((SUMX2 - (SUMX**2)/D)* $ (SUMY2 - (SUMY**2)/D))**.5) ENDIF AVGERR = SUMDIF/D ENDIF WRITE(12,406)STDERR,RMSE,PCTERR,AVGERR,CORR,RSQ 406 FORMAT(/,'STD ERROR = ' F12.3,/,'ROOT MSE = ',F12.3,/,'PCT ERROR $= ',F12.3,/,'AVG ERROR = ',F12.3,/, $'CORR (r) = ',F12.3,/,'R-SQUARED = ',F12.3) ENDIF WRITE(12,404) 404 FORMAT(/) 400 CONTINUE IF(FL1.EQ.1)THEN FL1=0 ENDIF IF(FL4.EQ.1)THEN FL2=0 ENDIF IF(FL5.EQ.1)GOTO 239 CLOSE(12) GOTO 5000 239 CLOSE(10) CLOSE(11) 550 CONTINUE WRITE(*,240) 240 FORMAT(' JOB DONE.') STOP 250 CONTINUE WRITE(*,251) 251 FORMAT(' OUT OF DATA -- TREATMENT NOT FOUND' ) STOP 344 write(*,333) 333 format(' ERROR -- RAN OUT OF PREDICTED DATA',/,' THIS IS A RESULT $OF ONE OF THE FOLLOWING:',/,' 1. The simulation started after $the measured data.',/,' 2. The simulation ended too soon -- in $crease the harvest date.',/,' 3. The model is not writing dail $y output to the .out files.') STOP 322 WRITE(*,321) 321 FORMAT(' ERROR -- You either do not have a t-file for this experim $ent',/,' or You did not set up your t-file with the required $ headers.') STOP 560 WRITE(*,561)m 561 FORMAT(' ERROR -- RUN ',i2,' NOT FOUND') STOP END