(******************************************************************************) (* plx scanner *) (* (C) 2010 Steffen Solyga *) (******************************************************************************) IMPLEMENTATION MODULE Scnr; IMPORT SysLib; IMPORT FIO; CONST bufLen = 1024; maxKwCnt = 20; err = FIO.ErrorOutput; cEOF = 32C; (* = 1AH = 26 = ASCII.sub (TS) *) cLF = 12C; TYPE TKwDesc = RECORD sym: TSymbol; id : CARDINAL; END; VAR chr: CHAR; chp: TPosDesc; buf: ARRAY [ 0 .. bufLen - 1 ] OF CHAR; len: CARDINAL; kwt: ARRAY [ 0 .. maxKwCnt - 1 ] OF TKwDesc; kwc: CARDINAL; id0: CARDINAL; (* first non-kw *) PROCEDURE Err( s: ARRAY OF CHAR ); BEGIN FIO.WrStr( err, 'Scnr (' ); FIO.WrCard( err, pos.l, 0 ); FIO.WrChar( err, ',' ); FIO.WrCard( err, pos.c, 0 ); FIO.WrStr( err, '): ' ); FIO.WrStr( err, s ); FIO.WrStr( err, '.' ); FIO.WrLn( err ); SysLib.exit( -1 ); END Err; PROCEDURE ShowKw( i: CARDINAL ); VAR l: CARDINAL; BEGIN IF i >= len THEN Err( 'ShowKw: Invalid index' ) END; FIO.WrCard( err, i, 2 ); FIO.WrStr( err, ': ' ); l := ORD( buf[i] ); INC( i ); IF l <= 0 THEN Err( 'ShowKw: Invalid length' ) END; FIO.WrCard( err, l, 2 ); FIO.WrStr( err, ' >' ); DEC( l ); WHILE l > 0 DO FIO.WrChar( err, buf[i] ); INC( i ); DEC( l ); END; FIO.WrStr( err, '<' ); FIO.WrLn( err ); END ShowKw; PROCEDURE Show(); VAR i: CARDINAL; BEGIN FIO.WrStr( err, 'len = ' ); FIO.WrCard( err, len, 0 ); FIO.WrStr( err, ' id = ' ); FIO.WrCard( err, id, 0 ); FIO.WrStr( err, ' id0 = ' ); FIO.WrCard( err, id0, 0 ); FIO.WrLn( err ); i := 0; WHILE i < len DO ShowKw( i ); INC( i, ORD(buf[i]) ); END; FIO.WrLn( err ); END Show; PROCEDURE ShowSym(); VAR c: CHAR; BEGIN c := FIO.PrefixChar; FIO.PrefixChar := '0'; FIO.WrStr( err, 'sym = ' ); FIO.WrHex( err, ORD(sym), 2 ); FIO.WrLn( err ); FIO.PrefixChar := c; END ShowSym; PROCEDURE Get(); BEGIN IF chr = cEOF THEN RETURN END; IF chr = cLF THEN (* UNIX and DOS text ok *) INC( chp.l ); chp.c := 1; ELSE INC( chp.c ); END; chr := FIO.RdChar( src ); END Get; PROCEDURE Alpha(): BOOLEAN; BEGIN RETURN ( '0' <= chr ) & ( chr <= '9' ) OR ( 'A' <= CAP(chr) ) & ( CAP(chr) <= 'Z' ); END Alpha; PROCEDURE Number(); VAR d: CARDINAL; BEGIN sym := sNum; num := 0; REPEAT d := ORD( chr ) - ORD( '0' ); IF ( MAX(LONGCARD) - LONGCARD(d) ) DIV 10 >= num THEN num := 10 * num + LONGCARD( d ); ELSE Err( 'Number too large' ); END; Get(); UNTIL ~Alpha(); END Number; PROCEDURE Diff( i, j: CARDINAL ): INTEGER; VAR l: CARDINAL; BEGIN l := ORD( buf[i] ); (* always >0 *) WHILE l > 0 DO IF buf[i] # buf[j] THEN RETURN INTEGER( ORD(buf[i]) ) - INTEGER( ORD(buf[j]) ); END; INC( i ); INC( j ); DEC( l ); END; RETURN 0; END Diff; PROCEDURE Id(); VAR a, b, m: CARDINAL; BEGIN sym := sId; len := id + 1; REPEAT IF len >= bufLen THEN Err( 'Buffer full' ) END; buf[ len ] := chr; INC( len ); Get(); UNTIL ~Alpha(); buf[ id ] := CHR( len - id ); a := 0; b := kwc; (* kwc = 0 okay *) WHILE a < b DO m := ( a + b ) DIV 2; IF Diff(id,kwt[m].id) <= 0 THEN b := m ELSE a := m + 1 END; END; (* a = b *) IF ( a < kwc ) & ( Diff(id,kwt[a].id) = 0 ) THEN sym := kwt[ a ].sym; END; END Id; PROCEDURE Put( s: ARRAY OF CHAR ); VAR i: CARDINAL; BEGIN i := 0; len := id + 1; REPEAT IF len >= bufLen THEN Err( 'Buffer full' ) END; buf[ len ] := s[ i ]; INC( len ); INC( i ); UNTIL i >= HIGH( s ); buf[ id ] := CHR( len - id ); END Put; PROCEDURE KeepId(); BEGIN id := len; END KeepId; PROCEDURE Read(); PROCEDURE Comment(); VAR lvl: CARDINAL; BEGIN Get(); lvl := 1; WHILE lvl > 0 DO CASE chr OF | '(' : Get(); IF chr = '*' THEN INC( lvl ); Get() END; | '*' : Get(); IF chr = ')' THEN DEC( lvl ); Get() END; | cEOF: Err( 'Unterminated comment (start pos)' ); ELSE Get(); END; END; END Comment; BEGIN LOOP (* ignore control characters except EOF *) IF chr <= ' ' THEN IF chr = cEOF THEN EXIT END; Get(); ELSIF chr >= 177C THEN Get(); ELSE EXIT; END; END; pos := chp; CASE chr OF | cEOF : sym := sEOF; (*| '!' : sym := sExcl; Get(); *) | '#' : sym := sNeq; Get(); | '&' : sym := sAnd; Get(); | '(' : Get(); IF chr = '*' THEN Comment(); Read() ELSE sym := sLPar END; | ')' : sym := sRPar; Get(); | '*' : sym := sMul; Get(); | '+' : sym := sPlus; Get(); | '-' : sym := sMinus; Get(); | '.' : sym := sDot; Get(); | '/' : sym := sDiv; Get(); | '0'..'9': Number(); | ':' : Get(); IF chr = '=' THEN sym := sBec; Get() ELSE sym := sCol END; | ';' : sym := sSemi; Get(); | '<' : Get(); CASE chr OF | '=': sym := sLeq; Get(); | '>': sym := sNeq; Get(); ELSE sym := sLss; END; | '=' : sym := sEql; Get(); | '>' : Get(); IF chr = '=' THEN sym := sGeq; Get() ELSE sym := sGtr END; | 'A'..'Z': Id(); | 'a'..'z': Id(); | '|' : sym := sOr; Get(); | '~' : sym := sNot; Get(); ELSE Err( 'Invalid character' ); END; END Read; PROCEDURE PutKw( sym: TSymbol; str: ARRAY OF CHAR ); BEGIN IF kwc >= maxKwCnt THEN Err( 'Bug: Too many keywords' ) END; kwt[ kwc ].sym := sym; kwt[ kwc ].id := id; INC( kwc ); Put( str ); KeepId(); END PutKw; BEGIN (* exported *) src := FIO.StandardInput; sym := sNull; num := 0; id := 0; pos := chp; (* local *) chr := cLF; chp.l := 0; chp.c := 0; buf[ 0 ] := 0C; len := 0; kwc := 0; PutKw( sDo, 'DO' ); PutKw( sIf, 'IF' ); PutKw( sEnd, 'END' ); PutKw( sVar, 'VAR' ); PutKw( sElse, 'ELSE' ); PutKw( sThen, 'THEN' ); PutKw( sBeg, 'BEGIN' ); PutKw( sConst, 'CONST' ); PutKw( sElsif, 'ELSIF' ); PutKw( sUntil, 'UNTIL' ); PutKw( sWhile, 'WHILE' ); PutKw( sWrt, 'WRITE' ); PutKw( sRepeat, 'REPEAT' ); id0 := id; END Scnr.