'-- file: scanner.adb -- ($Id: scanner.adb,v 1.2 2005/09/06 11:20:18 baker Exp baker $) -- Ada hoc hand-coded scanner implementation. -- Needs improvement. -- Error recovery is very primitive. with Ada.Text_IO; use Ada.Text_IO; package body Scanner is C : Character := ' '; Is_Whitespace : constant array (Character) of Boolean := (' ' | ASCII.Nul | ASCII.HT => True, others => False); Is_Operator : constant array (Character) of Boolean := ('+' | '-' | '*' | '/' | ')' | '(' => True, others => False); Is_Letter : constant array (Character) of Boolean := ('a' .. 'z' | 'A' .. 'Z' => True, others => False); Is_Digit : constant array (Character) of Boolean := ('0' .. '9' => True, others => False); -- Note that Ada's standard text input package is based on -- that of Pascal, which goes to a lot of trouble to not treat -- ends of lines as characters. That traces back to the -- original Pascal implementation, which was on an -- architecture that had 6-bit characters and where the -- encoding of line-ends was context-sensitive, depending on -- where in a 60-bit word the line end came. That is, in CDC -- "display code" the zero-byte (6-bits of zero) was used for -- the character ":", and there is no end-of-line character. -- An end of line was normally indicated by two bytes of zero -- in the 9th and 10th positions of a word, in which case all -- immediately preceding zero bytes were also included in the -- end-of-line mark. This had the side-effect that if one -- printed "12345678::" it would be read back as "12345678", -- and "123:::::::" would be read back as "123". However, in -- this case we are trying to limit input to one line at a -- time, so we add the routine Next_C to make the end of line -- visible as a token. At_End : Boolean := False; procedure Next_C is pragma Inline (Next_C); begin if At_End then Skip_Line; At_End := False; end if; if not End_Of_Line then Get (C); else C := ASCII.LF; At_End := True; end if; end Next_C; procedure Get_Next_Token is T : Token renames Current_Token; begin while Is_Whitespace (C) loop Next_C; end loop; if Is_Operator (C) then T.Id := Token_Id (C); Next_C; elsif Is_Digit (C) then T.ID := 'N'; T.Intval := 0; loop T.Intval := T.Intval * 10 + (Character'Pos (C) - Character'Pos ('0')); Next_C; exit when not Is_Digit (C); end loop; elsif At_End then T.ID := Eol_Token; C := ' '; else T.Id := Err_Token; Next_C; end if; exception when Constraint_Error => T.Id := Err_Token; Next_C; end Get_Next_Token; function Image (T : in Token) return String is begin if T.ID = 'N' then return Integer'Image (T.Intval); end if; if T.ID = Start_Token then return "*START*"; end if; if T.ID = Err_Token then return "*ERROR*"; end if; if T.ID = Eol_Token then return "*EOL*"; end if; return (1 => Character (T.ID)); end Image; end Scanner;