/* Must expose reset() advance() Both of these must maintain a "current_token". Since I apparently will be using the same token_type() bit, I will also need "try_match()". What would be nice, though, would be to think ahead to using this instead of current lexer for the recursive descent grammar. */ #include #include #include #include #include #include #include #include "lexer.h" #include "parser.h" int linecount = 1; int charcount = 0; char *current_token = NULL; char *next_token = NULL; static char *first_char = NULL; static char *current_char = NULL; static char *last_char = NULL; // declarations char *consume(void); // UTILITY /* * * memorymap_file() * * * Since re2c really likes to see a file as a sequence of bytes, that's what we will do... */ void memorymap_file(char *file, char **start, char **lastchar) { struct stat stat_buff; int ret = stat(file,&stat_buff); if(ret == -1) { fail("Error: stat(2) failed for file '%s'\n",file); } size_t file_size = (size_t) stat_buff.st_size; // mmap(2) the file into memory int fd = open(file,O_RDONLY); if(fd == -1) { fail("Error: open(2) failed for file %s\n",file); } *start = mmap(NULL, // Any location in memory is fine file_size, // Length of the file PROT_READ, // Let us read the file MAP_PRIVATE, // We don't need to share this fd, // Our file descriptor 0); // Start at the beginning *lastchar = *start + file_size; close(fd); // Apparently this is fine; look at man mmap(2) return; } void lexer_reset(char *file) { linecount = 1; charcount = 0; if(first_char != NULL) { munmap(first_char,last_char-first_char); } char *mmap_first_char; char *mmap_last_char; memorymap_file(file,&mmap_first_char,&mmap_last_char); first_char = mmap_first_char; current_char = mmap_first_char; last_char = mmap_last_char; current_token = consume(); next_token = consume(); } int try_match(char *wanted) { regex_t regex_wanted; if(current_token == NULL) { printf("Out of data\n"); return(0); } // add boundaries to the "wanted" regular express to make sure // that we are not doing a substring match int length = strlen(wanted); char *bounded = (char *)malloc(length+3); strcpy(bounded+1,wanted); bounded[0] = '^'; bounded[length+1] = '$'; bounded[length+2] = '\0'; int ret = regcomp(®ex_wanted,bounded,REG_EXTENDED|REG_NOSUB|REG_NEWLINE); if(ret != 0) { fail("lexer try_match(): Did not compile regex '%s'!\n",wanted); } if(regexec(®ex_wanted,current_token,0,NULL,0) == 0) { return(1); } regfree(®ex_wanted); free(bounded); return(0); } void advance() { current_token = next_token; next_token = consume(); } char *consume(void) { char *start = current_char; while(current_char < last_char) { /*!re2c re2c:define:YYCTYPE = "char"; re2c:define:YYLIMIT = last_char; re2c:define:YYCURSOR = current_char; re2c:yyfill:enable = 0; re2c:indent:top = 1; "," { // *token_type = COMMA; return(strdup(",")); } ";" { // *token_type = SEMICOLON; return(strdup(";")); } "{" { // *token_type = LBRACE; return(strdup("{")); } "}" { // *token_type = RBRACE; return(strdup("}")); } "(" { // *token_type = LPAREN; return(strdup("(")); } ")" { // *token_type = RPAREN; return(strdup(")")); } "=" { // *token_type = EQUALS; return(strdup("=")); } "if" { // *token_type = RBRACE; return(strdup("if")); } "else" { // *token_type = RBRACE; return(strdup("else")); } "while" { // *token_type = RBRACE; return(strdup("while")); } "imports" { // *token_type = RBRACE; return(strdup("imports")); } "exports" { // *token_type = RBRACE; return(strdup("exports")); } "module" { // *token_type = MODULE; return(strdup("module")); } "declarations" { // *token_type = RBRACE; return(strdup("declarations")); } "main" { // *token_type = RBRACE; return(strdup("main")); } "functions" { // *token_type = RBRACE; return(strdup("functions")); } "program" { // *token_type = RBRACE; return(strdup("program")); } "var" { // *token_type = RBRACE; return(strdup("var")); } [0-9]+ { // *token_type = NUMBER; char *x = strndup(start,(size_t)(current_char-start)); return(x); } [a-zA-Z][a-zA-Z0-9_.]* { // *token_type = IDENTIFIER; char *x = strndup(start,(size_t)(current_char-start)); return(x); } [\042][^\042]*[\042] { // *token_type = STRING; char *x = strndup(start,(size_t)(current_char-start-1)); return(x); } [ \t] { ++start; continue; } [^] { linecount++; ++start; continue; } */ } // end of input return(NULL); } void fail(char *string, ...) { va_list args; va_start(args,string); fprintf(stderr,"at line %d, char position %d: ",linecount,charcount); vfprintf(stderr,string,args); fprintf(stderr,"\n"); va_end(args); exit(1); }