/************************************************************************* parser.h Version 6.50 This module contains the public declarations. Copyright (c) 2002-2014 SLK Systems, all rights reserved. *************************************************************************/ #ifndef _PARSER_H #define _PARSER_H #include "SlkParse.h" #include "SlkString.h" // ----------------------------------------------------------------------- // general constants // ----------------------------------------------------------------------- typedef unsigned int bool; typedef unsigned int uint; typedef unsigned long ulong; typedef unsigned char uchar; typedef unsigned short ushort; #define loop for(;;) #define TRUE 1 #define FALSE 0 #ifndef NULL #define NULL ( (void *) 0 ) #endif #define PUBLIC #define PRIVATE static // ----------------------------------------------------------------------- // namespace for the public constructors // // The extra Parser prefix should avoid any name conflicts with external // code. In the very unlikely event that an external say MakeMemorySet() // needs to be used in the program it can be invoked as (MakeMemorySet)(), // preventing the preprocessor from changing it to ParserMakeMemorySet(). // ----------------------------------------------------------------------- /******** do not need to use this here ****** #define MakeMemorySet ParserMakeMemorySet #define MakeSymbol ParserMakeSymbol #define MakeTree ParserMakeTree #define MakeAction ParserMakeAction #define MakeError ParserMakeError #define MakeScanner ParserMakeScanner #define MakeToken ParserMakeToken #define MakeOptions ParserMakeOptions #define MakeParser ParserMakeParser ************************************/ // ------------------------------------------------------------------------ // memory_set_t // ------------------------------------------------------------------------ typedef struct _memory_set { void * (*allocate) ( void *memory_set_handle, uint length ); char * (*save_string) ( void *memory_set_handle, char *string, uint length ); char * (*resave_string) ( void *memory_set_handle, char *old_string, char *new_string ); uint (*get_size) ( void *memory_set_handle ); void (*release) ( void *memory_set_handle ); void *handle; } memory_set_t; PUBLIC memory_set_t MakeMemorySet ( int chunk_size ); // ----------------------------------------------------------------------- // symbol_t // // symbol table entry node // ----------------------------------------------------------------------- typedef struct _symbol { struct _symbol *next; char *name; slk_size_t token; uchar scope; uchar deleted; } symbol_t; // ------------------------------------------------------------------------ // Symbols_t // ------------------------------------------------------------------------ typedef struct _Symbols { symbol_t* (*lookup) ( char *name ); symbol_t* (*insert) ( slk_size_t token, char *name ); void (*release_table_scope) ( void ); void (*set_new_table_scope) ( void ); uchar (*get_table_scope) ( void ); void (*print_table) ( void ); void (*release) ( void ); } Symbols_t; PUBLIC Symbols_t MakeSymbol ( uint table_size ); // ------------------------------------------------------------------------ // Tree_t // ------------------------------------------------------------------------ typedef struct _Tree { void (*predict) ( slk_size_t production_number ); void (*reduce) ( slk_size_t production_number ); void (*show_source) ( void ); void (*show_tree) ( void ); void (*show_parse_derivation) ( void ); void (*finish_tree) ( void ); void (*release) ( void ); } Tree_t; PUBLIC Tree_t MakeTree ( void ); // ------------------------------------------------------------------ // Action_t // ------------------------------------------------------------------ typedef struct _Action { void (*show_source) ( void ); void (*show_tree) ( void ); void (*show_parse_derivation) ( void ); void (*release) ( void ); SlkAction slk; } Action_t; PUBLIC Action_t MakeAction ( void ); // ----------------------------------------------------------------------- // Error_t // ----------------------------------------------------------------------- typedef struct _Error { uint (*get_total_errors) ( void ); uint (*get_total_warnings) ( void ); void (*set_max_errors) ( int number ); void (*set_max_warnings) ( int number ); SlkError slk; } Error_t; PUBLIC Error_t MakeError ( void ); // ------------------------------------------------------------------------ // Scanner_t // ------------------------------------------------------------------------ typedef struct _Scanner { int (*get_line_number) ( void ); symbol_t* (*get) ( void ); } Scanner_t; PUBLIC Scanner_t MakeScanner ( char *input ); // ------------------------------------------------------------------------ // Tokens_t // ------------------------------------------------------------------------ typedef struct _Tokens { int (*get_line_number) ( void ); void (*repeat_last) ( void ); void (*release) ( void ); struct { symbol_t* (*get_current) ( void ); symbol_t* (*get_last_identifier) ( void ); void (*set_last_identifier) ( symbol_t *symbol ); struct { symbol_t* (*get_next) ( void ); void (*show_all) ( void ); void (*reset) ( void ); } list; } symbols; SlkToken slk; } Tokens_t; PUBLIC Tokens_t MakeToken ( char *input ); // ------------------------------------------------------------------------ // Options_t // A struct containing the command line arg results // These are used globally here, so must be at least defined, if not set // Override this struct by just redefining it somewhere else // ------------------------------------------------------------------------ typedef struct _Options { char *file_name; uint max_errors, max_warnings; bool DO_NOT_PRUNE_TREE : 1; bool RECREATE_SOURCE : 1; bool DISPLAY : 1; bool JUMP_EDITOR : 1; bool NAMETABLE : 1; bool SHOW_DERIVATION : 1; bool SHOW_PARSE_TREE : 1; bool SHOW_SYMBOLS_LIST : 1; bool TRACE_ERRORS : 1; bool TRACE_ACTIONS : 1; bool TRACE_NAMES : 1; bool TRACE_PEEKER : 1; bool TRACE_SYMBOLS : 1; bool TRACE_STATES : 1; bool TRACE_TYPEDEF : 1; } Options_t; PUBLIC Options_t MakeOptions ( int argc, char *argv [] ); // ------------------------------------------------------------------------ // Parser_t // Constructs a global Parser for the program // ------------------------------------------------------------------------ typedef struct _Parser { void (*parse) ( void ); void (*release) ( void ); } Parser_t; PUBLIC Parser_t MakeParser ( char *input ); // ------------------------------------------------------------------------ // Global Parser for the program namespaced with prefix "Parser" // ------------------------------------------------------------------------ /******** do not need to use this here ****** #define Options ParserOptions #define Symbols ParserSymbols #define Tokens ParserTokens #define Error ParserError #define Action ParserAction #define Parser ParserParser ************************************/ PUBLIC Options_t Options; PUBLIC Symbols_t Symbols; PUBLIC Tokens_t Tokens; PUBLIC Error_t Error; PUBLIC Action_t Action; PUBLIC Parser_t Parser; #endif /****************************************************************************** action.c Version 6.43 Semantic actions that are invoked by their index through the call table. Changes the symbol table entry to TYPEDEF_NAME_ when a typedef declaration is recognized. Scopes variables, handling TYPEDEF_NAME_ redefined as an identifier in an inner scope. Copyright (c) 2002-2014 SLK Systems, all rights reserved. ******************************************************************************/ #include #include #include "SlkConstants.h" #include "parser.h" PRIVATE uint Trace, Trace_typedef, Trace_states; PRIVATE char *File_name; PRIVATE Tree_t Tree; // ------------------------------------------------------------------ // actions // ------------------------------------------------------------------ void FinishParse ( void ) { Tree.finish_tree (); } void SetTypedefName ( void ) { symbol_t *last_identifier = Tokens.symbols.get_last_identifier(); last_identifier->token = TYPEDEF_NAME_; if ( Trace_typedef ) { printf ( " '%s' token is TYPEDEF_NAME \n", last_identifier->name ); } } void NewScope ( void ) { Symbols.set_new_table_scope (); } void ReleaseScope ( void ) { Symbols.release_table_scope (); } void ShowName ( void ) // debugging only { symbol_t *current, *last_identifier; if ( Trace ) { current = Tokens.symbols.get_current(); printf ( " current->name is %s \n", current->name ); last_identifier = Tokens.symbols.get_last_identifier(); printf ( " last_identifier->name is %s \t %s\n", last_identifier->name, SlkGetSymbolName ( last_identifier->token ) ); } } PRIVATE void state ( slk_size_t number ) { register slk_size_t *rhs; register slk_size_t *lhs; slk_size_t *state; int rhs_length, state_length, index, dot_index, dot_printed, production_number, rhs_index; if ( ! Trace_states ) { return; } printf ( "State %u\n", number ); state = SlkGetState ( number ); state_length = *state++; for ( index = 1; index < state_length; index += 2 ) { production_number = *state++; dot_index = *state++; lhs = SlkGetProductionArray ( production_number ); rhs_length = *lhs - 1; ++lhs; printf ( " %s -->", SlkGetSymbolName ( *lhs ) ); rhs = lhs + 1; dot_printed = FALSE; for ( rhs_index = 1; rhs_index <= rhs_length; ++rhs_index, ++rhs ) { if ( dot_index == rhs_index ) { printf ( " " ); dot_printed = TRUE; } printf ( " %s", SlkGetSymbolName ( *rhs ) ); } if ( ! dot_printed ) { printf ( " " ); } putchar ( '\n' ); } } #include "SlkExecute.txt" // ------------------------------------------------------------------ // SlkAction constructor // ------------------------------------------------------------------ PUBLIC Action_t MakeAction ( void ) { Action_t action; File_name = Options.file_name; Trace = Options.TRACE_ACTIONS; Trace_states = Options.TRACE_STATES; Trace_typedef = Options.TRACE_TYPEDEF; Tree = MakeTree(); action.slk.execute = execute; action.slk.predict = Tree.predict; action.slk.reduce = Tree.reduce; action.slk.state = state; action.show_source = Tree.show_source; action.show_tree = Tree.show_tree; action.show_parse_derivation = Tree.show_parse_derivation; action.release = Tree.release; return action; } /***************************************************************************** error.c Version 6.40 Parse error recovery routines. Copyright (c) 2002-2014 SLK Systems, all rights reserved. 09-19-14 Fixed TYPEDEF check in no_entry 09-26-14 Only print 1 error per line, delay no_entry printing 09-27-14 Added state cases, stuck logic to no_entry_lr ******************************************************************************/ #include #include #include "error.h" #include "SlkConstants.h" #include "parser.h" PRIVATE uint mismatch_line_number, // avoid duplicate messages no_entry_line_number, mismatch_stuck_count, no_entry_stuck_count, jump_editor, trace; PRIVATE uint total_warnings, total_errors, max_total_warnings, max_total_errors; PRIVATE char *file_name, pending_error [ 128 ] = ""; PRIVATE uint get_total_errors ( void ) { return total_errors; } PRIVATE uint get_total_warnings ( void ) { return total_warnings; } PRIVATE void set_max_errors ( int number ) { max_total_errors = number; } PRIVATE void set_max_warnings ( int number ) { max_total_warnings = number; } // ----------------------------------------------------------------------- // mismatch // // Assumes the token is missing, and inserts it. // In special cases like : for ;, replace : with ; instead of inserting. // Scan only if the parser gets stuck in a loop. // ----------------------------------------------------------------------- PRIVATE slk_size_t mismatch ( slk_size_t terminal, slk_size_t token ) { symbol_t *last_identifier; char *name; *pending_error = '\0'; // erase it if ( token == IDENTIFIER_ ) { last_identifier = Tokens.symbols.get_last_identifier(); name = last_identifier->name; } else { name = SlkGetSymbolName(token); } if ( mismatch_line_number == Tokens.get_line_number () ) { ++mismatch_stuck_count; if ( trace ) { printf ( "%s:%u: expecting '%s' but found '%s'\n", file_name, Tokens.get_line_number(), SlkGetSymbolName(terminal), name ); } token = terminal; // insert the token if ( mismatch_stuck_count > 99 ) { token = Tokens.slk.get(); // advance the input mismatch_stuck_count = 0; } } else { if ( ! mismatch_stuck_count ) { ++total_errors; printf ( "%s:%u: expecting '%s' but found '%s'\n", file_name, Tokens.get_line_number(), SlkGetSymbolName(terminal), name ); switch ( terminal ) { case SEMI_ : if ( token != COLON_ ) { Tokens.repeat_last(); } break; default : Tokens.repeat_last(); break; } token = terminal; // insert the missing token } mismatch_stuck_count = 0; } mismatch_line_number = Tokens.get_line_number (); if ( total_errors > max_total_errors ) { printf ( " max errors exceeded\n" ); exit(0); } return ( token ); } // ----------------------------------------------------------------------- // no_entry // // The symbol is a nonterminal. No parse table entry was found. // // The strategy is to try to continue without scanning. If the parser // gets into a loop, then scan. Many of these errors are followed // by a mismatch error that corrects well by insertion or replacement // of the token in the mismatch routine above. // // If the error token is TYPEDEF_NAME_, tell the parser to try again // using IDENTIFIER_. This is an easy way to correct many problems. // See also the action code for more on typedef. // ----------------------------------------------------------------------- PRIVATE slk_size_t no_entry ( slk_size_t nonterminal, slk_size_t token, int level ) { symbol_t *last_identifier, *new; char *name = ""; if ( token == TYPEDEF_NAME_ ) { last_identifier = Tokens.symbols.get_last_identifier(); new = Symbols.insert ( IDENTIFIER_, last_identifier->name ); Tokens.symbols.set_last_identifier ( new ); if ( trace ) { printf ( "-- no_entry: changing TYPEDEF_NAME_ to IDENTIFIER_ : " "'%s' --\n", last_identifier->name ); } return IDENTIFIER_ ; } if ( no_entry_line_number == Tokens.get_line_number () ) { ++no_entry_stuck_count; if ( no_entry_stuck_count > 99 ) { ++total_errors; token = Tokens.slk.get(); // advance the input no_entry_stuck_count = 0; } } else { no_entry_stuck_count = 0; } no_entry_line_number = Tokens.get_line_number (); if ( trace ) { printf ( "%s:%u: nonterminal: '%s' token: '%s'\n", file_name, Tokens.get_line_number(), SlkGetSymbolName(nonterminal), SlkGetSymbolName(token) ); } if ( no_entry_stuck_count == 0 ) { if ( *pending_error ) { // only print if no mismatch printf ( "%s", pending_error ); *pending_error = '\0'; // erase it ++total_errors; } if ( token == IDENTIFIER_ ) { last_identifier = Tokens.symbols.get_last_identifier(); name = last_identifier->name; } else { name = SlkGetSymbolName(token); } sprintf ( pending_error, "%s:%u: syntax error: '%s' not expected\n", file_name, Tokens.get_line_number(), name ); } if ( total_errors == 0 ) { ++total_errors; // in case just 1 error } if ( total_errors > max_total_errors ) { exit(0); } return token; } // ----------------------------------------------------------------------- // no_entry_lr // // The symbol is a state. No parse table entry was found. // If the error token is TYPEDEF_NAME_, tell the parser to try again // using IDENTIFIER_. This is an easy way to correct many problems. // See also the action code for more on typedef. // ----------------------------------------------------------------------- PRIVATE slk_size_t no_entry_lr ( slk_size_t state, slk_size_t token, int level ) { symbol_t *last_identifier, *new; char *name = ""; static int previous_state = 0; static char cmd [128]; if ( token == TYPEDEF_NAME_ ) { last_identifier = Tokens.symbols.get_last_identifier(); new = Symbols.insert ( IDENTIFIER_, last_identifier->name ); Tokens.symbols.set_last_identifier ( new ); if ( trace ) { printf ( "-- no_entry: changing TYPEDEF_NAME_ to IDENTIFIER_ : " "'%s' --\n", last_identifier->name ); } return IDENTIFIER_ ; } if ( jump_editor ) { sprintf ( cmd, "ed.exe %s -l%u", file_name, Tokens.get_line_number()); system ( cmd ); exit(0); } if ( trace ) { printf ( "%s:%u: state: %d token: '%s'\n", file_name, Tokens.get_line_number(), state, SlkGetSymbolName (token) ); } if ( no_entry_line_number != Tokens.get_line_number () ) { if ( token == IDENTIFIER_ ) { last_identifier = Tokens.symbols.get_last_identifier(); name = last_identifier->name; } else { name = SlkGetSymbolName(token); } printf ( "%s:%u: syntax error: '%s' not expected\n", file_name, Tokens.get_line_number(), name ); if ( ++total_errors > max_total_errors ) { printf ( " max errors exceeded\n" ); exit(0); } no_entry_line_number = Tokens.get_line_number (); } switch ( state ) { case 77 : token = SEMI_ ; Tokens.repeat_last(); break; case 259 : case 265 : token = RPAREN_ ; Tokens.repeat_last(); break; case 313 : token = SEMI_ ; break; default : if ( state == previous_state ) { token = Tokens.slk.get(); // advance the input } } if ( state == previous_state ) { ++no_entry_stuck_count; } else { no_entry_stuck_count = 0; } if ( no_entry_stuck_count > 9 ) { token = Tokens.slk.get(); // advance the input no_entry_stuck_count = 0; } previous_state = state; return token; } // ----------------------------------------------------------------------- // input_left // // The parse stack is empty, but input is still remaining. // Usually happens after an improperly recovered error. // ----------------------------------------------------------------------- PRIVATE void input_left ( void ) { if ( *pending_error ) { // did not get printed printf ( "%s", pending_error ); *pending_error = '\0'; // erase it } if ( total_errors == 0 ) { ++total_errors; printf ( "%s:%u: incomplete parse\n", file_name, Tokens.get_line_number()); } else if ( trace ) { printf ( "%s:%u: incomplete parse\n", file_name, Tokens.get_line_number()); } } PRIVATE void message ( char *message ) { puts ( message ); } // ----------------------------------------------------------------------- // SlkError // ----------------------------------------------------------------------- PUBLIC Error_t MakeError ( void ) { Error_t error = { get_total_errors, get_total_warnings, set_max_errors, set_max_warnings, { mismatch, no_entry, input_left, message } }; trace = Options.TRACE_ERRORS; jump_editor = Options.JUMP_EDITOR; file_name = Options.file_name; #ifdef LR error.slk.no_entry = no_entry_lr; // LR grammar #endif mismatch_line_number = 0; mismatch_stuck_count = 0; no_entry_line_number = 0; no_entry_stuck_count = 0; total_warnings = 0; total_errors = 0; max_total_warnings = 30; max_total_errors = 30; return error; } /************************************************************************* memory.c Version 6.30 This module contains the memory allocation code. Copyright (c) 2002-2014 SLK Systems, all rights reserved. 09-08-14 Moved dump_buffer_set *************************************************************************/ #include #include #include #include "parser.h" // --------------------------------------------------------------------------- // private variables // --------------------------------------------------------------------------- typedef struct _buffer { char *buffer; int size_remaining; void *malloc_pointer; struct _buffer *next; } buffer_t; typedef struct _buffer_set { buffer_t *current, *first; uint size, chunk_size; void *handle; } buffer_set_t; #define DEFAULT_MEMORY_CHUNK_SIZE ( 1024 * 64 ) #define MINIMUM_MEMORY_CHUNK_SIZE ( 1024 * 1 ) // --------------------------------------------------------------------------- // allocate // // Return zeroed memory of length, aligned on double word boundary. // Return NULL on failure. // // --------------------------------------------------------------------------- PRIVATE void * allocate ( void *memory_set_handle, uint length ) { register buffer_set_t *buffer_set = (buffer_set_t *) memory_set_handle; register buffer_t *current; uint size = length, chunk_size, align_size = sizeof ( double ), available, extra, odd, max_size; void *malloc_pointer, *memory = NULL; if ( ! buffer_set ) { printf ( "allocate: null memory set \n" ); return memory; } if ( buffer_set->handle != memory_set_handle ) { printf ( "allocate: invalid handle: 0x%x \n", memory_set_handle ); return memory; } if ( ! length ) { printf ( "allocate: zero length memory request \n" ); return memory; } chunk_size = buffer_set->chunk_size; max_size = chunk_size - sizeof ( buffer_t ) - align_size * 2 ; current = buffer_set->current; if ( size > current->size_remaining ) { if ( size < max_size ) { malloc_pointer = malloc ( chunk_size ); if ( malloc_pointer ) { buffer_set->size += chunk_size; buffer_set->current = (buffer_t *) malloc_pointer; current->next = (buffer_t *) malloc_pointer; current = (buffer_t *) malloc_pointer; current->malloc_pointer = malloc_pointer; current->size_remaining = chunk_size - sizeof ( buffer_t ) - align_size; current->buffer = (char *) current + sizeof ( buffer_t ); current->next = NULL; } else { fprintf ( stderr, "allocate: out of memory\n" ); fprintf ( stderr, " chunk_size = %u \n", chunk_size ); } } } if ( size <= current->size_remaining ) { available = (uint) current->buffer; odd = ( available & (align_size - 1) ); if ( odd ) { extra = align_size - odd; memory = (void *) ( (char *) memory + extra ); current->size_remaining -= extra; current->buffer += extra; } memory = current->buffer; memset ( memory, '\0', size ); current->size_remaining -= size; current->buffer += size; } if ( current->size_remaining < 0 ) { current->size_remaining = 0; } return memory; } // --------------------------------------------------------------------------- // save_string // // String can be null terminated or length specified. If length != 0, it // is used. // // Return NULL on failure. // --------------------------------------------------------------------------- PRIVATE char * save_string ( void *memory_set_handle, char *string, uint length ) { register buffer_set_t *buffer_set = (buffer_set_t *) memory_set_handle; register buffer_t *current; char *saved_string = NULL; uint size, chunk_size, max_size; void *malloc_pointer; if ( ! buffer_set ) { printf ( "save_string: null memory set \n" ); return saved_string; } if ( buffer_set->handle != memory_set_handle ) { printf ( "save_string: invalid handle: 0x%x \n", memory_set_handle ); return saved_string; } chunk_size = buffer_set->chunk_size; max_size = chunk_size - sizeof ( buffer_t ) - 4; current = buffer_set->current; if ( length ) { size = length + 1; } else { size = strlen ( string ) + 1; } if ( size > current->size_remaining ) { if ( size < max_size ) { // dump_buffer_set ( memory_set_handle ); malloc_pointer = malloc ( chunk_size ); if ( malloc_pointer ) { buffer_set->size += chunk_size; buffer_set->current = (buffer_t *) malloc_pointer; current->next = (buffer_t *) malloc_pointer; current = (buffer_t *) malloc_pointer; current->malloc_pointer = malloc_pointer; current->size_remaining = chunk_size - sizeof ( buffer_t ) - sizeof ( double ); current->buffer = (char *) current + sizeof ( buffer_t ); current->next = NULL; } else { fprintf ( stderr, "save_string: out of memory\n" ); fprintf ( stderr, " chunk_size = %u \n", chunk_size ); } } } if ( size <= current->size_remaining ) { saved_string = current->buffer; memcpy ( saved_string, string, size - 1 ); saved_string [ size - 1 ] = '\0'; current->size_remaining -= size; current->buffer += size; } return saved_string; } // --------------------------------------------------------------------------- // resave_string // // Attempt to reuse the memory. If not, call save_string. // // Return NULL on failure. // --------------------------------------------------------------------------- PRIVATE char * resave_string ( void *memory_set_handle, char *old_string, char *new_string ) { register buffer_set_t *buffer_set = (buffer_set_t *) memory_set_handle; register char *saved_string = NULL; uint old_length, new_length; if ( ! buffer_set ) { printf ( "resave_string: null memory set \n" ); return saved_string; } if ( buffer_set->handle != memory_set_handle ) { printf ( "resave_string: invalid handle: 0x%x \n", memory_set_handle ); return saved_string; } old_length = strlen ( old_string ); new_length = strlen ( new_string ); if ( new_length <= old_length ) { strcpy ( old_string, new_string ); saved_string = old_string; } else { saved_string = save_string ( memory_set_handle, new_string, new_length); } return saved_string; } // --------------------------------------------------------------------------- // get_size // // Total memory in the set. // // Return 0 on failure. // --------------------------------------------------------------------------- PRIVATE uint get_size ( void *memory_set_handle ) { buffer_set_t *buffer_set = (buffer_set_t *) memory_set_handle; if ( ! buffer_set ) { printf ( "get_size: null memory set \n" ); return 0; } if ( buffer_set->handle != memory_set_handle ) { printf ( "get_size: invalid handle: 0x%x \n", memory_set_handle ); return 0; } return buffer_set->size; } // --------------------------------------------------------------------------- // release // // Frees all the memory and structures. // // The string buffer begins with its buffer_set_t and buffer_t structures. So // releasing the malloc_pointer frees the structs and all the strings. // // --------------------------------------------------------------------------- PRIVATE void release ( void *memory_set_handle ) { register buffer_t *first; buffer_set_t *buffer_set = (buffer_set_t *) memory_set_handle; void *malloc_pointer; if ( ! buffer_set ) { printf ( "release_memory_set: null memory set \n" ); return; } if ( buffer_set->handle != memory_set_handle ) { printf ( "release_memory_set: invalid handle: 0x%x \n", memory_set_handle ); return; } for ( first = buffer_set->first; first && (first = first->next); ) { malloc_pointer = first->malloc_pointer; first->malloc_pointer = NULL; // invalidate it free ( malloc_pointer ); } buffer_set->handle = NULL; free ( buffer_set ); // first one last } // =========================================================================== // public methods // =========================================================================== // --------------------------------------------------------------------------- // MakeMemorySet // // If chunk_size is 0, a default size is used. 64K // // Returns the memory_set_t structure always. // The handle will be NULL on error. // // --------------------------------------------------------------------------- PUBLIC memory_set_t MakeMemorySet ( int chunk_size ) { register buffer_set_t *buffer_set = NULL; register buffer_t *current; void *malloc_pointer; memory_set_t memory_set = { allocate, save_string, resave_string, get_size, release, NULL }; if ( ! chunk_size ) { chunk_size = DEFAULT_MEMORY_CHUNK_SIZE; } else if ( chunk_size < MINIMUM_MEMORY_CHUNK_SIZE ) { chunk_size = MINIMUM_MEMORY_CHUNK_SIZE; } malloc_pointer = malloc ( chunk_size ); if ( malloc_pointer ) { buffer_set = (buffer_set_t *) malloc_pointer; current = (buffer_t *) ((char *) malloc_pointer + sizeof ( buffer_set_t )); buffer_set->first = current; buffer_set->current = current; buffer_set->chunk_size = chunk_size; buffer_set->size = chunk_size; buffer_set->handle = malloc_pointer; current->malloc_pointer = malloc_pointer; current->size_remaining = chunk_size - sizeof ( buffer_set_t ) - sizeof ( buffer_t ) - sizeof ( double ); current->buffer = (char *) current + sizeof ( buffer_t ); current->next = NULL; } else { fprintf ( stderr, "MakeMemorySet: out of memory\n" ); } memory_set.handle = malloc_pointer; return memory_set; } /************************************************************************* parser.c Version 6.50 Main entry point for the recognizer. Copyright (c) 2001-2014 SLK Systems, all rights reserved. 09-06-14 Added symbol name tracing 09-08-14 Just make a full array of all the scanned symbols 09-09-14 Combine everything into Parser 09-22-14 Use Parser_t with parse(), release() 11-14-14 Move MakeOptions out of MakeParser *************************************************************************/ #include #include #include "parser.h" #define HASH_SIZE 1013 // prime number: 211 503 1013 10007 40013 PRIVATE char Banner [] = "scc V6.43 (c) 2001-2014 SLK Systems"; PRIVATE char UsageMessage [] = "C language recognizer\n" "Usage: scc [ options ] file_name\n\n" " -a turn on all options that have no argument \n" " -d display parse derivation \n" " -db debug options set: d Tp Te \n" " -j jump to the editor on an error, then exit \n" " -p print symbol table \n" " -Dp do not prune the tree \n" " -Men set maximum allowed errors to n \n" " -Mwn set maximum allowed warnings to n \n" " -r recreate source code from parse tree \n" " -Sd show parse derivation from parse tree \n" " -Ss show all symbols list \n" " -St show parse tree \n" " -Tc trace names in actions \n" " -Te trace error handling \n" " -Tn trace symbol names in scanner \n" " -Tp trace peeking/scanning of tokens \n" " -Ts trace symbol lookup/installation \n" " -Tt trace states in actions \n" " -Ty trace typedef \n" " -Ta trace all \n"; // ------------------------------------------------------------------------ // get_file // // returns the file in a malloc buffer. // file is null terminated // ------------------------------------------------------------------------ PRIVATE char * get_file ( char *file_name ) { size_t file_size, size; FILE *fp; char *file = NULL; fp = fopen ( file_name, "rb" ); if ( fp ) { fseek ( fp, 0L, 2 ); file_size = ftell ( fp ); rewind ( fp ); file = (char *) malloc ( file_size + 4 ); if ( file ) { size = fread ( file, 1, file_size, fp ); if ( size == file_size ) { file [ file_size ] = '\0'; file [ file_size + 1 ] = '\0'; } else { free ( file ); file = NULL; } } fclose ( fp ); } return file; } // ------------------------------------------------------------------------ // MakeOptions // // A struct containing the command line arg results // ------------------------------------------------------------------------ PUBLIC Options_t MakeOptions ( int argc, char *argv [] ) { register char *p; Options_t options = { NULL, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; while ( --argc > 0 ) { // process command line options p = *++argv; switch (*p){ case '/': case '-': if ( *++p ) { switch ( *p ) { case 'a': options.DISPLAY = TRUE; options.JUMP_EDITOR = TRUE; options.NAMETABLE = TRUE; options.SHOW_DERIVATION = TRUE; options.SHOW_SYMBOLS_LIST = TRUE; options.SHOW_PARSE_TREE = TRUE; goto TRACE_ALL; break; case 'd': options.DISPLAY = TRUE; if ( *++p == 'b' ) { options.TRACE_ERRORS = TRUE; options.TRACE_PEEKER = TRUE; } else { --p; } break; case 'j': options.JUMP_EDITOR = TRUE; break; case 'p': options.NAMETABLE = TRUE; break; case 'r': options.RECREATE_SOURCE = TRUE; break; case 'D': if ( *++p ) { switch ( *p ) { case 'p': options.DO_NOT_PRUNE_TREE = TRUE; break; default: fputc ('D', stderr); fputc (*p, stderr); fputs (", invalid option\n", stderr); } } break; case 'M': if ( *++p ) { switch ( *p ) { case 'e': options.max_errors = atoi ( ++p ); break; case 'w': options.max_warnings = atoi ( ++p ); break; default: fputc ('M', stderr); fputc (*p, stderr); fputs (", invalid option\n", stderr); } } break; case 'S': while ( *++p ) { switch ( *p ) { case 'd': options.SHOW_DERIVATION = TRUE; break; case 's': options.SHOW_SYMBOLS_LIST = TRUE; break; case 't': options.SHOW_PARSE_TREE = TRUE; break; default: fputc ('S', stderr); fputc (*p, stderr); fputs (", invalid option\n", stderr); } } --p; break; case 'T': while ( *++p ) { switch ( *p ) { case 'a': TRACE_ALL: options.TRACE_ACTIONS = TRUE; options.TRACE_ERRORS = TRUE; options.TRACE_NAMES = TRUE; options.TRACE_PEEKER = TRUE; options.TRACE_SYMBOLS = TRUE; options.TRACE_TYPEDEF = TRUE; break; case 'c': options.TRACE_ACTIONS = TRUE; break; case 'e': options.TRACE_ERRORS = TRUE; break; case 'n': options.TRACE_NAMES = TRUE; break; case 'p': options.TRACE_PEEKER = TRUE; break; case 's': options.TRACE_SYMBOLS = TRUE; break; case 't': options.TRACE_STATES = TRUE; break; case 'y': options.TRACE_TYPEDEF = TRUE; break; default: fputc ('T', stderr); fputc (*p, stderr); fputs (", invalid option\n", stderr); } } --p; break; default: fputc (*p, stderr); fputs (", invalid option\n", stderr); } } break; default: options.file_name = *argv; } } return options; } // ----------------------------------------------------------------------- // parse // ----------------------------------------------------------------------- PRIVATE void parse ( void ) { SlkParse ( Action.slk, Tokens.slk, Error.slk, 0 ); } // ----------------------------------------------------------------------- // release // ----------------------------------------------------------------------- PRIVATE void release ( void ) { Symbols.release(); Tokens.release(); Action.release(); } // ------------------------------------------------------------------------ // MakeParser // // Constructs a global Parser for the program // ------------------------------------------------------------------------ PUBLIC Parser_t MakeParser ( char *input ) { Symbols = MakeSymbol ( HASH_SIZE ); Tokens = MakeToken ( input ); Error = MakeError(); if ( Options.max_errors ) { Error.set_max_errors ( Options.max_errors ); } if ( Options.max_warnings ) { Error.set_max_warnings ( Options.max_warnings ); } Action = MakeAction(); Parser.parse = parse; Parser.release = release; return Parser; } // ------------------------------------------------------------------------ // MAIN // Use -DMAIN=main in makefile to run this as a standalone program // This logic can be in another file. // ------------------------------------------------------------------------ PUBLIC int MAIN ( int argc, char *argv [] ) { uint errors, warnings; char *file_name; char *input; puts ( Banner ); Options = MakeOptions ( argc, argv ); // process command line options file_name = Options.file_name; if ( ! file_name ) { puts ( UsageMessage ); return -1; } input = get_file ( file_name ); if ( ! input ) { perror ( file_name ); return -1; } MakeParser ( input ); Parser.parse(); warnings = Error.get_total_warnings (); if ( warnings ) { printf ( "%5u warnings \n", warnings ); } errors = Error.get_total_errors (); if ( errors ) { if ( errors > 1 ) { --errors; // extra one from the first no_entry } printf ( "%5u errors \n", errors ); } if ( Options.NAMETABLE ) { Symbols.print_table(); } if ( Options.SHOW_SYMBOLS_LIST ) { Tokens.symbols.list.show_all(); } if ( Options.SHOW_DERIVATION ) { Action.show_parse_derivation (); } if ( Options.SHOW_PARSE_TREE ) { Action.show_tree (); } if ( Options.RECREATE_SOURCE ) { Action.show_source (); } Parser.release(); free ( input ); return 0; } /***************************************************************************** peeker.c Version 6.40 Interface to the actual lexical scanner for the compiler. This is needed to provide the peeking capability for the LL(k) parser. Doing it here rather than in the parser means that the parser is independent of the symbol table and any other attributes that may be used. This also allows the use of other scanners or generated scanners. Copyright (c) 2002-2014 SLK Systems, all rights reserved. 09-08-14 Just make a full array of all the scanned symbols 09-21-14 Merge List and Peeker arrays into one, no preload of tokens Needed to make scoping work for typedef ******************************************************************************/ #include #include #include #include "SlkConstants.h" #include "parser.h" #define CHUNK_SIZE 1024*4 typedef struct _peek { symbol_t *symbol; int line_number; } peek_t; PRIVATE uint Trace; PRIVATE Scanner_t Scanner; PRIVATE peek_t *Peeker; PRIVATE int Next, // Peeker index, next get(), peek(1) End, // Peeker last index in array Max, // Peeker max size of array List_next, // for the list of symbols Last_identifier; // for typedef, an index, no pointers PRIVATE symbol_t *get_current ( void ) { return Peeker[Next-1].symbol; } PRIVATE void repeat_last ( void ) { if ( Next > 0 ) --Next; } PRIVATE symbol_t *get_last_identifier(void) { return Peeker[Last_identifier].symbol; } PRIVATE void set_last_identifier ( symbol_t *symbol ) { Peeker[Last_identifier].symbol = symbol; } // ----------------------------------------------------------------------- // peek // // scan next token without consuming it // ----------------------------------------------------------------------- PRIVATE slk_size_t peek ( int level ) { peek_t peek; slk_size_t token; int index; static char id [32] = ""; // careful, causes stack overflow? index = Next + level - 1; if ( index == Max - 1 ) { Max += CHUNK_SIZE; Peeker = (peek_t *) realloc ( Peeker, Max * sizeof (peek_t)); if ( ! Peeker ) exit ( 8 ); } if ( index > End ) { peek.symbol = Scanner.get (); peek.line_number = Scanner.get_line_number(); Peeker [ index ] = peek; End = index; } else { peek = Peeker [ index ]; } token = peek.symbol->token; if ( Trace ) { *id = '\0'; if ( token == IDENTIFIER_ || token == TYPEDEF_NAME_ ) { strncpy ( id, peek.symbol->name, 31 ); } printf ( " peek(%u:line %u): %s %s\n", level, peek.line_number, SlkGetSymbolName(token), id); } return token; } // ----------------------------------------------------------------------- // get // // scan next token // Almost could just call peek ??? !Trace // ----------------------------------------------------------------------- PRIVATE slk_size_t get ( void ) { peek_t peek; slk_size_t token; int index; static char id [32] = ""; // careful, causes stack overflow? index = Next; if ( index == Max - 1 ) { Max += CHUNK_SIZE; Peeker = (peek_t *) realloc ( Peeker, Max * sizeof (peek_t)); if ( ! Peeker ) exit ( 8 ); } if ( index > End ) { peek.symbol = Scanner.get (); peek.line_number = Scanner.get_line_number(); Peeker [ index ] = peek; End = index; } else { peek = Peeker [ index ]; } if ( peek.symbol->token != END_OF_SLK_INPUT_ ) { ++Next; } token = peek.symbol->token; if ( token == IDENTIFIER_ || token == TYPEDEF_NAME_ ) { Last_identifier = index; } if ( Trace ) { *id = '\0'; if ( token == IDENTIFIER_ || token == TYPEDEF_NAME_ ) { strncpy ( id, peek.symbol->name, 31 ); } printf ( " get(%u:line %u): %s %s\n", index, peek.line_number, SlkGetSymbolName(token), id); } return token; } // ------------------------------------------------------------------------ // get_line_number // ------------------------------------------------------------------------ PRIVATE int get_line_number ( void ) { return Peeker[Next-1].line_number; } // ======================================================================= // List of symbols // ======================================================================= // ----------------------------------------------------------------------- // reset // // to reset the list of symbols to the beginning // ----------------------------------------------------------------------- PRIVATE void reset ( void ) { List_next = 0; } // ----------------------------------------------------------------------- // get_next // // get the ordered list of scanned symbols, use reset() to restart it // should only be done after parsing is completed // ----------------------------------------------------------------------- PRIVATE symbol_t * get_next ( void ) { peek_t peek; int index; symbol_t *symbol = NULL; index = List_next; if ( index <= End ) { ++List_next; peek = Peeker [ index ]; symbol = peek.symbol; } return symbol; } // ----------------------------------------------------------------------- // show_all // // show the ordered list of scanned symbols // should only be done after parsing is completed // ----------------------------------------------------------------------- PRIVATE void show_all ( void ) { peek_t peek; int index; symbol_t *symbol; slk_size_t token; for ( index = 0; index <= End; ++index ) { peek = Peeker [ index ]; symbol = peek.symbol; token = symbol->token; if ( token == IDENTIFIER_ || token == TYPEDEF_NAME_ ) { printf ( "%3u: %s (%u)\n", index, symbol->name, Symbols.get_table_scope() ); } else { printf ( "%3u: %s\n", index, symbol->name ); } } } // ----------------------------------------------------------------------- // release // ----------------------------------------------------------------------- PRIVATE void release ( void ) { free ( Peeker ); } // ----------------------------------------------------------------------- // Token // ----------------------------------------------------------------------- PUBLIC Tokens_t MakeToken ( char *input ) { register int index; peek_t peeker; Tokens_t this = { get_line_number, repeat_last, release, { get_current, get_last_identifier, set_last_identifier, { get_next, show_all, reset } }, { get, peek } }; Trace = Options.TRACE_PEEKER; Next = 0; End = -1; // less than Next List_next = 0; Max = CHUNK_SIZE; Peeker = (peek_t *) malloc ( Max * sizeof ( peek_t ) ); if ( ! Peeker ) exit ( 7 ); Scanner = MakeScanner ( input ); return this; } /***************************************************************************** scanner.c Version 6.20 Hand coded lexical scanner for the compiler. Tokens are set to ascii value, but fixed in the symbol manager Copyright (c) 2002-2014 SLK Systems, all rights reserved. ******************************************************************************/ #include #include #include #include "SlkConstants.h" #include "parser.h" PRIVATE char *Start_text, *End_text; PRIVATE uint Line_number, End_of_input, Trace; PRIVATE int get_line_number ( void ) { return Line_number; } // ------------------------------------------------------------------------ // install_name // // install without duplicates, return token // ------------------------------------------------------------------------ PRIVATE symbol_t * install_name ( char *start, register char *end, slk_size_t token ) { register symbol_t *symbol; char save; save = *end; *end = '\0'; if ( ! *start ) { printf ( "%u:scanner: zero length identifier", Line_number ); } symbol = Symbols.lookup ( start ); if ( ! symbol ) { symbol = Symbols.insert ( token, start ); // token ignored } *end = save; if ( Trace ) { printf ( " symbol: %s\t\t(%d)\n", symbol->name, symbol->token ); } return symbol; // from symbol table, not entry token } // ------------------------------------------------------------------------ // get // // return a token // ------------------------------------------------------------------------ PRIVATE symbol_t * get ( void ) { register char *end = End_text; register slk_size_t token = 0; symbol_t *symbol; loop { Start_text = end; switch ( *end++ ) { case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7: case 8: break; case 9: goto white_space ; break; case 10: ++Line_number; goto white_space ; break; case 11: case 12: case 13: case 14: case 15: case 16: case 17: case 18: case 19: case 20: case 21: case 22: case 23: case 24: case 25: case 26: case 27: case 28: case 29: case 30: case 31: break; case 32: white_space : while ( isspace ( *end ) ) { if ( *end == '\n' ) { ++Line_number; } ++end; } break; case '!': if ( *end == '=' ) { ++end; token = ( NOT_EQUAL_ ); } else { token = ( '!' ); } break; case '\"': while ( *end ) { if ( *end == '\"' ) { ++end; while ( isspace ( *end ) ) { if ( *end == '\n' ) { ++Line_number; } ++end; } if ( *end != '\"' ) { // string concatonation break; } } if ( *end == '\\' ) { ++end; if ( *end == '\r' || *end == '\n' ) { ++Line_number; } } ++end; } token = ( STRING_ ); break; case '#': // skip # line while ( *end ) { if ( *end == '\n' ) { ++Line_number; ++end; break; } ++end; } break; case 36: // invalid_char break; case '%': if ( *end == '=' ) { ++end; token = ( PERCENT_EQUAL_ ); } else { token = ( '%' ); } break; case '&': if ( *end == '&' ) { ++end; token = ( AND_AND_ ); } else if ( *end == '=' ) { ++end; token = ( AND_EQUAL_ ); } else { token = ( '&' ); } break; case '\'': while ( *end ) { if ( *end == '\'' ) { if ( *(end-1) == '\\' && *(end-2) != '\\' ){ // do nothing, continue the loop, found \' } else { ++end; break; } } ++end; } token = ( CHARACTER_CONSTANT_ ); break; case 40: case 41: // valid_token_char token = *(end - 1); break; case '*': if ( *end == '=' ) { ++end; token = ( STAR_EQUAL_ ); } else { token = ( '*' ); } break; case '+': if ( *end == '+' ) { ++end; token = ( PLUS_PLUS_ ); } else if ( *end == '=' ) { ++end; token = ( PLUS_EQUAL_ ); } else { token = ( '+' ); } break; case 44: token = *(end - 1); break; case '-': if ( *end == '-' ) { ++end; token = ( MINUS_MINUS_ ); } else if ( *end == '>' ) { ++end; token = ( MINUS_GREATER_ ); } else if ( *end == '=' ) { ++end; token = ( MINUS_EQUAL_ ); } else { token = ( '-' ); } break; case '.': if ( *end == '.' && *++end == '.' ) { ++end; token = ( DOT_DOT_DOT_ ); } else if ( isdigit ( *end ) ) { goto float_constant; } else { token = ( '.' ); } break; case '/': if ( *end == '=' ) { ++end; token = ( SLASH_EQUAL_ ); } else if ( *end == '/' ) { // comment while ( *++end ) { if ( *end == '\n' ) { ++end; if ( *end == '\r' ) { // for DOS ++end; } ++Line_number; break; } } } else if ( *end == '*' ) { // comment while ( *++end ) { if ( *end == '\n' ) { ++Line_number; continue; } if ( *end == '*' && *(end + 1) == '/' ) { end += 2; break; } } } else { token = ( '/' ); } break; case 48: // digit case 49: case 50: case 51: case 52: case 53: case 54: case 55: case 56: case 57: while ( isdigit (*end) ) { ++end; } if ( ! (*end == '.' || *end == 'e' || *end == 'E') ) { if ( *end == 'x' || *end == 'X' ) { // int constant ++end; } while ( isxdigit (*end) ) { ++end; } if ( *end == 'u' || *end == 'U' || *end == 'l' || *end == 'L' ) { ++end; } if ( *end == 'u' || *end == 'U' || *end == 'l' || *end == 'L' ) { ++end; } token = ( INTEGER_CONSTANT_ ); break; } if ( *end == '.' ) { float_constant : while ( isdigit (*++end) ) { } } if ( *end == 'e' || *end == 'E' ) { ++end; } if ( *end == '-' || *end == '+' ) { ++end; } while ( isdigit (*end) ) { ++end; } if ( *end == 'f' || *end == 'F' || *end == 'l' || *end == 'L' ) { ++end; } token = ( FLOATING_CONSTANT_ ); break; case 58: case 59: token = *(end - 1); break; case '<': if ( *end == '=' ) { ++end; token = ( LESS_EQUAL_ ); } else if ( *end == '<' ) { if ( *++end == '=' ) { ++end; token = ( LESS_LESS_EQUAL_ ); } else { token = ( LESS_LESS_ ); } } else { token = ( '<' ); } break; case '=': if ( *end == '=' ) { ++end; token = ( EQUAL_EQUAL_ ); } else { token = ( '=' ); } break; case '>': if ( *end == '=' ) { ++end; token = ( GREATER_EQUAL_ ); } else if ( *end == '>' ) { if ( *++end == '=' ) { ++end; token = ( GREATER_GREATER_EQUAL_ ); } else { token = ( GREATER_GREATER_ ); } } else { token = ( '>' ); } break; case 63: token = *(end - 1); break; case 64: break; case 65: // upper alpha case 66: case 67: case 68: case 69: case 70: case 71: case 72: case 73: case 74: case 75: case 76: case 77: case 78: case 79: case 80: case 81: case 82: case 83: case 84: case 85: case 86: case 87: case 88: case 89: case 90: goto identifier ; break; case 91: token = *(end - 1); break; case 92: break; case 93: token = *(end - 1); break; case '^': if ( *end == '=' ) { ++end; token = ( CARET_EQUAL_ ); } else { token = ( '^' ); } break; case 95: goto identifier ; break; case 96: break; case 97: // lower alpha case 98: case 99: case 100: case 101: case 102: case 103: case 104: case 105: case 106: case 107: case 108: case 109: case 110: case 111: case 112: case 113: case 114: case 115: case 116: case 117: case 118: case 119: case 120: case 121: case 122: identifier : while ( *end ) { if ( ! ( isalnum ( *end ) || *end == '_' ) ) { token = IDENTIFIER_; break; } ++end; } break; case 123: token = *(end - 1); break; break; case '|': if ( *end == '|' ) { ++end; token = ( OR_OR_ ); } else if ( *end == '=' ) { ++end; token = ( OR_EQUAL_ ); } else { token = ( '|' ); } break; case 125: case 126: token = *(end - 1); break; case 127: break; } if ( ! *end ) { // check for end of buffer if ( End_of_input ) { if ( ! token ) { token = END_OF_SLK_INPUT_; Start_text = "END_OF_SLK_INPUT"; --end; } } End_of_input = TRUE; } if ( token ) { symbol = install_name ( Start_text, end, token ); End_text = end; return symbol; } } } // ------------------------------------------------------------------------ // Scanner // ------------------------------------------------------------------------ PUBLIC Scanner_t MakeScanner ( char *input ) { Scanner_t scanner = { get_line_number, get }; Trace = Options.TRACE_NAMES; Start_text = input; End_text = input; Line_number = 1; End_of_input = FALSE; return scanner; } /************************************************************************* symbol.c Version 6.40 This module contains the symbol table manager. Copyright (c) 2002-2014 SLK Systems, all rights reserved. *************************************************************************/ #include #include #include #ifdef DEBUG #include #else #define assert(a) #endif #include "SlkTerminals.h" #include "parser.h" PRIVATE uint Symbol_table_size, Symbol_trace; PRIVATE memory_set_t Symbol_memory; PRIVATE uchar Symbol_table_scope; PRIVATE symbol_t **Symbol_table; // array of head nodes PRIVATE void set_new_table_scope ( void ) { ++Symbol_table_scope; } PRIVATE uchar get_table_scope ( void ) { return Symbol_table_scope; } // ----------------------------------------------------------------------- // hash // // 32 bit hashing function for the hashtable // ----------------------------------------------------------------------- PRIVATE uint hash ( char *string, uint table_size ) { register char *p = string - 1; register uint h = 0; register uint g; while ( *++p ) { h = (h << 4) + *p; g = (h & 0xF0000000); if ( g ) { h ^= (g >> 8); h ^= g; } } return ( h % table_size ); } // ----------------------------------------------------------------------- // lookup // // look up symbol in symbol table // ----------------------------------------------------------------------- PRIVATE symbol_t * lookup ( register char *name ) { register symbol_t *symbol; slk_size_t token; symbol = Symbol_table [ hash ( name, Symbol_table_size ) ]; for (; symbol; symbol = symbol->next ) { if ( ! ( symbol->deleted && (symbol->scope != Symbol_table_scope) ) ) { if ( ! ( strcmp ( name, symbol->name ) ) ) { symbol->deleted = FALSE; token = symbol->token; if ( Symbol_trace ) { printf ( " old symbol[%u]: %s (%d) \n", symbol->scope, symbol->name, symbol->token ); } break; // found it } } } return symbol; } // ----------------------------------------------------------------------- // insert // // insert a symbol into Symbol_table // ----------------------------------------------------------------------- PRIVATE symbol_t * insert ( slk_size_t token, char *name ) { register symbol_t *_new, *head; uint hash_value; _new = (symbol_t *) Symbol_memory.allocate ( Symbol_memory.handle, sizeof (symbol_t) ); _new->token = token; _new->name = Symbol_memory.save_string ( Symbol_memory.handle, name, 0 ); _new->scope = Symbol_table_scope; hash_value = hash ( name, Symbol_table_size ); head = Symbol_table [ hash_value ]; _new->next = head; // insert at head of list Symbol_table [ hash_value ] = _new; if ( Symbol_trace ) { printf ( " new symbol[%u]: %s (%d) \n", Symbol_table_scope, name, token ); } return ( _new ); } // ----------------------------------------------------------------------- // init_symbol_table with the terminals of the language // ----------------------------------------------------------------------- PRIVATE void init_symbol_table ( void ) { register Slk_terminal_t *terminal; for ( terminal = Slk_terminal; terminal->name; ++terminal ) { insert ( terminal->token, terminal->name ); } } // ----------------------------------------------------------------------- // release_table_scope // // remove all out of scope symbols from the symbol table by marking // them deleted. // --scope // ----------------------------------------------------------------------- PRIVATE void release_table_scope ( void ) { register symbol_t **head = Symbol_table, *symbol; uint hash_value; for ( hash_value = 0; hash_value < Symbol_table_size; ++hash_value ) { for ( symbol = *head++; symbol; symbol = symbol->next ) { if ( symbol->scope == Symbol_table_scope ) { symbol->deleted = TRUE; } } } --Symbol_table_scope; } // ----------------------------------------------------------------------- // print_table // // print out symbol table // ----------------------------------------------------------------------- PRIVATE void print_table ( void ) { register symbol_t **head = Symbol_table, *symbol; uint hash_value; uint total_symbols = 0, total_lists = 0, min_list_length = 10000, max_list_length = 0, avg_list_length = 0; printf ( "\n HASH SCOPE TOKEN NAME" "\n ------ ------- ------- ----------------\n" ); for ( hash_value = 0; hash_value < Symbol_table_size; ++hash_value ) { symbol = *head++; if ( symbol ) { uint list_length = 0; for (; symbol; symbol = symbol->next ) { if ( symbol->deleted ) { printf ( "%6d (%2d) %6d %s \n", hash_value, symbol->scope, symbol->token, symbol->name); } else { printf ( "%6d %6d %6d %s \n", hash_value, symbol->scope, symbol->token, symbol->name); } ++total_symbols; ++list_length; } if ( list_length < min_list_length ) { min_list_length = list_length; } if ( list_length > max_list_length ) { max_list_length = list_length; } avg_list_length += list_length; ++total_lists; } } avg_list_length /= total_lists; putchar ( '\n' ); printf ( " total symbols = %6u \n", total_symbols ); printf ( " total lists = %6u \n", total_lists ); printf ( " min list length = %6u \n", min_list_length ); printf ( " max list length = %6u \n", max_list_length ); printf ( " avg list length = %6u \n", avg_list_length ); } // ----------------------------------------------------------------------- // release // ----------------------------------------------------------------------- PRIVATE void release ( void ) { free ( Symbol_table ); Symbol_memory.release ( Symbol_memory.handle ); } // ----------------------------------------------------------------------- // Symbol // ----------------------------------------------------------------------- PUBLIC Symbols_t MakeSymbol ( uint table_size ) { Symbols_t symbols = { lookup, insert, release_table_scope, set_new_table_scope, get_table_scope, print_table, release }; Symbol_table_scope = 1; Symbol_trace = Options.TRACE_SYMBOLS; Symbol_table_size = table_size; Symbol_table = (symbol_t **) calloc ( table_size, sizeof (symbol_t *) ); Symbol_memory = MakeMemorySet ( 0 ); init_symbol_table (); return symbols; } /************************************************************************* tree.c Version 6.50 This module contains the parse tree routines Copyright (c) 2002-2014 SLK Systems, all rights reserved. *************************************************************************/ #include #include #include "SlkConstants.h" #include "parser.h" // #define PRINTF printf #define PRINTF(a,b) // #define PRINTF3 printf #define PRINTF3(a,b,c) typedef struct _node { struct _node *parent, // parent is the lhs of the production *child, // first symbol on rhs of production *sibling, // rest of the production is siblings *left_sibling; // to go backwards symbol_t *symbol; // symbol table entry slk_size_t token; } node_t; PRIVATE node_t *Root; PRIVATE int Display = 0; PRIVATE int Prune_tree = 0; PRIVATE memory_set_t Memory; typedef struct _stack { node_t **top , **end , *start [ 256 ]; } stack_t; PRIVATE stack_t Parse_stack = { Parse_stack.start + 255, Parse_stack.start + 255 }; #undef push #undef pop #define push(n,s) if (s.top > s.start) { *--s.top = n; }\ else { puts ("tree: stack overflow"); exit(1); } // #define pop(s) ( *s.top ? *s.top++ : 0 ) #define pop(s) ( s.top < s.end ? *s.top++ : *s.top ) // ----------------------------------------------------------------------- // private methods // ----------------------------------------------------------------------- PRIVATE node_t * make_node ( slk_size_t token ) { register node_t *node; node = (node_t *) Memory.allocate ( Memory.handle, sizeof (node_t) ); if ( node == NULL ) { puts ( "make_node: Memory.allocate failed\n" ); exit (1); } node->token = token; return ( node ); } PRIVATE void reduce ( slk_size_t production_number ) { register slk_size_t *rhs; register node_t *prev = NULL; node_t *child = NULL; node_t *this_child = NULL; slk_size_t *lhs; int length, rhs_length; if ( Display ) { puts ( SlkGetProductionName ( production_number ) ); } if ( Error.get_total_errors() ) { return; } lhs = SlkGetProductionArray ( production_number ); rhs_length = *lhs - 1; ++lhs; rhs = lhs + 1; for ( length = rhs_length; length > 0; --length, ++rhs ) { if ( SlkIsAction ( *rhs ) ) { continue; // skip actions } prev = make_node ( *rhs ); this_child = prev; for ( --length, ++rhs; length > 0; --length, ++rhs ) { if ( SlkIsAction ( *rhs ) ) { continue; // skip actions } prev->sibling = make_node ( *rhs ); prev->sibling->left_sibling = prev; prev = prev->sibling; } } for (; prev; prev = prev->left_sibling ) { if ( SlkIsNonterminal ( prev->token ) ) { child = pop ( Parse_stack ); if ( child ) { PRINTF ( "pop : %s\n", SlkGetSymbolName ( child->token ) ); prev->child = child; child->parent = prev; } else { PRINTF ( "pop : %s\n", "NULL" ); } } } if ( this_child ) { PRINTF ( "push: %s\n", SlkGetSymbolName ( this_child->token ) ); } else { PRINTF ( "push: %s\n", "NULL" ); } push ( this_child, Parse_stack ); if ( production_number == 1 ) { Root = make_node ( *lhs ); Root->child = this_child; this_child->parent = Root; } } PRIVATE node_t * make_LL_branch ( slk_size_t production_number ) { register slk_size_t *rhs; register node_t *prev = NULL; node_t *child = NULL; slk_size_t *lhs; int length, rhs_length; lhs = SlkGetProductionArray ( production_number ); rhs_length = *lhs - 1; ++lhs; rhs = lhs + 1; for ( length = rhs_length; length > 0; --length, ++rhs ) { if ( SlkIsAction ( *rhs ) ) { continue; // skip actions } prev = make_node ( *rhs ); child = prev; for ( --length, ++rhs; length > 0; --length, ++rhs ) { if ( SlkIsAction ( *rhs ) ) { continue; // skip actions } prev->sibling = make_node ( *rhs ); prev->sibling->left_sibling = prev; prev = prev->sibling; } } for (; prev; prev = prev->left_sibling ) { if ( SlkIsNonterminal ( prev->token ) ) { push ( prev, Parse_stack ); // copy parsing pushes } } return child; } PRIVATE void predict ( slk_size_t production_number ) { register slk_size_t *lhs; node_t *parent; node_t *child; if ( Display ) { puts ( SlkGetProductionName ( production_number ) ); } if ( Error.get_total_errors() ) { return; } lhs = SlkGetProductionArray ( production_number ); ++lhs; if ( ! Root ) { // first call Root = make_node ( *lhs ); push ( Root, Parse_stack ); } parent = pop ( Parse_stack ); child = make_LL_branch ( production_number ); if ( child && parent ) { parent->child = child; child->parent = parent; } } PRIVATE void delete_node ( register node_t *node ) { register node_t *left_sibling; // left side sibling back pointer node_t *parent; // lhs of production PRINTF ( "DELETE: %s\n", SlkGetSymbolName ( node->token ) ); left_sibling = node->left_sibling; if ( left_sibling ) { PRINTF ( " LEFT SIBLING: %s\n", SlkGetSymbolName ( left_sibling->token ) ); left_sibling->sibling = node->sibling; if ( node->sibling ) { node->sibling->left_sibling = left_sibling; } } else { parent = node->parent; if ( parent ) { PRINTF ( " PARENT: %s\n", SlkGetSymbolName ( parent->token ) ); parent->child = node->sibling; if ( node->sibling ) { PRINTF ( " SIBLING: %s\n", SlkGetSymbolName ( node->sibling->token )); node->sibling->parent = parent; node->sibling->left_sibling = NULL; } } else { printf ( "tree_t: no parent or left sibling for %s\n", SlkGetSymbolName ( node->token ) ); } } } PRIVATE void lift_child ( register node_t *parent ) // to lift up a child node to parent { // just rewrite the parent node register // then delete the child node_t *child = parent->child; if ( ! child ) { return; } parent->symbol = child->symbol; parent->token = child->token; if ( child->sibling ) { parent->child = child->sibling; child->sibling->parent = parent; child->sibling->left_sibling = NULL; } else if ( child->child ) { parent->child = child->child; child->child->parent = parent; } else { parent->child = NULL; } } PRIVATE void lift_single_rhs_symbol ( node_t *tree ) { register // remove chains of precedence productions node_t *child, *sibling; if ( ! tree ) { return; } PRINTF ( "tree: %s\n", SlkGetSymbolName ( tree->token ) ); if ( tree->parent ) { PRINTF ( " parent %s\n", SlkGetSymbolName ( tree->parent->token ) ); } if ( tree->left_sibling ) { PRINTF ( " left_sibling %s\n", SlkGetSymbolName ( tree->left_sibling->token ) ); } child = tree->child; if ( child ) { lift_single_rhs_symbol ( child ); if ( ! child->sibling ) { if ( tree != Root ) { lift_child ( tree ); // lift up child } } for ( sibling = child->sibling; sibling; sibling = sibling->sibling ){ lift_single_rhs_symbol ( sibling ); } } } PRIVATE void remove_null_productions ( node_t *tree ) { register node_t *child, *sibling; if ( ! tree ) { return; } child = tree->child; if ( child ) { remove_null_productions ( child ); for ( sibling = child->sibling; sibling; sibling = sibling->sibling ){ remove_null_productions ( sibling ); } } else if ( SlkIsNonterminal ( tree->token ) ) { // PRINTF ( "delete %s\n", SlkGetSymbolName ( tree->token ) ); delete_node ( tree ); } } PRIVATE void prune ( void ) { remove_null_productions ( Root ); // delete null productions lift_single_rhs_symbol ( Root ); // lift up children } PRIVATE void show_source_r ( node_t *tree ) { register node_t *child, *sibling; if ( tree == NULL ) { return; } if ( SlkIsTerminal ( tree->token ) ) { char *spaces = " "; switch ( tree->token ) { // pretty print it case LBRACE_: case RBRACE_: case SEMI_: spaces = "\n"; break; case DOT_: case COMMA_: spaces = ""; break; } printf ( "%s%s", tree->symbol->name, spaces ); } child = tree->child; if ( child ) { show_source_r ( child ); for ( sibling = child->sibling; sibling; sibling = sibling->sibling ) { show_source_r ( sibling ); } } } PRIVATE void print ( register node_t *node ) { printf ( "%s", SlkGetSymbolName ( node->token ) ); switch ( node->token ) { case IDENTIFIER_ : case TYPEDEF_ : case STRING_ : case INTEGER_CONSTANT_ : case CHARACTER_CONSTANT_ : case FLOATING_CONSTANT_ : case ENUMERATION_CONSTANT_ : printf ( " %s", node->symbol->name ); } putchar ( '\n' ); } PRIVATE void pre_order ( node_t *tree ) { register node_t *child, *sibling; if ( tree == NULL ) { return; } printf ( "%s", SlkGetSymbolName ( tree->token ) ); switch ( tree->token ) { case IDENTIFIER_ : case TYPEDEF_ : case STRING_ : case INTEGER_CONSTANT_ : case CHARACTER_CONSTANT_ : case FLOATING_CONSTANT_ : case ENUMERATION_CONSTANT_ : printf ( " %s", tree->symbol->name ); } putchar ( '\n' ); child = tree->child; if ( child ) { pre_order ( child ); for ( sibling = child->sibling; sibling; sibling = sibling->sibling ) { pre_order ( sibling ); } } } PRIVATE void show_source ( void ) { show_source_r ( Root ); } PRIVATE void show_tree ( void ) { pre_order ( Root ); } PRIVATE void show_parse_derivation_r ( node_t *tree ) { register node_t *child, *sibling; if ( tree == NULL ) { return; } printf ( "%s --> ", SlkGetSymbolName ( tree->token ) ); child = tree->child; if ( child != NULL ) { printf ( "%s ", SlkGetSymbolName ( child->token ) ); for ( sibling = child->sibling; sibling; sibling = sibling->sibling ) { printf ( "%s ", SlkGetSymbolName ( sibling->token ) ); } putchar ( '\n' ); if ( SlkIsNonterminal ( child->token ) ) { show_parse_derivation_r ( child ); } for ( sibling = child->sibling; sibling; sibling = sibling->sibling ) { if ( SlkIsNonterminal ( sibling->token ) ) { show_parse_derivation_r ( sibling ); } } } else { putchar ( '\n' ); } } PRIVATE void show_parse_derivation ( void ) { show_parse_derivation_r ( Root ); } // ----------------------------------------------------------------------- // add_symbols - put the symbols on the tree // // problem with stacking because cannot get at symbol at the right time // so just save the symbols linearly while scanning, // then decorate the tree with them here, in order // ----------------------------------------------------------------------- PRIVATE void add_symbols ( node_t *tree ) { register node_t *child, *sibling; if ( ! tree ) { return; } child = tree->child; if ( child ) { add_symbols ( child ); } if ( SlkIsTerminal ( tree->token ) && ! tree->symbol ) { tree->symbol = Tokens.symbols.list.get_next(); if ( tree->symbol ) { PRINTF ( "\n symbol name: %s \n", tree->symbol->name ); PRINTF ( " tree token: %s \n", SlkGetSymbolName ( tree->token )); } else { printf ( "no symbol for '%s'\n", SlkGetSymbolName ( tree->token ) ); } } if ( child ) { for ( sibling = child->sibling; sibling; sibling = sibling->sibling ){ add_symbols ( sibling ); } } } PRIVATE void finish_tree ( void ) { int errors; errors = Error.get_total_errors (); if ( ! errors ) { if ( Prune_tree ) { prune (); } add_symbols ( Root ); } } // ----------------------------------------------------------------------- // release // ----------------------------------------------------------------------- PRIVATE void release ( void ) { Memory.release ( Memory.handle ); } // ----------------------------------------------------------------------- // MakeTree // ----------------------------------------------------------------------- PUBLIC Tree_t MakeTree ( void ) { Tree_t tree = { predict, reduce, show_source, show_tree, show_parse_derivation, finish_tree, release }; Root = NULL; Display = Options.DISPLAY; Prune_tree = ! Options.DO_NOT_PRUNE_TREE; Memory = MakeMemorySet ( 0 ); return tree; }