#include "headers.h" /* How many statements should we allocate per language block? Note that sizeof(Statement*), which probably equals 8, is multiplied by this constant and the product of these numbers is then given to m_alloc as an argument, so setting this to a high value won't use too much memory. */ #define DEFAULT_NUMBER_OF_STATEMENTS_TO_ALLOCATE 5 attr_noreturn static void unable_to_allocate_memory_for_statement(void) { output_nullt_string("Unable to allocate memory for statement.\n"); exit(1); } attr_noreturn static void unable_to_allocate_memory_to_parse_block(void) { output_nullt_string("Unable to allocate memory to parse block.\n"); exit(1); } attr_nonnull static ErrorNumber parse_statement(DataAndCode* const restrict dac, Statement* statement, Block* block) { #if 0 u64 i; #endif /* just easier names to refer to */ Token* tokens; u64 number_of_tokens; assert_comparison(statement, !=, NULL); assert_comparison(block, !=, NULL); number_of_tokens = ARROW(statement, length_of_tokens); tokens = statement->tokens; #if 0 output_nullt_string("Tokens in statement:\n"); for (i = 0; i < statement->length_of_tokens; ++i) { output_nullt_string(" "); output_string(tokens[i].string.string, tokens[i].string.length); output_newline(); } #endif /* This branch handles declaring initialized variables, for example: u i = 0; */ if (number_of_tokens > 3 && tokens[2].string.length == 1 && tokens[2].string.string[0] == '=') { Variable *variable, *already_declared_variable; if ( tokens[0].string.length != 1 || ( tokens[0].string.string[0] != 'u' && tokens[0].string.string[0] != 's' && tokens[0].string.string[0] != 'f' && tokens[0].string.string[0] != 'b') ) { output_nullt_string("Type error on line "); output_u64(tokens[0].line_number); output_nullt_string(": invalid type: ‘"); output_string(tokens[0].string.string, tokens[0].string.length); output_nullt_string( "’\nThe valid types are:\n" " u (unsigned 64-bit integer)\n Range: 0 to 18,446,744,073,709,551,615\n" " s (signed 64-bit integer)\n Range: -9,223,372,036,854,775,808 to +9,223,372,036,854,775,807\n" " f (double precision floating-point number)\n Range: about 10^-308 to about 10^308\n" " b (boolean)\n Possible values: true or false" "\n\n"); return ERROR_NUMBER__TYPE_ERROR__INVALID_TYPE; } if (!is_valid_identifier(tokens[1].string.string, tokens[1].string.length)) { output_nullt_string("Identifier error on line "); output_u64(tokens[1].line_number); output_nullt_string(": invalid variable name: ‘"); output_string(tokens[1].string.string, tokens[1].string.length); output_nullt_string("’\n"); return ERROR_NUMBER__NAME_ERROR__INVALID_VARIABLE_NAME; } already_declared_variable = (Variable*)(*hash_table_get_value_with_string_key( &block->variables_in_scope, tokens[1].string.string, tokens[1].string.length )); if (already_declared_variable) { output_nullt_string("Identifier error on line "); output_u64(tokens[1].line_number); output_nullt_string(": Variable ‘"); output_string(tokens[1].string.string, tokens[1].string.length); output_nullt_string("’ already declared on line "); output_u64(already_declared_variable->line_declared_on); output_nullt_string(".\n"); return ERROR_NUMBER__NAME_ERROR__VARIABLE_ALREADY_DECLARED; } variable = (Variable*)m_alloc(sizeof(Variable)); if (unlikely(!variable)) { output_nullt_string("Unable to allocate memory for variable.\n"); exit(1); } variable->line_declared_on = tokens[1].line_number; variable->name.string = tokens[1].string.string; variable->name.length = tokens[1].string.length; variable->evaluated = false; switch (ARRAY_INDEX(ARRAY_INDEX(tokens, 0).string.string, 0)) { case 'u': { ErrorNumber error_number; expression_parser_parse_expression(variable, statement, &error_number, block, TYPE_UINT); if (error_number != ERROR_NUMBER_NO_ERROR) { m_free(variable); return error_number; } break; } case 'f': { ErrorNumber error_number; expression_parser_parse_expression(variable, statement, &error_number, block, TYPE_FLOAT); if (error_number != ERROR_NUMBER_NO_ERROR) { m_free(variable); return error_number; } break; } default: output_nullt_string("Not implemented yet. Please fix this.\n"); exit(1); } assert_comparison(tokens[1].string.string, !=, NULL); assert_comparison(tokens[1].string.length, !=, 0); { #if DEBUG const bool return_value = #endif hash_table_insert_string(&block->variables_in_scope, tokens[1].string.string, tokens[1].string.length, variable, false); assert_comparison(return_value, !=, false); } } else if (number_of_tokens == 2 && strnequal(tokens[0].string.string, "output", tokens[0].string.length, 6)) { Variable* variable; variable = (Variable*)(*hash_table_get_value_with_string_key(&block->variables_in_scope, tokens[1].string.string, tokens[1].string.length)); if (!variable) { output_nullt_string("Name error on line "); output_u64(tokens[1].line_number); output_nullt_string(": ‘"); output_string(tokens[1].string.string, tokens[1].string.length); output_nullt_string("’ undeclared\n"); exit(1); } if (ARROW(variable, evaluated)) { output_nullt_string("Variable has already been evaluated!\n"); exit(1); } else { translate_expression_into_mid_level_instructions(dac, block, variable); ARROW_ASSIGN(variable, evaluated) = true; } } /* Unknown syntax error */ else { if (tokens[0].line_number == tokens[number_of_tokens - 1].line_number) { output_nullt_string("Syntax error on line "); output_u64(tokens[0].line_number); output_nullt_string(":\n"); output_string(statement->statement_line, statement->length_of_statement_line_string); output_newline(); } else { output_nullt_string("Syntax error from line "); output_u64(tokens[0].line_number); output_nullt_string(" to "); output_u64(tokens[number_of_tokens - 1].line_number); output_nullt_string(":\n"); output_string(statement->statement_line, statement->length_of_statement_line_string); output_newline(); } return ERROR_NUMBER__SYNTAX_ERROR__UNCLASSIFIED_SYNTAX_ERROR; } return ERROR_NUMBER_NO_ERROR; } /* Parses the statement parameter's statement_line member and sets the statement parameter appropriately. This function makes very good use of memory; it only uses the amount it needs -- no more, no less. */ attr_nonnull static ErrorNumber parse_statement_string_into_statement(Statement* const statement, u64 line_number_at_start_of_statement) { u64 i, token_number, line_number = line_number_at_start_of_statement, start_of_token_offset; /* This variable is used to test whether or not the parser is in a comment and also to store the starting line for the block comment. */ u64 block_comment_starting_line; assert_comparison(statement, !=, NULL); assert_comparison(line_number_at_start_of_statement, !=, 0); block_comment_starting_line = 0; statement->length_of_tokens = 0; for (i = 0; i < statement->length_of_statement_line_string; ++i) { if (statement->statement_line[i] == '\n') { ++line_number; } if (block_comment_starting_line) { if (statement->statement_line[i] == '/' && statement->statement_line[i - 1] == '*') { block_comment_starting_line = 0; } /* Continue regardless of whether the parser is still in the comment or just got out of it. */ continue; } if (statement->statement_line[i] == '/' && i + 1 != statement->length_of_statement_line_string && statement->statement_line[i + 1] == '*') { block_comment_starting_line = line_number; } if ( (statement->statement_line[i] == ' ' || statement->statement_line[i] == '\t' || statement->statement_line[i] == '\n' || statement->statement_line[i] == ';') && i && ( statement->statement_line[i - 1] != ' ' && statement->statement_line[i - 1] != '\t' && statement->statement_line[i - 1] != '\n') ) { if (!(i > 1 && statement->statement_line[i - 2] == '*' && statement->statement_line[i - 1] == '/')) { ++statement->length_of_tokens; } } } if (block_comment_starting_line) { output_nullt_string("Syntax error: unclosed block comment"); if (block_comment_starting_line != line_number) { output_nullt_string(" (it was opened on line "); output_u64(block_comment_starting_line); output_char(')'); } output_char('\n'); return ERROR_NUMBER__SYNTAX_ERROR__UNCLOSED_BLOCK_COMMENT_NOT_AT_BEGINNING_OF_LINE; } if (!statement->length_of_tokens) { output_nullt_string("Syntax error on line "); output_u64(line_number_at_start_of_statement); output_nullt_string(": unlike C, C++, Java, C#, D and other C-like languages, in the Shilto Language an empty statement is invalid.\n"); return ERROR_NUMBER__SYNTAX_ERROR__EMPTY_STATEMENT; } statement->tokens = (Token*)m_alloc(sizeof(Token) * statement->length_of_tokens); if (unlikely(!statement->tokens)) { output_nullt_string("Unable to allocate memory for token.\n"); exit(1); } for (i = 0, token_number = 0, start_of_token_offset = 0, block_comment_starting_line = 0, line_number = line_number_at_start_of_statement ;; ++i ) { if (statement->statement_line[i] == '\n') { ++line_number; } if (block_comment_starting_line) { if (statement->statement_line[i] == '/' && statement->statement_line[i - 1] == '*') { block_comment_starting_line = 0; } ++start_of_token_offset; /* Continue regardless of whether the parser is still in the comment or just got out of it. */ continue; } if (statement->statement_line[i] == '/' && i + 1 < statement->length_of_statement_line_string && statement->statement_line[i + 1] == '*') { block_comment_starting_line = line_number; ++start_of_token_offset; continue; } if ( statement->statement_line[i] == ' ' || statement->statement_line[i] == '\t' || statement->statement_line[i] == '\n' || statement->statement_line[i] == ';' ) { if ( i && ( statement->statement_line[i - 1] == ' ' || statement->statement_line[i - 1] == '\t' || statement->statement_line[i - 1] == '\n') ) { ++start_of_token_offset; } else if (!(i > 1 && statement->statement_line[i - 2] == '*' && statement->statement_line[i - 1] == '/')) { statement->tokens[token_number].string.string = statement->statement_line + start_of_token_offset; statement->tokens[token_number].string.length = i - start_of_token_offset; statement->tokens[token_number].line_number = line_number; ++token_number; if (token_number >= statement->length_of_tokens) break; start_of_token_offset = i + 1; } else { ++start_of_token_offset; } } } return ERROR_NUMBER_NO_ERROR; } /* Parses a block, a list of statements surrounded by curly braces. Blocks include functions, if statements and loops. */ Block* parse_block(FILE* restrict file, DataAndCode* restrict dac, ErrorNumber* error_number, GlobalData* global_data) { Block* block; char* buffer; int char_, last_char; bool before_statement = true; u64 buffer_allocated_length, i, block_comment_starting_line, number_of_slash_stars_in_block_comment; u64 block_allocated_length, line_number, line_number_at_start_of_statement; (void)global_data; assert_comparison(file, !=, NULL); assert_comparison(dac, !=, NULL); assert_comparison(error_number, !=, NULL); assert_comparison(global_data, !=, NULL); block = (Block*)m_zero_initialized_alloc(sizeof(Block)); if (unlikely(!block)) unable_to_allocate_memory_to_parse_block(); buffer = m_alloc(100); if (unlikely(!buffer)) unable_to_allocate_memory_to_parse_block(); block->statement_or_block_array = (Statement**)m_alloc(sizeof(Statement*) * DEFAULT_NUMBER_OF_STATEMENTS_TO_ALLOCATE); if (unlikely(!buffer)) unable_to_allocate_memory_to_parse_block(); line_number = 1; line_number_at_start_of_statement = 1; block_allocated_length = DEFAULT_NUMBER_OF_STATEMENTS_TO_ALLOCATE; buffer_allocated_length = 100; number_of_slash_stars_in_block_comment = 0; block->length = 0; while (1) { block_comment_starting_line = 0; last_char = '\0'; while (1) { char_ = fgetc(file); if (char_ == EOF) goto after; if (char_ == '\n') { ++line_number; } if (last_char == '*' && char_ == '/') { block_comment_starting_line = 0; continue; } if (char_ == '/') { int next_char = fgetc(file); if (next_char == EOF) { output_nullt_string("EOF\n"); exit(1); } if (next_char == '*') { if (number_of_slash_stars_in_block_comment) { output_nullt_string("Warning on line "); output_u64(line_number); output_nullt_string(": "); if (number_of_slash_stars_in_block_comment == 1) { output_nullt_string("‘/*’ in block comment\n"); } else if (number_of_slash_stars_in_block_comment == 2) { output_nullt_string("another ‘/*’ in block comment\n"); } else { output_nullt_string("a "); output_u64(number_of_slash_stars_in_block_comment); if (number_of_slash_stars_in_block_comment == 3) { output_nullt_string("rd"); } else { output_nullt_string("th"); } output_nullt_string(" ‘/*’ in block comment\n"); } } else { block_comment_starting_line = line_number; } ++number_of_slash_stars_in_block_comment; continue; } else { if (ungetc(next_char, file) == EOF) { output_nullt_string("Unable to ungetc.\n"); exit(1); } } continue; } if (!block_comment_starting_line && char_ != ' ' && char_ != '\t' && char_ != '\n') break; last_char = char_; } if (number_of_slash_stars_in_block_comment > 1) { if (number_of_slash_stars_in_block_comment == 2) { output_nullt_string("Style warning: remove the ‘/*’ that are is in the block comment!\n"); } else { output_nullt_string("Style warning: remove all the ‘/*’s that are in the block comment!\n"); } } line_number_at_start_of_statement = line_number; i = 0; while (char_ != EOF) { buffer[i] = char_; if (char_ == '\n') { ++line_number; } if (i == buffer_allocated_length - 1) { buffer = m_realloc(buffer, buffer_allocated_length *= 2); if (unlikely(!buffer)) unable_to_allocate_memory_to_parse_block(); } if (char_ == ';') { ErrorNumber result; if (block->length == block_allocated_length) { block->statement_or_block_array = (Statement**)m_realloc(block->statement_or_block_array, sizeof(Statement*) * (block_allocated_length *= 2)); if (unlikely(!buffer)) unable_to_allocate_memory_to_parse_block(); } block->statement_or_block_array[block->length] = (Statement*)m_alloc(sizeof(Statement)); if (unlikely(!buffer)) unable_to_allocate_memory_for_statement(); block->statement_or_block_array[block->length]->length_of_statement_line_string = i + 1; block->statement_or_block_array[block->length]->statement_line = m_alloc(i + 1); if (unlikely(!buffer)) unable_to_allocate_memory_for_statement(); memcpy(block->statement_or_block_array[block->length]->statement_line, buffer, i + 1); block->statement_or_block_array[block->length]->type = STATEMENT_OR_BLOCK_TYPE__STATEMENT; result = parse_statement_string_into_statement(block->statement_or_block_array[block->length], line_number_at_start_of_statement); if (result != ERROR_NUMBER_NO_ERROR) { m_free(buffer); DEREF_ASSIGN(error_number) = result; return block; } line_number_at_start_of_statement = line_number; ++block->length; before_statement = true; break; } before_statement = false; ++i; char_ = fgetc(file); } if (char_ == EOF) { u64 j = --i; while (j) { if (buffer[j] != ' ' && buffer[j] != '\t' && buffer[j] != '\n') break; --j; } output_nullt_string("Syntax error: missing semicolon at the end of line "); output_u64(line_number - 1); output_nullt_string(":\n"); output_string(buffer, i); /* Nice unicode arrow, isn't it? :) */ output_nullt_string(" ◀――― ';'\n"); m_free(buffer); DEREF_ASSIGN(error_number) = ERROR_NUMBER__SYNTAX_ERROR__MISSING_SEMICOLON; return block; } } after: m_free(buffer); if (block_comment_starting_line) { output_nullt_string("Syntax error: unclosed block comment"); if (block_comment_starting_line != line_number - 1) { output_nullt_string(" (it was opened on line "); output_u64(block_comment_starting_line); output_char(')'); } output_char('\n'); DEREF_ASSIGN(error_number) = ERROR_NUMBER__SYNTAX_ERROR__UNCLOSED_BLOCK_COMMENT_AT_BEGINNING_OF_LINE; return block; } hash_table_new(&block->variables_in_scope, 20, 4, 3.0); mid_level_instructions_init(&block->mid_level_instructions); for (i = 0; i < block->length; ++i) { ErrorNumber result = parse_statement(dac, block->statement_or_block_array[i], block); if (result != ERROR_NUMBER_NO_ERROR) { DEREF_ASSIGN(error_number) = result; return block; } } translate_mid_level_instructions_to_cpu_instructions(dac, &block->mid_level_instructions); DEREF_ASSIGN(error_number) = ERROR_NUMBER_NO_ERROR; return block; }