#include #include #include #include #include #include #include namespace LFortran { Result<:translationunit_t> parse(Allocator &al, const std::string &s, diag::Diagnostics &diagnostics) { Parser p(al, diagnostics); try { p.parse(s); } catch (const parser_local::TokenizerError &e) { Error error; diagnostics.diagnostics.push_back(e.d); return error; } catch (const parser_local::ParserError &e) { Error error; diagnostics.diagnostics.push_back(e.d); return error; } Location l; if (p.result.size() == 0) { l.first=0; l.last=0; } else { l.first=p.result[0]->loc.first; l.last=p.result[p.result.size()-1]->loc.last; } return (AST::TranslationUnit_t*)AST::make_TranslationUnit_t(al, l, p.result.p, p.result.size()); } void Parser::parse(const std::string &input) { inp = input; if (inp.size() > 0) { if (inp[inp.size()-1] != '\n') inp.append("\n"); } else { inp.append("\n"); } m_tokenizer.set_string(inp); if (yyparse(*this) == 0) { return; } throw parser_local::ParserError("Parsing unsuccessful (internal compiler error)"); } Result<:vector>> tokens(Allocator &al, const std::string &input, diag::Diagnostics &diagnostics, std::vector *stypes, std::vector *locations) { Tokenizer t; t.set_string(input); std::vector tst; int token = yytokentype::END_OF_FILE + 1; // Something different from EOF while (token != yytokentype::END_OF_FILE) { YYSTYPE y; Location l; try { token = t.lex(al, y, l, diagnostics); } catch (const parser_local::TokenizerError &e) { diagnostics.diagnostics.push_back(e.d); return Error(); } tst.push_back(token); if (stypes) stypes->push_back(y); if (locations) locations->push_back(l); } return tst; } void cont1(const std::string &s, size_t &pos, bool &ws_or_comment) { ws_or_comment = true; bool in_comment = false; while (s[pos] != '\n') { if (s[pos] == '!') in_comment = true; if (!in_comment) { if (s[pos] != ' ' && s[pos] != '\t') { ws_or_comment = false; return; } } pos++; } pos++; } enum LineType { Comment, Statement, LabeledStatement, Continuation, EndOfFile }; // Determines the type of line // `pos` points to the first character (column) of the line // The line ends with either `\n` or `\0`. LineType determine_line_type(const unsigned char *pos) { int col=1; if (*pos == '\n') { // Empty line => classified as comment return LineType::Comment; } else if (*pos == '*' || *pos == 'c' || *pos == 'C' || *pos == '!') { // Comment return LineType::Comment; } else if (*pos == '\0') { return LineType::EndOfFile; } else { while (*pos == ' ') { pos++; col+=1; } if (*pos == '\n' || *pos == '\0') return LineType::Comment; if (*pos == '!' && col != 6) return LineType::Comment; if (col == 6) { if (*pos == ' ' || *pos == '0') { return LineType::Statement; } else { return LineType::Continuation; } } if (col <= 6) { return LineType::LabeledStatement; } else { return LineType::Statement; } } } void skip_rest_of_line(const std::string &s, size_t &pos) { while (pos < s.size() && s[pos] != '\n') { pos++; } pos++; // Skip the last '\n' } // Parses string, including possible continuation lines void parse_string(std::string &out, const std::string &s, size_t &pos) { char quote = s[pos]; LFORTRAN_ASSERT(quote == '"' || quote == '\''); out += s[pos]; pos++; while (pos < s.size() && ! (s[pos] == quote && s[pos+1] != quote)) { if (s[pos] == '\n') { pos++; pos += 6; continue; } if (s[pos] == quote && s[pos+1] == quote) { out += s[pos]; pos++; } out += s[pos]; pos++; } out += s[pos]; // Copy the last quote pos++; } bool is_num(char c) { return '0' <= c && c <= '9'; } void copy_label(std::string &out, const std::string &s, size_t &pos) { size_t col = 1; while (pos < s.size() && s[pos] != '\n' && col <= 6) { out += s[pos]; pos++; col++; } } void copy_rest_of_line(std::string &out, const std::string &s, size_t &pos) { while (pos < s.size() && s[pos] != '\n') { if (s[pos] == '"' || s[pos] == '\'') { parse_string(out, s, pos); } else if (s[pos] == '!') { skip_rest_of_line(s, pos); out += '\n'; return; } else { out += s[pos]; pos++; } } out += s[pos]; // Copy the last `\n' pos++; } // Checks that newlines are computed correctly bool check_newlines(const std::string &s, const std::vector &newlines) { std::vector newlines2; for (uint32_t pos=0; pos < s.size(); pos++) { if (s[pos] == '\n') newlines2.push_back(pos); } if (newlines2.size() != newlines.size()) return false; for (size_t i=0; i < newlines2.size(); i++) { if (newlines2[i] != newlines[i]) return false; } return true; } std::string fix_continuation(const std::string &s, LocationManager &lm, bool fixed_form) { if (fixed_form) { // `pos` is the position in the original code `s` // `out` is the final code (outcome) lm.get_newlines(s, lm.in_newlines); lm.out_start.push_back(0); lm.in_start.push_back(0); std::string out; size_t pos = 0; /* Note: * This should be a valid fixed form prescanner, except the following * features which are currently not implemented: * * * Continuation lines after comment(s) or empty lines (they will be * appended to the previous comment, and thus skipped) * * Characters after column 72 are included, but should be ignored * * White space is preserved (but should be removed) * * The parser together with this fixed form prescanner works as a fixed * form parser with some limitations. Due to the last point above, * white space is not ignored because it is needed for the parser, so * the following are not supported: * * * Extra space: `. and.`, `3.5 55 d0`, ... * * Missing space: `doi=1,5`, `callsome_subroutine(x)` * * It turns out most fixed form codes use white space as one would * expect, so it is not such a big problem and the fixes needed to do * in the fixed form Fortran code are relatively minor in practice. */ while (true) { const char *p = &s[pos]; LineType lt = determine_line_type((const unsigned char*)p); switch (lt) { case LineType::Comment : { // Skip skip_rest_of_line(s, pos); lm.out_start.push_back(out.size()); lm.in_start.push_back(pos); break; } case LineType::Statement : { // Copy from column 7 pos += 6; lm.out_start.push_back(out.size()); lm.in_start.push_back(pos); copy_rest_of_line(out, s, pos); break; } case LineType::LabeledStatement : { // Copy the label copy_label(out, s, pos); // Copy from column 7 lm.out_start.push_back(out.size()); lm.in_start.push_back(pos); copy_rest_of_line(out, s, pos); break; } case LineType::Continuation : { // Append from column 7 to previous line out = out.substr(0, out.size()-1); // Remove the last '\n' pos += 6; lm.out_start.push_back(out.size()); lm.in_start.push_back(pos); copy_rest_of_line(out, s, pos); break; } case LineType::EndOfFile : { break; } }; if (lt == LineType::EndOfFile) break; } lm.in_start.push_back(pos); lm.out_start.push_back(out.size()); return out; } else { // `pos` is the position in the original code `s` // `out` is the final code (outcome) lm.out_start.push_back(0); lm.in_start.push_back(0); std::string out; size_t pos = 0; bool in_comment = false; while (pos < s.size()) { if (s[pos] == '!') in_comment = true; if (in_comment && s[pos] == '\n') in_comment = false; if (!in_comment && s[pos] == '&') { size_t pos2=pos+1; bool ws_or_comment; cont1(s, pos2, ws_or_comment); if (ws_or_comment) lm.in_newlines.push_back(pos2-1); if (ws_or_comment) { while (ws_or_comment) { cont1(s, pos2, ws_or_comment); if (ws_or_comment) lm.in_newlines.push_back(pos2-1); } // `pos` will move by more than 1, close the old interval // lm.in_size.push_back(pos-lm.in_start[lm.in_start.size()-1]); // Move `pos` pos = pos2; if (s[pos] == '&') pos++; // Start a new interval (just the starts, the size will be // filled in later) lm.out_start.push_back(out.size()); lm.in_start.push_back(pos); } } else { if (s[pos] == '\n') lm.in_newlines.push_back(pos); } out += s[pos]; pos++; } // set the size of the last interval // lm.in_size.push_back(pos-lm.in_start[lm.in_start.size()-1]); LFORTRAN_ASSERT(check_newlines(s, lm.in_newlines)) // Add the position of EOF as the last \n, whether or not the original // file has it lm.in_start.push_back(pos); lm.out_start.push_back(out.size()); return out; } } #define T(tk, name) case (yytokentype::tk) : return name; std::string token2text(const int token) { if (0 < token && token < 256) { char t = token; return std::string(&t, 1); } switch (token) { T(END_OF_FILE, "end of file") T(TK_NEWLINE, "newline") T(TK_NAME, "identifier") T(TK_DEF_OP, "defined operator") T(TK_INTEGER, "integer") T(TK_REAL, "real") T(TK_BOZ_CONSTANT, "BOZ constant") T(TK_PLUS, "+") T(TK_MINUS, "-") T(TK_STAR, "*") T(TK_SLASH, "/") T(TK_COLON, ":") T(TK_SEMICOLON, ";") T(TK_COMMA, ",") T(TK_EQUAL, "=") T(TK_LPAREN, "(") T(TK_RPAREN, ")") T(TK_LBRACKET, "[") T(TK_RBRACKET, "]") T(TK_RBRACKET_OLD, "/)") T(TK_PERCENT, "%") T(TK_VBAR, "|") T(TK_STRING, "string") T(TK_COMMENT, "comment") T(TK_LABEL, "label") T(TK_DBL_DOT, "..") T(TK_DBL_COLON, "::") T(TK_POW, "**") T(TK_CONCAT, "//") T(TK_ARROW, "=>") T(TK_EQ, "==") T(TK_NE, "!=") T(TK_LT, "<") T(TK_LE, "<=") T(TK_GT, ">") T(TK_GE, ">=") T(TK_NOT, ".not.") T(TK_AND, ".and.") T(TK_OR, ".or.") T(TK_EQV, ".eqv.") T(TK_NEQV, ".neqv.") T(TK_TRUE, ".true.") T(TK_FALSE, ".false.") T(TK_FORMAT, "format") T(KW_ABSTRACT, "abstract") T(KW_ALL, "all") T(KW_ALLOCATABLE, "allocatable") T(KW_ALLOCATE, "allocate") T(KW_ASSIGN, "assign") T(KW_ASSIGNMENT, "assignment") T(KW_ASSOCIATE, "associate") T(KW_ASYNCHRONOUS, "asynchronous") T(KW_BACKSPACE, "backspace") T(KW_BIND, "bind") T(KW_BLOCK, "block") T(KW_CALL, "call") T(KW_CASE, "case") T(KW_CHANGE, "change") T(KW_CHANGE_TEAM, "changeteam") T(KW_CHARACTER, "character") T(KW_CLASS, "class") T(KW_CLOSE, "close") T(KW_CODIMENSION, "codimension") T(KW_COMMON, "common") T(KW_COMPLEX, "complex") T(KW_CONCURRENT, "concurrent") T(KW_CONTAINS, "contains") T(KW_CONTIGUOUS, "contiguous") T(KW_CONTINUE, "continue") T(KW_CRITICAL, "critical") T(KW_CYCLE, "cycle") T(KW_DATA, "data") T(KW_DEALLOCATE, "deallocate") T(KW_DEFAULT, "default") T(KW_DEFERRED, "deferred") T(KW_DIMENSION, "dimension") T(KW_DO, "do") T(KW_DOWHILE, "dowhile") T(KW_DOUBLE, "double") T(KW_DOUBLE_PRECISION, "doubleprecision") T(KW_ELEMENTAL, "elemental") T(KW_ELSE, "else") T(KW_ELSEIF, "elseif") T(KW_ELSEWHERE, "elsewhere") T(KW_END, "end") T(KW_END_DO, "end do") T(KW_ENDDO, "enddo") T(KW_END_IF, "end if") T(KW_ENDIF, "endif") T(KW_END_INTERFACE, "end interface") T(KW_ENDINTERFACE, "endinterface") T(KW_END_TYPE, "end type") T(KW_ENDTYPE, "endtype") T(KW_END_PROGRAM, "end program") T(KW_ENDPROGRAM, "endprogram") T(KW_END_MODULE, "end module") T(KW_ENDMODULE, "endmodule") T(KW_END_SUBMODULE, "end submodule") T(KW_ENDSUBMODULE, "endsubmodule") T(KW_END_BLOCK, "end block") T(KW_ENDBLOCK, "endblock") T(KW_END_BLOCK_DATA, "end block data") T(KW_ENDBLOCKDATA, "endblockdata") T(KW_END_SUBROUTINE, "end subroutine") T(KW_ENDSUBROUTINE, "endsubroutine") T(KW_END_FUNCTION, "end function") T(KW_ENDFUNCTION, "endfunction") T(KW_END_PROCEDURE, "end procedure") T(KW_ENDPROCEDURE, "endprocedure") T(KW_END_ENUM, "end enum") T(KW_ENDENUM, "endenum") T(KW_END_SELECT, "end select") T(KW_ENDSELECT, "endselect") T(KW_END_ASSOCIATE, "end associate") T(KW_ENDASSOCIATE, "endassociate") T(KW_END_FORALL, "end forall") T(KW_ENDFORALL, "endforall") T(KW_END_WHERE, "end where") T(KW_ENDWHERE, "endwhere") T(KW_END_CRITICAL, "end critical") T(KW_ENDCRITICAL, "endcritical") T(KW_END_FILE, "end file") T(KW_ENDFILE, "endfile") T(KW_END_TEAM, "end team") T(KW_ENDTEAM, "endteam") T(KW_ENTRY, "entry") T(KW_ENUM, "enum") T(KW_ENUMERATOR, "enumerator") T(KW_EQUIVALENCE, "equivalence") T(KW_ERRMSG, "errmsg") T(KW_ERROR, "error") T(KW_EVENT, "event") T(KW_EXIT, "exit") T(KW_EXTENDS, "extends") T(KW_EXTERNAL, "external") T(KW_FILE, "file") T(KW_FINAL, "final") T(KW_FLUSH, "flush") T(KW_FORALL, "forall") T(KW_FORMATTED, "formatted") T(KW_FORM, "form") T(KW_FORM_TEAM, "formteam") T(KW_FUNCTION, "function") T(KW_GENERIC, "generic") T(KW_GO, "go") T(KW_GOTO, "goto") T(KW_IF, "if") T(KW_IMAGES, "images") T(KW_IMPLICIT, "implicit") T(KW_IMPORT, "import") T(KW_IMPURE, "impure") T(KW_IN, "in") T(KW_INCLUDE, "include") T(KW_INOUT, "inout") T(KW_INQUIRE, "inquire") T(KW_INTEGER, "integer") T(KW_INTENT, "intent") T(KW_INTERFACE, "interface") T(KW_INTRINSIC, "intrinsic") T(KW_IS, "is") T(KW_KIND, "kind") T(KW_LEN, "len") T(KW_LOCAL, "local") T(KW_LOCAL_INIT, "local_init") T(KW_LOGICAL, "logical") T(KW_MEMORY, "memory") T(KW_MODULE, "module") T(KW_MOLD, "mold") T(KW_NAME, "name") T(KW_NAMELIST, "namelist") T(KW_NEW_INDEX, "new_index") T(KW_NOPASS, "nopass") T(KW_NON_INTRINSIC, "non_intrinsic") T(KW_NON_OVERRIDABLE, "non_overridable") T(KW_NON_RECURSIVE, "non_recursive") T(KW_NONE, "none") T(KW_NULLIFY, "nullify") T(KW_ONLY, "only") T(KW_OPEN, "open") T(KW_OPERATOR, "operator") T(KW_OPTIONAL, "optional") T(KW_OUT, "out") T(KW_PARAMETER, "parameter") T(KW_PASS, "pass") T(KW_POINTER, "pointer") T(KW_POST, "post") T(KW_PRECISION, "precision") T(KW_PRINT, "print") T(KW_PRIVATE, "private") T(KW_PROCEDURE, "procedure") T(KW_PROGRAM, "program") T(KW_PROTECTED, "protected") T(KW_PUBLIC, "public") T(KW_PURE, "pure") T(KW_QUIET, "quiet") T(KW_RANK, "rank") T(KW_READ, "read") T(KW_REAL, "real") T(KW_RECURSIVE, "recursive") T(KW_REDUCE, "reduce") T(KW_RESULT, "result") T(KW_RETURN, "return") T(KW_REWIND, "rewind") T(KW_SAVE, "save") T(KW_SELECT, "select") T(KW_SELECT_CASE, "selectcase") T(KW_SELECT_RANK, "selectrank") T(KW_SELECT_TYPE, "selecttype") T(KW_SEQUENCE, "sequence") T(KW_SHARED, "shared") T(KW_SOURCE, "source") T(KW_STAT, "stat") T(KW_STOP, "stop") T(KW_SUBMODULE, "submodule") T(KW_SUBROUTINE, "subroutine") T(KW_SYNC, "sync") T(KW_SYNC_ALL, "syncall") T(KW_SYNC_IMAGES, "synimages") T(KW_SYNC_MEMORY, "syncmemory") T(KW_SYNC_TEAM, "syncteam") T(KW_TARGET, "target") T(KW_TEAM, "team") T(KW_TEAM_NUMBER, "team_number") T(KW_THEN, "then") T(KW_TO, "to") T(KW_TYPE, "type") T(KW_UNFORMATTED, "unformatted") T(KW_USE, "use") T(KW_VALUE, "value") T(KW_VOLATILE, "volatile") T(KW_WAIT, "wait") T(KW_WHERE, "where") T(KW_WHILE, "while") T(KW_WRITE, "write") default : { std::cout << "TOKEN: " << token << std::endl; throw LFortranException("Token conversion not implemented yet."); } } } void Parser::handle_yyerror(const Location &loc, const std::string &msg) { std::string message; if (msg == "syntax is ambiguous") { message = "Internal Compiler Error: syntax is ambiguous in the parser"; } else if (msg == "syntax error") { LFortran::YYSTYPE yylval_; YYLTYPE yyloc_; this->m_tokenizer.cur = this->m_tokenizer.tok; int token = this->m_tokenizer.lex(this->m_a, yylval_, yyloc_, diag); if (token == yytokentype::END_OF_FILE) { message = "End of file is unexpected here"; } else if (token == yytokentype::TK_NEWLINE) { message = "Newline is unexpected here"; } else { std::string token_str = this->m_tokenizer.token(); std::string token_type = token2text(token); if (token_str == token_type) { message = "Token '" + token_str + "' is unexpected here"; } else { message = "Token '" + token_str + "' (of type '" + token2text(token) + "') is unexpected here"; } } } else { message = "Internal Compiler Error: parser returned unknown error"; } throw parser_local::ParserError(message, loc); } }