From 471ac26106860a09b2bdfeeca5e61bd71fab5cf7 Mon Sep 17 00:00:00 2001 From: Herb Sutter Date: Sun, 9 Jun 2024 16:55:15 -0700 Subject: [PATCH 1/3] Add `..` syntax to select a member function only With this commit, `x.f()` is still UFCS, but `x..f()` will now find only a member function. For now I like that the default is UFCS, but as we get more experience I'm open to changing the default so that `.` is member selection and `..` is UFCS (which would be a breaking change, but a mechanical one). Also: Remove the old version of `get_declaration_of` now that the new lookup seems stable. --- ...pure2-ufcs-member-access-and-chaining.cpp2 | 7 + .../msvc-2022-c++latest/MSVC-version.output | 2 +- source/lex.h | 13 +- source/parse.h | 15 +- source/sema.h | 267 ++---------------- source/to_cpp1.h | 12 +- 6 files changed, 63 insertions(+), 253 deletions(-) diff --git a/regression-tests/pure2-ufcs-member-access-and-chaining.cpp2 b/regression-tests/pure2-ufcs-member-access-and-chaining.cpp2 index decb3357a..3da15e797 100644 --- a/regression-tests/pure2-ufcs-member-access-and-chaining.cpp2 +++ b/regression-tests/pure2-ufcs-member-access-and-chaining.cpp2 @@ -21,6 +21,9 @@ main: () -> int = { 42.no_return(); res.no_return(); + + obj: mytype = (); + obj..hun(42); // explicit non-UFCS } no_return: (_) = { } @@ -41,3 +44,7 @@ get_i: (r:_) -> int = { // And a test for non-local UFCS, which shouldn't do a [&] capture f: (_)->int = 0; y: int = 0.f(); + +mytype: type = { + hun: (i: int) = { } +} diff --git a/regression-tests/test-results/msvc-2022-c++latest/MSVC-version.output b/regression-tests/test-results/msvc-2022-c++latest/MSVC-version.output index ba2c3c80f..ca1298b64 100644 --- a/regression-tests/test-results/msvc-2022-c++latest/MSVC-version.output +++ b/regression-tests/test-results/msvc-2022-c++latest/MSVC-version.output @@ -1,3 +1,3 @@ -Microsoft (R) C/C++ Optimizing Compiler Version 19.39.33523 for x86 +Microsoft (R) C/C++ Optimizing Compiler Version 19.40.33811 for x64 Copyright (C) Microsoft Corporation. All rights reserved. diff --git a/source/lex.h b/source/lex.h index b1fad1437..dc843ee07 100644 --- a/source/lex.h +++ b/source/lex.h @@ -83,6 +83,7 @@ enum class lexeme : std::int8_t { Semicolon, Comma, Dot, + DotDot, Ellipsis, QuestionMark, At, @@ -180,6 +181,7 @@ auto _as(lexeme l) break;case lexeme::Semicolon: return "Semicolon"; break;case lexeme::Comma: return "Comma"; break;case lexeme::Dot: return "Dot"; + break;case lexeme::DotDot: return "DotDot"; break;case lexeme::Ellipsis: return "Ellipsis"; break;case lexeme::QuestionMark: return "QuestionMark"; break;case lexeme::At: return "At"; @@ -845,9 +847,9 @@ auto lex_line( if ( i >= 3 - && (tokens[i-3] != "::" && tokens[i-3] != ".") + && (tokens[i-3] != "::" && tokens[i-3].type() != lexeme::Dot && tokens[i - 3].type() != lexeme::DotDot) && (tokens[i-2] == "unique" || tokens[i-2] == "shared") - && tokens[i-1] == "." + && tokens[i-1].type() == lexeme::Dot && tokens[i] == "new" ) { @@ -1402,10 +1404,11 @@ auto lex_line( //G //G punctuator: one of - //G '...' '.' + //G '.' '..' '...' break;case '.': - if (peek1 == '.' && peek2 == '.') { store(3, lexeme::Ellipsis); } - else { store(1, lexeme::Dot); } + if (peek1 == '.' && peek2 == '.') { store(3, lexeme::Ellipsis); } + else if (peek1 == '.') { store(2, lexeme::DotDot); } + else { store(1, lexeme::Dot); } //G '::' ':' break;case ':': diff --git a/source/parse.h b/source/parse.h index b8d404625..d92b0cbca 100644 --- a/source/parse.h +++ b/source/parse.h @@ -36,10 +36,6 @@ auto violates_lifetime_safety = false; auto is_prefix_operator(token const& tok) -> bool { - //if (to_passing_style(tok) != passing_style::invalid) { - // return true; - //} - switch (tok.type()) { break;case lexeme::Not: case lexeme::Minus: @@ -1682,7 +1678,7 @@ auto postfix_expression_node::get_first_token_ignoring_this() const expr->get_token() && *expr->get_token() == "this" && std::ssize(ops) == 1 - && ops[0].op->type() == lexeme::Dot + && (ops[0].op->type() == lexeme::Dot || ops[0].op->type() == lexeme::DotDot) ) { return ops[0].id_expr->get_token(); @@ -5820,6 +5816,7 @@ class parser //G postfix-expression '[' expression-list? ','? ']' //G postfix-expression '(' expression-list? ','? ')' //G postfix-expression '.' id-expression + //G postfix-expression '..' id-expression //G auto postfix_expression() -> std::unique_ptr @@ -5837,7 +5834,8 @@ class parser || curr().type() == lexeme::LeftBracket || curr().type() == lexeme::LeftParen || curr().type() == lexeme::Dot - ) + || curr().type() == lexeme::DotDot + ) ) { // * and & can't be unary operators if followed by a (, identifier, or literal @@ -5916,7 +5914,10 @@ class parser break; } } - else if (term.op->type() == lexeme::Dot) + else if ( + term.op->type() == lexeme::Dot + || term.op->type() == lexeme::DotDot + ) { term.id_expr = id_expression(); if (!term.id_expr) { diff --git a/source/sema.h b/source/sema.h index 546b0ec3a..eed9a85e1 100644 --- a/source/sema.h +++ b/source/sema.h @@ -419,26 +419,6 @@ class sema ) const -> declaration_sym const* { - //--------- START TEMPORARY REGRESSION TEST CODE FOR G_D_O OPTIMIZATION VERIFICATION --------- - auto result_old = static_cast(nullptr); - if (flag_internal_debug) - { - auto debug = false; // t == "xxx" && t.position().colno > 20; - if (debug) - { - std::cout << "\nlooking up " << t.as_string_view() << " (" << static_cast(&t) << ", " << look_beyond_current_function << ", " << include_implicit_this << ") from " << t.position().to_string(); - std::cout << " ... symbol table state is:\n"; - debug_print(std::cout); - } - - result_old = get_declaration_of_old(t, look_beyond_current_function, include_implicit_this ); - - if (debug) { - std::cout << "\n found -> " << static_cast(result_old); - } - } - //--------- END TEMPORARY REGRESSION TEST CODE FOR G_D_O OPTIMIZATION VERIFICATION ----------- - // Calculate result using declaration_of[] auto result = static_cast(nullptr); @@ -502,228 +482,36 @@ class sema } - //--------- START TEMPORARY REGRESSION TEST CODE FOR G_D_O OPTIMIZATION VERIFICATION --------- - // Now do a regression test violation check - // - if ( - flag_internal_debug - && result_old != result - ) - { - std::cerr << "\n Internal compiler error - see cppfront-ice-data.out for debug information\n\n"; + ////--------- START TEMPORARY REGRESSION TEST CODE FOR G_D_O OPTIMIZATION VERIFICATION --------- + //// Now do a regression test violation check + //// + //if ( + // flag_internal_debug + // && result_old != result + // ) + //{ + // std::cerr << "\n Internal compiler error - see cppfront-ice-data.out for debug information\n\n"; - auto out = std::ofstream{"cppfront-ice-data.out"}; + // auto out = std::ofstream{"cppfront-ice-data.out"}; - out << "g_d_o arguments:\n"; - out << " " << static_cast(&t) << " -> " << t.as_string_view() << " @ " << t.position().to_string() - << ", token order # " << t.get_global_token_order() << "\n"; - out << " look_beyond_current_function: " << std::boolalpha << look_beyond_current_function << "\n"; - out << " include_implicit_this: " << std::boolalpha << include_implicit_this << "\n"; + // out << "g_d_o arguments:\n"; + // out << " " << static_cast(&t) << " -> " << t.as_string_view() << " @ " << t.position().to_string() + // << ", token order # " << t.get_global_token_order() << "\n"; + // out << " look_beyond_current_function: " << std::boolalpha << look_beyond_current_function << "\n"; + // out << " include_implicit_this: " << std::boolalpha << include_implicit_this << "\n"; - out << "result_old: " << static_cast(result_old) << "\n"; - out << "result: " << static_cast(result ) << "\n\n"; + // out << "result_old: " << static_cast(result_old) << "\n"; + // out << "result: " << static_cast(result ) << "\n\n"; - debug_print(out); + // debug_print(out); - exit(EXIT_FAILURE); - } - //--------- END TEMPORARY REGRESSION TEST CODE FOR G_D_O OPTIMIZATION VERIFICATION ----------- + // exit(EXIT_FAILURE); + //} + ////--------- END TEMPORARY REGRESSION TEST CODE FOR G_D_O OPTIMIZATION VERIFICATION ----------- return result; } - //----------------------------------------------------------------------- - // Unoptimized version: Baseline to check against for regressions - // - auto get_declaration_of_old( - token const& t, - bool look_beyond_current_function = false, - bool include_implicit_this = false - ) const - -> declaration_sym const* - { - CPP2_SCOPE_TIMER("get_declaration_of - old"); - - // First find the position the query is coming from - // and remember its depth - auto i = symbols.cbegin(); - while ( - i != symbols.cend() - && i->get_global_token_order() < t.get_global_token_order() - ) - { - ++i; - } - - if (i == symbols.cbegin()) { - return nullptr; - } - - auto depth = 0; - - // If we found it exactly, we have its depth - if ( - i != symbols.cend() - && i->get_global_token_order() == t.get_global_token_order() - ) - { - depth = i->depth; - } - // Else we're at cend or at the entry for the following token, - // so move backward one entry to approximate the location - else - { - // OK since we already checked that we're not at cbegin - --i; - - // If we're now at cbegin, not found - if (i == symbols.cbegin()) { - return nullptr; - } - - // Else if this location is the end of a named non-object - // declaration, go to its beginning - if (i->is_declaration()) - { - auto find_start_of_name = static_cast(nullptr); - - if ( - !i->start - && !i->as_declaration().declaration->is_object() - ) - { - find_start_of_name = i->as_declaration().declaration->name(); - } - - if (find_start_of_name) - { - // We want to go backwards until we're at the corresponding - // declaration start, and there could be intervening nested - // declaration ends/starts to pass by - int relative_decl_depth = 1; - while (--i != symbols.cbegin()) { - if ( - i->is_declaration() - && !i->as_declaration().declaration->is_object() - ) - { - if (i->start) { - --relative_decl_depth; - if (relative_decl_depth == 0) { - // With proper nesting, this should be the name we're looking for - assert( - i->as_declaration().declaration->has_name( - find_start_of_name->as_string_view() - ) - ); - break; - } - } - else { - ++relative_decl_depth; - } - } - } - - // This shouldn't happen with correct nesting, so - // alternatively we could assert(i != symbols.cbegin()) - if (i == symbols.cbegin()) { - return nullptr; - } - } - } - - depth = i->depth; - } - - // Then look backward to find the first declaration of - // this name that is not deeper (in a nested scope) - // and is in the same function - using I = stable_vector::const_iterator; - auto advance = [](I& i, int n, I bound) { // TODO Use `std::ranges::advance` - auto in = i; - if (std::abs(n) >= std::abs(bound - i)) { - i = bound; - } - else { - std::advance(i, n); - } - return n - (i - in); - }; - advance(i, -int(i->position() > t.position()), symbols.cbegin()); - advance(i, 1, symbols.cend()); - while (advance(i, -1, symbols.begin()) == 0) - { - if ( - i->sym.index() == symbol::active::declaration - && i->depth <= depth - ) - { - auto const& decl = std::get(i->sym); - - // Conditionally look beyond the start of the current named (has identifier) - // function (an unnamed function is ok to look beyond) or enclosing scope - assert(decl.declaration); - if ( - ( - decl.declaration->type.index() == declaration_node::a_function - || decl.declaration->type.index() == declaration_node::a_type - || decl.declaration->type.index() == declaration_node::a_namespace - ) - && decl.declaration->identifier - && !look_beyond_current_function - ) - { - return nullptr; - } - - // If the name matches, this is it - if ( - decl.identifier - && *decl.identifier == t - ) - { - return &decl; - } - - // If we reached a 'move this' parameter, look it up in the type members - if ( - include_implicit_this - && decl.identifier - && *decl.identifier == "this" - ) - { - if (auto n = decl.declaration; - decl.parameter - && decl.parameter->pass == passing_style::move - && n - && n->parent_is_function() - && (n = n->parent_declaration)->parent_is_type() - && n->my_statement - && n->my_statement->compound_parent - && std::any_of( - n->my_statement->compound_parent->statements.begin(), - n->my_statement->compound_parent->statements.end(), - [&t, n](std::unique_ptr const& s) mutable { - return s - && s->statement.index() == statement_node::declaration - && (n = &*std::get(s->statement))->identifier - && n->identifier->to_string() == t; - }) - ) - { - return &decl; - } - return nullptr; - } - - depth = i->depth; - } - } - - return nullptr; - } - auto is_captured(token const& t) const -> bool @@ -1708,7 +1496,7 @@ class sema if ( decl->declaration->is_type() && n.ops[0].op - && n.ops[0].op->type() == lexeme::Dot + && (n.ops[0].op->type() == lexeme::Dot || n.ops[0].op->type() == lexeme::DotDot) ) { errors.emplace_back( @@ -2751,8 +2539,7 @@ class sema // If we found it exactly if ((*i)->declaration->has_name(t)) { - // SUCCESS: Record it and break - //declaration_of[&t] = { *i, in_current_function && (*i)->declaration->parent_is_function() }; + // SUCCESS: Break and record it break; } @@ -2788,8 +2575,10 @@ class sema // PARTIAL SUCCESS: Record the location of 'this' and keep going // found_this = *i; - prev_token_was_this = *prev2_token == "this" && *prev_token == "."; - //declaration_of[&t] = { *i, in_current_function, *prev2_token == "this" && *prev_token == "." }; + prev_token_was_this = + *prev2_token == "this" + && (prev_token->type() == lexeme::Dot || prev_token->type() == lexeme::DotDot) + ; } } } @@ -2814,7 +2603,7 @@ class sema auto started_member_access = prev_token - && prev_token->type() == lexeme::Dot; + && (prev_token->type() == lexeme::Dot || prev_token->type() == lexeme::DotDot); auto started_this_member_access = started_member_access && prev2_token diff --git a/source/to_cpp1.h b/source/to_cpp1.h index 79289105b..e14317983 100644 --- a/source/to_cpp1.h +++ b/source/to_cpp1.h @@ -272,6 +272,9 @@ class positional_printer ) -> void { +if (s.starts_with("..")) { + s == s; +} // Take ownership of (and reset) just_printed_line_directive value auto line_directive_already_done = std::exchange(just_printed_line_directive, false); @@ -3419,7 +3422,11 @@ class cppfront args.reset(); } - auto print = print_to_string(*i->id_expr, false /*not a local name*/, i->op->type() == lexeme::Dot); + auto print = print_to_string( + *i->id_expr, + false, // not a local name + i->op->type() == lexeme::Dot || i->op->type() == lexeme::DotDot // member access + ); suffix.emplace_back( print, i->id_expr->position() ); } @@ -3450,6 +3457,9 @@ class cppfront } suffix.emplace_back( ", ", i->op->position() ); } + else if( i->op->type() == lexeme::DotDot) { + suffix.emplace_back(".", i->op->position()); + } else { suffix.emplace_back( i->op->to_string(), i->op->position() ); } From 70fed4b2d85fd999bde13ee2b55910288f8bf89e Mon Sep 17 00:00:00 2001 From: Herb Sutter Date: Mon, 10 Jun 2024 12:40:32 -0700 Subject: [PATCH 2/3] Remove some debug code --- source/to_cpp1.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/source/to_cpp1.h b/source/to_cpp1.h index e14317983..ce24028cd 100644 --- a/source/to_cpp1.h +++ b/source/to_cpp1.h @@ -272,9 +272,6 @@ class positional_printer ) -> void { -if (s.starts_with("..")) { - s == s; -} // Take ownership of (and reset) just_printed_line_directive value auto line_directive_already_done = std::exchange(just_printed_line_directive, false); From 58002c976282bff1ec33e054c7cbb953d9f7036e Mon Sep 17 00:00:00 2001 From: Herb Sutter Date: Thu, 13 Jun 2024 10:21:32 -0700 Subject: [PATCH 3/3] Finish removing old g_d_o paths --- source/sema.h | 34 ---------------------------------- 1 file changed, 34 deletions(-) diff --git a/source/sema.h b/source/sema.h index eed9a85e1..7ff07df67 100644 --- a/source/sema.h +++ b/source/sema.h @@ -449,13 +449,6 @@ class sema // Now we have the lookup result, but based on lookup constraints flags // we may decide it's unsuitable for this lookup and not use it... - // - // TODO: The initial versions of these conditions are written to make the - // new lookup results exactly match the prior lookup results, so we don't - // introduce regressions. However, not all these wrinkles may be needed - // or desirable, so after we know we didn't introduce regressions we can - // consider tweaking/simplifying them -- which is easier now that they're - // collected all in one place if ( result // If we were told not to look beyond the current function @@ -482,33 +475,6 @@ class sema } - ////--------- START TEMPORARY REGRESSION TEST CODE FOR G_D_O OPTIMIZATION VERIFICATION --------- - //// Now do a regression test violation check - //// - //if ( - // flag_internal_debug - // && result_old != result - // ) - //{ - // std::cerr << "\n Internal compiler error - see cppfront-ice-data.out for debug information\n\n"; - - // auto out = std::ofstream{"cppfront-ice-data.out"}; - - // out << "g_d_o arguments:\n"; - // out << " " << static_cast(&t) << " -> " << t.as_string_view() << " @ " << t.position().to_string() - // << ", token order # " << t.get_global_token_order() << "\n"; - // out << " look_beyond_current_function: " << std::boolalpha << look_beyond_current_function << "\n"; - // out << " include_implicit_this: " << std::boolalpha << include_implicit_this << "\n"; - - // out << "result_old: " << static_cast(result_old) << "\n"; - // out << "result: " << static_cast(result ) << "\n\n"; - - // debug_print(out); - - // exit(EXIT_FAILURE); - //} - ////--------- END TEMPORARY REGRESSION TEST CODE FOR G_D_O OPTIMIZATION VERIFICATION ----------- - return result; }