From 598c0259e1f67a678dd9cef8adf5d21500d10bc8 Mon Sep 17 00:00:00 2001 From: Krzysztof Bieganski Date: Wed, 7 Jun 2023 16:54:13 +0200 Subject: [PATCH] Format preprocessed token stream in multiple passes Currently, the formatter doesn't handle many scenarios involving preprocessor `ifdef`s/`endif`s interleaved with `begin`s, module headers, etc. This patch attempts to solve this problem by performing multiple passes of the formatting on preprocessed variants of the source. Each of these variants has a different set of preprocessor branches enabled. Together, they should cover the entire source (though that doesn't work in all cases yet). After several formatting passes for different variants of the AST, a correct and properly formatted file is produced. This is still work in progress, so not everything works, and the code isn't very clean. I'd love to get some early feedback on this. Signed-off-by: Krzysztof Bieganski --- common/text/text_structure.cc | 121 ++++++++++------ common/text/text_structure.h | 16 ++- common/text/token_info.h | 2 + verilog/CST/expression_test.cc | 2 +- verilog/analysis/BUILD | 3 + verilog/analysis/extractors_test.cc | 2 +- verilog/analysis/flow_tree.cc | 91 +++++++++++- verilog/analysis/flow_tree.h | 16 ++- verilog/analysis/verilog_analyzer.cc | 18 ++- verilog/analysis/verilog_analyzer_test.cc | 6 +- verilog/analysis/verilog_project.cc | 2 +- verilog/formatting/BUILD | 1 + verilog/formatting/align.cc | 1 + verilog/formatting/formatter.cc | 130 +++++++++++++----- verilog/formatting/formatter_test.cc | 2 +- verilog/formatting/token_annotator.cc | 9 ++ verilog/formatting/tree_unwrapper.cc | 116 +++++++++++----- verilog/preprocessor/verilog_preprocess.cc | 24 ++-- verilog/preprocessor/verilog_preprocess.h | 11 +- .../preprocessor/verilog_preprocess_test.cc | 26 ++-- 20 files changed, 448 insertions(+), 151 deletions(-) diff --git a/common/text/text_structure.cc b/common/text/text_structure.cc index 9ce7949810..ad17b754db 100644 --- a/common/text/text_structure.cc +++ b/common/text/text_structure.cc @@ -244,6 +244,11 @@ void TextStructureView::TrimTokensToSubstring(int left_offset, tokens_view_.begin(), tokens_view_.end(), view_trim_range.begin()); const auto iter_trim_end = std::lower_bound( iter_trim_begin, tokens_view_.end(), view_trim_range.end()); + const auto iter_trim_begin2 = + std::lower_bound(prep_tokens_view_.begin(), prep_tokens_view_.end(), + view_trim_range.begin()); + const auto iter_trim_end2 = std::lower_bound( + iter_trim_begin2, prep_tokens_view_.end(), view_trim_range.end()); // Copy subset of tokens to new token sequence. TokenSequence trimmed_stream(view_trim_range.begin(), view_trim_range.end()); @@ -278,9 +283,17 @@ void TextStructureView::TrimTokensToSubstring(int left_offset, const int new_index = old_index - index_difference; trimmed_view.push_back(trimmed_stream.begin() + new_index); } + TokenStreamView prep_trimmed_view; + prep_trimmed_view.reserve(std::distance(iter_trim_begin2, iter_trim_end2)); + for (auto token_iterator : make_range(iter_trim_begin2, iter_trim_end2)) { + const int old_index = std::distance(tokens_.cbegin(), token_iterator); + const int new_index = old_index - index_difference; + prep_trimmed_view.push_back(trimmed_stream.begin() + new_index); + } // Swap new-old arrays, which will cause old arrays to be deleted. tokens_view_.swap(trimmed_view); + prep_tokens_view_.swap(prep_trimmed_view); tokens_.swap(trimmed_stream); } @@ -431,23 +444,28 @@ absl::Status TextStructureView::InternalConsistencyCheck() const { return SyntaxTreeConsistencyCheck(); } +// TokenRange can be a container reference or iterator range. +template +static void CopyTokens(TokenSequence* destination, + const TokenRange& token_source) { + // Copy tokens up to this expansion point. + for (const auto& token : token_source) { + destination->push_back(token); + } +} + // TokenRange can be a container reference or iterator range. // TokenViewRange can be a container reference or iterator range. template -static void CopyTokensAndView(TokenSequence* destination, - std::vector* view_indices, - const TokenRange& token_source, - const TokenViewRange& view_source) { +static void CopyView(size_t base, std::vector* view_indices, + const TokenRange& token_source, + const TokenViewRange& view_source) { // Translate token_view's iterators into array indices, adjusting for the // number of pre-existing tokens. for (const auto& token_iter : view_source) { - view_indices->push_back(destination->size() + + view_indices->push_back(base + std::distance(token_source.begin(), token_iter)); } - // Copy tokens up to this expansion point. - for (const auto& token : token_source) { - destination->push_back(token); - } } // Incrementally copies a slice of tokens and expands a single subtree. @@ -457,29 +475,34 @@ static void CopyTokensAndView(TokenSequence* destination, // token_view_indices. Offset is the location of each expansion point. void TextStructureView::ConsumeDeferredExpansion( TokenSequence::const_iterator* next_token_iter, - TokenStreamView::const_iterator* next_token_view_iter, + const std::vector& next_token_view_iters, DeferredExpansion* expansion, TokenSequence* combined_tokens, - std::vector* token_view_indices, const char* offset) { + const std::vector& token_views, + const std::vector*>& token_view_indices, + const char* offset) { auto token_iter = *next_token_iter; - auto token_view_iter = *next_token_view_iter; - // Find the position up to each expansion point. - *next_token_iter = - std::lower_bound(token_iter, tokens_.cend(), offset, - [](const TokenInfo& token, const char* target) { - return std::distance(target, token.text().begin()) < 0; - }); - CHECK(*next_token_iter != tokens_.cend()); - *next_token_view_iter = std::lower_bound( - token_view_iter, tokens_view_.cend(), offset, - [](TokenStreamView::const_reference token_ref, const char* target) { - return std::distance(target, token_ref->text().begin()) < 0; - }); - CHECK(*next_token_view_iter != tokens_view_.cend()); - - // Copy tokens and partial view into output. - CopyTokensAndView(combined_tokens, token_view_indices, - make_range(token_iter, *next_token_iter), - make_range(token_view_iter, *next_token_view_iter)); + for (size_t i = 0; i < token_view_indices.size(); i++) { + auto token_view_iter = *next_token_view_iters[i]; + // Find the position up to each expansion point. + *next_token_iter = std::lower_bound( + token_iter, tokens_.cend(), offset, + [](const TokenInfo& token, const char* target) { + return std::distance(target, token.text().begin()) < 0; + }); + CHECK(*next_token_iter != tokens_.cend()); + *next_token_view_iters[i] = std::lower_bound( + token_view_iter, token_views[i]->cend(), offset, + [](TokenStreamView::const_reference token_ref, const char* target) { + return std::distance(target, token_ref->text().begin()) < 0; + }); + CHECK(*next_token_view_iters[i] != tokens_view_.cend()); + + // Copy tokens and partial view into output. + CopyView(combined_tokens->size(), token_view_indices[i], + make_range(token_iter, *next_token_iter), + make_range(token_view_iter, *next_token_view_iters[i])); + } + CopyTokens(combined_tokens, make_range(token_iter, *next_token_iter)); // Adjust locations of tokens in the expanded tree by pointing them // into the original text (contents_). @@ -498,8 +521,11 @@ void TextStructureView::ConsumeDeferredExpansion( // Don't want to splice it into result. sub_data.tokens_.pop_back(); } - CopyTokensAndView(combined_tokens, token_view_indices, sub_data.tokens_, - sub_data.tokens_view_); + for (std::vector* indices : token_view_indices) { + CopyView(combined_tokens->size(), indices, sub_data.tokens_, + sub_data.tokens_view_); + } + CopyTokens(combined_tokens, sub_data.tokens_); // Transfer ownership of transformed syntax tree to this object's tree. *expansion->expansion_point = std::move(sub_data.MutableSyntaxTree()); @@ -507,7 +533,9 @@ void TextStructureView::ConsumeDeferredExpansion( // Advance one past expansion point to skip over expanded token. ++*next_token_iter; - ++*next_token_view_iter; + for (auto next_token_view_iter : next_token_view_iters) { + ++*next_token_view_iter; + } } TextStructure::TextStructure(std::shared_ptr contents) @@ -530,19 +558,29 @@ void TextStructureView::ExpandSubtrees(NodeExpansionMap* expansions) { // Gather indices and reconstruct iterators after there are no more // reallocations due to growing combined_tokens. std::vector combined_token_view_indices; + std::vector combined_prep_token_view_indices; auto token_iter = tokens_.cbegin(); auto token_view_iter = tokens_view_.cbegin(); + auto prep_token_view_iter = prep_tokens_view_.cbegin(); for (auto& expansion_entry : *expansions) { const auto offset = Contents().begin() + expansion_entry.first; - ConsumeDeferredExpansion(&token_iter, &token_view_iter, - &expansion_entry.second, &combined_tokens, - &combined_token_view_indices, offset); + ConsumeDeferredExpansion( + &token_iter, {&token_view_iter, &prep_token_view_iter}, + &expansion_entry.second, &combined_tokens, + {&tokens_view_, &prep_tokens_view_}, + {&combined_token_view_indices, &combined_prep_token_view_indices}, + offset); } // Copy the remaining tokens beyond the last expansion point. - CopyTokensAndView(&combined_tokens, &combined_token_view_indices, - make_range(token_iter, tokens_.cend()), - make_range(token_view_iter, tokens_view_.cend())); + CopyView(combined_tokens.size(), &combined_token_view_indices, + make_range(token_iter, tokens_.cend()), + make_range(token_view_iter, tokens_view_.cend())); + CopyView(combined_tokens.size(), &combined_prep_token_view_indices, + make_range(token_iter, tokens_.cend()), + make_range(prep_token_view_iter, prep_tokens_view_.cend())); + + CopyTokens(&combined_tokens, make_range(token_iter, tokens_.cend())); // Commit the newly expanded sequence of tokens. tokens_.swap(combined_tokens); @@ -553,6 +591,11 @@ void TextStructureView::ExpandSubtrees(NodeExpansionMap* expansions) { for (const auto index : combined_token_view_indices) { tokens_view_.push_back(tokens_.cbegin() + index); } + prep_tokens_view_.clear(); + prep_tokens_view_.reserve(combined_prep_token_view_indices.size()); + for (const auto index : combined_prep_token_view_indices) { + prep_tokens_view_.push_back(tokens_.cbegin() + index); + } // Recalculate line-by-line token ranges. // TODO(fangism): Should be possible to update line_token_map_ incrementally diff --git a/common/text/text_structure.h b/common/text/text_structure.h index 816ef24247..973efbb028 100644 --- a/common/text/text_structure.h +++ b/common/text/text_structure.h @@ -99,6 +99,14 @@ class TextStructureView { TokenStreamView& MutableTokenStreamView() { return tokens_view_; } + const TokenStreamView& GetPreprocessedTokenStreamView() const { + return prep_tokens_view_; + } + + TokenStreamView& MutablePreprocessedTokenStreamView() { + return prep_tokens_view_; + } + // Creates a stream of modifiable iterators to the filtered tokens. // Uses tokens_view_ to create the iterators. TokenStreamReferenceView MakeTokenStreamReferenceView(); @@ -205,6 +213,8 @@ class TextStructureView { // Possibly modified view of the tokens_ token sequence. TokenStreamView tokens_view_; + TokenStreamView prep_tokens_view_; + // Index of token iterators that mark the beginnings of each line. // Lazily calculated on request. mutable std::vector lazy_line_token_map_; @@ -220,9 +230,11 @@ class TextStructureView { void ConsumeDeferredExpansion( TokenSequence::const_iterator* next_token_iter, - TokenStreamView::const_iterator* next_token_view_iter, + const std::vector& next_token_view_iter, DeferredExpansion* expansion, TokenSequence* combined_tokens, - std::vector* token_view_indices, const char* offset); + const std::vector& token_views, + const std::vector*>& token_view_indices, + const char* offset); // Resets all fields. Only needed in tests. void Clear(); diff --git a/common/text/token_info.h b/common/text/token_info.h index 7147a677ff..66d2091918 100644 --- a/common/text/token_info.h +++ b/common/text/token_info.h @@ -157,6 +157,8 @@ class TokenInfo { bool isEOF() const { return token_enum_ == TK_EOF; } + bool visible = true; // FIXME: do this differently, or put in protected + protected: // protected, as ExpectedTokenInfo accesses it. int token_enum_; diff --git a/verilog/CST/expression_test.cc b/verilog/CST/expression_test.cc index e4d254bf0b..e683c1b2c9 100644 --- a/verilog/CST/expression_test.cc +++ b/verilog/CST/expression_test.cc @@ -32,7 +32,7 @@ namespace verilog { namespace { -static constexpr VerilogPreprocess::Config kDefaultPreprocess; +static VerilogPreprocess::Config kDefaultPreprocess; using verible::SyntaxTreeSearchTestCase; using verible::TextStructureView; diff --git a/verilog/analysis/BUILD b/verilog/analysis/BUILD index f628ae249b..9a816a8250 100644 --- a/verilog/analysis/BUILD +++ b/verilog/analysis/BUILD @@ -79,10 +79,13 @@ cc_library( hdrs = ["flow_tree.h"], deps = [ "//common/text:token-stream-view", + "//common/util:interval-set", "//common/util:logging", "//common/util:status-macros", "//verilog/parser:verilog-token-enum", + "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", ], ) diff --git a/verilog/analysis/extractors_test.cc b/verilog/analysis/extractors_test.cc index 7df644efa0..ded7c77a08 100644 --- a/verilog/analysis/extractors_test.cc +++ b/verilog/analysis/extractors_test.cc @@ -26,7 +26,7 @@ namespace verilog { namespace analysis { namespace { -static constexpr VerilogPreprocess::Config kDefaultPreprocess; +static VerilogPreprocess::Config kDefaultPreprocess; TEST(CollectInterfaceNamesTest, NonModuleTests) { const std::pair> kTestCases[] = { diff --git a/verilog/analysis/flow_tree.cc b/verilog/analysis/flow_tree.cc index 68bdf0dd55..df45cfce94 100644 --- a/verilog/analysis/flow_tree.cc +++ b/verilog/analysis/flow_tree.cc @@ -1,4 +1,4 @@ -// Copyright 2017-2022 The Verible Authors. +// Copyright 2017-2023 The Verible Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -126,6 +126,10 @@ absl::Status FlowTree::MacroFollows( return absl::InvalidArgumentError("Error macro name can't be extracted."); } auto macro_iterator = conditional_iterator + 1; + if (macro_iterator->token_enum() == TK_SPACE) { + // FIXME: It's not always there? + macro_iterator++; + } if (macro_iterator->token_enum() != PP_Identifier) { return absl::InvalidArgumentError("Expected identifier for macro name."); } @@ -142,6 +146,10 @@ absl::Status FlowTree::AddMacroOfConditional( "Error no macro follows the conditional directive."); } auto macro_iterator = conditional_iterator + 1; + if (macro_iterator->token_enum() == TK_SPACE) { + // FIXME: It's not always there? + macro_iterator++; + } auto macro_identifier = macro_iterator->text(); if (conditional_macro_id_.find(macro_identifier) == conditional_macro_id_.end()) { @@ -162,6 +170,10 @@ int FlowTree::GetMacroIDOfConditional( return -1; } auto macro_iterator = conditional_iterator + 1; + if (macro_iterator->token_enum() == TK_SPACE) { + // FIXME: It's not always there? + macro_iterator++; + } auto macro_identifier = macro_iterator->text(); // It is always assumed that the macro already exists in the map. return conditional_macro_id_[macro_identifier]; @@ -176,6 +188,83 @@ absl::Status FlowTree::GenerateVariants(const VariantReceiver &receiver) { return DepthFirstSearch(receiver, source_sequence_.begin()); } +absl::StatusOr FlowTree::MinCoverDefineVariants() { + auto status = GenerateControlFlowTree(); + if (!status.ok()) return status; + verible::IntervalSet covered; // Tokens covered by + // MinCoverDefineVariants. + verible::IntervalSet last_covered; // Tokens covered + // by the previous iterations. + DefineVariants define_variants; // The result – all define variants that + // should cover the entire source + DefineSet visited; // Visited defines are ones that are assumed to be defined + // or undefined (decided in a previous iteration) + const int64_t tok_count = static_cast(source_sequence_.size()); + while (!covered.Contains({0, tok_count})) { + DefineSet defines; // Define sets are moved into the define variants list, + // so we make a new one each iteration + visited.clear(); // We keep the visited set to avoid unnecessary + // allocations, but clear it each iteration + TokenSequenceConstIterator tok_it = source_sequence_.begin(); + while (tok_it < source_sequence_.end()) { + covered.Add(std::distance(source_sequence_.begin(), tok_it)); + if (tok_it->token_enum() == PP_ifdef || + tok_it->token_enum() == PP_ifndef || + tok_it->token_enum() == PP_elsif) { + const auto macro_id_it = tok_it + 2; + auto macro_text = macro_id_it->text(); + bool negated = tok_it->token_enum() == PP_ifndef; + // If this macro was already visited (either defined/undefined), we + // to stick to the same branch. TODO: handle `defines + if (visited.contains(macro_text)) { + bool assume_condition_is_true = + (negated ^ defines.contains(macro_text)); + tok_it = edges_[tok_it][assume_condition_is_true ? 0 : 1]; + } else { + // First time we see this macro; mark as visited + visited.insert(macro_text); + const auto if_it = edges_[tok_it][0]; + const auto if_idx = std::distance(source_sequence_.begin(), if_it); + const auto else_it = edges_[tok_it][1]; + const auto else_idx = + std::distance(source_sequence_.begin(), else_it); + if (!covered.Contains({if_idx, else_idx})) { + // If the `ifdef is not covered, we assume the condition is true + if (!negated) defines.insert(macro_text); + tok_it = if_it; + } else { + // Else we assume the condition is false + if (negated) defines.insert(macro_text); + tok_it = else_it; + } + } + } else { + const auto it = edges_.find(tok_it); + if (it == edges_.end() || it->second.empty()) { + // If there's no outgoing edge, just move to the next token. + tok_it++; + } else { + // Else jump + tok_it = edges_[tok_it][0]; + } + } + } + define_variants.push_back(std::move(defines)); + // To prevent an infinite loop, if nothing new was covered, break. + if (last_covered == covered) { + // TODO: If there are nested `ifdefs that contradict each other early in + // the source, this will prevent us from traversing the rest of the flow + // tree. It would be better to detect this case, assume that the + // contradicting part is covered, and continue the analysis. + VLOG(4) << "Giving up on finding all define variants"; + break; // FIXME: Perhaps we should error? + } + last_covered = covered; + } + VLOG(4) << "Done generating define variants. Coverage: " << covered; + return define_variants; +} + // Constructs the control flow tree, which determines the edge from each node // (token index) to the next possible childs, And save edge_from_iterator in // edges_. diff --git a/verilog/analysis/flow_tree.h b/verilog/analysis/flow_tree.h index c5ed13a8e3..906813f012 100644 --- a/verilog/analysis/flow_tree.h +++ b/verilog/analysis/flow_tree.h @@ -1,4 +1,4 @@ -// Copyright 2017-2022 The Verible Authors. +// Copyright 2017-2023 The Verible Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,8 +20,11 @@ #include #include +#include "absl/container/flat_hash_set.h" #include "absl/status/status.h" +#include "absl/status/statusor.h" #include "common/text/token_stream_view.h" +#include "common/util/interval_set.h" namespace verilog { @@ -80,6 +83,17 @@ class FlowTree { // Generates all possible variants. absl::Status GenerateVariants(const VariantReceiver &receiver); + // Set of macro name defines. + using DefineSet = absl::flat_hash_set; + + // A list of macro name sets; each set represents a variant of the source; + // together they should cover the entire source. + using DefineVariants = std::vector; + + // Returns the minimum set of defines needed to generate token stream variants + // that cover the entire source. + absl::StatusOr MinCoverDefineVariants(); + // Returns all the used macros in conditionals, ordered with the same ID as // used in BitSets. const std::vector &GetUsedMacros() { diff --git a/verilog/analysis/verilog_analyzer.cc b/verilog/analysis/verilog_analyzer.cc index cbb0ccb861..d0f9d13e01 100644 --- a/verilog/analysis/verilog_analyzer.cc +++ b/verilog/analysis/verilog_analyzer.cc @@ -248,17 +248,13 @@ absl::Status VerilogAnalyzer::Analyze() { // Lex into tokens. RETURN_IF_ERROR(Tokenize()); - // Here would be one place to analyze the raw token stream. - FilterTokensForSyntaxTree(); - - // Disambiguate tokens using lexical context. - ContextualizeTokens(); - // pseudo-preprocess token stream. // Not all analyses will want to preprocess. { VerilogPreprocess preprocessor(preprocess_config_); - preprocessor_data_ = preprocessor.ScanStream(Data().GetTokenStreamView()); + verible::TokenStreamView lexed_streamview; + InitTokenStreamView(Data().TokenStream(), &lexed_streamview); + preprocessor_data_ = preprocessor.ScanStream(lexed_streamview); if (!preprocessor_data_.errors.empty()) { for (const auto& error : preprocessor_data_.errors) { rejected_tokens_.push_back(verible::RejectedToken{ @@ -283,11 +279,19 @@ absl::Status VerilogAnalyzer::Analyze() { LOG(INFO) << LinterTokenErrorMessage(warn_token, false); } } + MutableData().MutablePreprocessedTokenStreamView() = + preprocessor_data_.preprocessed_token_stream; // copy MutableData().MutableTokenStreamView() = preprocessor_data_.preprocessed_token_stream; // copy // TODO(fangism): could we just move, swap, or directly reference? } + // Here would be one place to analyze the raw token stream. + FilterTokensForSyntaxTree(); + + // Disambiguate tokens using lexical context. + ContextualizeTokens(); + auto generator = MakeTokenViewer(Data().GetTokenStreamView()); VerilogParser parser(&generator, filename_); parse_status_ = FileAnalyzer::Parse(&parser); diff --git a/verilog/analysis/verilog_analyzer_test.cc b/verilog/analysis/verilog_analyzer_test.cc index a56f5ec982..1da34b33c0 100644 --- a/verilog/analysis/verilog_analyzer_test.cc +++ b/verilog/analysis/verilog_analyzer_test.cc @@ -62,7 +62,7 @@ using verible::SyntaxTreeLeaf; using verible::TokenInfo; using verible::TokenInfoTestData; -static constexpr verilog::VerilogPreprocess::Config kDefaultPreprocess; +static verilog::VerilogPreprocess::Config kDefaultPreprocess; bool TreeContainsToken(const ConcreteSyntaxTree& tree, const TokenInfo& token) { const auto* matching_leaf = @@ -509,10 +509,10 @@ TEST(AnalyzeVerilogAutomaticMode, InferredModuleBodyMode) { } TEST(AnalyzeVerilogAutomaticMode, AutomaticWithFallback) { - static constexpr verilog::VerilogPreprocess::Config kNoBranchFilter{ + static verilog::VerilogPreprocess::Config kNoBranchFilter{ .filter_branches = false, }; - static constexpr verilog::VerilogPreprocess::Config kWithBranchFilter{ + static verilog::VerilogPreprocess::Config kWithBranchFilter{ .filter_branches = true, }; diff --git a/verilog/analysis/verilog_project.cc b/verilog/analysis/verilog_project.cc index 4a041d73ad..ca9d103d94 100644 --- a/verilog/analysis/verilog_project.cc +++ b/verilog/analysis/verilog_project.cc @@ -36,7 +36,7 @@ namespace verilog { // All files we process with the verilog project, essentially applications that // build a symbol table (project-tool, kythe-indexer) only benefit from // processing the same sequence of tokens a synthesis tool sees. -static constexpr verilog::VerilogPreprocess::Config kPreprocessConfig{ +static verilog::VerilogPreprocess::Config kPreprocessConfig{ .filter_branches = true, }; diff --git a/verilog/formatting/BUILD b/verilog/formatting/BUILD index b105c9e9ab..b9050fb2b8 100644 --- a/verilog/formatting/BUILD +++ b/verilog/formatting/BUILD @@ -163,6 +163,7 @@ cc_library( "//common/util:vector-tree-iterators", "//verilog/CST:declaration", "//verilog/CST:module", + "//verilog/analysis:flow-tree", "//verilog/analysis:verilog-analyzer", "//verilog/analysis:verilog-equivalence", "//verilog/parser:verilog-token-enum", diff --git a/verilog/formatting/align.cc b/verilog/formatting/align.cc index cd7b47a534..13550fd751 100644 --- a/verilog/formatting/align.cc +++ b/verilog/formatting/align.cc @@ -1563,6 +1563,7 @@ void TabularAlignTokenPartitions(const FormatStyle& style, const ExtractAlignmentGroupsFunction extract_alignment_groups = std::bind(alignment_partitioner, std::placeholders::_1, style); + VLOG(4) << "===> " << partition.Value() << std::endl; verible::TabularAlignTokens(style.column_limit, full_text, disabled_byte_ranges, extract_alignment_groups, &partition); diff --git a/verilog/formatting/formatter.cc b/verilog/formatting/formatter.cc index dafe89a799..994b03ebf9 100644 --- a/verilog/formatting/formatter.cc +++ b/verilog/formatting/formatter.cc @@ -20,6 +20,7 @@ #include #include +#include "absl/algorithm/container.h" #include "absl/status/status.h" #include "absl/status/statusor.h" #include "common/formatting/format_token.h" @@ -46,6 +47,7 @@ #include "common/util/vector_tree_iterators.h" #include "verilog/CST/declaration.h" #include "verilog/CST/module.h" +#include "verilog/analysis/flow_tree.h" #include "verilog/analysis/verilog_analyzer.h" #include "verilog/analysis/verilog_equivalence.h" #include "verilog/formatting/align.h" @@ -53,6 +55,8 @@ #include "verilog/formatting/format_style.h" #include "verilog/formatting/token_annotator.h" #include "verilog/formatting/tree_unwrapper.h" +#include "verilog/parser/verilog_lexer.h" +#include "verilog/parser/verilog_token_classifications.h" #include "verilog/parser/verilog_token_enum.h" #include "verilog/preprocessor/verilog_preprocess.h" @@ -116,19 +120,29 @@ Status VerifyFormatting(const verible::TextStructureView& text_structure, // Note: We cannot just Tokenize() and compare because Analyze() // performs additional transformations like expanding MacroArgs to // expression subtrees. - const auto reanalyzer = VerilogAnalyzer::AnalyzeAutomaticMode( - formatted_output, filename, verilog::VerilogPreprocess::Config()); - const auto relex_status = ABSL_DIE_IF_NULL(reanalyzer)->LexStatus(); - const auto reparse_status = reanalyzer->ParseStatus(); + verilog::FlowTree control_flow_tree(text_structure.TokenStream()); + const auto define_variants = control_flow_tree.MinCoverDefineVariants(); + if (!define_variants.ok()) return define_variants.status(); + for (const FlowTree::DefineSet& defines : *define_variants) { + VerilogPreprocess::Config config{.filter_branches = true}; + for (auto define : defines) { + config.macro_definitions.emplace(define, std::nullopt); + } - if (!relex_status.ok() || !reparse_status.ok()) { - const auto& token_errors = reanalyzer->TokenErrorMessages(); - // Only print the first error. - if (!token_errors.empty()) { - return absl::DataLossError( - absl::StrCat("Error lex/parsing-ing formatted output. " - "Please file a bug.\nFirst error: ", - token_errors.front())); + const auto reanalyzer = VerilogAnalyzer::AnalyzeAutomaticMode( + formatted_output, filename, config); + const auto relex_status = ABSL_DIE_IF_NULL(reanalyzer)->LexStatus(); + const auto reparse_status = reanalyzer->ParseStatus(); + + if (!relex_status.ok() || !reparse_status.ok()) { + const auto& token_errors = reanalyzer->TokenErrorMessages(); + // Only print the first error. + if (!token_errors.empty()) { + return absl::DataLossError( + absl::StrCat("Error lexing/parsing formatted output. " + "Please file a bug.\nFirst error: ", + token_errors.front())); + } } } @@ -199,10 +213,10 @@ static Status ReformatVerilog(absl::string_view original_text, } static absl::StatusOr> ParseWithStatus( - absl::string_view text, absl::string_view filename) { + absl::string_view text, absl::string_view filename, + const verilog::VerilogPreprocess::Config& preprocess_config = {}) { std::unique_ptr analyzer = - VerilogAnalyzer::AnalyzeAutomaticMode( - text, filename, verilog::VerilogPreprocess::Config()); + VerilogAnalyzer::AnalyzeAutomaticMode(text, filename, preprocess_config); { // Lex and parse code. Exit on failure. const auto lex_status = ABSL_DIE_IF_NULL(analyzer)->LexStatus(); @@ -265,13 +279,36 @@ Status FormatVerilog(absl::string_view text, absl::string_view filename, const FormatStyle& style, std::ostream& formatted_stream, const LineNumberSet& lines, const ExecutionControl& control) { - const auto analyzer = ParseWithStatus(text, filename); - if (!analyzer.ok()) return analyzer.status(); + // Prepare define variants + VerilogAnalyzer analyzer(text, filename); + if (Status tokenize_status = analyzer.Tokenize(); !tokenize_status.ok()) { + return tokenize_status; + } + FlowTree control_flow_tree(analyzer.Data().TokenStream()); + const auto define_variants = control_flow_tree.MinCoverDefineVariants(); + if (!define_variants.ok()) return define_variants.status(); + // Proceed with formatting for each variant + std::string text_to_format{text.begin(), text.end()}; + Status format_status; + for (const FlowTree::DefineSet& defines : *define_variants) { + // Set up preprocess config + VerilogPreprocess::Config config{.filter_branches = true}; + for (auto define : defines) { + config.macro_definitions.emplace(define, std::nullopt); + } + + const auto analyzer = ParseWithStatus(text_to_format, filename, config); + if (!analyzer.ok()) return analyzer.status(); + + const verible::TextStructureView& text_structure = analyzer->get()->Data(); + std::string formatted_text; + format_status = FormatVerilog(text_structure, filename, style, + &formatted_text, lines, control); + if (!format_status.ok()) break; + text_to_format = formatted_text; + } - const verible::TextStructureView& text_structure = analyzer->get()->Data(); - std::string formatted_text; - Status format_status = FormatVerilog(text_structure, filename, style, - &formatted_text, lines, control); + const absl::string_view formatted_text = text_to_format; // Commit formatted text to the output stream independent of status. formatted_stream << formatted_text; if (!format_status.ok()) return format_status; @@ -789,10 +826,26 @@ class ContinuationCommentAligner { Status Formatter::Format(const ExecutionControl& control) { const absl::string_view full_text(text_structure_.Contents()); const auto& token_stream(text_structure_.TokenStream()); + const auto& prep_token_view = + text_structure_.GetPreprocessedTokenStreamView(); // Initialize auxiliary data needed for TreeUnwrapper. UnwrapperData unwrapper_data(token_stream); + auto it1 = prep_token_view.begin(); + auto it2 = unwrapper_data.preformatted_tokens.begin(); + while (it1 != prep_token_view.end() && + it2 != unwrapper_data.preformatted_tokens.end()) { + // FIXME: do it in a cleaner way, do not const_cast. This could probably be + // done in TextStructure. + const_cast(it2->token)->visible = &**it1 == it2->token; + if (it2->token->text().begin() > (*it1)->text().begin()) { + ++it1; + } else { + ++it2; + } + } + // Partition input token stream into hierarchical set of UnwrappedLines. TreeUnwrapper tree_unwrapper(text_structure_, style_, unwrapper_data.preformatted_tokens); @@ -920,25 +973,34 @@ Status Formatter::Format(const ExecutionControl& control) { // TODO(fangism): Use different formatting strategies depending on // uwline.PartitionPolicy(). if (continuation_comment_aligner.HandleLine(uwline, &formatted_lines_)) { + } else if (IsPreprocessorControlFlow(verilog_tokentype( + uwline.TokensRange().begin()->TokenEnum()))) { + UnwrappedLine formatted_line = uwline; + formatted_line.SetIndentationSpaces(0); + formatted_lines_.emplace_back(formatted_line); } else if (uwline.PartitionPolicy() == PartitionPolicyEnum::kAlreadyFormatted) { // For partitions that were successfully aligned, do not search // line-wrapping, but instead accept the adjusted padded spacing. formatted_lines_.emplace_back(uwline); } else { - // In other case, default to searching for optimal line wrapping. - const auto optimal_solutions = - verible::SearchLineWraps(uwline, style_, control.max_search_states); - if (control.show_equally_optimal_wrappings && - optimal_solutions.size() > 1) { - verible::DisplayEquallyOptimalWrappings(control.Stream(), uwline, - optimal_solutions); - } - // Arbitrarily choose the first solution, if there are multiple. - formatted_lines_.push_back(optimal_solutions.front()); - if (!formatted_lines_.back().CompletedFormatting()) { - // Copy over any lines that did not finish wrap searching. - partially_formatted_lines.push_back(&uwline); + if (!uwline.TokensRange().front().token->visible) { + formatted_lines_.emplace_back(uwline); + } else { + // In other case, default to searching for optimal line wrapping. + const auto optimal_solutions = + verible::SearchLineWraps(uwline, style_, control.max_search_states); + if (control.show_equally_optimal_wrappings && + optimal_solutions.size() > 1) { + verible::DisplayEquallyOptimalWrappings(control.Stream(), uwline, + optimal_solutions); + } + // Arbitrarily choose the first solution, if there are multiple. + formatted_lines_.push_back(optimal_solutions.front()); + if (!formatted_lines_.back().CompletedFormatting()) { + // Copy over any lines that did not finish wrap searching. + partially_formatted_lines.push_back(&uwline); + } } } } diff --git a/verilog/formatting/formatter_test.cc b/verilog/formatting/formatter_test.cc index 6f7d28c22f..2cb1206aa0 100644 --- a/verilog/formatting/formatter_test.cc +++ b/verilog/formatting/formatter_test.cc @@ -54,7 +54,7 @@ absl::Status VerifyFormatting(const verible::TextStructureView& text_structure, namespace { -static constexpr VerilogPreprocess::Config kDefaultPreprocess; +static VerilogPreprocess::Config kDefaultPreprocess; using absl::StatusCode; using testing::HasSubstr; diff --git a/verilog/formatting/token_annotator.cc b/verilog/formatting/token_annotator.cc index 37e12ba74d..174c15c281 100644 --- a/verilog/formatting/token_annotator.cc +++ b/verilog/formatting/token_annotator.cc @@ -890,6 +890,15 @@ static WithReason BreakDecisionBetween( } } + static bool in_prep = false; + if (left.TokenEnum() == PP_Identifier && in_prep) { + return {SpacingOptions::kMustWrap, + "`end and `else should be on their own line except for comments."}; + } + auto right_tok = verilog_tokentype(left.TokenEnum()); + in_prep = IsPreprocessorControlFlow(right_tok) || + (in_prep && right_tok == PP_Identifier); + if (left.TokenEnum() == PP_else || left.TokenEnum() == PP_endif) { if (IsComment(FormatTokenType(right.format_token_enum))) { return {SpacingOptions::kUndecided, "Comment may follow `else and `end"}; diff --git a/verilog/formatting/tree_unwrapper.cc b/verilog/formatting/tree_unwrapper.cc index fe1bfc5958..206959beb8 100644 --- a/verilog/formatting/tree_unwrapper.cc +++ b/verilog/formatting/tree_unwrapper.cc @@ -146,6 +146,9 @@ enum TokenScannerState { // While encountering any string of consecutive newlines kHaveNewline, + // While encountering tokens related to preprocessor control flow + kPreprocessorControlFlow, + // Transition from newline to non-newline kNewPartition, @@ -163,6 +166,8 @@ TokenScannerStateStrings() { {"kStart", TokenScannerState::kStart}, {"kHaveNewline", TokenScannerState::kHaveNewline}, {"kNewPartition", TokenScannerState::kNewPartition}, + {"kPreprocessorControlFlow", + TokenScannerState::kPreprocessorControlFlow}, {"kEndWithNewline", TokenScannerState::kEndWithNewline}, {"kEndNoNewline", TokenScannerState::kEndNoNewline}, }); @@ -198,12 +203,15 @@ class TreeUnwrapper::TokenScanner { // Calls TransitionState using current_state_ and the tranition token_Type void UpdateState(verilog_tokentype token_type) { current_state_ = TransitionState(current_state_, token_type); + on_pp_identifier_ = token_type == PP_Identifier; seen_any_nonspace_ |= IsComment(token_type); } // Returns true if this is a state that should start a new token partition. bool ShouldStartNewPartition() const { return current_state_ == State::kNewPartition || + (current_state_ == State::kPreprocessorControlFlow && + !on_pp_identifier_) || (current_state_ == State::kEndWithNewline && seen_any_nonspace_); } @@ -212,6 +220,7 @@ class TreeUnwrapper::TokenScanner { // The current state of the TokenScanner. State current_state_ = kStart; + bool on_pp_identifier_ = false; bool seen_any_nonspace_ = false; // Transitions the TokenScanner given a TokenState and a verilog_tokentype @@ -241,8 +250,19 @@ TokenScannerState TreeUnwrapper::TokenScanner::TransitionState( const State& old_state, verilog_tokentype token_type) { VLOG(4) << "state transition on: " << old_state << ", token: " << verilog_symbol_name(token_type); + if (IsPreprocessorControlFlow(token_type)) { + const State new_state = kPreprocessorControlFlow; + VLOG(4) << "new state: " << new_state; + return new_state; + } State new_state = old_state; switch (old_state) { + case kPreprocessorControlFlow: { + if (token_type != PP_Identifier) { + new_state = kNewPartition; + } + break; + } case kStart: { if (IsNewlineOrEOF(token_type)) { new_state = kHaveNewline; @@ -499,6 +519,10 @@ static verible::TokenSequence::const_iterator StopAtLastNewlineBeforeTreeLeaf( bool break_while = false; while (!token_iter->isEOF() && !break_while) { VLOG(4) << "scan: " << TokenWithContext{*token_iter, context}; + if (!token_iter->visible) { + ++token_iter; + continue; + } switch (token_iter->token_enum()) { // TODO(b/144653479): this token-case logic is redundant with other // places; plumb that through to here instead of replicating it. @@ -1524,6 +1548,32 @@ static bool PartitionIsForcedIntoNewLine(const TokenPartitionTree& partition) { return ftokens.front().before.break_decision == SpacingOptions::kMustWrap; } +bool IsPreprocessorPartition(const TokenPartitionTree& partition) { + auto tok = partition.Value().TokensRange().front().TokenEnum(); + return IsPreprocessorControlFlow(verilog_tokentype(tok)) || + tok == PP_Identifier; +} + +TokenPartitionTree* MergeLeafIntoPreviousLeafIfNotPP( + TokenPartitionTree* const leaf) { + if (IsPreprocessorPartition(*leaf)) { + VLOG(5) << " not merging into previous partition."; + return nullptr; + } + VLOG(5) << " merge into previous partition."; + return MergeLeafIntoPreviousLeaf(leaf); +} + +TokenPartitionTree* MergeLeafIntoNextLeafIfNotPP( + TokenPartitionTree* const leaf) { + if (IsPreprocessorPartition(*leaf)) { + VLOG(5) << " not merging into next partition."; + return nullptr; + } + VLOG(5) << " merge into next partition."; + return MergeLeafIntoNextLeaf(leaf); +} + // Joins partition containing only a ',' (and optionally comments) with // a partition preceding or following it. static void AttachSeparatorToPreviousOrNextPartition( @@ -1561,6 +1611,8 @@ static void AttachSeparatorToPreviousOrNextPartition( return; } + if (IsPreprocessorPartition(partition[0])) return; + // Merge with previous partition if both partitions are in the same line in // original text const auto* previous_partition = PreviousLeaf(*partition); @@ -1571,8 +1623,7 @@ static void AttachSeparatorToPreviousOrNextPartition( absl::string_view original_text_between = verible::make_string_view_range( previous_token.Text().end(), separator->Text().begin()); if (!absl::StrContains(original_text_between, '\n')) { - VLOG(5) << " merge into previous partition."; - verible::MergeLeafIntoPreviousLeaf(partition); + MergeLeafIntoPreviousLeafIfNotPP(partition); return; } } @@ -1587,8 +1638,7 @@ static void AttachSeparatorToPreviousOrNextPartition( absl::string_view original_text_between = verible::make_string_view_range( separator->Text().end(), next_token.Text().begin()); if (!absl::StrContains(original_text_between, '\n')) { - VLOG(5) << " merge into next partition."; - verible::MergeLeafIntoNextLeaf(partition); + MergeLeafIntoNextLeafIfNotPP(partition); return; } } @@ -1598,16 +1648,14 @@ static void AttachSeparatorToPreviousOrNextPartition( if (partition->Value().TokensRange().size() == 1) { // Try merging with previous partition if (!PartitionIsForcedIntoNewLine(*partition)) { - VLOG(5) << " merge into previous partition."; - verible::MergeLeafIntoPreviousLeaf(partition); + MergeLeafIntoPreviousLeafIfNotPP(partition); return; } // Try merging with next partition if (next_partition != nullptr && !PartitionIsForcedIntoNewLine(*next_partition)) { - VLOG(5) << " merge into next partition."; - verible::MergeLeafIntoNextLeaf(partition); + MergeLeafIntoNextLeafIfNotPP(partition); return; } } @@ -1656,7 +1704,7 @@ static void AttachTrailingSemicolonToPreviousPartition( semicolon_partition->Value().SetIndentationSpaces( group->Value().IndentationSpaces()); } else { - verible::MergeLeafIntoPreviousLeaf(semicolon_partition); + MergeLeafIntoPreviousLeafIfNotPP(semicolon_partition); } VLOG(4) << "after moving semicolon:\n" << *partition; } @@ -1725,7 +1773,7 @@ static void AttachOpeningBraceToDeclarationsAssignmentOperator( } if (!PartitionIsForcedIntoNewLine(next)) { - verible::MergeLeafIntoNextLeaf(¤t); + MergeLeafIntoNextLeafIfNotPP(¤t); // There shouldn't be more matching partitions return; } @@ -1746,7 +1794,7 @@ static void ReshapeIfClause(const SyntaxTreeNode& node, // Then fuse the 'begin' partition with the preceding 'if (...)' auto& if_body_partition = partition.Children().back(); auto& begin_partition = if_body_partition.Children().front(); - verible::MergeLeafIntoPreviousLeaf(&begin_partition); + MergeLeafIntoPreviousLeafIfNotPP(&begin_partition); partition.Value().SetPartitionPolicy( if_body_partition.Value().PartitionPolicy()); // if seq_block body was empty, that leaves only 'end', so hoist. @@ -1791,7 +1839,7 @@ static void ReshapeElseClause(const SyntaxTreeNode& node, return; } - verible::MergeLeafIntoNextLeaf(&else_partition); + MergeLeafIntoNextLeafIfNotPP(&else_partition); } static void HoistOnlyChildPartition(TokenPartitionTree* partition) { @@ -1814,7 +1862,7 @@ static void PushEndIntoElsePartition(TokenPartitionTree* partition_ptr) { auto& partition = *partition_ptr; auto& if_clause_partition = partition.Children().front(); auto* end_partition = &RightmostDescendant(if_clause_partition); - auto* end_parent = verible::MergeLeafIntoNextLeaf(end_partition); + auto* end_parent = MergeLeafIntoNextLeafIfNotPP(end_partition); // if moving leaf results in any singleton partitions, hoist. if (end_parent != nullptr) { HoistOnlyChildPartition(end_parent); @@ -1971,7 +2019,7 @@ class MacroCallReshaper { if (semicolon_ == NextSibling(*paren_group_)) { if (!PartitionIsForcedIntoNewLine(*semicolon_)) { // Merge with ')' - verible::MergeLeafIntoPreviousLeaf(semicolon_); + MergeLeafIntoPreviousLeafIfNotPP(semicolon_); semicolon_ = nullptr; } } @@ -2224,7 +2272,7 @@ class MacroCallReshaper { return IsComment(verilog_tokentype(t.TokenEnum())); })) { // Merge - verible::MergeLeafIntoPreviousLeaf(r_paren_); + MergeLeafIntoPreviousLeafIfNotPP(r_paren_); r_paren_ = &argument_list_->Children().back(); } else { // Group @@ -2304,7 +2352,7 @@ class MacroCallReshaper { CHECK(!PartitionIsForcedIntoNewLine(*l_paren_)); verible::AdjustIndentationAbsolute(paren_group_, main_node_->Value().IndentationSpaces()); - verible::MergeLeafIntoNextLeaf(identifier_); + MergeLeafIntoNextLeafIfNotPP(identifier_); HoistOnlyChildPartition(main_node_); paren_group_ = main_node_; @@ -2637,7 +2685,7 @@ void TreeUnwrapper::ReshapeTokenPartitions( // Push the "import..." down. { if (partition.Children().size() > 1) { - verible::MergeLeafIntoNextLeaf(&partition.Children().front()); + MergeLeafIntoNextLeafIfNotPP(&partition.Children().front()); AttachTrailingSemicolonToPreviousPartition(&partition); } break; @@ -2650,7 +2698,7 @@ void TreeUnwrapper::ReshapeTokenPartitions( auto& target_instance_partition = partition; auto& children = target_instance_partition.Children(); // Attach ')' to the instance name - verible::MergeLeafIntoNextLeaf(PreviousSibling(children.back())); + MergeLeafIntoNextLeafIfNotPP(PreviousSibling(children.back())); verible::AdjustIndentationRelative(&children.back(), -style.wrap_spaces); break; @@ -2667,7 +2715,7 @@ void TreeUnwrapper::ReshapeTokenPartitions( auto& last_prev = *ABSL_DIE_IF_NULL(PreviousSibling(last)); if (PartitionStartsWithCloseParen(last) && PartitionEndsWithOpenParen(last_prev)) { - verible::MergeLeafIntoPreviousLeaf(&last); + MergeLeafIntoPreviousLeafIfNotPP(&last); } } // If there were any parameters or ports at all, expand. @@ -2687,7 +2735,7 @@ void TreeUnwrapper::ReshapeTokenPartitions( auto& last_prev = *ABSL_DIE_IF_NULL(PreviousSibling(last)); if (PartitionStartsWithCloseParen(last) && PartitionEndsWithOpenParen(last_prev)) { - verible::MergeLeafIntoPreviousLeaf(&last); + MergeLeafIntoPreviousLeafIfNotPP(&last); } } break; @@ -2702,7 +2750,7 @@ void TreeUnwrapper::ReshapeTokenPartitions( case NodeEnum::kTaskPrototype: { auto& last = RightmostDescendant(partition); if (PartitionIsCloseParenSemi(last)) { - verible::MergeLeafIntoPreviousLeaf(&last); + MergeLeafIntoPreviousLeafIfNotPP(&last); } break; } @@ -2719,7 +2767,7 @@ void TreeUnwrapper::ReshapeTokenPartitions( auto& last = RightmostDescendant(partition); if (PartitionStartsWithCloseParen(last) || PartitionStartsWithSemicolon(last)) { - verible::MergeLeafIntoPreviousLeaf(&last); + MergeLeafIntoPreviousLeafIfNotPP(&last); } } } @@ -2748,7 +2796,7 @@ void TreeUnwrapper::ReshapeTokenPartitions( auto* group = verible::GroupLeafWithPreviousLeaf(&last); group->Value().SetPartitionPolicy(PartitionPolicyEnum::kStack); } else { - verible::MergeLeafIntoPreviousLeaf(&last); + MergeLeafIntoPreviousLeafIfNotPP(&last); } } } @@ -2772,7 +2820,7 @@ void TreeUnwrapper::ReshapeTokenPartitions( auto* group = verible::GroupLeafWithPreviousLeaf(&last); group->Value().SetPartitionPolicy(PartitionPolicyEnum::kStack); } else { - verible::MergeLeafIntoPreviousLeaf(&last); + MergeLeafIntoPreviousLeafIfNotPP(&last); } } } else if (policy == PartitionPolicyEnum::kStack) { @@ -2872,7 +2920,7 @@ void TreeUnwrapper::ReshapeTokenPartitions( auto& last = RightmostDescendant(partition); // TODO(fangism): why does test fail without this clause? if (PartitionStartsWithCloseParen(last)) { - verible::MergeLeafIntoPreviousLeaf(&last); + MergeLeafIntoPreviousLeafIfNotPP(&last); } break; } @@ -2882,7 +2930,7 @@ void TreeUnwrapper::ReshapeTokenPartitions( if (partition.Children().size() == 2) { auto& last = RightmostDescendant(partition); if (PartitionIsCloseBrace(last)) { - verible::MergeLeafIntoPreviousLeaf(&last); + MergeLeafIntoPreviousLeafIfNotPP(&last); } } break; @@ -2921,7 +2969,7 @@ void TreeUnwrapper::ReshapeTokenPartitions( if (children.size() == 2 && verible::is_leaf(children.front()) /* left side */ && !PartitionIsForcedIntoNewLine(children.back())) { - verible::MergeLeafIntoNextLeaf(&children.front()); + MergeLeafIntoNextLeafIfNotPP(&children.front()); VLOG(4) << "after merge leaf (left-into-right):\n" << partition; } break; @@ -2950,7 +2998,7 @@ void TreeUnwrapper::ReshapeTokenPartitions( AttachSeparatorsToListElementPartitions(&partition); // Merge the 'assign' keyword with the (first) x=y assignment. // TODO(fangism): reshape for multiple assignments. - verible::MergeLeafIntoNextLeaf(&partition.Children().front()); + MergeLeafIntoNextLeafIfNotPP(&partition.Children().front()); VLOG(4) << "after merging 'assign':\n" << partition; AdjustSubsequentPartitionsIndentation(&partition, style.wrap_spaces); VLOG(4) << "after adjusting partitions indentation:\n" << partition; @@ -2972,10 +3020,10 @@ void TreeUnwrapper::ReshapeTokenPartitions( VLOG(4) << "kForSpec got ';' at child " << dist1 << " and " << dist2; // Merge from back-to-front to keep indices valid. if (dist2 > 0 && (dist2 - dist1 > 1)) { - verible::MergeLeafIntoPreviousLeaf(&*iter2); + MergeLeafIntoPreviousLeafIfNotPP(&*iter2); } if (dist1 > 0) { - verible::MergeLeafIntoPreviousLeaf(&*iter1); + MergeLeafIntoPreviousLeafIfNotPP(&*iter1); } break; } @@ -3019,7 +3067,7 @@ void TreeUnwrapper::ReshapeTokenPartitions( auto& begin_partition = LeftmostDescendant(seq_block_partition); VLOG(4) << "begin partition: " << begin_partition; CHECK(is_leaf(begin_partition)); - verible::MergeLeafIntoPreviousLeaf(&begin_partition); + MergeLeafIntoPreviousLeafIfNotPP(&begin_partition); VLOG(4) << "after merging 'begin' to predecessor:\n" << partition; // Flatten only the statement block so that the control partition // can retain its own partition policy. @@ -3035,11 +3083,11 @@ void TreeUnwrapper::ReshapeTokenPartitions( // merge "do" <- "begin" auto& begin_partition = LeftmostDescendant(seq_block_partition); - verible::MergeLeafIntoPreviousLeaf(&begin_partition); + MergeLeafIntoPreviousLeafIfNotPP(&begin_partition); // merge "end" -> "while" auto& end_partition = RightmostDescendant(seq_block_partition); - verible::MergeLeafIntoNextLeaf(&end_partition); + MergeLeafIntoNextLeafIfNotPP(&end_partition); // Flatten only the statement block so that the control partition // can retain its own partition policy. @@ -3053,7 +3101,7 @@ void TreeUnwrapper::ReshapeTokenPartitions( ->MatchesTagAnyOf({NodeEnum::kProceduralTimingControlStatement, NodeEnum::kSeqBlock})) { // Merge 'always' keyword with next sibling, and adjust subtree indent. - verible::MergeLeafIntoNextLeaf(&partition.Children().front()); + MergeLeafIntoNextLeafIfNotPP(&partition.Children().front()); verible::AdjustIndentationAbsolute( &partition.Children().front(), partition.Value().IndentationSpaces()); diff --git a/verilog/preprocessor/verilog_preprocess.cc b/verilog/preprocessor/verilog_preprocess.cc index b8e2b21f40..6326a0836f 100644 --- a/verilog/preprocessor/verilog_preprocess.cc +++ b/verilog/preprocessor/verilog_preprocess.cc @@ -44,10 +44,13 @@ namespace verilog { using verible::TokenGenerator; using verible::TokenStreamView; using verible::container::FindOrNull; +using verible::container::FindWithDefault; using verible::container::InsertOrUpdate; VerilogPreprocess::VerilogPreprocess(const Config& config) - : VerilogPreprocess(config, nullptr) {} + : VerilogPreprocess(config, nullptr) { + preprocess_data_.macro_definitions = config_.macro_definitions; +} VerilogPreprocess::VerilogPreprocess(const Config& config, FileOpener opener) : config_(config), file_opener_(std::move(opener)) { @@ -55,6 +58,7 @@ VerilogPreprocess::VerilogPreprocess(const Config& config, FileOpener opener) // place a toplevel 'conditional' that is always selected. // Thus we only need to test in `else and `endif to see if we underrun due // to unbalanced statements. + preprocess_data_.macro_definitions = config_.macro_definitions; conditional_block_.push( BranchBlock(true, true, verible::TokenInfo::EOFToken())); } @@ -288,8 +292,8 @@ absl::Status VerilogPreprocess::HandleMacroIdentifier( // Finding the macro definition. const absl::string_view sv = (*iter)->text(); - const auto* found = - FindOrNull(preprocess_data_.macro_definitions, sv.substr(1)); + const auto found = FindWithDefault(preprocess_data_.macro_definitions, + sv.substr(1), std::nullopt); if (!found) { preprocess_data_.errors.emplace_back( **iter, @@ -302,7 +306,7 @@ absl::Status VerilogPreprocess::HandleMacroIdentifier( verible::MacroCall macro_call; RETURN_IF_ERROR( ConsumeAndParseMacroCall(iter, generator, ¯o_call, *found)); - RETURN_IF_ERROR(ExpandMacro(macro_call, found)); + RETURN_IF_ERROR(ExpandMacro(macro_call, *found)); } auto& lexed = preprocess_data_.lexed_macros_backup.back(); if (!forward) return absl::OkStatus(); @@ -378,16 +382,16 @@ absl::Status VerilogPreprocess::ExpandText( // `MACRO([param1],[param2],...) absl::Status VerilogPreprocess::ExpandMacro( const verible::MacroCall& macro_call, - const verible::MacroDefinition* macro_definition) { + const verible::MacroDefinition& macro_definition) { const auto& actual_parameters = macro_call.positional_arguments; std::map subs_map; - if (macro_definition->IsCallable()) { - RETURN_IF_ERROR(macro_definition->PopulateSubstitutionMap(actual_parameters, - &subs_map)); + if (macro_definition.IsCallable()) { + RETURN_IF_ERROR( + macro_definition.PopulateSubstitutionMap(actual_parameters, &subs_map)); } - VerilogLexer lexer(macro_definition->DefinitionText().text()); + VerilogLexer lexer(macro_definition.DefinitionText().text()); verible::TokenSequence lexed_sequence; verible::TokenSequence expanded_lexed_sequence; // Populating the lexed token sequence. @@ -420,7 +424,7 @@ absl::Status VerilogPreprocess::ExpandMacro( for (auto& u : expanded_child) expanded_lexed_sequence.push_back(u); continue; } - if (macro_definition->IsCallable()) { + if (macro_definition.IsCallable()) { // Check if the last token is a formal parameter const auto* replacement = FindOrNull(subs_map, last_token.text()); if (replacement) { diff --git a/verilog/preprocessor/verilog_preprocess.h b/verilog/preprocessor/verilog_preprocess.h index 69d9759835..e9c5ce13ca 100644 --- a/verilog/preprocessor/verilog_preprocess.h +++ b/verilog/preprocessor/verilog_preprocess.h @@ -69,7 +69,8 @@ struct VerilogPreprocessError { // Information that results from preprocessing. struct VerilogPreprocessData { using MacroDefinition = verible::MacroDefinition; - using MacroDefinitionRegistry = std::map; + using MacroDefinitionRegistry = + std::map>; using TokenSequence = std::vector; // Resulting token stream after preprocessing @@ -94,6 +95,8 @@ struct VerilogPreprocessData { class VerilogPreprocess { using TokenStreamView = verible::TokenStreamView; using MacroDefinition = verible::MacroDefinition; + using MacroDefinitionRegistry = + std::map>; using MacroParameterInfo = verible::MacroParameterInfo; public: @@ -115,7 +118,9 @@ class VerilogPreprocess { // Expand macro definition bodies, this will relexes the macro body. bool expand_macros = false; - // TODO(hzeller): Provide a map of command-line provided +define+'s + + MacroDefinitionRegistry macro_definitions; + // TODO(hzeller): Provide a way to +define+ from the command line. }; explicit VerilogPreprocess(const Config& config); @@ -182,7 +187,7 @@ class VerilogPreprocess { void RegisterMacroDefinition(const MacroDefinition&); absl::Status ExpandText(const absl::string_view&); absl::Status ExpandMacro(const verible::MacroCall&, - const verible::MacroDefinition*); + const verible::MacroDefinition&); absl::Status HandleInclude(TokenStreamView::const_iterator, const StreamIteratorGenerator&); diff --git a/verilog/preprocessor/verilog_preprocess_test.cc b/verilog/preprocessor/verilog_preprocess_test.cc index cb72019ca2..af609c442c 100644 --- a/verilog/preprocessor/verilog_preprocess_test.cc +++ b/verilog/preprocessor/verilog_preprocess_test.cc @@ -37,7 +37,7 @@ namespace { using testing::ElementsAre; using testing::Pair; using testing::StartsWith; -using verible::container::FindOrNull; +using verible::container::FindWithDefault; using verible::file::CreateDir; using verible::file::JoinPath; using verible::file::testing::ScopedTestFile; @@ -171,8 +171,8 @@ TEST(VerilogPreprocessTest, OneMacroDefinitionNoParamsNoValue) { const auto& definitions = tester.PreprocessorData().macro_definitions; EXPECT_THAT(definitions, ElementsAre(Pair("FOOOO", testing::_))); - auto macro = FindOrNull(definitions, "FOOOO"); - ASSERT_NE(macro, nullptr); + auto macro = FindWithDefault(definitions, "FOOOO", std::nullopt); + ASSERT_TRUE(macro.has_value()); EXPECT_EQ(macro->DefinitionText().text(), ""); EXPECT_FALSE(macro->IsCallable()); EXPECT_TRUE(macro->Parameters().empty()); @@ -187,8 +187,8 @@ TEST(VerilogPreprocessTest, OneMacroDefinitionNoParamsSimpleValue) { const auto& definitions = tester.PreprocessorData().macro_definitions; EXPECT_THAT(definitions, ElementsAre(Pair("FOOOO", testing::_))); - auto macro = FindOrNull(definitions, "FOOOO"); - ASSERT_NE(macro, nullptr); + auto macro = FindWithDefault(definitions, "FOOOO", std::nullopt); + ASSERT_TRUE(macro.has_value()); EXPECT_EQ(macro->DefinitionText().text(), "\"bar\""); EXPECT_FALSE(macro->IsCallable()); EXPECT_TRUE(macro->Parameters().empty()); @@ -202,8 +202,8 @@ TEST(VerilogPreprocessTest, OneMacroDefinitionOneParamWithValue) { const auto& definitions = tester.PreprocessorData().macro_definitions; EXPECT_THAT(definitions, ElementsAre(Pair("FOOOO", testing::_))); - auto macro = FindOrNull(definitions, "FOOOO"); - ASSERT_NE(macro, nullptr); + auto macro = FindWithDefault(definitions, "FOOOO", std::nullopt); + ASSERT_TRUE(macro.has_value()); EXPECT_EQ(macro->DefinitionText().text(), "(x+1)"); EXPECT_TRUE(macro->IsCallable()); const auto& params = macro->Parameters(); @@ -221,8 +221,8 @@ TEST(VerilogPreprocessTest, OneMacroDefinitionOneParamDefaultWithValue) { const auto& definitions = tester.PreprocessorData().macro_definitions; EXPECT_THAT(definitions, ElementsAre(Pair("FOOOO", testing::_))); - auto macro = FindOrNull(definitions, "FOOOO"); - ASSERT_NE(macro, nullptr); + auto macro = FindWithDefault(definitions, "FOOOO", std::nullopt); + ASSERT_TRUE(macro.has_value()); EXPECT_EQ(macro->DefinitionText().text(), "(x+3)"); EXPECT_TRUE(macro->IsCallable()); const auto& params = macro->Parameters(); @@ -243,15 +243,15 @@ TEST(VerilogPreprocessTest, TwoMacroDefinitions) { EXPECT_THAT(definitions, ElementsAre(Pair("BAAAAR", testing::_), Pair("FOOOO", testing::_))); { - auto macro = FindOrNull(definitions, "BAAAAR"); - ASSERT_NE(macro, nullptr); + auto macro = FindWithDefault(definitions, "BAAAAR", std::nullopt); + ASSERT_TRUE(macro.has_value()); EXPECT_TRUE(macro->IsCallable()); const auto& params = macro->Parameters(); EXPECT_EQ(params.size(), 2); } { - auto macro = FindOrNull(definitions, "FOOOO"); - ASSERT_NE(macro, nullptr); + auto macro = FindWithDefault(definitions, "FOOOO", std::nullopt); + ASSERT_TRUE(macro.has_value()); EXPECT_TRUE(macro->IsCallable()); const auto& params = macro->Parameters(); EXPECT_EQ(params.size(), 1);