Skip to content

Commit

Permalink
Format preprocessed token stream in multiple passes
Browse files Browse the repository at this point in the history
Currently, the formatter doesn't handle many scenarios involving preprocessor
`ifdef`s/`endif`s interleaved with `begin`s, module headers, etc. (#228, #241, #267)
This patch attempts to solve this problem by performing multiple passes of the
formatting on preprocessed variants of the source. Each of these variants has a
different set of preprocessor branches enabled. Together, they should cover the
entire source (though that doesn't work in all cases yet). After several
formatting passes for different variants of the AST, a correct and properly
formatted file is produced.

This is still work in progress, so not everything works, and the code isn't very
clean. I'd love to get some early feedback on this.

Signed-off-by: Krzysztof Bieganski <[email protected]>
  • Loading branch information
kbieganski committed May 24, 2023
1 parent 529b519 commit da412ef
Show file tree
Hide file tree
Showing 15 changed files with 246 additions and 56 deletions.
2 changes: 1 addition & 1 deletion verilog/CST/expression_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
namespace verilog {
namespace {

static constexpr VerilogPreprocess::Config kDefaultPreprocess;
static VerilogPreprocess::Config kDefaultPreprocess;

using verible::SyntaxTreeSearchTestCase;
using verible::TextStructureView;
Expand Down
3 changes: 3 additions & 0 deletions verilog/analysis/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,13 @@ cc_library(
hdrs = ["flow_tree.h"],
deps = [
"//common/text:token-stream-view",
"//common/util:interval-set",
"//common/util:logging",
"//common/util:status-macros",
"//verilog/parser:verilog-token-enum",
"@com_google_absl//absl/container:flat_hash_set",
"@com_google_absl//absl/status",
"@com_google_absl//absl/status:statusor",
"@com_google_absl//absl/strings",
],
)
Expand Down
2 changes: 1 addition & 1 deletion verilog/analysis/extractors_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
namespace verilog {
namespace analysis {
namespace {
static constexpr VerilogPreprocess::Config kDefaultPreprocess;
static VerilogPreprocess::Config kDefaultPreprocess;

TEST(CollectInterfaceNamesTest, NonModuleTests) {
const std::pair<absl::string_view, std::set<std::string>> kTestCases[] = {
Expand Down
91 changes: 90 additions & 1 deletion verilog/analysis/flow_tree.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2017-2022 The Verible Authors.
// Copyright 2017-2023 The Verible Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -126,6 +126,10 @@ absl::Status FlowTree::MacroFollows(
return absl::InvalidArgumentError("Error macro name can't be extracted.");
}
auto macro_iterator = conditional_iterator + 1;
if (macro_iterator->token_enum() == TK_SPACE) {
// FIXME: It's not always there?
macro_iterator++;
}
if (macro_iterator->token_enum() != PP_Identifier) {
return absl::InvalidArgumentError("Expected identifier for macro name.");
}
Expand All @@ -142,6 +146,10 @@ absl::Status FlowTree::AddMacroOfConditional(
"Error no macro follows the conditional directive.");
}
auto macro_iterator = conditional_iterator + 1;
if (macro_iterator->token_enum() == TK_SPACE) {
// FIXME: It's not always there?
macro_iterator++;
}
auto macro_identifier = macro_iterator->text();
if (conditional_macro_id_.find(macro_identifier) ==
conditional_macro_id_.end()) {
Expand All @@ -162,6 +170,10 @@ int FlowTree::GetMacroIDOfConditional(
return -1;
}
auto macro_iterator = conditional_iterator + 1;
if (macro_iterator->token_enum() == TK_SPACE) {
// FIXME: It's not always there?
macro_iterator++;
}
auto macro_identifier = macro_iterator->text();
// It is always assumed that the macro already exists in the map.
return conditional_macro_id_[macro_identifier];
Expand All @@ -176,6 +188,83 @@ absl::Status FlowTree::GenerateVariants(const VariantReceiver &receiver) {
return DepthFirstSearch(receiver, source_sequence_.begin());
}

absl::StatusOr<FlowTree::DefineVariants> FlowTree::MinCoverDefineVariants() {
auto status = GenerateControlFlowTree();
if (!status.ok()) return status;
verible::IntervalSet<int64_t> covered; // Tokens covered by
// MinCoverDefineVariants.
verible::IntervalSet<int64_t> last_covered; // Tokens covered
// by the previous iterations.
DefineVariants define_variants; // The result – all define variants that
// should cover the entire source
DefineSet visited; // Visited defines are ones that are assumed to be defined
// or undefined (decided in a previous iteration)
const int64_t tok_count = static_cast<int64_t>(source_sequence_.size());
while (!covered.Contains({0, tok_count})) {
DefineSet defines; // Define sets are moved into the define variants list,
// so we make a new one each iteration
visited.clear(); // We keep the visited set to avoid unnecessary
// allocations, but clear it each iteration
TokenSequenceConstIterator tok_it = source_sequence_.begin();
while (tok_it < source_sequence_.end()) {
covered.Add(std::distance(source_sequence_.begin(), tok_it));
if (tok_it->token_enum() == PP_ifdef ||
tok_it->token_enum() == PP_ifndef ||
tok_it->token_enum() == PP_elsif) {
const auto macro_id_it = tok_it + 2;
auto macro_text = macro_id_it->text();
bool negated = tok_it->token_enum() == PP_ifndef;
// If this macro was already visited (either defined/undefined), we
// to stick to the same branch. TODO: handle `defines
if (visited.contains(macro_text)) {
bool assume_condition_is_true =
(negated ^ defines.contains(macro_text));
tok_it = edges_[tok_it][assume_condition_is_true ? 0 : 1];
} else {
// First time we see this macro; mark as visited
visited.insert(macro_text);
const auto if_it = edges_[tok_it][0];
const auto if_idx = std::distance(source_sequence_.begin(), if_it);
const auto else_it = edges_[tok_it][1];
const auto else_idx =
std::distance(source_sequence_.begin(), else_it);
if (!covered.Contains({if_idx, else_idx})) {
// If the `ifdef is not covered, we assume the condition is true
if (!negated) defines.insert(macro_text);
tok_it = if_it;
} else {
// Else we assume the condition is false
if (negated) defines.insert(macro_text);
tok_it = else_it;
}
}
} else {
const auto it = edges_.find(tok_it);
if (it == edges_.end() || it->second.empty()) {
// If there's no outgoing edge, just move to the next token.
tok_it++;
} else {
// Else jump
tok_it = edges_[tok_it][0];
}
}
}
define_variants.push_back(std::move(defines));
// To prevent an infinite loop, if nothing new was covered, break.
if (last_covered == covered) {
// TODO: If there are nested `ifdefs that contradict each other early in
// the source, this will prevent us from traversing the rest of the flow
// tree. It would be better to detect this case, assume that the
// contradicting part is covered, and continue the analysis.
VLOG(4) << "Giving up on finding all define variants";
break; // FIXME: Perhaps we should error?
}
last_covered = covered;
}
VLOG(4) << "Done generating define variants. Coverage: " << covered;
return define_variants;
}

// Constructs the control flow tree, which determines the edge from each node
// (token index) to the next possible childs, And save edge_from_iterator in
// edges_.
Expand Down
16 changes: 15 additions & 1 deletion verilog/analysis/flow_tree.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2017-2022 The Verible Authors.
// Copyright 2017-2023 The Verible Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand All @@ -20,8 +20,11 @@
#include <string>
#include <vector>

#include "absl/container/flat_hash_set.h"
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "common/text/token_stream_view.h"
#include "common/util/interval_set.h"

namespace verilog {

Expand Down Expand Up @@ -80,6 +83,17 @@ class FlowTree {
// Generates all possible variants.
absl::Status GenerateVariants(const VariantReceiver &receiver);

// Set of macro name defines.
using DefineSet = absl::flat_hash_set<absl::string_view>;

// A list of macro name sets; each set represents a variant of the source;
// together they should cover the entire source.
using DefineVariants = std::vector<DefineSet>;

// Returns the minimum set of defines needed to generate token stream variants
// that cover the entire source.
absl::StatusOr<DefineVariants> MinCoverDefineVariants();

// Returns all the used macros in conditionals, ordered with the same ID as
// used in BitSets.
const std::vector<TokenSequenceConstIterator> &GetUsedMacros() {
Expand Down
6 changes: 3 additions & 3 deletions verilog/analysis/verilog_analyzer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ using verible::SyntaxTreeLeaf;
using verible::TokenInfo;
using verible::TokenInfoTestData;

static constexpr verilog::VerilogPreprocess::Config kDefaultPreprocess;
static verilog::VerilogPreprocess::Config kDefaultPreprocess;

bool TreeContainsToken(const ConcreteSyntaxTree& tree, const TokenInfo& token) {
const auto* matching_leaf =
Expand Down Expand Up @@ -509,10 +509,10 @@ TEST(AnalyzeVerilogAutomaticMode, InferredModuleBodyMode) {
}

TEST(AnalyzeVerilogAutomaticMode, AutomaticWithFallback) {
static constexpr verilog::VerilogPreprocess::Config kNoBranchFilter{
static verilog::VerilogPreprocess::Config kNoBranchFilter{
.filter_branches = false,
};
static constexpr verilog::VerilogPreprocess::Config kWithBranchFilter{
static verilog::VerilogPreprocess::Config kWithBranchFilter{
.filter_branches = true,
};

Expand Down
2 changes: 1 addition & 1 deletion verilog/analysis/verilog_project.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ namespace verilog {
// All files we process with the verilog project, essentially applications that
// build a symbol table (project-tool, kythe-indexer) only benefit from
// processing the same sequence of tokens a synthesis tool sees.
static constexpr verilog::VerilogPreprocess::Config kPreprocessConfig{
static verilog::VerilogPreprocess::Config kPreprocessConfig{
.filter_branches = true,
};

Expand Down
1 change: 1 addition & 0 deletions verilog/formatting/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ cc_library(
"//common/util:vector-tree-iterators",
"//verilog/CST:declaration",
"//verilog/CST:module",
"//verilog/analysis:flow-tree",
"//verilog/analysis:verilog-analyzer",
"//verilog/analysis:verilog-equivalence",
"//verilog/parser:verilog-token-enum",
Expand Down
78 changes: 57 additions & 21 deletions verilog/formatting/formatter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <iterator>
#include <vector>

#include "absl/algorithm/container.h"
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "common/formatting/format_token.h"
Expand All @@ -46,13 +47,15 @@
#include "common/util/vector_tree_iterators.h"
#include "verilog/CST/declaration.h"
#include "verilog/CST/module.h"
#include "verilog/analysis/flow_tree.h"
#include "verilog/analysis/verilog_analyzer.h"
#include "verilog/analysis/verilog_equivalence.h"
#include "verilog/formatting/align.h"
#include "verilog/formatting/comment_controls.h"
#include "verilog/formatting/format_style.h"
#include "verilog/formatting/token_annotator.h"
#include "verilog/formatting/tree_unwrapper.h"
#include "verilog/parser/verilog_lexer.h"
#include "verilog/parser/verilog_token_enum.h"
#include "verilog/preprocessor/verilog_preprocess.h"

Expand Down Expand Up @@ -116,19 +119,29 @@ Status VerifyFormatting(const verible::TextStructureView& text_structure,
// Note: We cannot just Tokenize() and compare because Analyze()
// performs additional transformations like expanding MacroArgs to
// expression subtrees.
const auto reanalyzer = VerilogAnalyzer::AnalyzeAutomaticMode(
formatted_output, filename, verilog::VerilogPreprocess::Config());
const auto relex_status = ABSL_DIE_IF_NULL(reanalyzer)->LexStatus();
const auto reparse_status = reanalyzer->ParseStatus();
verilog::FlowTree control_flow_tree(text_structure.TokenStream());
const auto define_variants = control_flow_tree.MinCoverDefineVariants();
if (!define_variants.ok()) return define_variants.status();
for (const FlowTree::DefineSet& defines : *define_variants) {
VerilogPreprocess::Config config{.filter_branches = true};
for (auto define : defines) {
config.macro_definitions.emplace(define, std::nullopt);
}

if (!relex_status.ok() || !reparse_status.ok()) {
const auto& token_errors = reanalyzer->TokenErrorMessages();
// Only print the first error.
if (!token_errors.empty()) {
return absl::DataLossError(
absl::StrCat("Error lex/parsing-ing formatted output. "
"Please file a bug.\nFirst error: ",
token_errors.front()));
const auto reanalyzer = VerilogAnalyzer::AnalyzeAutomaticMode(
formatted_output, filename, config);
const auto relex_status = ABSL_DIE_IF_NULL(reanalyzer)->LexStatus();
const auto reparse_status = reanalyzer->ParseStatus();

if (!relex_status.ok() || !reparse_status.ok()) {
const auto& token_errors = reanalyzer->TokenErrorMessages();
// Only print the first error.
if (!token_errors.empty()) {
return absl::DataLossError(
absl::StrCat("Error lexing/parsing formatted output. "
"Please file a bug.\nFirst error: ",
token_errors.front()));
}
}
}

Expand Down Expand Up @@ -199,10 +212,10 @@ static Status ReformatVerilog(absl::string_view original_text,
}

static absl::StatusOr<std::unique_ptr<VerilogAnalyzer>> ParseWithStatus(
absl::string_view text, absl::string_view filename) {
absl::string_view text, absl::string_view filename,
const verilog::VerilogPreprocess::Config& preprocess_config = {}) {
std::unique_ptr<VerilogAnalyzer> analyzer =
VerilogAnalyzer::AnalyzeAutomaticMode(
text, filename, verilog::VerilogPreprocess::Config());
VerilogAnalyzer::AnalyzeAutomaticMode(text, filename, preprocess_config);
{
// Lex and parse code. Exit on failure.
const auto lex_status = ABSL_DIE_IF_NULL(analyzer)->LexStatus();
Expand Down Expand Up @@ -265,13 +278,36 @@ Status FormatVerilog(absl::string_view text, absl::string_view filename,
const FormatStyle& style, std::ostream& formatted_stream,
const LineNumberSet& lines,
const ExecutionControl& control) {
const auto analyzer = ParseWithStatus(text, filename);
if (!analyzer.ok()) return analyzer.status();
// Prepare define variants
VerilogAnalyzer analyzer(text, filename);
if (Status tokenize_status = analyzer.Tokenize(); !tokenize_status.ok()) {
return tokenize_status;
}
FlowTree control_flow_tree(analyzer.Data().TokenStream());
const auto define_variants = control_flow_tree.MinCoverDefineVariants();
if (!define_variants.ok()) return define_variants.status();
// Proceed with formatting for each variant
std::string text_to_format{text.begin(), text.end()};
Status format_status;
for (const FlowTree::DefineSet& defines : *define_variants) {
// Set up preprocess config
VerilogPreprocess::Config config{.filter_branches = true};
for (auto define : defines) {
config.macro_definitions.emplace(define, std::nullopt);
}

const auto analyzer = ParseWithStatus(text_to_format, filename, config);
if (!analyzer.ok()) return analyzer.status();

const verible::TextStructureView& text_structure = analyzer->get()->Data();
std::string formatted_text;
format_status = FormatVerilog(text_structure, filename, style,
&formatted_text, lines, control);
if (!format_status.ok()) break;
text_to_format = formatted_text;
}

const verible::TextStructureView& text_structure = analyzer->get()->Data();
std::string formatted_text;
Status format_status = FormatVerilog(text_structure, filename, style,
&formatted_text, lines, control);
const absl::string_view formatted_text = text_to_format;
// Commit formatted text to the output stream independent of status.
formatted_stream << formatted_text;
if (!format_status.ok()) return format_status;
Expand Down
2 changes: 1 addition & 1 deletion verilog/formatting/formatter_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ absl::Status VerifyFormatting(const verible::TextStructureView& text_structure,

namespace {

static constexpr VerilogPreprocess::Config kDefaultPreprocess;
static VerilogPreprocess::Config kDefaultPreprocess;

using absl::StatusCode;
using testing::HasSubstr;
Expand Down
6 changes: 6 additions & 0 deletions verilog/formatting/token_annotator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -746,6 +746,12 @@ static WithReason<SpacingOptions> BreakDecisionBetween(
const FormatStyle& style, const PreFormatToken& left,
const PreFormatToken& right, const SyntaxTreeContext& left_context,
const SyntaxTreeContext& right_context) {
if (IsPreprocessorControlFlow(verilog_tokentype(right.TokenEnum()))) {
return {SpacingOptions::kPreserve,
"Use original spacing before prepocessor control (FIXME: for some "
"reason works better than kMustWrap)."};
}

// For now, leave everything inside [dimensions] alone.
if (InDeclaredDimensions(right_context)) {
// ... except for the spacing immediately around '[' and ']',
Expand Down
Loading

0 comments on commit da412ef

Please sign in to comment.