Skip to content

Commit da412ef

Browse files
committed
Format preprocessed token stream in multiple passes
Currently, the formatter doesn't handle many scenarios involving preprocessor `ifdef`s/`endif`s interleaved with `begin`s, module headers, etc. (#228, #241, #267) This patch attempts to solve this problem by performing multiple passes of the formatting on preprocessed variants of the source. Each of these variants has a different set of preprocessor branches enabled. Together, they should cover the entire source (though that doesn't work in all cases yet). After several formatting passes for different variants of the AST, a correct and properly formatted file is produced. This is still work in progress, so not everything works, and the code isn't very clean. I'd love to get some early feedback on this. Signed-off-by: Krzysztof Bieganski <[email protected]>
1 parent 529b519 commit da412ef

15 files changed

+246
-56
lines changed

verilog/CST/expression_test.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
namespace verilog {
3333
namespace {
3434

35-
static constexpr VerilogPreprocess::Config kDefaultPreprocess;
35+
static VerilogPreprocess::Config kDefaultPreprocess;
3636

3737
using verible::SyntaxTreeSearchTestCase;
3838
using verible::TextStructureView;

verilog/analysis/BUILD

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,10 +79,13 @@ cc_library(
7979
hdrs = ["flow_tree.h"],
8080
deps = [
8181
"//common/text:token-stream-view",
82+
"//common/util:interval-set",
8283
"//common/util:logging",
8384
"//common/util:status-macros",
8485
"//verilog/parser:verilog-token-enum",
86+
"@com_google_absl//absl/container:flat_hash_set",
8587
"@com_google_absl//absl/status",
88+
"@com_google_absl//absl/status:statusor",
8689
"@com_google_absl//absl/strings",
8790
],
8891
)

verilog/analysis/extractors_test.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
namespace verilog {
2727
namespace analysis {
2828
namespace {
29-
static constexpr VerilogPreprocess::Config kDefaultPreprocess;
29+
static VerilogPreprocess::Config kDefaultPreprocess;
3030

3131
TEST(CollectInterfaceNamesTest, NonModuleTests) {
3232
const std::pair<absl::string_view, std::set<std::string>> kTestCases[] = {

verilog/analysis/flow_tree.cc

Lines changed: 90 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2017-2022 The Verible Authors.
1+
// Copyright 2017-2023 The Verible Authors.
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.
@@ -126,6 +126,10 @@ absl::Status FlowTree::MacroFollows(
126126
return absl::InvalidArgumentError("Error macro name can't be extracted.");
127127
}
128128
auto macro_iterator = conditional_iterator + 1;
129+
if (macro_iterator->token_enum() == TK_SPACE) {
130+
// FIXME: It's not always there?
131+
macro_iterator++;
132+
}
129133
if (macro_iterator->token_enum() != PP_Identifier) {
130134
return absl::InvalidArgumentError("Expected identifier for macro name.");
131135
}
@@ -142,6 +146,10 @@ absl::Status FlowTree::AddMacroOfConditional(
142146
"Error no macro follows the conditional directive.");
143147
}
144148
auto macro_iterator = conditional_iterator + 1;
149+
if (macro_iterator->token_enum() == TK_SPACE) {
150+
// FIXME: It's not always there?
151+
macro_iterator++;
152+
}
145153
auto macro_identifier = macro_iterator->text();
146154
if (conditional_macro_id_.find(macro_identifier) ==
147155
conditional_macro_id_.end()) {
@@ -162,6 +170,10 @@ int FlowTree::GetMacroIDOfConditional(
162170
return -1;
163171
}
164172
auto macro_iterator = conditional_iterator + 1;
173+
if (macro_iterator->token_enum() == TK_SPACE) {
174+
// FIXME: It's not always there?
175+
macro_iterator++;
176+
}
165177
auto macro_identifier = macro_iterator->text();
166178
// It is always assumed that the macro already exists in the map.
167179
return conditional_macro_id_[macro_identifier];
@@ -176,6 +188,83 @@ absl::Status FlowTree::GenerateVariants(const VariantReceiver &receiver) {
176188
return DepthFirstSearch(receiver, source_sequence_.begin());
177189
}
178190

191+
absl::StatusOr<FlowTree::DefineVariants> FlowTree::MinCoverDefineVariants() {
192+
auto status = GenerateControlFlowTree();
193+
if (!status.ok()) return status;
194+
verible::IntervalSet<int64_t> covered; // Tokens covered by
195+
// MinCoverDefineVariants.
196+
verible::IntervalSet<int64_t> last_covered; // Tokens covered
197+
// by the previous iterations.
198+
DefineVariants define_variants; // The result – all define variants that
199+
// should cover the entire source
200+
DefineSet visited; // Visited defines are ones that are assumed to be defined
201+
// or undefined (decided in a previous iteration)
202+
const int64_t tok_count = static_cast<int64_t>(source_sequence_.size());
203+
while (!covered.Contains({0, tok_count})) {
204+
DefineSet defines; // Define sets are moved into the define variants list,
205+
// so we make a new one each iteration
206+
visited.clear(); // We keep the visited set to avoid unnecessary
207+
// allocations, but clear it each iteration
208+
TokenSequenceConstIterator tok_it = source_sequence_.begin();
209+
while (tok_it < source_sequence_.end()) {
210+
covered.Add(std::distance(source_sequence_.begin(), tok_it));
211+
if (tok_it->token_enum() == PP_ifdef ||
212+
tok_it->token_enum() == PP_ifndef ||
213+
tok_it->token_enum() == PP_elsif) {
214+
const auto macro_id_it = tok_it + 2;
215+
auto macro_text = macro_id_it->text();
216+
bool negated = tok_it->token_enum() == PP_ifndef;
217+
// If this macro was already visited (either defined/undefined), we
218+
// to stick to the same branch. TODO: handle `defines
219+
if (visited.contains(macro_text)) {
220+
bool assume_condition_is_true =
221+
(negated ^ defines.contains(macro_text));
222+
tok_it = edges_[tok_it][assume_condition_is_true ? 0 : 1];
223+
} else {
224+
// First time we see this macro; mark as visited
225+
visited.insert(macro_text);
226+
const auto if_it = edges_[tok_it][0];
227+
const auto if_idx = std::distance(source_sequence_.begin(), if_it);
228+
const auto else_it = edges_[tok_it][1];
229+
const auto else_idx =
230+
std::distance(source_sequence_.begin(), else_it);
231+
if (!covered.Contains({if_idx, else_idx})) {
232+
// If the `ifdef is not covered, we assume the condition is true
233+
if (!negated) defines.insert(macro_text);
234+
tok_it = if_it;
235+
} else {
236+
// Else we assume the condition is false
237+
if (negated) defines.insert(macro_text);
238+
tok_it = else_it;
239+
}
240+
}
241+
} else {
242+
const auto it = edges_.find(tok_it);
243+
if (it == edges_.end() || it->second.empty()) {
244+
// If there's no outgoing edge, just move to the next token.
245+
tok_it++;
246+
} else {
247+
// Else jump
248+
tok_it = edges_[tok_it][0];
249+
}
250+
}
251+
}
252+
define_variants.push_back(std::move(defines));
253+
// To prevent an infinite loop, if nothing new was covered, break.
254+
if (last_covered == covered) {
255+
// TODO: If there are nested `ifdefs that contradict each other early in
256+
// the source, this will prevent us from traversing the rest of the flow
257+
// tree. It would be better to detect this case, assume that the
258+
// contradicting part is covered, and continue the analysis.
259+
VLOG(4) << "Giving up on finding all define variants";
260+
break; // FIXME: Perhaps we should error?
261+
}
262+
last_covered = covered;
263+
}
264+
VLOG(4) << "Done generating define variants. Coverage: " << covered;
265+
return define_variants;
266+
}
267+
179268
// Constructs the control flow tree, which determines the edge from each node
180269
// (token index) to the next possible childs, And save edge_from_iterator in
181270
// edges_.

verilog/analysis/flow_tree.h

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2017-2022 The Verible Authors.
1+
// Copyright 2017-2023 The Verible Authors.
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.
@@ -20,8 +20,11 @@
2020
#include <string>
2121
#include <vector>
2222

23+
#include "absl/container/flat_hash_set.h"
2324
#include "absl/status/status.h"
25+
#include "absl/status/statusor.h"
2426
#include "common/text/token_stream_view.h"
27+
#include "common/util/interval_set.h"
2528

2629
namespace verilog {
2730

@@ -80,6 +83,17 @@ class FlowTree {
8083
// Generates all possible variants.
8184
absl::Status GenerateVariants(const VariantReceiver &receiver);
8285

86+
// Set of macro name defines.
87+
using DefineSet = absl::flat_hash_set<absl::string_view>;
88+
89+
// A list of macro name sets; each set represents a variant of the source;
90+
// together they should cover the entire source.
91+
using DefineVariants = std::vector<DefineSet>;
92+
93+
// Returns the minimum set of defines needed to generate token stream variants
94+
// that cover the entire source.
95+
absl::StatusOr<DefineVariants> MinCoverDefineVariants();
96+
8397
// Returns all the used macros in conditionals, ordered with the same ID as
8498
// used in BitSets.
8599
const std::vector<TokenSequenceConstIterator> &GetUsedMacros() {

verilog/analysis/verilog_analyzer_test.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ using verible::SyntaxTreeLeaf;
6262
using verible::TokenInfo;
6363
using verible::TokenInfoTestData;
6464

65-
static constexpr verilog::VerilogPreprocess::Config kDefaultPreprocess;
65+
static verilog::VerilogPreprocess::Config kDefaultPreprocess;
6666

6767
bool TreeContainsToken(const ConcreteSyntaxTree& tree, const TokenInfo& token) {
6868
const auto* matching_leaf =
@@ -509,10 +509,10 @@ TEST(AnalyzeVerilogAutomaticMode, InferredModuleBodyMode) {
509509
}
510510

511511
TEST(AnalyzeVerilogAutomaticMode, AutomaticWithFallback) {
512-
static constexpr verilog::VerilogPreprocess::Config kNoBranchFilter{
512+
static verilog::VerilogPreprocess::Config kNoBranchFilter{
513513
.filter_branches = false,
514514
};
515-
static constexpr verilog::VerilogPreprocess::Config kWithBranchFilter{
515+
static verilog::VerilogPreprocess::Config kWithBranchFilter{
516516
.filter_branches = true,
517517
};
518518

verilog/analysis/verilog_project.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ namespace verilog {
3636
// All files we process with the verilog project, essentially applications that
3737
// build a symbol table (project-tool, kythe-indexer) only benefit from
3838
// processing the same sequence of tokens a synthesis tool sees.
39-
static constexpr verilog::VerilogPreprocess::Config kPreprocessConfig{
39+
static verilog::VerilogPreprocess::Config kPreprocessConfig{
4040
.filter_branches = true,
4141
};
4242

verilog/formatting/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@ cc_library(
163163
"//common/util:vector-tree-iterators",
164164
"//verilog/CST:declaration",
165165
"//verilog/CST:module",
166+
"//verilog/analysis:flow-tree",
166167
"//verilog/analysis:verilog-analyzer",
167168
"//verilog/analysis:verilog-equivalence",
168169
"//verilog/parser:verilog-token-enum",

verilog/formatting/formatter.cc

Lines changed: 57 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include <iterator>
2121
#include <vector>
2222

23+
#include "absl/algorithm/container.h"
2324
#include "absl/status/status.h"
2425
#include "absl/status/statusor.h"
2526
#include "common/formatting/format_token.h"
@@ -46,13 +47,15 @@
4647
#include "common/util/vector_tree_iterators.h"
4748
#include "verilog/CST/declaration.h"
4849
#include "verilog/CST/module.h"
50+
#include "verilog/analysis/flow_tree.h"
4951
#include "verilog/analysis/verilog_analyzer.h"
5052
#include "verilog/analysis/verilog_equivalence.h"
5153
#include "verilog/formatting/align.h"
5254
#include "verilog/formatting/comment_controls.h"
5355
#include "verilog/formatting/format_style.h"
5456
#include "verilog/formatting/token_annotator.h"
5557
#include "verilog/formatting/tree_unwrapper.h"
58+
#include "verilog/parser/verilog_lexer.h"
5659
#include "verilog/parser/verilog_token_enum.h"
5760
#include "verilog/preprocessor/verilog_preprocess.h"
5861

@@ -116,19 +119,29 @@ Status VerifyFormatting(const verible::TextStructureView& text_structure,
116119
// Note: We cannot just Tokenize() and compare because Analyze()
117120
// performs additional transformations like expanding MacroArgs to
118121
// expression subtrees.
119-
const auto reanalyzer = VerilogAnalyzer::AnalyzeAutomaticMode(
120-
formatted_output, filename, verilog::VerilogPreprocess::Config());
121-
const auto relex_status = ABSL_DIE_IF_NULL(reanalyzer)->LexStatus();
122-
const auto reparse_status = reanalyzer->ParseStatus();
122+
verilog::FlowTree control_flow_tree(text_structure.TokenStream());
123+
const auto define_variants = control_flow_tree.MinCoverDefineVariants();
124+
if (!define_variants.ok()) return define_variants.status();
125+
for (const FlowTree::DefineSet& defines : *define_variants) {
126+
VerilogPreprocess::Config config{.filter_branches = true};
127+
for (auto define : defines) {
128+
config.macro_definitions.emplace(define, std::nullopt);
129+
}
123130

124-
if (!relex_status.ok() || !reparse_status.ok()) {
125-
const auto& token_errors = reanalyzer->TokenErrorMessages();
126-
// Only print the first error.
127-
if (!token_errors.empty()) {
128-
return absl::DataLossError(
129-
absl::StrCat("Error lex/parsing-ing formatted output. "
130-
"Please file a bug.\nFirst error: ",
131-
token_errors.front()));
131+
const auto reanalyzer = VerilogAnalyzer::AnalyzeAutomaticMode(
132+
formatted_output, filename, config);
133+
const auto relex_status = ABSL_DIE_IF_NULL(reanalyzer)->LexStatus();
134+
const auto reparse_status = reanalyzer->ParseStatus();
135+
136+
if (!relex_status.ok() || !reparse_status.ok()) {
137+
const auto& token_errors = reanalyzer->TokenErrorMessages();
138+
// Only print the first error.
139+
if (!token_errors.empty()) {
140+
return absl::DataLossError(
141+
absl::StrCat("Error lexing/parsing formatted output. "
142+
"Please file a bug.\nFirst error: ",
143+
token_errors.front()));
144+
}
132145
}
133146
}
134147

@@ -199,10 +212,10 @@ static Status ReformatVerilog(absl::string_view original_text,
199212
}
200213

201214
static absl::StatusOr<std::unique_ptr<VerilogAnalyzer>> ParseWithStatus(
202-
absl::string_view text, absl::string_view filename) {
215+
absl::string_view text, absl::string_view filename,
216+
const verilog::VerilogPreprocess::Config& preprocess_config = {}) {
203217
std::unique_ptr<VerilogAnalyzer> analyzer =
204-
VerilogAnalyzer::AnalyzeAutomaticMode(
205-
text, filename, verilog::VerilogPreprocess::Config());
218+
VerilogAnalyzer::AnalyzeAutomaticMode(text, filename, preprocess_config);
206219
{
207220
// Lex and parse code. Exit on failure.
208221
const auto lex_status = ABSL_DIE_IF_NULL(analyzer)->LexStatus();
@@ -265,13 +278,36 @@ Status FormatVerilog(absl::string_view text, absl::string_view filename,
265278
const FormatStyle& style, std::ostream& formatted_stream,
266279
const LineNumberSet& lines,
267280
const ExecutionControl& control) {
268-
const auto analyzer = ParseWithStatus(text, filename);
269-
if (!analyzer.ok()) return analyzer.status();
281+
// Prepare define variants
282+
VerilogAnalyzer analyzer(text, filename);
283+
if (Status tokenize_status = analyzer.Tokenize(); !tokenize_status.ok()) {
284+
return tokenize_status;
285+
}
286+
FlowTree control_flow_tree(analyzer.Data().TokenStream());
287+
const auto define_variants = control_flow_tree.MinCoverDefineVariants();
288+
if (!define_variants.ok()) return define_variants.status();
289+
// Proceed with formatting for each variant
290+
std::string text_to_format{text.begin(), text.end()};
291+
Status format_status;
292+
for (const FlowTree::DefineSet& defines : *define_variants) {
293+
// Set up preprocess config
294+
VerilogPreprocess::Config config{.filter_branches = true};
295+
for (auto define : defines) {
296+
config.macro_definitions.emplace(define, std::nullopt);
297+
}
298+
299+
const auto analyzer = ParseWithStatus(text_to_format, filename, config);
300+
if (!analyzer.ok()) return analyzer.status();
301+
302+
const verible::TextStructureView& text_structure = analyzer->get()->Data();
303+
std::string formatted_text;
304+
format_status = FormatVerilog(text_structure, filename, style,
305+
&formatted_text, lines, control);
306+
if (!format_status.ok()) break;
307+
text_to_format = formatted_text;
308+
}
270309

271-
const verible::TextStructureView& text_structure = analyzer->get()->Data();
272-
std::string formatted_text;
273-
Status format_status = FormatVerilog(text_structure, filename, style,
274-
&formatted_text, lines, control);
310+
const absl::string_view formatted_text = text_to_format;
275311
// Commit formatted text to the output stream independent of status.
276312
formatted_stream << formatted_text;
277313
if (!format_status.ok()) return format_status;

verilog/formatting/formatter_test.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ absl::Status VerifyFormatting(const verible::TextStructureView& text_structure,
5454

5555
namespace {
5656

57-
static constexpr VerilogPreprocess::Config kDefaultPreprocess;
57+
static VerilogPreprocess::Config kDefaultPreprocess;
5858

5959
using absl::StatusCode;
6060
using testing::HasSubstr;

0 commit comments

Comments
 (0)