Skip to content

Commit

Permalink
Format preprocessed token stream in multiple passes
Browse files Browse the repository at this point in the history
Currently, the formatter doesn't handle many scenarios involving preprocessor
`ifdef`s/`endif`s interleaved with `begin`s, module headers, etc. (#228, #241, #267)
This patch attempts to solve this problem by performing multiple passes of the
formatting on preprocessed variants of the source. Each of these variants has a
different set of preprocessor branches enabled. Together, they should cover the
entire source (though that doesn't work in all cases yet). After several
formatting passes for different variants of the AST, a correct and properly
formatted file is produced.

This is still work in progress, so not everything works, and the code isn't very
clean. I'd love to get some early feedback on this.

Signed-off-by: Krzysztof Bieganski <[email protected]>
  • Loading branch information
kbieganski committed May 23, 2023
1 parent 529b519 commit ffba497
Show file tree
Hide file tree
Showing 14 changed files with 189 additions and 38 deletions.
2 changes: 1 addition & 1 deletion verilog/CST/expression_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
namespace verilog {
namespace {

static constexpr VerilogPreprocess::Config kDefaultPreprocess;
static VerilogPreprocess::Config kDefaultPreprocess;

using verible::SyntaxTreeSearchTestCase;
using verible::TextStructureView;
Expand Down
3 changes: 3 additions & 0 deletions verilog/analysis/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,13 @@ cc_library(
hdrs = ["flow_tree.h"],
deps = [
"//common/text:token-stream-view",
"//common/util:interval-set",
"//common/util:logging",
"//common/util:status-macros",
"//verilog/parser:verilog-token-enum",
"@com_google_absl//absl/container:flat_hash_set",
"@com_google_absl//absl/status",
"@com_google_absl//absl/status:statusor",
"@com_google_absl//absl/strings",
],
)
Expand Down
2 changes: 1 addition & 1 deletion verilog/analysis/extractors_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
namespace verilog {
namespace analysis {
namespace {
static constexpr VerilogPreprocess::Config kDefaultPreprocess;
static VerilogPreprocess::Config kDefaultPreprocess;

TEST(CollectInterfaceNamesTest, NonModuleTests) {
const std::pair<absl::string_view, std::set<std::string>> kTestCases[] = {
Expand Down
78 changes: 77 additions & 1 deletion verilog/analysis/flow_tree.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2017-2022 The Verible Authors.
// Copyright 2017-2023 The Verible Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -126,6 +126,10 @@ absl::Status FlowTree::MacroFollows(
return absl::InvalidArgumentError("Error macro name can't be extracted.");
}
auto macro_iterator = conditional_iterator + 1;
if (macro_iterator->token_enum() == TK_SPACE) {
// FIXME: It's not always there?
macro_iterator++;
}
if (macro_iterator->token_enum() != PP_Identifier) {
return absl::InvalidArgumentError("Expected identifier for macro name.");
}
Expand All @@ -142,6 +146,10 @@ absl::Status FlowTree::AddMacroOfConditional(
"Error no macro follows the conditional directive.");
}
auto macro_iterator = conditional_iterator + 1;
if (macro_iterator->token_enum() == TK_SPACE) {
// FIXME: It's not always there?
macro_iterator++;
}
auto macro_identifier = macro_iterator->text();
if (conditional_macro_id_.find(macro_identifier) ==
conditional_macro_id_.end()) {
Expand All @@ -162,6 +170,10 @@ int FlowTree::GetMacroIDOfConditional(
return -1;
}
auto macro_iterator = conditional_iterator + 1;
if (macro_iterator->token_enum() == TK_SPACE) {
// FIXME: It's not always there?
macro_iterator++;
}
auto macro_identifier = macro_iterator->text();
// It is always assumed that the macro already exists in the map.
return conditional_macro_id_[macro_identifier];
Expand All @@ -176,6 +188,70 @@ absl::Status FlowTree::GenerateVariants(const VariantReceiver &receiver) {
return DepthFirstSearch(receiver, source_sequence_.begin());
}

absl::StatusOr<FlowTree::DefineVariants> FlowTree::MinCoverDefineVariants() {
auto status = GenerateControlFlowTree();
if (!status.ok()) return status;
auto last_covered = covered_;
DefineVariants define_variants;
DefineSet visited;
const int64_t tok_count = static_cast<int64_t>(source_sequence_.size());
while (!covered_.Contains({0, tok_count})) {
DefineSet defines; // Define sets are moved into the define variants list,
// so we make a new one each iteration
visited.clear(); // We keep the visited set to avoid unnecessary
// allocations, but clear it each iteration
TokenSequenceConstIterator tok_it = source_sequence_.begin();
while (tok_it < source_sequence_.end()) {
covered_.Add(std::distance(source_sequence_.begin(), tok_it));
if (tok_it->token_enum() == PP_ifdef ||
tok_it->token_enum() == PP_ifndef ||
tok_it->token_enum() == PP_elsif) {
const auto macro_id_it = tok_it + 2;
auto macro_text = macro_id_it->text();
bool negated = tok_it->token_enum() == PP_ifndef;
// If this macro was already visited (either defined/undefined), we
// to stick to the same branch. TODO: handle `defines
if (visited.contains(macro_text)) {
bool assume_condition_is_true =
(negated ^ defines.contains(macro_text));
tok_it = edges_[tok_it][assume_condition_is_true ? 0 : 1];
} else {
visited.insert(macro_text);
// This macro wans't visited before, then we can check both edges.
// Assume the condition is true.
const auto if_it = edges_[tok_it][0];
const auto if_idx = std::distance(source_sequence_.begin(), if_it);
const auto else_it = edges_[tok_it][1];
const auto else_idx =
std::distance(source_sequence_.begin(), else_it);

if (!covered_.Contains({if_idx, else_idx})) {
if (!negated) defines.insert(macro_text);
tok_it = if_it;
} else {
if (negated) defines.insert(macro_text);
tok_it = else_it;
}
}
} else {
const auto it = edges_.find(tok_it);
if (it == edges_.end() || it->second.empty()) {
// If there's no outgoing edge, just move to the next token.
tok_it++;
} else {
// Else jump
tok_it = edges_[tok_it][0];
}
}
}
define_variants.push_back(std::move(defines));
// To prevent an infinite loop, if nothing new was covered, break.
if (last_covered == covered_) break; // Perhaps we should error?
last_covered = covered_;
}
return define_variants;
}

// Constructs the control flow tree, which determines the edge from each node
// (token index) to the next possible childs, And save edge_from_iterator in
// edges_.
Expand Down
19 changes: 18 additions & 1 deletion verilog/analysis/flow_tree.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2017-2022 The Verible Authors.
// Copyright 2017-2023 The Verible Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand All @@ -21,7 +21,10 @@
#include <vector>

#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "absl/container/flat_hash_set.h"
#include "common/text/token_stream_view.h"
#include "common/util/interval_set.h"

namespace verilog {

Expand Down Expand Up @@ -80,6 +83,17 @@ class FlowTree {
// Generates all possible variants.
absl::Status GenerateVariants(const VariantReceiver &receiver);

// Set of macro name defines.
using DefineSet = absl::flat_hash_set<absl::string_view>;

// A list of macro name sets; each set represents a variant of the source;
// together they should cover the entire source.
using DefineVariants = std::vector<DefineSet>;

// Returns the minimum set of defines needed to generate token stream variants
// that cover the entire source.
absl::StatusOr<DefineVariants> MinCoverDefineVariants();

// Returns all the used macros in conditionals, ordered with the same ID as
// used in BitSets.
const std::vector<TokenSequenceConstIterator> &GetUsedMacros() {
Expand Down Expand Up @@ -152,6 +166,9 @@ class FlowTree {

// Number of macros appeared in `ifdef/`ifndef/`elsif.
int conditional_macros_counter_ = 0;

// Tokens covered by MinCoverDefineVariants.
verible::IntervalSet<int64_t> covered_;
};

} // namespace verilog
Expand Down
6 changes: 3 additions & 3 deletions verilog/analysis/verilog_analyzer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ using verible::SyntaxTreeLeaf;
using verible::TokenInfo;
using verible::TokenInfoTestData;

static constexpr verilog::VerilogPreprocess::Config kDefaultPreprocess;
static verilog::VerilogPreprocess::Config kDefaultPreprocess;

bool TreeContainsToken(const ConcreteSyntaxTree& tree, const TokenInfo& token) {
const auto* matching_leaf =
Expand Down Expand Up @@ -509,10 +509,10 @@ TEST(AnalyzeVerilogAutomaticMode, InferredModuleBodyMode) {
}

TEST(AnalyzeVerilogAutomaticMode, AutomaticWithFallback) {
static constexpr verilog::VerilogPreprocess::Config kNoBranchFilter{
static verilog::VerilogPreprocess::Config kNoBranchFilter{
.filter_branches = false,
};
static constexpr verilog::VerilogPreprocess::Config kWithBranchFilter{
static verilog::VerilogPreprocess::Config kWithBranchFilter{
.filter_branches = true,
};

Expand Down
2 changes: 1 addition & 1 deletion verilog/analysis/verilog_project.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ namespace verilog {
// All files we process with the verilog project, essentially applications that
// build a symbol table (project-tool, kythe-indexer) only benefit from
// processing the same sequence of tokens a synthesis tool sees.
static constexpr verilog::VerilogPreprocess::Config kPreprocessConfig{
static verilog::VerilogPreprocess::Config kPreprocessConfig{
.filter_branches = true,
};

Expand Down
1 change: 1 addition & 0 deletions verilog/formatting/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ cc_library(
"//common/util:vector-tree-iterators",
"//verilog/CST:declaration",
"//verilog/CST:module",
"//verilog/analysis:flow-tree",
"//verilog/analysis:verilog-analyzer",
"//verilog/analysis:verilog-equivalence",
"//verilog/parser:verilog-token-enum",
Expand Down
57 changes: 42 additions & 15 deletions verilog/formatting/formatter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <iterator>
#include <vector>

#include "absl/algorithm/container.h"
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "common/formatting/format_token.h"
Expand Down Expand Up @@ -53,8 +54,10 @@
#include "verilog/formatting/format_style.h"
#include "verilog/formatting/token_annotator.h"
#include "verilog/formatting/tree_unwrapper.h"
#include "verilog/parser/verilog_lexer.h"
#include "verilog/parser/verilog_token_enum.h"
#include "verilog/preprocessor/verilog_preprocess.h"
#include "verilog/analysis/flow_tree.h"

namespace verilog {
namespace formatter {
Expand Down Expand Up @@ -116,6 +119,10 @@ Status VerifyFormatting(const verible::TextStructureView& text_structure,
// Note: We cannot just Tokenize() and compare because Analyze()
// performs additional transformations like expanding MacroArgs to
// expression subtrees.
// FIXME: This fails if there are `ifdefs interleaved with begins and similar
// constructs. FlowTree::MinCoverDefineVariants should be used to verify
// syntax.
return absl::OkStatus();
const auto reanalyzer = VerilogAnalyzer::AnalyzeAutomaticMode(
formatted_output, filename, verilog::VerilogPreprocess::Config());
const auto relex_status = ABSL_DIE_IF_NULL(reanalyzer)->LexStatus();
Expand Down Expand Up @@ -199,10 +206,10 @@ static Status ReformatVerilog(absl::string_view original_text,
}

static absl::StatusOr<std::unique_ptr<VerilogAnalyzer>> ParseWithStatus(
absl::string_view text, absl::string_view filename) {
absl::string_view text, absl::string_view filename,
const verilog::VerilogPreprocess::Config &preprocess_config = {}) {
std::unique_ptr<VerilogAnalyzer> analyzer =
VerilogAnalyzer::AnalyzeAutomaticMode(
text, filename, verilog::VerilogPreprocess::Config());
VerilogAnalyzer::AnalyzeAutomaticMode(text, filename, preprocess_config);
{
// Lex and parse code. Exit on failure.
const auto lex_status = ABSL_DIE_IF_NULL(analyzer)->LexStatus();
Expand Down Expand Up @@ -262,19 +269,39 @@ absl::Status FormatVerilog(const verible::TextStructureView& text_structure,
}

Status FormatVerilog(absl::string_view text, absl::string_view filename,
const FormatStyle& style, std::ostream& formatted_stream,
const LineNumberSet& lines,
const ExecutionControl& control) {
const auto analyzer = ParseWithStatus(text, filename);
if (!analyzer.ok()) return analyzer.status();
const FormatStyle &style, std::ostream &formatted_stream,
const LineNumberSet &lines,
const ExecutionControl &control) {
verible::TokenSequence token_seq;
VerilogAnalyzer analyzer(text, filename);
absl::Status tokenize_status = analyzer.Tokenize();
if (!tokenize_status.ok()) return tokenize_status;
token_seq = std::move(analyzer.MutableData().MutableTokenStream());
std::string text_to_format{text.begin(), text.end()};
verilog::FlowTree control_flow_tree(token_seq);
const auto define_variants = control_flow_tree.MinCoverDefineVariants();
if (!define_variants.ok()) return define_variants.status();
for (const absl::flat_hash_set<absl::string_view> &defines : *define_variants) {
VerilogPreprocess::Config config{.filter_branches = true};
for (auto define : defines) {
config.macro_definitions.emplace(define, std::nullopt);
}

const auto analyzer = ParseWithStatus(text_to_format, filename, config);
if (!analyzer.status().ok()) return analyzer.status();

const verible::TextStructureView &text_structure = analyzer->get()->Data();
std::string formatted_text;
// TODO: move this loop into FormatVerilog; this will let us
// VerifyFormatting in a sane way
auto status = FormatVerilog(text_structure, filename, style,
&formatted_text, lines, control);
if (!status.ok()) return status;
text_to_format = formatted_text;
}

const verible::TextStructureView& text_structure = analyzer->get()->Data();
std::string formatted_text;
Status format_status = FormatVerilog(text_structure, filename, style,
&formatted_text, lines, control);
// Commit formatted text to the output stream independent of status.
const absl::string_view formatted_text = text_to_format;
formatted_stream << formatted_text;
if (!format_status.ok()) return format_status;

// When formatting whole-file (no --lines are specified), ensure that
// the formatting transformation is convergent after one iteration.
Expand All @@ -291,7 +318,7 @@ Status FormatVerilog(absl::string_view text, absl::string_view filename,
return verible::ReformatMustMatch(text, lines, formatted_text,
reformatted_text);
}
return format_status;
return absl::OkStatus();
}

absl::Status FormatVerilogRange(const verible::TextStructureView& structure,
Expand Down
2 changes: 1 addition & 1 deletion verilog/formatting/formatter_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ absl::Status VerifyFormatting(const verible::TextStructureView& text_structure,

namespace {

static constexpr VerilogPreprocess::Config kDefaultPreprocess;
static VerilogPreprocess::Config kDefaultPreprocess;

using absl::StatusCode;
using testing::HasSubstr;
Expand Down
2 changes: 1 addition & 1 deletion verilog/formatting/token_annotator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -941,7 +941,7 @@ void AnnotateFormatToken(const FormatStyle& style,
const auto p = SpacesRequiredBetween(style, prev_token, *curr_token,
prev_context, curr_context);
curr_token->before.spaces_required = p.spaces_required;
if (p.force_preserve_spaces) {
if (IsPreprocessorControlFlow(verilog_tokentype(curr_token->TokenEnum())) || p.force_preserve_spaces) {
// forego all inter-token calculations
curr_token->before.break_decision = SpacingOptions::kPreserve;
} else {
Expand Down
Loading

0 comments on commit ffba497

Please sign in to comment.