Skip to content

Refactor vtr_tokens #3135

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jun 12, 2025
Merged
51 changes: 19 additions & 32 deletions libs/libarchfpga/src/read_xml_arch_file.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -698,7 +698,7 @@ static void LoadPinLoc(pugi::xml_node Locations,
for (int width = 0; width < type->width; ++width) {
for (int height = 0; height < type->height; ++height) {
for (e_side side : TOTAL_2D_SIDES) {
for (auto token : pin_locs->assignments[sub_tile_index][width][height][layer][side]) {
for (const std::string& token : pin_locs->assignments[sub_tile_index][width][height][layer][side]) {
auto pin_range = ProcessPinString<t_sub_tile*>(Locations,
&sub_tile,
token.c_str(),
Expand Down Expand Up @@ -741,109 +741,97 @@ static std::pair<int, int> ProcessPinString(pugi::xml_node Locations,
T type,
const char* pin_loc_string,
const pugiutil::loc_data& loc_data) {
int num_tokens;
auto tokens = GetTokensFromString(pin_loc_string, &num_tokens);
Tokens tokens(pin_loc_string);

int token_index = 0;
auto token = tokens[token_index];
size_t token_index = 0;

if (token.type != TOKEN_STRING || token.data != type->name) {
if (tokens[token_index].type != e_token_type::STRING || tokens[token_index].data != type->name) {
archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations),
"Wrong physical type name of the port: %s\n", pin_loc_string);
}

token_index++;
token = tokens[token_index];

if (token.type != TOKEN_DOT) {
if (tokens[token_index].type != e_token_type::DOT) {
archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations),
"No dot is present to separate type name and port name: %s\n", pin_loc_string);
}

token_index++;
token = tokens[token_index];

if (token.type != TOKEN_STRING) {
if (tokens[token_index].type != e_token_type::STRING) {
archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations),
"No port name is present: %s\n", pin_loc_string);
}

auto port = type->get_port(token.data);
auto port = type->get_port(tokens[token_index].data);
if (port == nullptr) {
archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations),
"Port %s for %s could not be found: %s\n",
type->name.c_str(), token.data,
type->name.c_str(), tokens[token_index].data.c_str(),
pin_loc_string);
}
int abs_first_pin_idx = port->absolute_first_pin_index;

token_index++;

// All the pins of the port are taken or the port has a single pin
if (token_index == num_tokens) {
freeTokens(tokens, num_tokens);
if (token_index == tokens.size()) {
return std::make_pair(abs_first_pin_idx, abs_first_pin_idx + port->num_pins);
}

token = tokens[token_index];

if (token.type != TOKEN_OPEN_SQUARE_BRACKET) {
if (tokens[token_index].type != e_token_type::OPEN_SQUARE_BRACKET) {
archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations),
"No open square bracket present: %s\n", pin_loc_string);
}

token_index++;
token = tokens[token_index];

if (token.type != TOKEN_INT) {
if (tokens[token_index].type != e_token_type::INT) {
archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations),
"No integer to indicate least significant pin index: %s\n", pin_loc_string);
}

int first_pin = vtr::atoi(token.data);
int first_pin = vtr::atoi(tokens[token_index].data);

token_index++;
token = tokens[token_index];

// Single pin is specified
if (token.type != TOKEN_COLON) {
if (token.type != TOKEN_CLOSE_SQUARE_BRACKET) {
if (tokens[token_index].type != e_token_type::COLON) {
if (tokens[token_index].type != e_token_type::CLOSE_SQUARE_BRACKET) {
archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations),
"No closing bracket: %s\n", pin_loc_string);
}

token_index++;

if (token_index != num_tokens) {
if (token_index != tokens.size()) {
archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations),
"pin location should be completed, but more tokens are present: %s\n", pin_loc_string);
}

freeTokens(tokens, num_tokens);
return std::make_pair(abs_first_pin_idx + first_pin, abs_first_pin_idx + first_pin + 1);
}

token_index++;
token = tokens[token_index];

if (token.type != TOKEN_INT) {
if (tokens[token_index].type != e_token_type::INT) {
archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations),
"No integer to indicate most significant pin index: %s\n", pin_loc_string);
}

int last_pin = vtr::atoi(token.data);
int last_pin = vtr::atoi(tokens[token_index].data);

token_index++;
token = tokens[token_index];

if (token.type != TOKEN_CLOSE_SQUARE_BRACKET) {
if (tokens[token_index].type != e_token_type::CLOSE_SQUARE_BRACKET) {
archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations),
"No closed square bracket: %s\n", pin_loc_string);
}

token_index++;

if (token_index != num_tokens) {
if (token_index != tokens.size()) {
archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations),
"pin location should be completed, but more tokens are present: %s\n", pin_loc_string);
}
Expand All @@ -852,7 +840,6 @@ static std::pair<int, int> ProcessPinString(pugi::xml_node Locations,
std::swap(first_pin, last_pin);
}

freeTokens(tokens, num_tokens);
return std::make_pair(abs_first_pin_idx + first_pin, abs_first_pin_idx + last_pin + 1);
}

Expand Down
167 changes: 50 additions & 117 deletions libs/libvtrutil/src/vtr_token.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,149 +4,89 @@
* Tokenizer
*/

#include <cstring>
#include "vtr_token.h"

#include "vtr_assert.h"
#include "vtr_util.h"
#include "vtr_memory.h"
#include "vtr_token.h"

enum e_token_type GetTokenTypeFromChar(const enum e_token_type cur_token_type,
const char cur);

bool IsWhitespace(char c);

///@brief Returns true if character is whatspace between tokens
bool IsWhitespace(char c) {
switch (c) {
case ' ':
case '\t':
case '\r':
case '\n':
return true;
default:
return false;
}
}
#include <cctype>

///@brief Returns a token list of the text for a given string.
t_token* GetTokensFromString(const char* inString, int* num_tokens) {
const char* cur;
t_token* tokens;
int i, in_string_index, prev_in_string_index;
bool has_null;
enum e_token_type cur_token_type, new_token_type;
/// @brief Returns a token type of the given char
static e_token_type get_token_type_from_char(e_token_type cur_token_type, char cur);

*num_tokens = i = 0;
cur_token_type = TOKEN_NULL;
const t_token Tokens::null_token_{e_token_type::NULL_TOKEN, ""};

if (inString == nullptr) {
return nullptr;
};

cur = inString;

/* Count number of tokens */
while (*cur) {
new_token_type = GetTokenTypeFromChar(cur_token_type, *cur);
if (new_token_type != cur_token_type) {
cur_token_type = new_token_type;
if (new_token_type != TOKEN_NULL) {
i++;
}
}
++cur;
Tokens::Tokens(std::string_view inString) {
if (inString.empty()) {
return;
}
*num_tokens = i;

if (*num_tokens > 0) {
tokens = (t_token*)vtr::calloc(*num_tokens + 1, sizeof(t_token));
} else {
return nullptr;
}
e_token_type cur_token_type = e_token_type::NULL_TOKEN;
size_t in_string_index = 0;
size_t prev_in_string_index = 0;

/* populate tokens */
i = 0;
in_string_index = 0;
has_null = true;
prev_in_string_index = 0;
cur_token_type = TOKEN_NULL;

cur = inString;

while (*cur) {
new_token_type = GetTokenTypeFromChar(cur_token_type, *cur);
for (char cur : inString) {
e_token_type new_token_type = get_token_type_from_char(cur_token_type, cur);
if (new_token_type != cur_token_type) {
if (!has_null) {
tokens[i - 1].data[in_string_index - prev_in_string_index] = '\0'; /* NULL the end of the data string */
has_null = true;
if (cur_token_type != e_token_type::NULL_TOKEN) {
// Finalize the current token
t_token& current_token = tokens_.back();
current_token.data = inString.substr(prev_in_string_index,
in_string_index - prev_in_string_index);
}
if (new_token_type != TOKEN_NULL) {
tokens[i].type = new_token_type;
tokens[i].data = vtr::strdup(inString + in_string_index);
if (new_token_type != e_token_type::NULL_TOKEN) {
// Start a new token
t_token new_token;
new_token.type = new_token_type;
tokens_.push_back(new_token);
prev_in_string_index = in_string_index;
has_null = false;
i++;
}
cur_token_type = new_token_type;
}
++cur;
in_string_index++;
}

VTR_ASSERT(i == *num_tokens);

tokens[*num_tokens].type = TOKEN_NULL;
tokens[*num_tokens].data = nullptr;

/* Return the list */
return tokens;
// Finalize the last token if it exists
if (cur_token_type != e_token_type::NULL_TOKEN && !tokens_.empty()) {
t_token& current_token = tokens_.back();
current_token.data = inString.substr(prev_in_string_index,
in_string_index - prev_in_string_index);
}
}

///@brief Free (tokens)
void freeTokens(t_token* tokens, const int num_tokens) {
int i;
for (i = 0; i < num_tokens; i++) {
free(tokens[i].data);
const t_token& Tokens::operator[](size_t idx) const {
if (idx < tokens_.size()) {
return tokens_[idx];
} else {
return null_token_;
}
free(tokens);
}

///@brief Returns a token type of the given char
enum e_token_type GetTokenTypeFromChar(const enum e_token_type cur_token_type,
const char cur) {
if (IsWhitespace(cur)) {
return TOKEN_NULL;
static e_token_type get_token_type_from_char(e_token_type cur_token_type, char cur) {
if (std::isspace(cur)) {
return e_token_type::NULL_TOKEN;
} else {
if (cur == '[') {
return TOKEN_OPEN_SQUARE_BRACKET;
return e_token_type::OPEN_SQUARE_BRACKET;
} else if (cur == ']') {
return TOKEN_CLOSE_SQUARE_BRACKET;
return e_token_type::CLOSE_SQUARE_BRACKET;
} else if (cur == '{') {
return TOKEN_OPEN_SQUIG_BRACKET;
return e_token_type::OPEN_SQUIG_BRACKET;
} else if (cur == '}') {
return TOKEN_CLOSE_SQUIG_BRACKET;
return e_token_type::CLOSE_SQUIG_BRACKET;
} else if (cur == ':') {
return TOKEN_COLON;
return e_token_type::COLON;
} else if (cur == '.') {
return TOKEN_DOT;
} else if (cur >= '0' && cur <= '9' && cur_token_type != TOKEN_STRING) {
return TOKEN_INT;
return e_token_type::DOT;
} else if (cur >= '0' && cur <= '9' && cur_token_type != e_token_type::STRING) {
return e_token_type::INT;
} else {
return TOKEN_STRING;
return e_token_type::STRING;
}
}
}

///@brief Returns true if the token's type equals to token_type
bool checkTokenType(const t_token token, enum e_token_type token_type) {
if (token.type != token_type) {
return false;
}
return true;
}

///@brief Returns a 2D array representing the atof result of all the input string entries seperated by whitespace
void my_atof_2D(float** matrix, const int max_i, const int max_j, const char* instring) {
int i, j;
char *cur, *cur2, *copy, *final;
Expand All @@ -160,7 +100,7 @@ void my_atof_2D(float** matrix, const int max_i, const int max_j, const char* in
cur = copy;
i = j = 0;
while (cur != final) {
while (IsWhitespace(*cur) && cur != final) {
while (std::isspace(*cur) && cur != final) {
if (j == max_j) {
i++;
j = 0;
Expand All @@ -171,7 +111,7 @@ void my_atof_2D(float** matrix, const int max_i, const int max_j, const char* in
break;
}
cur2 = cur;
while (!IsWhitespace(*cur2) && cur2 != final) {
while (!std::isspace(*cur2) && cur2 != final) {
cur2++;
}
*cur2 = '\0';
Expand All @@ -187,13 +127,6 @@ void my_atof_2D(float** matrix, const int max_i, const int max_j, const char* in
free(copy);
}

/* Date:July 2nd, 2013 *
* Author: Daniel Chen */
/**
* @brief Checks if the number of entries (separated by whitespace) matches the the expected number (max_i * max_j)
*
* can be used before calling my_atof_2D
*/
bool check_my_atof_2D(const int max_i, const int max_j, const char* instring, int* num_entries) {
/* Check if max_i * max_j matches number of entries in instring */
const char* cur = instring;
Expand All @@ -202,10 +135,10 @@ bool check_my_atof_2D(const int max_i, const int max_j, const char* instring, in

/* First count number of entries in instring */
while (*cur != '\0') {
if (!IsWhitespace(*cur) && !in_str) {
if (!std::isspace(*cur) && !in_str) {
in_str = true;
entry_count++;
} else if (IsWhitespace(*cur)) {
} else if (std::isspace(*cur)) {
in_str = false;
}
cur++;
Expand Down
Loading