Skip to content

refactor: switch to quick_xml library #101

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jan 22, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 11 additions & 39 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion cpp-linter/Cargo.toml
Original file line number Diff line number Diff line change
@@ -25,11 +25,11 @@ lenient_semver = "0.4.2"
log = { version = "0.4.25", features = ["std"] }
openssl = { version = "0.10", features = ["vendored"], optional = true }
openssl-probe = { version = "0.1", optional = true }
quick-xml = {version = "0.37.2", features = ["serialize"]}
regex = "1.11.1"
reqwest = "0.12.12"
semver = "1.0.25"
serde = { version = "1.0.217", features = ["derive"] }
serde-xml-rs = "0.6.0"
serde_json = "1.0.137"
tokio = { version = "1.43.0", features = ["macros", "rt-multi-thread"]}
tokio-macros = "2.4.0"
159 changes: 55 additions & 104 deletions cpp-linter/src/clang_tools/clang_format.rs
Original file line number Diff line number Diff line change
@@ -2,29 +2,26 @@
//! output.
use std::{
fs,
process::Command,
sync::{Arc, Mutex, MutexGuard},
};

use anyhow::{Context, Result};
use log::Level;
// non-std crates
use serde::Deserialize;
use serde_xml_rs::de::Deserializer;

// project-specific crates/modules
use super::MakeSuggestions;
use crate::{
cli::ClangParams,
common_fs::{get_line_cols_from_offset, FileObj},
common_fs::{get_line_count_from_offset, FileObj},
};

/// A Structure used to deserialize clang-format's XML output.
#[derive(Debug, Deserialize, PartialEq, Clone)]
#[serde(rename = "replacements")]
#[derive(Debug, Clone, Deserialize, PartialEq, Eq)]
pub struct FormatAdvice {
/// A list of [`Replacement`]s that clang-tidy wants to make.
#[serde(rename = "$value")]
#[serde(rename(deserialize = "replacement"))]
pub replacements: Vec<Replacement>,

pub patched: Option<Vec<u8>>,
@@ -41,43 +38,18 @@
}

/// A single replacement that clang-format wants to make.
#[derive(Debug, Deserialize, PartialEq)]
#[derive(Debug, PartialEq, Eq, Default, Clone, Copy, Deserialize)]
pub struct Replacement {
/// The byte offset where the replacement will start.
pub offset: usize,

/// The amount of bytes that will be removed.
pub length: usize,

/// The bytes (UTF-8 encoded) that will be added at the [`Replacement::offset`] position.
#[serde(rename = "$value")]
pub value: Option<String>,
#[serde(rename = "@offset")]
pub offset: u32,

/// The line number described by the [`Replacement::offset`].
///
/// This value is not provided by the XML output, but we calculate it after
/// deserialization.
#[serde(default)]
pub line: usize,

/// The column number on the line described by the [`Replacement::offset`].
///
/// This value is not provided by the XML output, but we calculate it after
/// deserialization.
#[serde(default)]
pub cols: usize,
}

impl Clone for Replacement {
fn clone(&self) -> Self {
Replacement {
offset: self.offset,
length: self.length,
value: self.value.clone(),
line: self.line,
cols: self.cols,
}
}
pub line: u32,
}

/// Get a string that summarizes the given `--style`
@@ -122,8 +94,9 @@
}
let file_name = file.name.to_string_lossy().to_string();
cmd.arg(file.name.to_path_buf().as_os_str());
let mut patched = None;
if clang_params.format_review {
let patched = if !clang_params.format_review {
None
} else {
logs.push((
Level::Info,
format!(
@@ -140,12 +113,12 @@
.join(" ")
),
));
patched = Some(
Some(
cmd.output()
.with_context(|| format!("Failed to get fixes from clang-format: {file_name}"))?
.stdout,
);
}
)
};
cmd.arg("--output-replacements-xml");
logs.push((
log::Level::Info,
@@ -170,41 +143,40 @@
),
));
}
if output.stdout.is_empty() {
return Ok(logs);
}
let xml = String::from_utf8(output.stdout)
.with_context(|| format!("stdout from clang-format was not UTF-8 encoded: {file_name}"))?
.lines()
.collect::<Vec<&str>>()
.join("");
let config = serde_xml_rs::ParserConfig::new()
.trim_whitespace(false)
.whitespace_to_characters(true)
.ignore_root_level_whitespace(true);
let event_reader = serde_xml_rs::EventReader::new_with_config(xml.as_bytes(), config);
let mut format_advice = FormatAdvice::deserialize(&mut Deserializer::new(event_reader))
.unwrap_or(FormatAdvice {
let mut format_advice = if !output.stdout.is_empty() {
let xml = String::from_utf8(output.stdout).with_context(|| {
format!("XML output from clang-format was not UTF-8 encoded: {file_name}")

Check warning on line 148 in cpp-linter/src/clang_tools/clang_format.rs

Codecov / codecov/patch

cpp-linter/src/clang_tools/clang_format.rs#L148

Added line #L148 was not covered by tests
})?;
quick_xml::de::from_str::<FormatAdvice>(&xml).with_context(|| {
format!("Failed to parse XML output from clang-format for {file_name}")
})?
} else {
FormatAdvice {

Check warning on line 154 in cpp-linter/src/clang_tools/clang_format.rs

Codecov / codecov/patch

cpp-linter/src/clang_tools/clang_format.rs#L154

Added line #L154 was not covered by tests
replacements: vec![],
patched: None,
});
}

Check warning on line 157 in cpp-linter/src/clang_tools/clang_format.rs

Codecov / codecov/patch

cpp-linter/src/clang_tools/clang_format.rs#L157

Added line #L157 was not covered by tests
};
format_advice.patched = patched;
if !format_advice.replacements.is_empty() {
let original_contents = fs::read(&file.name).with_context(|| {
format!(
"Failed to read file's original content before translating byte offsets: {file_name}",
)

Check warning on line 164 in cpp-linter/src/clang_tools/clang_format.rs

Codecov / codecov/patch

cpp-linter/src/clang_tools/clang_format.rs#L162-L164

Added lines #L162 - L164 were not covered by tests
})?;
// get line and column numbers from format_advice.offset
let mut filtered_replacements = Vec::new();
for replacement in &mut format_advice.replacements {
let (line_number, columns) = get_line_cols_from_offset(&file.name, replacement.offset);
let line_number = get_line_count_from_offset(&original_contents, replacement.offset);
replacement.line = line_number;
replacement.cols = columns;
for range in &ranges {
if range.contains(&line_number.try_into().unwrap_or(0)) {
filtered_replacements.push(replacement.clone());
if range.contains(&line_number) {
filtered_replacements.push(*replacement);
break;
}
}
if ranges.is_empty() {
// lines_changed_only is disabled
filtered_replacements.push(replacement.clone());
filtered_replacements.push(*replacement);
}
}
format_advice.replacements = filtered_replacements;
@@ -216,7 +188,13 @@
#[cfg(test)]
mod tests {
use super::{summarize_style, FormatAdvice, Replacement};
use serde::Deserialize;

#[test]
fn parse_blank_xml() {
let xml = String::new();
let result = quick_xml::de::from_str::<FormatAdvice>(&xml);
assert!(result.is_err());
}

#[test]
fn parse_xml() {
@@ -226,51 +204,24 @@
<replacement offset='147' length='0'> </replacement>
<replacement offset='161' length='0'></replacement>
<replacement offset='165' length='19'>&#10;&#10;</replacement>
</replacements>"#;
//since whitespace is part of the elements' body, we need to remove the LFs first
let xml = xml_raw.lines().collect::<Vec<&str>>().join("");
</replacements>"#
.as_bytes()
.to_vec();

let expected = FormatAdvice {
replacements: vec![
Replacement {
offset: 113,
length: 5,
value: Some(String::from("\n ")),
line: 0,
cols: 0,
},
Replacement {
offset: 147,
length: 0,
value: Some(String::from(" ")),
line: 0,
cols: 0,
},
Replacement {
offset: 161,
length: 0,
value: None,
line: 0,
cols: 0,
},
Replacement {
offset: 165,
length: 19,
value: Some(String::from("\n\n")),
line: 0,
cols: 0,
},
],
replacements: [113, 147, 161, 165]
.iter()
.map(|offset| Replacement {
offset: *offset,
..Default::default()
})
.collect(),
patched: None,
};
let config = serde_xml_rs::ParserConfig::new()
.trim_whitespace(false)
.whitespace_to_characters(true)
.ignore_root_level_whitespace(true);
let event_reader = serde_xml_rs::EventReader::new_with_config(xml.as_bytes(), config);
let document =
FormatAdvice::deserialize(&mut serde_xml_rs::de::Deserializer::new(event_reader))
.unwrap();

let xml = String::from_utf8(xml_raw).unwrap();

let document = quick_xml::de::from_str::<FormatAdvice>(&xml).unwrap();
assert_eq!(expected, document);
}

11 changes: 6 additions & 5 deletions cpp-linter/src/clang_tools/clang_tidy.rs
Original file line number Diff line number Diff line change
@@ -276,16 +276,17 @@ pub fn run_clang_tidy(
cmd.args(["--line-filter", filter.as_str()]);
}
}
let mut original_content = None;
if clang_params.tidy_review {
let original_content = if !clang_params.tidy_review {
None
} else {
cmd.arg("--fix-errors");
original_content = Some(fs::read_to_string(&file.name).with_context(|| {
Some(fs::read_to_string(&file.name).with_context(|| {
format!(
"Failed to cache file's original content before applying clang-tidy changes: {}",
file_name.clone()
)
})?);
}
})?)
};
if !clang_params.style.is_empty() {
cmd.args(["--format-style", clang_params.style.as_str()]);
}
28 changes: 12 additions & 16 deletions cpp-linter/src/clang_tools/mod.rs
Original file line number Diff line number Diff line change
@@ -258,20 +258,18 @@ pub struct ReviewComments {
impl ReviewComments {
pub fn summarize(&self, clang_versions: &ClangVersions) -> String {
let mut body = format!("{COMMENT_MARKER}## Cpp-linter Review\n");
for t in 0u8..=1 {
for t in 0_usize..=1 {
let mut total = 0;
let (tool_name, tool_version) = if t == 0 {
("clang-format", clang_versions.format_version.as_ref())
} else {
("clang-tidy", clang_versions.tidy_version.as_ref())
};

let tool_total = if let Some(total) = self.tool_total[t as usize] {
total
} else {
// review was not requested from this tool or the tool was not used at all
if tool_version.is_none() {
// this tool was not used at all
continue;
};
}
let tool_total = self.tool_total[t].unwrap_or_default();

// If the tool's version is unknown, then we don't need to output this line.
// NOTE: If the tool was invoked at all, then the tool's version shall be known.
@@ -295,11 +293,11 @@ impl ReviewComments {
.as_str(),
);
}
if !self.full_patch[t as usize].is_empty() {
if !self.full_patch[t].is_empty() {
body.push_str(
format!(
"\n<details><summary>Click here for the full {tool_name} patch</summary>\n\n```diff\n{}```\n\n</details>\n",
self.full_patch[t as usize]
self.full_patch[t]
).as_str()
);
} else {
@@ -370,8 +368,7 @@ pub trait MakeSuggestions {
patch: &mut Patch,
summary_only: bool,
) -> Result<()> {
let tool_name = self.get_tool_name();
let is_tidy_tool = tool_name == "clang-tidy";
let is_tidy_tool = (&self.get_tool_name() == "clang-tidy") as usize;
let hunks_total = patch.num_hunks();
let mut hunks_in_patch = 0u32;
let file_name = file_obj
@@ -384,13 +381,13 @@ pub trait MakeSuggestions {
.to_buf()
.with_context(|| "Failed to convert patch to byte array")?
.to_vec();
review_comments.full_patch[is_tidy_tool as usize].push_str(
review_comments.full_patch[is_tidy_tool].push_str(
String::from_utf8(patch_buf.to_owned())
.with_context(|| format!("Failed to convert patch to string: {file_name}"))?
.as_str(),
);
review_comments.tool_total[is_tidy_tool as usize].get_or_insert(0);
if summary_only {
review_comments.tool_total[is_tidy_tool].get_or_insert(0);
return Ok(());
}
for hunk_id in 0..hunks_total {
@@ -447,9 +444,8 @@ pub trait MakeSuggestions {
review_comments.comments.push(comment);
}
}
review_comments.tool_total[is_tidy_tool as usize] = Some(
review_comments.tool_total[is_tidy_tool as usize].unwrap_or_default() + hunks_in_patch,
);
review_comments.tool_total[is_tidy_tool] =
Some(review_comments.tool_total[is_tidy_tool].unwrap_or_default() + hunks_in_patch);
Ok(())
}
}
2 changes: 1 addition & 1 deletion cpp-linter/src/common_fs/file_filter.rs
Original file line number Diff line number Diff line change
@@ -111,7 +111,7 @@ impl FileFilter {
|| (pat.is_dir() && file_name.starts_with(pat))
{
log::debug!(
"file {file_name:?} is in {}ignored with domain {pattern:?}.",
"file {file_name:?} is {}ignored with domain {pattern:?}.",
if is_ignored { "" } else { "not " }
);
return true;
50 changes: 27 additions & 23 deletions cpp-linter/src/common_fs/mod.rs
Original file line number Diff line number Diff line change
@@ -2,7 +2,6 @@
use std::fmt::Debug;
use std::fs;
use std::io::Read;
use std::path::{Component, Path};
use std::{ops::RangeInclusive, path::PathBuf};

@@ -220,24 +219,16 @@ impl FileObj {
}
}

/// Gets the line and column number from a given `offset` (of bytes) for given
/// `file_path`.
/// Gets the line number for a given `offset` (of bytes) from the given
/// buffer `contents`.
///
/// This computes the line and column numbers from a buffer of bytes read from the
/// `file_path`. In non-UTF-8 encoded files, this does not guarantee that a word
/// boundary exists at the returned column number. However, the `offset` given to this
/// function is expected to originate from diagnostic information provided by
/// clang-format or clang-tidy.
pub fn get_line_cols_from_offset(file_path: &PathBuf, offset: usize) -> (usize, usize) {
let mut file_buf = vec![0; offset];
fs::File::open(file_path)
.unwrap()
.read_exact(&mut file_buf)
.unwrap();
let lines = file_buf.split(|byte| byte == &b'\n');
let line_count = lines.clone().count();
let column_count = lines.last().unwrap_or(&[]).len() + 1; // +1 because not a 0 based count
(line_count, column_count)
/// The `offset` given to this function is expected to originate from
/// diagnostic information provided by clang-format. Any `offset` out of
/// bounds is clamped to the given `contents` buffer's length.
pub fn get_line_count_from_offset(contents: &[u8], offset: u32) -> u32 {
let offset = (offset as usize).min(contents.len());
let lines = contents[0..offset].split(|byte| byte == &b'\n');
lines.count() as u32
}

/// This was copied from [cargo source code](https://github.com/rust-lang/cargo/blob/fede83ccf973457de319ba6fa0e36ead454d2e20/src/cargo/util/paths.rs#L61).
@@ -272,10 +263,10 @@ pub fn normalize_path(path: &Path) -> PathBuf {

#[cfg(test)]
mod test {
use std::env::current_dir;
use std::path::PathBuf;
use std::{env::current_dir, fs};

use super::{get_line_cols_from_offset, normalize_path, FileObj};
use super::{get_line_count_from_offset, normalize_path, FileObj};
use crate::cli::LinesChangedOnly;

// *********************** tests for normalized paths
@@ -317,12 +308,25 @@ mod test {

#[test]
fn translate_byte_offset() {
let (lines, cols) = get_line_cols_from_offset(&PathBuf::from("tests/demo/demo.cpp"), 144);
println!("lines: {lines}, cols: {cols}");
let contents = fs::read(PathBuf::from("tests/demo/demo.cpp")).unwrap();
let lines = get_line_count_from_offset(&contents, 144);
assert_eq!(lines, 13);
assert_eq!(cols, 5);
}

#[test]
fn get_line_count_edge_cases() {
// Empty content
assert_eq!(get_line_count_from_offset(&[], 0), 1);

// No newlines
assert_eq!(get_line_count_from_offset(b"abc", 3), 1);

// Consecutive newlines
assert_eq!(get_line_count_from_offset(b"a\n\nb", 3), 3);

// Offset beyond content length
assert_eq!(get_line_count_from_offset(b"a\nb\n", 10), 3);
}
// *********************** tests for FileObj::get_ranges()

#[test]
9 changes: 1 addition & 8 deletions cpp-linter/src/rest_api/github/mod.rs
Original file line number Diff line number Diff line change
@@ -314,15 +314,8 @@ mod test {
notes,
patched: None,
});
let replacements = vec![Replacement {
offset: 0,
length: 0,
value: Some(String::new()),
line: 1,
cols: 1,
}];
file.format_advice = Some(FormatAdvice {
replacements,
replacements: vec![Replacement { offset: 0, line: 1 }],
patched: None,
});
files.push(Arc::new(Mutex::new(file)));
2 changes: 1 addition & 1 deletion cpp-linter/src/rest_api/github/specific_api.rs
Original file line number Diff line number Diff line change
@@ -169,7 +169,7 @@ impl GithubApiClient {
let file = file.lock().unwrap();
if let Some(format_advice) = &file.format_advice {
// assemble a list of line numbers
let mut lines: Vec<usize> = Vec::new();
let mut lines = Vec::new();
for replacement in &format_advice.replacements {
if !lines.contains(&replacement.line) {
lines.push(replacement.line);