Skip to content

refactor: switch to quick_xml library #101

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jan 22, 2025
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 11 additions & 39 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion cpp-linter/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@ lenient_semver = "0.4.2"
log = { version = "0.4.25", features = ["std"] }
openssl = { version = "0.10", features = ["vendored"], optional = true }
openssl-probe = { version = "0.1", optional = true }
quick-xml = {version = "0.37.2", features = ["serialize"]}
regex = "1.11.1"
reqwest = "0.12.12"
semver = "1.0.25"
serde = { version = "1.0.217", features = ["derive"] }
serde-xml-rs = "0.6.0"
serde_json = "1.0.137"
tokio = { version = "1.43.0", features = ["macros", "rt-multi-thread"]}
tokio-macros = "2.4.0"
Expand Down
129 changes: 39 additions & 90 deletions cpp-linter/src/clang_tools/clang_format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,14 @@
//! output.

use std::{
fs,
process::Command,
sync::{Arc, Mutex, MutexGuard},
};

use anyhow::{Context, Result};
use log::Level;
// non-std crates
use serde::Deserialize;
use serde_xml_rs::de::Deserializer;

// project-specific crates/modules
use super::MakeSuggestions;
Expand All @@ -19,12 +18,10 @@
common_fs::{get_line_cols_from_offset, FileObj},
};

/// A Structure used to deserialize clang-format's XML output.
#[derive(Debug, Deserialize, PartialEq, Clone)]
#[serde(rename = "replacements")]
#[derive(Debug, Clone, Deserialize, PartialEq, Eq)]
pub struct FormatAdvice {
/// A list of [`Replacement`]s that clang-tidy wants to make.
#[serde(rename = "$value")]
#[serde(rename(deserialize = "replacement"))]
pub replacements: Vec<Replacement>,

pub patched: Option<Vec<u8>>,
Expand All @@ -41,43 +38,25 @@
}

/// A single replacement that clang-format wants to make.
#[derive(Debug, Deserialize, PartialEq)]
#[derive(Debug, PartialEq, Eq, Default, Clone, Copy, Deserialize)]
pub struct Replacement {
/// The byte offset where the replacement will start.
pub offset: usize,

/// The amount of bytes that will be removed.
pub length: usize,

/// The bytes (UTF-8 encoded) that will be added at the [`Replacement::offset`] position.
#[serde(rename = "$value")]
pub value: Option<String>,
#[serde(rename = "@offset")]
pub offset: u32,

/// The line number described by the [`Replacement::offset`].
///
/// This value is not provided by the XML output, but we calculate it after
/// deserialization.
#[serde(default)]
pub line: usize,
pub line: u32,

/// The column number on the line described by the [`Replacement::offset`].
///
/// This value is not provided by the XML output, but we calculate it after
/// deserialization.
#[serde(default)]
pub cols: usize,
}

impl Clone for Replacement {
fn clone(&self) -> Self {
Replacement {
offset: self.offset,
length: self.length,
value: self.value.clone(),
line: self.line,
cols: self.cols,
}
}
pub cols: u32,
}

/// Get a string that summarizes the given `--style`
Expand Down Expand Up @@ -173,38 +152,36 @@
if output.stdout.is_empty() {
return Ok(logs);
}
let xml = String::from_utf8(output.stdout)
.with_context(|| format!("stdout from clang-format was not UTF-8 encoded: {file_name}"))?
.lines()
.collect::<Vec<&str>>()
.join("");
let config = serde_xml_rs::ParserConfig::new()
.trim_whitespace(false)
.whitespace_to_characters(true)
.ignore_root_level_whitespace(true);
let event_reader = serde_xml_rs::EventReader::new_with_config(xml.as_bytes(), config);
let mut format_advice = FormatAdvice::deserialize(&mut Deserializer::new(event_reader))
.unwrap_or(FormatAdvice {
replacements: vec![],
patched: None,
});
let xml = String::from_utf8(output.stdout).with_context(|| {
format!("XML output from clang-format was not UTF-8 encoded: {file_name}")

Check warning on line 156 in cpp-linter/src/clang_tools/clang_format.rs

View check run for this annotation

Codecov / codecov/patch

cpp-linter/src/clang_tools/clang_format.rs#L156

Added line #L156 was not covered by tests
})?;
let mut format_advice = quick_xml::de::from_str::<FormatAdvice>(&xml).unwrap_or(FormatAdvice {
replacements: vec![],
patched: None,
});
format_advice.patched = patched;
if !format_advice.replacements.is_empty() {
let original_contents = fs::read(&file.name).with_context(|| {
format!(
"Failed to read file's original content before translating byte offsets: {file_name}",
)

Check warning on line 167 in cpp-linter/src/clang_tools/clang_format.rs

View check run for this annotation

Codecov / codecov/patch

cpp-linter/src/clang_tools/clang_format.rs#L165-L167

Added lines #L165 - L167 were not covered by tests
})?;
// get line and column numbers from format_advice.offset
let mut filtered_replacements = Vec::new();
for replacement in &mut format_advice.replacements {
let (line_number, columns) = get_line_cols_from_offset(&file.name, replacement.offset);
let (line_number, columns) =
get_line_cols_from_offset(&original_contents, replacement.offset);
replacement.line = line_number;
replacement.cols = columns;
for range in &ranges {
if range.contains(&line_number.try_into().unwrap_or(0)) {
filtered_replacements.push(replacement.clone());
if range.contains(&line_number) {
filtered_replacements.push(*replacement);
break;
}
}
if ranges.is_empty() {
// lines_changed_only is disabled
filtered_replacements.push(replacement.clone());
filtered_replacements.push(*replacement);
}
}
format_advice.replacements = filtered_replacements;
Expand All @@ -216,7 +193,6 @@
#[cfg(test)]
mod tests {
use super::{summarize_style, FormatAdvice, Replacement};
use serde::Deserialize;

#[test]
fn parse_xml() {
Expand All @@ -226,51 +202,24 @@
<replacement offset='147' length='0'> </replacement>
<replacement offset='161' length='0'></replacement>
<replacement offset='165' length='19'>&#10;&#10;</replacement>
</replacements>"#;
//since whitespace is part of the elements' body, we need to remove the LFs first
let xml = xml_raw.lines().collect::<Vec<&str>>().join("");
</replacements>"#
.as_bytes()
.to_vec();

let expected = FormatAdvice {
replacements: vec![
Replacement {
offset: 113,
length: 5,
value: Some(String::from("\n ")),
line: 0,
cols: 0,
},
Replacement {
offset: 147,
length: 0,
value: Some(String::from(" ")),
line: 0,
cols: 0,
},
Replacement {
offset: 161,
length: 0,
value: None,
line: 0,
cols: 0,
},
Replacement {
offset: 165,
length: 19,
value: Some(String::from("\n\n")),
line: 0,
cols: 0,
},
],
replacements: [113, 147, 161, 165]
.iter()
.map(|offset| Replacement {
offset: *offset,
..Default::default()
})
.collect(),
patched: None,
};
let config = serde_xml_rs::ParserConfig::new()
.trim_whitespace(false)
.whitespace_to_characters(true)
.ignore_root_level_whitespace(true);
let event_reader = serde_xml_rs::EventReader::new_with_config(xml.as_bytes(), config);
let document =
FormatAdvice::deserialize(&mut serde_xml_rs::de::Deserializer::new(event_reader))
.unwrap();

let xml = String::from_utf8(xml_raw).unwrap();

let document = quick_xml::de::from_str::<FormatAdvice>(&xml).unwrap();
assert_eq!(expected, document);
}

Expand Down
20 changes: 8 additions & 12 deletions cpp-linter/src/common_fs/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

use std::fmt::Debug;
use std::fs;
use std::io::Read;
use std::path::{Component, Path};
use std::{ops::RangeInclusive, path::PathBuf};

Expand Down Expand Up @@ -228,15 +227,11 @@ impl FileObj {
/// boundary exists at the returned column number. However, the `offset` given to this
/// function is expected to originate from diagnostic information provided by
/// clang-format or clang-tidy.
pub fn get_line_cols_from_offset(file_path: &PathBuf, offset: usize) -> (usize, usize) {
let mut file_buf = vec![0; offset];
fs::File::open(file_path)
.unwrap()
.read_exact(&mut file_buf)
.unwrap();
let lines = file_buf.split(|byte| byte == &b'\n');
let line_count = lines.clone().count();
let column_count = lines.last().unwrap_or(&[]).len() + 1; // +1 because not a 0 based count
pub fn get_line_cols_from_offset(contents: &[u8], offset: u32) -> (u32, u32) {
let lines = contents[0..offset as usize].split(|byte| byte == &b'\n');
let line_count = lines.clone().count() as u32;
// here we `cols.len() + 1` because columns is not a 0-based count
let column_count = lines.last().map(|cols| cols.len() + 1).unwrap_or(1) as u32;
(line_count, column_count)
}

Expand Down Expand Up @@ -272,8 +267,8 @@ pub fn normalize_path(path: &Path) -> PathBuf {

#[cfg(test)]
mod test {
use std::env::current_dir;
use std::path::PathBuf;
use std::{env::current_dir, fs};

use super::{get_line_cols_from_offset, normalize_path, FileObj};
use crate::cli::LinesChangedOnly;
Expand Down Expand Up @@ -317,7 +312,8 @@ mod test {

#[test]
fn translate_byte_offset() {
let (lines, cols) = get_line_cols_from_offset(&PathBuf::from("tests/demo/demo.cpp"), 144);
let contents = fs::read(PathBuf::from("tests/demo/demo.cpp")).unwrap();
let (lines, cols) = get_line_cols_from_offset(&contents, 144);
println!("lines: {lines}, cols: {cols}");
assert_eq!(lines, 13);
assert_eq!(cols, 5);
Expand Down
2 changes: 0 additions & 2 deletions cpp-linter/src/rest_api/github/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -316,8 +316,6 @@ mod test {
});
let replacements = vec![Replacement {
offset: 0,
length: 0,
value: Some(String::new()),
line: 1,
cols: 1,
}];
Expand Down
2 changes: 1 addition & 1 deletion cpp-linter/src/rest_api/github/specific_api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ impl GithubApiClient {
let file = file.lock().unwrap();
if let Some(format_advice) = &file.format_advice {
// assemble a list of line numbers
let mut lines: Vec<usize> = Vec::new();
let mut lines = Vec::new();
for replacement in &format_advice.replacements {
if !lines.contains(&replacement.line) {
lines.push(replacement.line);
Expand Down
Loading