// Copyright 2023-2024 Hugo Osvaldo Barrera
//
// SPDX-License-Identifier: ISC
//! Utilities for handling XML data.
use std::borrow::Cow;
use std::str::FromStr;
use http::status::InvalidStatusCode;
use http::StatusCode;
use roxmltree::Node;
use crate::dav::{check_status, WebDavError};
use crate::encoding::normalise_percent_encoded;
use crate::names;
use crate::PropertyName;
/// Check all the statuses in a `multistatus` response.
///
/// # Errors
///
/// - If any of the `<DAV:status>` nodes is missing the status text, returns
/// [`WebDavError::InvalidResponse`].
///
/// - If the text inside a `<DAV:status>` node is not a valid status line, returns
/// [`WebDavError::InvalidStatusCode`].
///
/// - If any of the statuses are non-success, returns [`WebDavError::BadStatusCode`].
pub fn check_multistatus(root: Node) -> Result<(), WebDavError> {
let statuses = root
.descendants()
.filter(|node| node.tag_name() == names::STATUS);
for status in statuses {
let status = status.text().ok_or(WebDavError::InvalidResponse(
"missing text inside 'DAV:status'".into(),
))?;
check_status(parse_statusline(status)?)?;
}
Ok(())
}
/// Parses a status line string into a [`StatusCode`].
///
/// Example input string: `HTTP/1.1 200 OK`.
///
/// # See also
///
/// - The [status element](https://www.rfc-editor.org/rfc/rfc2518#section-12.9.1.2)
/// - [Status-Line](https://www.rfc-editor.org/rfc/rfc2068#section-6.1)
///
/// # Errors
///
/// If the input string does not match a status line.
pub fn parse_statusline(status_line: &str) -> Result<StatusCode, InvalidStatusCode> {
let mut iter = status_line.splitn(3, ' ');
iter.next();
let code = iter.next().unwrap_or("");
StatusCode::from_str(code)
}
/// Render an empty XML node.
pub(crate) fn render_xml(name: &PropertyName) -> String {
format!("<{0} xmlns=\"{1}\"/>", name.name(), name.namespace())
}
/// Render an XML node with optional text.
#[must_use]
pub fn render_xml_with_text(name: &PropertyName, text: Option<&str>) -> String {
match text {
None => format!("<{0} xmlns=\"{1}\"/>", name.name(), name.namespace()),
Some(t) => format!(
"<{0} xmlns=\"{1}\">{2}</{0}>",
name.name(),
name.namespace(),
escape_xml_entities(t)
),
}
}
/// Find an `href` node and return its normalised text value.
pub(crate) fn get_normalised_href<'a>(node: &'a Node) -> Result<Cow<'a, str>, WebDavError> {
let href_node = node
.descendants()
.find(|node| node.tag_name() == crate::names::HREF)
.ok_or(WebDavError::InvalidResponse(
"missing href in response".into(),
))?;
let text = href_node
.text()
.ok_or(WebDavError::InvalidResponse("missing text in href".into()))?;
// Hrefs may be percent encoded: https://www.rfc-editor.org/rfc/rfc4918#section-8.3.1
normalise_percent_encoded(text).map_err(WebDavError::from)
}
/// Escape characters into XML entities.
pub(crate) fn escape_xml_entities(input: &str) -> Cow<str> {
let mut escaped: Option<String> = None;
let mut last_index = 0;
for (i, c) in input.char_indices() {
let replacement = match c {
'"' => """,
'\'' => "'",
'<' => "<",
'>' => ">",
'&' => "&",
_ => continue,
};
if let Some(ref mut res) = escaped {
res.push_str(&input[last_index..i]);
res.push_str(replacement);
} else {
let mut new_string = String::with_capacity(input.len());
new_string.push_str(&input[..i]);
new_string.push_str(replacement);
escaped = Some(new_string);
}
last_index = i + c.len_utf8();
}
if let Some(mut res) = escaped {
res.push_str(&input[last_index..]);
Cow::Owned(res)
} else {
Cow::Borrowed(input)
}
}
#[inline]
pub(crate) fn get_newline_corrected_text(
node: &Node,
property: &PropertyName<'_, '_>,
) -> Result<String, WebDavError> {
node.descendants()
.find(|node| node.tag_name() == *property)
.ok_or(WebDavError::InvalidResponse(
format!("missing {} in response", property.name()).into(),
))?
.text()
.ok_or(WebDavError::InvalidResponse(
format!("missing text in property {property:?}").into(),
))
// "\r\n" is usually converted into "\n" during parsing. This needs to be undone.
//
// See: https://github.com/RazrFalcon/roxmltree/issues/102
// See: https://www.w3.org/TR/xml/#sec-line-ends
// See: https://www.rfc-editor.org/rfc/rfc4791#section-9.6
.map(normalise_newlines)
}
/// Normalise newlines by replacing any `\n` with `\r\n`.
///
/// # Examples
///
/// ```rust
/// # use libdav::xmlutils::normalise_newlines;
/// // These inputs return the same value unchanged:
/// assert_eq!(normalise_newlines("hello\r\nworld"), "hello\r\nworld");
/// assert_eq!(normalise_newlines("hello\r\r\nworld"), "hello\r\r\nworld");
/// assert_eq!(normalise_newlines("hello\rworld"), "hello\rworld");
/// assert_eq!(normalise_newlines("hello\r\nworld\r\n"), "hello\r\nworld\r\n");
/// // These add a missing \r:
/// assert_eq!(normalise_newlines("hello\nworld"), "hello\r\nworld");
/// assert_eq!(normalise_newlines("hello\r\nworld\n"), "hello\r\nworld\r\n");
/// ```
#[must_use]
pub fn normalise_newlines(orig: &str) -> String {
let mut result = String::new();
let mut last_end = 0;
for (start, part) in orig.match_indices('\n') {
let line = &orig[last_end..start];
result.push_str(line.strip_suffix('\r').unwrap_or(line));
result.push_str("\r\n");
last_end = start + part.len();
}
result.push_str(&orig[last_end..orig.len()]);
result
}
#[cfg(test)]
mod test {
use std::borrow::Cow;
use crate::{
names,
xmlutils::{escape_xml_entities, get_newline_corrected_text},
};
#[test]
fn get_newline_corrected_text_without_returns() {
let without_returns ="<ns0:multistatus xmlns:ns0=\"DAV:\" xmlns:ns1=\"urn:ietf:params:xml:ns:caldav\"><ns0:response><ns0:href>/user/calendars/qdBEnN9jwjQFLry4/1ehsci7nhH31.ics</ns0:href><ns0:propstat><ns0:status>HTTP/1.1 200 OK</ns0:status><ns0:prop><ns0:getetag>\"2d2c827debd802fb3844309b53254b90dd7fd900\"</ns0:getetag><ns1:calendar-data>BEGIN:VCALENDAR\nVERSION:2.0\nPRODID:-//hacksw/handcal//NONSGML v1.0//EN\nBEGIN:VEVENT\nSUMMARY:hello\\, testing\nDTSTART:19970714T170000Z\nDTSTAMP:19970610T172345Z\nUID:92gDWceCowpO\nEND:VEVENT\nEND:VCALENDAR\n</ns1:calendar-data></ns0:prop></ns0:propstat></ns0:response></ns0:multistatus>";
let expected = "BEGIN:VCALENDAR\r\nVERSION:2.0\r\nPRODID:-//hacksw/handcal//NONSGML v1.0//EN\r\nBEGIN:VEVENT\r\nSUMMARY:hello\\, testing\r\nDTSTART:19970714T170000Z\r\nDTSTAMP:19970610T172345Z\r\nUID:92gDWceCowpO\r\nEND:VEVENT\r\nEND:VCALENDAR\r\n";
let doc = roxmltree::Document::parse(without_returns).unwrap();
let responses = doc
.root_element()
.descendants()
.find(|node| node.tag_name() == names::RESPONSE)
.unwrap();
assert_eq!(
get_newline_corrected_text(&responses, &names::CALENDAR_DATA).unwrap(),
expected
);
}
#[test]
fn get_newline_corrected_text_with_returns() {
let with_returns= "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<multistatus xmlns=\"DAV:\" xmlns:C=\"urn:ietf:params:xml:ns:caldav\">\n <response>\n <href>/dav/calendars/user/vdirsyncer@fastmail.com/UvrlExcG9Jp0gEzQ/2H8kQfNQj8GP.ics</href>\n <propstat>\n <prop>\n <getetag>\"4d92fc1c8bdc18bbf83caf34eeab7e7167eb292e\"</getetag>\n <C:calendar-data><![CDATA[BEGIN:VCALENDAR\r\nVERSION:2.0\r\nPRODID:-//hacksw/handcal//NONSGML v1.0//EN\r\nBEGIN:VEVENT\r\nUID:jSayX7OSdp3V\r\nDTSTAMP:19970610T172345Z\r\nDTSTART:19970714T170000Z\r\nSUMMARY:hello\\, testing\r\nEND:VEVENT\r\nEND:VCALENDAR\r\n]]></C:calendar-data>\n </prop>\n <status>HTTP/1.1 200 OK</status>\n </propstat>\n </response>\n</multistatus>\n";
let expected = "BEGIN:VCALENDAR\r\nVERSION:2.0\r\nPRODID:-//hacksw/handcal//NONSGML v1.0//EN\r\nBEGIN:VEVENT\r\nUID:jSayX7OSdp3V\r\nDTSTAMP:19970610T172345Z\r\nDTSTART:19970714T170000Z\r\nSUMMARY:hello\\, testing\r\nEND:VEVENT\r\nEND:VCALENDAR\r\n";
let doc = roxmltree::Document::parse(with_returns).unwrap();
let responses = doc
.root_element()
.descendants()
.find(|node| node.tag_name() == names::RESPONSE)
.unwrap();
assert_eq!(
get_newline_corrected_text(&responses, &names::CALENDAR_DATA).unwrap(),
expected
);
}
// Tests for escape_xml_entities
#[test]
fn escape_xml_entities_basic_substitution() {
let input = "This is a <test> with \"quotes\" and &.";
let expected = "This is a <test> with "quotes" and &.";
let result = escape_xml_entities(input);
assert_eq!(result.to_string(), expected);
assert!(matches!(escape_xml_entities(input), Cow::Owned(_)));
}
#[test]
fn escape_xml_entities_multibyte_characters() {
let input = "你好";
let expected = input;
let result = escape_xml_entities(input);
assert_eq!(result.to_string(), expected);
assert!(matches!(escape_xml_entities(input), Cow::Borrowed(_)));
}
#[test]
fn escape_xml_entities_multibyte_characters_and_tags() {
let input = "你好 <test>";
let expected = "你好 <test>";
let result = escape_xml_entities(input);
assert_eq!(result.to_string(), expected);
assert!(matches!(escape_xml_entities(input), Cow::Owned(_)));
}
#[test]
fn escape_xml_entities_slash_no_change() {
let input = "Path/to/file";
let expected = input;
let result = escape_xml_entities(input);
assert_eq!(result.to_string(), expected);
assert!(matches!(escape_xml_entities(input), Cow::Borrowed(_)));
}
#[test]
fn escape_xml_entities_at_symbol_no_change() {
let input = "user@example.com";
let expected = input;
let result = escape_xml_entities(input);
assert_eq!(result.to_string(), expected);
assert!(matches!(escape_xml_entities(input), Cow::Borrowed(_)));
}
#[test]
fn escape_xml_entities_emoji_and_special_characters() {
let input = "😀 & <>";
let expected = "😀 & <>";
let result = escape_xml_entities(input);
assert_eq!(result.to_string(), expected);
assert!(matches!(escape_xml_entities(input), Cow::Owned(_)));
}
#[test]
fn escape_xml_entities_empty_string() {
let input = "";
let expected = "";
let result = escape_xml_entities(input);
assert_eq!(result.to_string(), expected);
assert!(matches!(escape_xml_entities(input), Cow::Borrowed(_)));
}
#[test]
fn escape_xml_entities_no_special_characters() {
let input = "Just some normal text";
let expected = input;
let result = escape_xml_entities(input);
assert_eq!(result.to_string(), expected);
assert!(matches!(escape_xml_entities(input), Cow::Borrowed(_)));
}
#[test]
fn escape_xml_entities_url_encoded_slash() {
let input = "http://example.com/path%2Fto%2Ffile";
let expected = input;
let result = escape_xml_entities(input);
assert_eq!(result.to_string(), expected);
assert!(matches!(escape_xml_entities(input), Cow::Borrowed(_)));
}
}