libdav/
xmlutils.rs

Help
// Copyright 2023-2024 Hugo Osvaldo Barrera
//
// SPDX-License-Identifier: ISC

//! Utilities for handling XML data.
use std::borrow::Cow;
use std::str::FromStr;

use http::status::InvalidStatusCode;
use http::StatusCode;
use roxmltree::Node;

use crate::dav::{check_status, WebDavError};
use crate::encoding::normalise_percent_encoded;
use crate::names;
use crate::PropertyName;

/// Check all the statuses in a `multistatus` response.
///
/// # Errors
///
/// - If any of the `<DAV:status>` nodes is missing the status text, returns
///   [`WebDavError::InvalidResponse`].
///
/// - If the text inside a `<DAV:status>` node is not a valid status line, returns
///   [`WebDavError::InvalidStatusCode`].
///
/// - If any of the statuses are non-success, returns [`WebDavError::BadStatusCode`].
pub fn check_multistatus(root: Node) -> Result<(), WebDavError> {
    let statuses = root
        .descendants()
        .filter(|node| node.tag_name() == names::STATUS);
    for status in statuses {
        let status = status.text().ok_or(WebDavError::InvalidResponse(
            "missing text inside 'DAV:status'".into(),
        ))?;
        check_status(parse_statusline(status)?)?;
    }

    Ok(())
}

/// Parses a status line string into a [`StatusCode`].
///
/// Example input string: `HTTP/1.1 200 OK`.
///
/// # See also
///
/// - The [status element](https://www.rfc-editor.org/rfc/rfc2518#section-12.9.1.2)
/// - [Status-Line](https://www.rfc-editor.org/rfc/rfc2068#section-6.1)
///
/// # Errors
///
/// If the input string does not match a status line.
pub fn parse_statusline(status_line: &str) -> Result<StatusCode, InvalidStatusCode> {
    let mut iter = status_line.splitn(3, ' ');
    iter.next();
    let code = iter.next().unwrap_or("");
    StatusCode::from_str(code)
}

/// Render an empty XML node.
pub(crate) fn render_xml(name: &PropertyName) -> String {
    format!("<{0} xmlns=\"{1}\"/>", name.name(), name.namespace())
}

/// Render an XML node with optional text.
#[must_use]
pub fn render_xml_with_text(name: &PropertyName, text: Option<&str>) -> String {
    match text {
        None => format!("<{0} xmlns=\"{1}\"/>", name.name(), name.namespace()),
        Some(t) => format!(
            "<{0} xmlns=\"{1}\">{2}</{0}>",
            name.name(),
            name.namespace(),
            escape_xml_entities(t)
        ),
    }
}

/// Find an `href` node and return its normalised text value.
pub(crate) fn get_normalised_href<'a>(node: &'a Node) -> Result<Cow<'a, str>, WebDavError> {
    let href_node = node
        .descendants()
        .find(|node| node.tag_name() == crate::names::HREF)
        .ok_or(WebDavError::InvalidResponse(
            "missing href in response".into(),
        ))?;
    let text = href_node
        .text()
        .ok_or(WebDavError::InvalidResponse("missing text in href".into()))?;
    // Hrefs may be percent encoded: https://www.rfc-editor.org/rfc/rfc4918#section-8.3.1
    normalise_percent_encoded(text).map_err(WebDavError::from)
}

/// Escape characters into XML entities.
pub(crate) fn escape_xml_entities(input: &str) -> Cow<str> {
    let mut escaped: Option<String> = None;
    let mut last_index = 0;

    for (i, c) in input.char_indices() {
        let replacement = match c {
            '"' => "&quot;",
            '\'' => "&apos;",
            '<' => "&lt;",
            '>' => "&gt;",
            '&' => "&amp;",
            _ => continue,
        };

        if let Some(ref mut res) = escaped {
            res.push_str(&input[last_index..i]);
            res.push_str(replacement);
        } else {
            let mut new_string = String::with_capacity(input.len());
            new_string.push_str(&input[..i]);
            new_string.push_str(replacement);
            escaped = Some(new_string);
        }

        last_index = i + c.len_utf8();
    }

    if let Some(mut res) = escaped {
        res.push_str(&input[last_index..]);
        Cow::Owned(res)
    } else {
        Cow::Borrowed(input)
    }
}

#[inline]
pub(crate) fn get_newline_corrected_text(
    node: &Node,
    property: &PropertyName<'_, '_>,
) -> Result<String, WebDavError> {
    node.descendants()
        .find(|node| node.tag_name() == *property)
        .ok_or(WebDavError::InvalidResponse(
            format!("missing {} in response", property.name()).into(),
        ))?
        .text()
        .ok_or(WebDavError::InvalidResponse(
            format!("missing text in property {property:?}").into(),
        ))
        // "\r\n" is usually converted into "\n" during parsing. This needs to be undone.
        //
        // See: https://github.com/RazrFalcon/roxmltree/issues/102
        // See: https://www.w3.org/TR/xml/#sec-line-ends
        // See: https://www.rfc-editor.org/rfc/rfc4791#section-9.6
        .map(normalise_newlines)
}

/// Normalise newlines by replacing any `\n` with `\r\n`.
///
/// # Examples
///
/// ```rust
/// # use libdav::xmlutils::normalise_newlines;
/// // These inputs return the same value unchanged:
/// assert_eq!(normalise_newlines("hello\r\nworld"), "hello\r\nworld");
/// assert_eq!(normalise_newlines("hello\r\r\nworld"),  "hello\r\r\nworld");
/// assert_eq!(normalise_newlines("hello\rworld"), "hello\rworld");
/// assert_eq!(normalise_newlines("hello\r\nworld\r\n"), "hello\r\nworld\r\n");
/// // These add a missing \r:
/// assert_eq!(normalise_newlines("hello\nworld"), "hello\r\nworld");
/// assert_eq!(normalise_newlines("hello\r\nworld\n"), "hello\r\nworld\r\n");
/// ```
#[must_use]
pub fn normalise_newlines(orig: &str) -> String {
    let mut result = String::new();
    let mut last_end = 0;
    for (start, part) in orig.match_indices('\n') {
        let line = &orig[last_end..start];
        result.push_str(line.strip_suffix('\r').unwrap_or(line));
        result.push_str("\r\n");
        last_end = start + part.len();
    }
    result.push_str(&orig[last_end..orig.len()]);
    result
}

#[cfg(test)]
mod test {
    use std::borrow::Cow;

    use crate::{
        names,
        xmlutils::{escape_xml_entities, get_newline_corrected_text},
    };

    #[test]
    fn get_newline_corrected_text_without_returns() {
        let without_returns ="<ns0:multistatus xmlns:ns0=\"DAV:\" xmlns:ns1=\"urn:ietf:params:xml:ns:caldav\"><ns0:response><ns0:href>/user/calendars/qdBEnN9jwjQFLry4/1ehsci7nhH31.ics</ns0:href><ns0:propstat><ns0:status>HTTP/1.1 200 OK</ns0:status><ns0:prop><ns0:getetag>\"2d2c827debd802fb3844309b53254b90dd7fd900\"</ns0:getetag><ns1:calendar-data>BEGIN:VCALENDAR\nVERSION:2.0\nPRODID:-//hacksw/handcal//NONSGML v1.0//EN\nBEGIN:VEVENT\nSUMMARY:hello\\, testing\nDTSTART:19970714T170000Z\nDTSTAMP:19970610T172345Z\nUID:92gDWceCowpO\nEND:VEVENT\nEND:VCALENDAR\n</ns1:calendar-data></ns0:prop></ns0:propstat></ns0:response></ns0:multistatus>";
        let expected = "BEGIN:VCALENDAR\r\nVERSION:2.0\r\nPRODID:-//hacksw/handcal//NONSGML v1.0//EN\r\nBEGIN:VEVENT\r\nSUMMARY:hello\\, testing\r\nDTSTART:19970714T170000Z\r\nDTSTAMP:19970610T172345Z\r\nUID:92gDWceCowpO\r\nEND:VEVENT\r\nEND:VCALENDAR\r\n";

        let doc = roxmltree::Document::parse(without_returns).unwrap();
        let responses = doc
            .root_element()
            .descendants()
            .find(|node| node.tag_name() == names::RESPONSE)
            .unwrap();
        assert_eq!(
            get_newline_corrected_text(&responses, &names::CALENDAR_DATA).unwrap(),
            expected
        );
    }

    #[test]
    fn get_newline_corrected_text_with_returns() {
        let with_returns= "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<multistatus xmlns=\"DAV:\" xmlns:C=\"urn:ietf:params:xml:ns:caldav\">\n  <response>\n    <href>/dav/calendars/user/vdirsyncer@fastmail.com/UvrlExcG9Jp0gEzQ/2H8kQfNQj8GP.ics</href>\n    <propstat>\n      <prop>\n        <getetag>\"4d92fc1c8bdc18bbf83caf34eeab7e7167eb292e\"</getetag>\n        <C:calendar-data><![CDATA[BEGIN:VCALENDAR\r\nVERSION:2.0\r\nPRODID:-//hacksw/handcal//NONSGML v1.0//EN\r\nBEGIN:VEVENT\r\nUID:jSayX7OSdp3V\r\nDTSTAMP:19970610T172345Z\r\nDTSTART:19970714T170000Z\r\nSUMMARY:hello\\, testing\r\nEND:VEVENT\r\nEND:VCALENDAR\r\n]]></C:calendar-data>\n      </prop>\n      <status>HTTP/1.1 200 OK</status>\n    </propstat>\n  </response>\n</multistatus>\n";
        let expected = "BEGIN:VCALENDAR\r\nVERSION:2.0\r\nPRODID:-//hacksw/handcal//NONSGML v1.0//EN\r\nBEGIN:VEVENT\r\nUID:jSayX7OSdp3V\r\nDTSTAMP:19970610T172345Z\r\nDTSTART:19970714T170000Z\r\nSUMMARY:hello\\, testing\r\nEND:VEVENT\r\nEND:VCALENDAR\r\n";

        let doc = roxmltree::Document::parse(with_returns).unwrap();
        let responses = doc
            .root_element()
            .descendants()
            .find(|node| node.tag_name() == names::RESPONSE)
            .unwrap();
        assert_eq!(
            get_newline_corrected_text(&responses, &names::CALENDAR_DATA).unwrap(),
            expected
        );
    }

    // Tests for escape_xml_entities

    #[test]
    fn escape_xml_entities_basic_substitution() {
        let input = "This is a <test> with \"quotes\" and &.";
        let expected = "This is a &lt;test&gt; with &quot;quotes&quot; and &amp;.";
        let result = escape_xml_entities(input);
        assert_eq!(result.to_string(), expected);
        assert!(matches!(escape_xml_entities(input), Cow::Owned(_)));
    }

    #[test]
    fn escape_xml_entities_multibyte_characters() {
        let input = "你好";
        let expected = input;
        let result = escape_xml_entities(input);
        assert_eq!(result.to_string(), expected);
        assert!(matches!(escape_xml_entities(input), Cow::Borrowed(_)));
    }

    #[test]
    fn escape_xml_entities_multibyte_characters_and_tags() {
        let input = "你好 <test>";
        let expected = "你好 &lt;test&gt;";
        let result = escape_xml_entities(input);
        assert_eq!(result.to_string(), expected);
        assert!(matches!(escape_xml_entities(input), Cow::Owned(_)));
    }

    #[test]
    fn escape_xml_entities_slash_no_change() {
        let input = "Path/to/file";
        let expected = input;
        let result = escape_xml_entities(input);
        assert_eq!(result.to_string(), expected);
        assert!(matches!(escape_xml_entities(input), Cow::Borrowed(_)));
    }

    #[test]
    fn escape_xml_entities_at_symbol_no_change() {
        let input = "user@example.com";
        let expected = input;
        let result = escape_xml_entities(input);
        assert_eq!(result.to_string(), expected);
        assert!(matches!(escape_xml_entities(input), Cow::Borrowed(_)));
    }

    #[test]
    fn escape_xml_entities_emoji_and_special_characters() {
        let input = "😀 & <>";
        let expected = "😀 &amp; &lt;&gt;";
        let result = escape_xml_entities(input);
        assert_eq!(result.to_string(), expected);
        assert!(matches!(escape_xml_entities(input), Cow::Owned(_)));
    }

    #[test]
    fn escape_xml_entities_empty_string() {
        let input = "";
        let expected = "";
        let result = escape_xml_entities(input);
        assert_eq!(result.to_string(), expected);
        assert!(matches!(escape_xml_entities(input), Cow::Borrowed(_)));
    }

    #[test]
    fn escape_xml_entities_no_special_characters() {
        let input = "Just some normal text";
        let expected = input;
        let result = escape_xml_entities(input);
        assert_eq!(result.to_string(), expected);
        assert!(matches!(escape_xml_entities(input), Cow::Borrowed(_)));
    }

    #[test]
    fn escape_xml_entities_url_encoded_slash() {
        let input = "http://example.com/path%2Fto%2Ffile";
        let expected = input;
        let result = escape_xml_entities(input);
        assert_eq!(result.to_string(), expected);
        assert!(matches!(escape_xml_entities(input), Cow::Borrowed(_)));
    }
}
libdav/xmlutils.rs

libdav/
xmlutils.rs