1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
// Copyright 2023-2024 Hugo Osvaldo Barrera
//
// SPDX-License-Identifier: EUPL-1.2

//! Implements reading entries from a remote webcal resource.
//!
//! Webcal is a de-facto standard, and is basically a single icalendar file hosted via http(s).
//!
//! See the [Webcal wikipedia page](https://en.wikipedia.org/wiki/Webcal).

use async_trait::async_trait;
use http::{uri::Scheme, StatusCode, Uri};
use http_body_util::BodyExt;
use hyper_rustls::{HttpsConnector, HttpsConnectorBuilder};
use hyper_util::{
    client::legacy::{connect::HttpConnector, Client},
    rt::TokioExecutor,
};

use crate::{
    base::{
        CalendarProperty, Collection, FetchedItem, IcsItem, Item, ItemRef, ListedProperty, Storage,
    },
    disco::{DiscoveredCollection, Discovery},
    simple_component::Component,
    CollectionId, Error, ErrorKind, Etag, Href, Result,
};

/// A storage which exposes items in remote icalendar resource.
///
/// A webcal storage contains exactly one collection, which contains all the entires found in the
/// remote resource. The name of this single collection is specified via the `collection_name`
/// argument.
///
/// This storage is a bit of an odd one (since in reality, there's no concept of collections in
/// webcal). The extra abstraction layer is here merely to match the format of other storages.
///
/// # Href
///
/// The `href` for this meaningless. A string matching the `collection_name` property is used to
/// describe the only available collection.
// TODO: If an alternative href is provided, it should be used as a path on the same host.
//       Note that discovery will only support the one matching the input URL.
pub struct WebCalStorage {
    /// The URL of the remote icalendar resource. Must be HTTP or HTTPS.
    url: Uri,
    /// The href and id to be given to the single collection available.
    collection_name: CollectionId,
    http_client: Client<HttpsConnector<HttpConnector>, String>,
}

impl WebCalStorage {
    /// Build a new `Storage` instance.
    ///
    /// # Errors
    ///
    /// If there are errors discovering the CardDav server.
    pub fn new(url: Uri, collection_name: CollectionId) -> Result<WebCalStorage> {
        let proto = match &url.scheme().map(Scheme::as_str) {
            Some("http") => HttpsConnectorBuilder::new()
                .with_native_roots()?
                .https_or_http()
                .enable_http1()
                .build(),
            Some("https") => HttpsConnectorBuilder::new()
                .with_native_roots()?
                .https_only()
                .enable_http1()
                .build(),
            // TODO: support webcal and webcals
            Some(_) => {
                return Err(Error::new(
                    ErrorKind::InvalidInput,
                    "URL scheme must be http or https",
                ));
            }
            None => {
                return Err(Error::new(
                    ErrorKind::InvalidInput,
                    "webcal URL requires a scheme/protocol",
                ));
            }
        };
        Ok(WebCalStorage {
            url,
            collection_name,
            http_client: Client::builder(TokioExecutor::new()).build(proto),
        })
    }

    /// Helper method to fetch a URL and return its body as a String.
    ///
    /// Be warned! This swallows headers (including `Etag`!).
    async fn fetch_raw(&self, url: &Uri) -> Result<String> {
        let response = self
            .http_client
            // TODO: upstream should impl IntoURL for &Uri
            .get(url.clone())
            .await
            .map_err(|e| Error::new(ErrorKind::Io, e))?;

        match response.status() {
            StatusCode::NOT_FOUND | StatusCode::GONE => {
                return Err(Error::new(
                    ErrorKind::DoesNotExist,
                    "The remote resource does not exist.",
                ))
            }
            StatusCode::OK => {}
            code => {
                return Err(Error::new(
                    ErrorKind::Io,
                    format!("request returned {code}"),
                ))
            }
        }

        let (_head, body) = response.into_parts();
        let data = body
            .collect()
            .await
            .map_err(|e| Error::new(ErrorKind::Io, e))?
            .to_bytes();

        // TODO: handle non-UTF-8 data (e.g.: Content-Type/charset).
        // TODO: can I avoid making a copy of the entire response here?
        String::from_utf8(data.to_vec()).map_err(|e| Error::new(ErrorKind::InvalidData, e))
    }
}

#[async_trait]
impl Storage<IcsItem> for WebCalStorage {
    /// Checks that the remove resource exists and whether it looks like an icalendar resource.
    async fn check(&self) -> Result<()> {
        // TODO: Should map status codes to io::Error. if 404 -> NotFound, etc.
        let raw = self.fetch_raw(&self.url).await?;

        if !raw.starts_with("BEGIN:VCALENDAR") {
            return Err(Error::new(
                ErrorKind::InvalidData,
                "response for URL doesn't look like a calendar",
            ));
        }
        Ok(())
    }

    /// Returns a single collection with the name originally specified.
    async fn discover_collections(&self) -> Result<Discovery> {
        // TODO: shouldn't I check that the collection actually exists?
        Ok(vec![DiscoveredCollection::new(
            self.url.path().to_string(),
            self.collection_name.clone(),
        )]
        .into())
    }

    /// Unsupported for this storage type.
    async fn create_collection(&self, _: &str) -> Result<Collection> {
        Err(Error::new(
            ErrorKind::Unsupported,
            "creating collections via webcal is not supported",
        ))
    }

    /// Unsupported for this storage type.
    async fn destroy_collection(&self, _: &str) -> Result<()> {
        Err(Error::new(
            ErrorKind::Unsupported,
            "destroying collections via webcal is not supported",
        ))
    }

    /// Enumerates items in this collection.
    ///
    /// Note that, due to the nature of webcal, the whole collection needs to be retrieved. If some
    /// items need to be read as well, it is generally best to use
    /// [`WebCalStorage::get_all_items`] instead.
    async fn list_items(&self, _collection: &str) -> Result<Vec<ItemRef>> {
        let raw = self.fetch_raw(&self.url).await?;

        // TODO: it would be best if the parser could operate on a stream, although that might
        //       complicate copying VTIMEZONEs inline if they are at the end of the stream.
        let refs = Component::parse(&raw)
            .map_err(|e| Error::new(ErrorKind::InvalidData, e))?
            .into_split_collection()
            .map_err(|e| Error::new(ErrorKind::InvalidData, e))?
            .iter()
            .map(|c| {
                let item = IcsItem::from(c.to_string());
                let hash = item.hash();

                ItemRef {
                    href: item.ident(),
                    etag: hash.into(),
                }
            })
            .collect();

        Ok(refs)
    }

    /// Returns a single item from the collection.
    ///
    /// Note that, due to the nature of webcal, the whole collection needs to be retrieved. It is
    /// strongly recommended to use [`WebCalStorage::get_all_items`] instead.
    async fn get_item(&self, href: &str) -> Result<(IcsItem, Etag)> {
        let raw = self.fetch_raw(&self.url).await?;

        // TODO: it would be best if the parser could operate on a stream, although that might
        //       complicate inlining VTIMEZONEs that are at the end.
        let item = Component::parse(&raw)
            .map_err(|e| Error::new(ErrorKind::InvalidData, e))?
            .into_split_collection()
            .map_err(|e| Error::new(ErrorKind::InvalidData, e))?
            .iter()
            .find_map(|c| {
                let item = IcsItem::from(c.to_string());
                if item.ident() == href {
                    Some(item)
                } else {
                    None
                }
            })
            .ok_or_else(|| Error::from(ErrorKind::DoesNotExist))?;

        let hash = item.hash();
        Ok((item, hash.into()))
    }

    /// Returns multiple items from the collection.
    ///
    /// Note that, due to the nature of webcal, the whole collection needs to be retrieved. It is
    /// generally best to use [`WebCalStorage::get_all_items`] instead.
    async fn get_many_items(&self, hrefs: &[&str]) -> Result<Vec<FetchedItem<IcsItem>>> {
        let raw = self.fetch_raw(&self.url).await?;

        // TODO: it would be best if the parser could operate on a stream, although that might
        //       complicate inlining VTIMEZONEs that are at the end.

        Component::parse(&raw)
            .map_err(|e| Error::new(ErrorKind::InvalidData, e))?
            .into_split_collection()
            .map_err(|e| Error::new(ErrorKind::InvalidData, e))?
            .iter()
            .filter_map(|c| {
                let item = IcsItem::from(c.to_string());
                if hrefs.contains(&(item.ident().as_ref())) {
                    Some(Ok(FetchedItem {
                        href: item.ident(),
                        etag: item.hash().into(),
                        item,
                    }))
                } else {
                    None
                }
            })
            .collect()
    }

    /// Fetch all items in the collection.
    ///
    /// Performs a single HTTP(s) request to fetch all items.
    async fn get_all_items(&self, _collection: &str) -> Result<Vec<FetchedItem<IcsItem>>> {
        let raw = self.fetch_raw(&self.url).await?;

        // TODO: it would be best if the parser could operate on a stream, although that might
        //       complicate inlining VTIMEZONEs that are at the end.
        let components = Component::parse(&raw)
            .map_err(|e| Error::new(ErrorKind::InvalidData, e))?
            .into_split_collection()
            .map_err(|e| Error::new(ErrorKind::InvalidData, e))?;

        components
            .iter()
            .map(|c| {
                let item = IcsItem::from(c.to_string());
                Ok(FetchedItem {
                    href: item.ident(),
                    etag: item.hash().into(),
                    item,
                })
            })
            .collect()
    }

    /// Unsupported for this storage type.
    async fn add_item(&self, _collection: &str, _: &IcsItem) -> Result<ItemRef> {
        Err(Error::new(
            ErrorKind::Unsupported,
            "adding items via webcal is not supported",
        ))
    }

    /// Unsupported for this storage type.
    async fn update_item(&self, _: &str, _: &Etag, _: &IcsItem) -> Result<Etag> {
        Err(Error::new(
            ErrorKind::Unsupported,
            "updating items via webcal is not supported",
        ))
    }

    /// Unsupported for this storage type.
    async fn set_property(&self, _: &str, _: CalendarProperty, _: &str) -> Result<()> {
        Err(Error::new(
            ErrorKind::Unsupported,
            "setting metadata via webcal is not supported",
        ))
    }

    /// Unsupported for this storage type.
    async fn unset_property(&self, _: &str, _: CalendarProperty) -> Result<()> {
        Err(Error::new(
            ErrorKind::Unsupported,
            "unsetting metadata via webcal is not supported",
        ))
    }

    /// Unsupported for this storage type.
    async fn get_property(&self, _: &str, _: CalendarProperty) -> Result<Option<String>> {
        // TODO: return None?
        Err(Error::new(
            ErrorKind::Unsupported,
            "getting metadata via webcal is not supported",
        ))
    }

    async fn delete_item(&self, _: &str, _: &Etag) -> Result<()> {
        Err(Error::new(
            ErrorKind::Unsupported,
            "deleting items via webcal is not supported",
        ))
    }

    fn collection_id(&self, collection_href: &str) -> Result<CollectionId> {
        if collection_href == self.url.path() {
            Ok(self.collection_name.clone())
        } else {
            Err(ErrorKind::DoesNotExist.into())
        }
    }

    fn href_for_collection_id(&self, id: &CollectionId) -> Result<Href> {
        if id == &self.collection_name {
            Ok(self.url.path().to_string())
        } else {
            Err(Error::new(
                ErrorKind::Unsupported,
                "discovery of arbitrary collections is not supported",
            ))
        }
    }

    async fn list_properties(&self, _: &str) -> Result<Vec<ListedProperty<CalendarProperty>>> {
        Err(Error::new(
            ErrorKind::Unsupported,
            "webcal does not support properties",
        ))
    }
}

#[cfg(test)]
mod test {
    use http::Uri;

    use crate::{base::Storage, webcal::WebCalStorage};

    // FIXME: only run this test with a dedicated flag for networked test.
    // FIXME: use a webcal link hosted by me.
    // TODO: these are just validation tests and not suitable as a keeper.
    #[tokio::test]
    #[ignore = "uses internet resource"]
    async fn test_dummy() {
        let storage = WebCalStorage::new(
            Uri::try_from("https://www.officeholidays.com/ics/netherlands").unwrap(),
            "holidays".parse().unwrap(),
        )
        .unwrap();
        storage.check().await.unwrap();
        let collection = "holidays";
        let discovery = &storage.discover_collections().await.unwrap();

        assert_eq!(
            &collection,
            &discovery.collections().first().unwrap().href()
        );

        let item_refs = storage.list_items(collection).await.unwrap();

        for item_ref in &item_refs {
            let (_item, etag) = storage.get_item(&item_ref.href).await.unwrap();
            // Might file if upstream file mutates between requests.
            assert_eq!(etag, item_ref.etag);
        }

        let hrefs: Vec<&str> = item_refs.iter().map(|r| r.href.as_ref()).collect();
        let many = storage.get_many_items(&hrefs.clone()).await.unwrap();

        assert_eq!(many.len(), hrefs.len());
        assert_eq!(many.len(), item_refs.len());
        // TODO: compare their contents and etags, though these should all match.
    }
}