domain/base/
scan.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
//! Parsing of data from its presentation format.
//!
//! This module provides the basic machinery to parse DNS data from its
//! standard textual representation, known as the presentation format or,
//! perhaps more commonly, zonefile format. To distinguish this process from
//! parsing data from its binary wire format, we call this process
//! _scanning._
//!
//! The module provides two important traits which should sound familiar to
//! anyone who has used Serde before: [`Scan`] and [`Scanner`]. A type that
//! knows how to create a value from its presentation format implements
//! [`Scan`]. It uses an implementation of the [`Scanner`] trait as the source
//! of data in presentation format.
//!
//! This module provides a simple scanner that uses a sequence of strings as
//! its source and can be used to, for instance, read record data from
//! command line arguments. A “proper” scanner is included in the
#![cfg_attr(feature = "zonefile", doc = "[zonefile][crate::zonefile]")]
#![cfg_attr(not(feature = "zonefile"), doc = "zonefile")]
//! module.
#![allow(clippy::manual_range_contains)] // Hard disagree.
#![allow(unused_imports)] // XXX

use crate::base::charstr::{CharStr, CharStrBuilder};
use crate::base::name::{Name, ToName};
use crate::base::wire::{Compose, Composer};
use core::convert::{TryFrom, TryInto};
use core::iter::Peekable;
use core::marker::PhantomData;
use core::{fmt, str};
use octseq::str::Str;
use octseq::{
    EmptyBuilder, FreezeBuilder, FromBuilder, OctetsBuilder, ShortBuf,
    Truncate,
};
#[cfg(feature = "std")]
use std::error;

use super::Ttl;

//============ Scanning Traits ===============================================

//------------ Scan ---------------------------------------------------------

/// An extension trait to add scanning to foreign types.
///
/// This trait is generic over the specific scanner, allowing types to limit
/// their implementation to a scanners with certain properties.
pub trait Scan<S: Scanner>: Sized {
    /// Reads a value from the provided scanner.
    ///
    /// An implementation should read as many tokens as it needs from the
    /// scanner. It can assume that they are all available – the scanner will
    /// produce an error if it runs out of tokens prematurely.
    ///
    /// The implementation does not need to keep reading until the end of
    /// tokens. It is the responsibility of the user to make sure there are
    /// no stray tokens at the end of an entry.
    ///
    /// Finally, if an implementation needs to read tokens until the end of
    /// the entry, it can use [`Scanner::continues`] to check if there are
    /// still tokens left.
    ///
    /// If an implementation encounters an error in the presentation data,
    /// it should report it using [`ScannerError::custom`] unless any of the
    /// other methods of [`ScannerError`] seem more appropriate.
    fn scan(scanner: &mut S) -> Result<Self, S::Error>;
}

macro_rules! impl_scan_unsigned {
    ( $type:ident) => {
        impl<S: Scanner> Scan<S> for $type {
            fn scan(scanner: &mut S) -> Result<Self, S::Error> {
                let mut res: $type = 0;
                scanner.scan_symbols(|ch| {
                    res = res.checked_mul(10).ok_or_else(|| {
                        S::Error::custom("decimal number overflow")
                    })?;
                    res += ch.into_digit(10).map_err(|_| {
                        S::Error::custom("expected decimal number")
                    })? as $type;
                    Ok(())
                })?;
                Ok(res)
            }
        }
    };
}

impl_scan_unsigned!(u8);
impl_scan_unsigned!(u16);
impl_scan_unsigned!(u32);
impl_scan_unsigned!(u64);
impl_scan_unsigned!(u128);

impl<S: Scanner> Scan<S> for Ttl {
    fn scan(scanner: &mut S) -> Result<Self, <S as Scanner>::Error> {
        let mut res: u32 = 0;
        scanner.scan_symbols(|ch| {
            res = res
                .checked_mul(10)
                .ok_or_else(|| S::Error::custom("decimal number overflow"))?;
            res += ch
                .into_digit(10)
                .map_err(|_| S::Error::custom("expected decimal number"))?;
            Ok(())
        })?;
        Ok(Ttl::from_secs(res))
    }
}

//------------ Scanner -------------------------------------------------------

/// A type that can produce tokens of data in presentation format.
///
/// The presentation format is a relatively simple text format that provides
/// a sequence of _entries_ each consisting of a sequence of _tokens._ An
/// implementation of the `Scanner` trait provides access to the tokens of a
/// single entry.
///
/// Most methods of the trait process a single token to the caller. Exceptions
/// are those methods suffixed with `_entry`, which process all the remaining
/// tokens of the entry. In addition, [`has_space`][Scanner::has_space]
/// reports whether the token was prefixed with white space (which is relevant
/// in some cases), and [`continues`][Scanner::continues] reports whether
/// there are more tokens in the entry. It it returns `false, all the other
/// token and entry methods will return an error. That is, calling these
/// methods assumes that the caller requires at least one more token.
///
/// Because an implementation may be able to optimize the process of
/// converting tokens into output data types, there are a number of methods
/// for different output. Each of these methods assumes that the next token
/// (or the remaining tokens in the entry) is required to contain the
/// presentation format of the given type and is should produce an error
/// if that is not the case.
///
/// This allows for instance to optimize the creation of domain names and
/// avoid copying around data in the most usual cases.
///
/// As a consequence, an implementation gets to choose how to return tokens.
/// This mostly concerns the octets types to be used, but also allows it to
/// creatively employing the [name::Chain](crate::base::name::Chain) type to
/// deal with a zone’s changing origin.
pub trait Scanner {
    /// The type of octet sequences returned by the scanner.
    type Octets: AsRef<[u8]>;

    /// The octets builder used internally and returned upon request.
    type OctetsBuilder: OctetsBuilder
        + AsRef<[u8]>
        + AsMut<[u8]>
        + Truncate
        + FreezeBuilder<Octets = Self::Octets>;

    /// The type of a domain name returned by the scanner.
    type Name: ToName;

    /// The error type of the scanner.
    type Error: ScannerError;

    /// Returns whether the next token is preceded by white space.
    fn has_space(&self) -> bool;

    /// Returns whether there are more tokens in the entry.
    ///
    /// This method takes a `&mut self` to allow implementations to peek on
    /// request.
    fn continues(&mut self) -> bool;

    /// Scans a token into a sequence of symbols.
    ///
    /// Each symbol is passed to the caller via the closure and can be
    /// processed there.
    fn scan_symbols<F>(&mut self, op: F) -> Result<(), Self::Error>
    where
        F: FnMut(Symbol) -> Result<(), Self::Error>;

    /// Scans the remainder of the entry as symbols.
    ///
    /// Each symbol is passed to the caller via the closure and can be
    /// processed there.
    fn scan_entry_symbols<F>(&mut self, op: F) -> Result<(), Self::Error>
    where
        F: FnMut(EntrySymbol) -> Result<(), Self::Error>;

    /// Converts the symbols of a token into an octets sequence.
    ///
    /// Each symbol is passed to the provided converter which can return
    /// octet slices to be used to construct the returned value. When the
    /// token is complete, the converter is called again to ask for any
    /// remaining data to be added.
    fn convert_token<C: ConvertSymbols<Symbol, Self::Error>>(
        &mut self,
        convert: C,
    ) -> Result<Self::Octets, Self::Error>;

    /// Converts the symbols of a token into an octets sequence.
    ///
    /// Each symbol is passed to the provided converter which can return
    /// octet slices to be used to construct the returned value. When the
    /// token is complete, the converter is called again to ask for any
    /// remaining data to be added.
    fn convert_entry<C: ConvertSymbols<EntrySymbol, Self::Error>>(
        &mut self,
        convert: C,
    ) -> Result<Self::Octets, Self::Error>;

    /// Scans a token into an octets sequence.
    ///
    /// The returned sequence has all symbols converted into their octets.
    /// It can be of any length.
    fn scan_octets(&mut self) -> Result<Self::Octets, Self::Error>;

    /// Scans a token as a borrowed ASCII string.
    ///
    /// If the next token contains non-ascii characters, returns an error.
    /// The string is given to the caller via the provided closure.
    fn scan_ascii_str<F, T>(&mut self, op: F) -> Result<T, Self::Error>
    where
        F: FnOnce(&str) -> Result<T, Self::Error>;

    /// Scans a token into a domain name.
    fn scan_name(&mut self) -> Result<Self::Name, Self::Error>;

    /// Scans a token into a character string.
    ///
    /// Note that character strings have a length limit.  If you want a
    /// sequence of indefinite length, use [`scan_octets`][Self::scan_octets]
    /// instead.
    fn scan_charstr(&mut self) -> Result<CharStr<Self::Octets>, Self::Error>;

    /// Scans a token as a UTF-8 string.
    fn scan_string(&mut self) -> Result<Str<Self::Octets>, Self::Error>;

    /// Scans a sequence of character strings until the end of the entry.
    ///
    /// The returned octets will contain the sequence of character strings in
    /// wire format.
    fn scan_charstr_entry(&mut self) -> Result<Self::Octets, Self::Error>;

    /// Scans an optional unknown rdata marker.
    ///
    /// If the next token is `\#`, i.e., an unquoted, escaped hash sign,
    /// consumes the token and returns `Ok(true)`. If the next token is
    /// anything else or if there is no next token, does nothing and returns
    /// `Ok(false)`. If there is an error, returns an error.
    fn scan_opt_unknown_marker(&mut self) -> Result<bool, Self::Error>;

    /// Returns an empty octets builder.
    ///
    /// This builder can be used to create octets sequences in cases where
    /// the other methods can’t be used.
    fn octets_builder(&mut self) -> Result<Self::OctetsBuilder, Self::Error>;
}

//------------ ScannerError --------------------------------------------------

macro_rules! declare_error_trait {
    (ScannerError: Sized $(+ $($supertrait:ident)::+)*) => {
        /// A type providing error information for a scanner.
        pub trait ScannerError: Sized $(+ $($supertrait)::+)* {
            /// Creates a new error wrapping a supplied error message.
            fn custom(msg: &'static str) -> Self;

            /// Creates an error when more tokens were expected in the entry.
            fn end_of_entry() -> Self;

            /// Creates an error when a octets buffer is too short.
            fn short_buf() -> Self;

            /// Creates an error when there are trailing tokens.
            fn trailing_tokens() -> Self;
        }
    }
}

#[cfg(feature = "std")]
declare_error_trait!(ScannerError: Sized + error::Error);

#[cfg(not(feature = "std"))]
declare_error_trait!(ScannerError: Sized + fmt::Debug + fmt::Display);

#[cfg(feature = "std")]
impl ScannerError for std::io::Error {
    fn custom(msg: &'static str) -> Self {
        std::io::Error::new(std::io::ErrorKind::Other, msg)
    }

    fn end_of_entry() -> Self {
        std::io::Error::new(
            std::io::ErrorKind::UnexpectedEof,
            "unexpected end of entry",
        )
    }

    fn short_buf() -> Self {
        std::io::Error::new(std::io::ErrorKind::Other, ShortBuf)
    }

    fn trailing_tokens() -> Self {
        std::io::Error::new(std::io::ErrorKind::Other, "trailing data")
    }
}

//------------ ConvertSymbols ------------------------------------------------

/// A type that helps convert the symbols in presentation format.
///
/// This trait is used by [`Scanner::convert_token`] with [`Symbol`]s and
/// [`Scanner::convert_entry`] with [`EntrySymbol]`s.
///
/// For each symbol, [`process_symbol`][ConvertSymbols::process_symbol] is
/// called. When the end of token or entry is reached,
/// [`process_tail`][ConvertSymbols::process_tail] is called, giving the
/// implementer a chance to return any remaining data.
pub trait ConvertSymbols<Sym, Error> {
    /// Processes the next symbol.
    ///
    /// If the method returns some data, it will be appended to the output
    /// octets sequence.
    fn process_symbol(&mut self, symbol: Sym)
        -> Result<Option<&[u8]>, Error>;

    /// Process the end of token.
    ///
    /// If the method returns some data, it will be appended to the output
    /// octets sequence.
    fn process_tail(&mut self) -> Result<Option<&[u8]>, Error>;
}

//============ Zone file symbol ==============================================

//------------ Symbol --------------------------------------------------------

/// The zone file representation of a single character.
///
/// This is either a regular character or an escape sequence. See the variants
/// for more details.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum Symbol {
    /// An unescaped Unicode character.
    Char(char),

    /// A character escaped via a preceding backslash.
    ///
    /// This escape sequence is only allowed for printable ASCII characters.
    SimpleEscape(u8),

    /// A raw octet escaped using the decimal escape sequence.
    ///
    /// This escape sequence consists of a backslash followed by exactly three
    /// decimal digits with the value of the octets.
    DecimalEscape(u8),
}

impl Symbol {
    /// Reads a symbol from a character source.
    ///
    /// Returns the next symbol in the source, `Ok(None)` if the source has
    /// been exhausted, or an error if there wasn’t a valid symbol.
    pub fn from_chars<C: Iterator<Item = char>>(
        chars: &mut C,
    ) -> Result<Option<Self>, SymbolCharsError> {
        #[inline]
        fn bad_escape() -> SymbolCharsError {
            SymbolCharsError(SymbolCharsEnum::BadEscape)
        }

        #[inline]
        fn short_input() -> SymbolCharsError {
            SymbolCharsError(SymbolCharsEnum::ShortInput)
        }

        let ch = match chars.next() {
            Some(ch) => ch,
            None => return Ok(None),
        };
        if ch != '\\' {
            return Ok(Some(Symbol::Char(ch)));
        }
        match chars.next() {
            Some(ch) if ch.is_ascii_digit() => {
                let ch = ch.to_digit(10).unwrap() * 100;
                let ch2 = match chars.next() {
                    Some(ch) => match ch.to_digit(10) {
                        Some(ch) => ch * 10,
                        None => return Err(bad_escape()),
                    },
                    None => return Err(short_input()),
                };
                let ch3 = match chars.next() {
                    Some(ch) => match ch.to_digit(10) {
                        Some(ch) => ch,
                        None => return Err(bad_escape()),
                    },
                    None => return Err(short_input()),
                };
                let res = ch + ch2 + ch3;
                if res > 255 {
                    return Err(bad_escape());
                }
                Ok(Some(Symbol::DecimalEscape(res as u8)))
            }
            Some(ch) => {
                let ch = u8::try_from(ch).map_err(|_| bad_escape())?;
                if ch < 0x20 || ch > 0x7e {
                    Err(bad_escape())
                } else {
                    Ok(Some(Symbol::SimpleEscape(ch)))
                }
            }
            None => Err(short_input()),
        }
    }

    /// Reads a symbol from the given position in an octets slice.
    ///
    /// Returns the symbol and the index of the end of the symbol in the
    /// slice.
    pub fn from_slice_index(
        octets: &[u8],
        pos: usize,
    ) -> Result<Option<(Symbol, usize)>, SymbolOctetsError> {
        #[inline]
        fn bad_utf8() -> SymbolOctetsError {
            SymbolOctetsError(SymbolOctetsEnum::BadUtf8)
        }

        #[inline]
        fn bad_escape() -> SymbolOctetsError {
            SymbolOctetsError(SymbolOctetsEnum::BadEscape)
        }

        #[inline]
        fn short_input() -> SymbolOctetsError {
            SymbolOctetsError(SymbolOctetsEnum::ShortInput)
        }

        let c1 = match octets.get(pos) {
            Some(c1) => *c1,
            None => return Ok(None),
        };
        let pos = pos + 1;

        if c1 == b'\\' {
            // Escape sequence

            // Get the next octet.
            let c2 = match octets.get(pos) {
                Some(c2) => *c2,
                None => return Err(short_input()),
            };
            let pos = pos + 1;

            if c2.is_ascii_control() {
                // Only printable ASCII characters allowed.
                return Err(bad_escape());
            } else if !c2.is_ascii_digit() {
                // Simple escape.
                return Ok(Some((Symbol::SimpleEscape(c2), pos)));
            }

            // Get two more octets.
            let c3 = match octets.get(pos) {
                Some(c) if c.is_ascii_digit() => *c,
                Some(_) => return Err(bad_escape()),
                None => return Err(short_input()),
            };
            let pos = pos + 1;
            let c4 = match octets.get(pos) {
                Some(c) if c.is_ascii_digit() => *c,
                Some(_) => return Err(bad_escape()),
                None => return Err(short_input()),
            };
            let pos = pos + 1;

            Ok(Some((
                Symbol::DecimalEscape(
                    u8::try_from(
                        (u32::from(c2 - b'0') * 100)
                            + (u32::from(c3 - b'0') * 10)
                            + (u32::from(c4 - b'0')),
                    )
                    .map_err(|_| bad_escape())?,
                ),
                pos,
            )))
        } else {
            // UTF-8 encoded character.
            //
            // Looks like there’s nothing in the standard library to help us
            // do this.

            // ASCII is single byte.
            if c1 < 128 {
                return Ok(Some((Symbol::Char(c1.into()), pos)));
            }

            // Second-to-left but must be 1.
            if c1 & 0b0100_0000 == 0 {
                return Err(bad_utf8());
            }

            // Get the next octet, check that it is valid.
            let c2 = match octets.get(pos) {
                Some(c2) => *c2,
                None => return Err(short_input()),
            };
            let pos = pos + 1;
            if c2 & 0b1100_0000 != 0b1000_0000 {
                return Err(bad_utf8());
            }

            // If c1’s third-to-left bit is 0, we have the two octet case.
            if c1 & 0b0010_0000 == 0 {
                return Ok(Some((
                    Symbol::Char(
                        (u32::from(c2 & 0b0011_1111)
                            | (u32::from(c1 & 0b0001_1111) << 6))
                            .try_into()
                            .map_err(|_| bad_utf8())?,
                    ),
                    pos,
                )));
            }

            // Get the next octet, check that it is valid.
            let c3 = match octets.get(pos) {
                Some(c3) => *c3,
                None => return Err(short_input()),
            };
            let pos = pos + 1;
            if c3 & 0b1100_0000 != 0b1000_0000 {
                return Err(bad_utf8());
            }

            // If c1’s fourth-to-left bit is 0, we have the three octet case.
            if c1 & 0b0001_0000 == 0 {
                return Ok(Some((
                    Symbol::Char(
                        (u32::from(c3 & 0b0011_1111)
                            | (u32::from(c2 & 0b0011_1111) << 6)
                            | (u32::from(c1 & 0b0001_1111) << 12))
                            .try_into()
                            .map_err(|_| bad_utf8())?,
                    ),
                    pos,
                )));
            }

            // Get the next octet, check that it is valid.
            let c4 = match octets.get(pos) {
                Some(c4) => *c4,
                None => return Err(short_input()),
            };
            let pos = pos + 1;
            if c4 & 0b1100_0000 != 0b1000_0000 {
                return Err(bad_utf8());
            }

            Ok(Some((
                Symbol::Char(
                    (u32::from(c4 & 0b0011_1111)
                        | (u32::from(c3 & 0b0011_1111) << 6)
                        | (u32::from(c2 & 0b0011_1111) << 12)
                        | (u32::from(c1 & 0b0000_1111) << 18))
                        .try_into()
                        .map_err(|_| bad_utf8())?,
                ),
                pos,
            )))
        }
    }

    /// Provides the best symbol for an octet.
    ///
    /// The function will use the simple escape sequence for octet values that
    /// represent ASCII spaces, quotes, backslashes, and semicolons and the
    /// plain ASCII value for all other printable ASCII characters. Any other
    /// value is escaped using the decimal escape sequence.
    #[must_use]
    pub fn from_octet(ch: u8) -> Self {
        if ch == b' ' || ch == b'"' || ch == b'\\' || ch == b';' {
            Symbol::SimpleEscape(ch)
        } else if !(0x20..0x7F).contains(&ch) {
            Symbol::DecimalEscape(ch)
        } else {
            Symbol::Char(ch as char)
        }
    }

    /// Provides the best symbol for an octet inside a quoted string.
    ///
    /// The function will only escape a double quote and backslash using a
    /// simple escape and all non-printable characters using decimal escapes.
    #[must_use]
    pub fn quoted_from_octet(ch: u8) -> Self {
        if ch == b'"' || ch == b'\\' {
            Symbol::SimpleEscape(ch)
        } else if !(0x20..0x7F).contains(&ch) {
            Symbol::DecimalEscape(ch)
        } else {
            Symbol::Char(ch as char)
        }
    }

    /// Provides the best symbol for an octet inside a `Display` impl.
    ///
    /// The function will only escape a backslash using a simple escape and
    /// all non-printable characters using decimal escapes.
    #[must_use]
    pub fn display_from_octet(ch: u8) -> Self {
        if ch == b'\\' {
            Symbol::SimpleEscape(ch)
        } else if !(0x20..0x7F).contains(&ch) {
            Symbol::DecimalEscape(ch)
        } else {
            Symbol::Char(ch as char)
        }
    }

    /// Converts the symbol into an octet if it represents one.
    ///
    /// Both domain names and character strings operate on bytes instead of
    /// (Unicode) characters. These bytes can be represented by printable
    /// ASCII characters (that is, U+0020 to U+007E), both plain or through
    /// a simple escape, or by a decimal escape.
    ///
    /// This method returns such an octet or an error if the symbol doesn’t
    /// have value representing an octet. Note that it will succeed for an
    /// ASCII space character U+0020 which may be used as a word separator
    /// in some cases.
    pub fn into_octet(self) -> Result<u8, BadSymbol> {
        match self {
            Symbol::Char(ch) => {
                if ch.is_ascii() && ch >= '\u{20}' && ch <= '\u{7E}' {
                    Ok(ch as u8)
                } else {
                    Err(BadSymbol(BadSymbolEnum::NonAscii))
                }
            }
            Symbol::SimpleEscape(ch) | Symbol::DecimalEscape(ch) => Ok(ch),
        }
    }

    /// Converts the symbol into an octet if it is printable ASCII.
    ///
    /// This is similar to [`into_octet`][Self::into_octet] but returns an
    /// error when the resulting octet is not a printable ASCII character,
    /// i.e., an octet of value 0x20 up to and including 0x7E.
    pub fn into_ascii(self) -> Result<u8, BadSymbol> {
        match self {
            Symbol::Char(ch) => {
                if ch.is_ascii() && ch >= '\u{20}' && ch <= '\u{7E}' {
                    Ok(ch as u8)
                } else {
                    Err(BadSymbol(BadSymbolEnum::NonAscii))
                }
            }
            Symbol::SimpleEscape(ch) | Symbol::DecimalEscape(ch) => {
                if ch >= 0x20 && ch <= 0x7E {
                    Ok(ch)
                } else {
                    Err(BadSymbol(BadSymbolEnum::NonAscii))
                }
            }
        }
    }

    /// Converts the symbol into a `char`.
    ///
    /// This will fail for a decimal escape sequence which doesn’t actually
    /// represent a character.
    pub fn into_char(self) -> Result<char, BadSymbol> {
        match self {
            Symbol::Char(ch) => Ok(ch),
            Symbol::SimpleEscape(ch) if ch >= 0x20 && ch < 0x7F => {
                Ok(ch.into())
            }
            _ => Err(BadSymbol(BadSymbolEnum::NonUtf8)),
        }
    }

    /// Converts the symbol representing a digit into its integer value.
    pub fn into_digit(self, base: u32) -> Result<u32, BadSymbol> {
        if let Symbol::Char(ch) = self {
            match ch.to_digit(base) {
                Some(ch) => Ok(ch),
                None => Err(BadSymbol(BadSymbolEnum::NonDigit)),
            }
        } else {
            Err(BadSymbol(BadSymbolEnum::Escape))
        }
    }

    /// Returns whether the symbol can occur as part of a word.
    ///
    /// This is true for all symbols other than unescaped ASCII space and
    /// horizontal tabs, opening and closing parentheses, semicolon, and
    /// double quote.
    #[must_use]
    pub fn is_word_char(self) -> bool {
        match self {
            Symbol::Char(ch) => {
                ch != ' '
                    && ch != '\t'
                    && ch != '\r'
                    && ch != '\n'
                    && ch != '('
                    && ch != ')'
                    && ch != ';'
                    && ch != '"'
            }
            _ => true,
        }
    }
}

//--- From

impl From<char> for Symbol {
    fn from(ch: char) -> Symbol {
        Symbol::Char(ch)
    }
}

//--- Display

impl fmt::Display for Symbol {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        match *self {
            Symbol::Char(ch) => write!(f, "{}", ch),
            Symbol::SimpleEscape(ch) => write!(f, "\\{}", ch as char),
            Symbol::DecimalEscape(ch) => write!(f, "\\{:03}", ch),
        }
    }
}

//------------ EntrySymbol ---------------------------------------------------

/// The symbols encountered in the remainder of an entry.
///
/// This can either be a regular symbol or the end of a token.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum EntrySymbol {
    /// A regular in-token symbol.
    Symbol(Symbol),

    /// The end of a token.
    EndOfToken,
}

//--- From

impl From<Symbol> for EntrySymbol {
    fn from(symbol: Symbol) -> Self {
        EntrySymbol::Symbol(symbol)
    }
}

//------------ Symbols -------------------------------------------------------

/// An iterator over the symbols in a char sequence.
///
/// The iterator stops if a character cannot be converted into symbols. You
/// can check if that happened via the [`ok`][Self::ok] method.
#[derive(Clone, Debug)]
pub struct Symbols<Chars> {
    /// The chars of the sequence.
    ///
    /// This is an option so we can fuse the iterator on error.
    chars: Result<Chars, SymbolCharsError>,
}

impl<Chars> Symbols<Chars> {
    /// Creates a new symbols iterator atop a char iterator.
    pub fn new(chars: Chars) -> Self {
        Symbols { chars: Ok(chars) }
    }

    /// Checks whether there was an error converting symbols.
    pub fn ok(self) -> Result<(), SymbolCharsError> {
        self.chars.map(|_| ())
    }

    pub fn with<F, T, E>(chars: Chars, op: F) -> Result<T, E>
    where
        F: FnOnce(&mut Self) -> Result<T, E>,
        E: From<SymbolCharsError>,
    {
        let mut symbols = Self::new(chars);
        let res = op(&mut symbols)?;
        symbols.ok()?;
        Ok(res)
    }
}

impl<Chars: Iterator<Item = char>> Iterator for Symbols<Chars> {
    type Item = Symbol;

    fn next(&mut self) -> Option<Self::Item> {
        self.chars = {
            let chars = match self.chars.as_mut() {
                Ok(chars) => chars,
                Err(_) => return None,
            };
            match Symbol::from_chars(chars) {
                Ok(res) => return res,
                Err(err) => Err(err),
            }
        };
        None
    }
}

//------------ IterScanner ---------------------------------------------------

/// A simple scanner atop an iterator of strings.
///
/// The type is generic over the iterator as well as the octets sequence to
/// use for returned data. The types associated octets builder is used to
/// create values.
pub struct IterScanner<Iter: Iterator, Octets> {
    /// The source of tokens of the scanner.
    iter: Peekable<Iter>,

    /// The marker for the output octets sequence type.
    marker: PhantomData<Octets>,
}

impl<Iter: Iterator, Octets> IterScanner<Iter, Octets> {
    /// Creates a new scanner from an iterator.
    pub fn new<I: IntoIterator<IntoIter = Iter>>(iter: I) -> Self {
        IterScanner {
            iter: iter.into_iter().peekable(),
            marker: PhantomData,
        }
    }

    /// Returns whether the iterator is exhausted.
    pub fn is_exhausted(&mut self) -> bool {
        self.iter.peek().is_none()
    }
}

impl<Iter, Item, Octets> Scanner for IterScanner<Iter, Octets>
where
    Item: AsRef<str>,
    Iter: Iterator<Item = Item>,
    Octets: FromBuilder,
    <Octets as FromBuilder>::Builder: EmptyBuilder + Composer,
{
    type Octets = Octets;
    type OctetsBuilder = <Octets as FromBuilder>::Builder;
    type Name = Name<Octets>;
    type Error = StrError;

    fn has_space(&self) -> bool {
        false
    }

    fn continues(&mut self) -> bool {
        self.iter.peek().is_some()
    }

    fn scan_symbols<F>(&mut self, mut op: F) -> Result<(), Self::Error>
    where
        F: FnMut(Symbol) -> Result<(), Self::Error>,
    {
        let token = match self.iter.next() {
            Some(token) => token,
            None => return Err(StrError::end_of_entry()),
        };
        for sym in Symbols::new(token.as_ref().chars()) {
            op(sym)?;
        }
        Ok(())
    }

    fn scan_entry_symbols<F>(&mut self, mut op: F) -> Result<(), Self::Error>
    where
        F: FnMut(EntrySymbol) -> Result<(), Self::Error>,
    {
        for token in &mut self.iter {
            for sym in Symbols::new(token.as_ref().chars()) {
                op(sym.into())?;
            }
            op(EntrySymbol::EndOfToken)?;
        }
        Ok(())
    }

    fn convert_token<C: ConvertSymbols<Symbol, Self::Error>>(
        &mut self,
        mut convert: C,
    ) -> Result<Self::Octets, Self::Error> {
        let token = match self.iter.next() {
            Some(token) => token,
            None => return Err(StrError::end_of_entry()),
        };
        let mut res = <Octets as FromBuilder>::Builder::empty();

        for sym in Symbols::new(token.as_ref().chars()) {
            if let Some(data) = convert.process_symbol(sym)? {
                res.append_slice(data).map_err(Into::into)?;
            }
        }

        if let Some(data) = convert.process_tail()? {
            res.append_slice(data).map_err(Into::into)?;
        }

        Ok(<Octets as FromBuilder>::from_builder(res))
    }

    fn convert_entry<C: ConvertSymbols<EntrySymbol, Self::Error>>(
        &mut self,
        mut convert: C,
    ) -> Result<Self::Octets, Self::Error> {
        let mut res = <Octets as FromBuilder>::Builder::empty();
        for token in &mut self.iter {
            for sym in Symbols::new(token.as_ref().chars()) {
                if let Some(data) = convert.process_symbol(sym.into())? {
                    res.append_slice(data).map_err(Into::into)?;
                }
            }
        }
        if let Some(data) = convert.process_tail()? {
            res.append_slice(data).map_err(Into::into)?;
        }
        Ok(<Octets as FromBuilder>::from_builder(res))
    }

    fn scan_octets(&mut self) -> Result<Self::Octets, Self::Error> {
        let token = match self.iter.next() {
            Some(token) => token,
            None => return Err(StrError::end_of_entry()),
        };
        let mut res = <Octets as FromBuilder>::Builder::empty();
        for sym in Symbols::new(token.as_ref().chars()) {
            match sym.into_octet() {
                Ok(ch) => res.append_slice(&[ch]).map_err(Into::into)?,
                Err(_) => return Err(StrError::custom("bad symbol")),
            }
        }
        Ok(<Octets as FromBuilder>::from_builder(res))
    }

    fn scan_ascii_str<F, T>(&mut self, op: F) -> Result<T, Self::Error>
    where
        F: FnOnce(&str) -> Result<T, Self::Error>,
    {
        let res = self.scan_string()?;
        if res.is_ascii() {
            op(&res)
        } else {
            Err(StrError::custom("non-ASCII characters"))
        }
    }

    fn scan_name(&mut self) -> Result<Self::Name, Self::Error> {
        let token = match self.iter.next() {
            Some(token) => token,
            None => return Err(StrError::end_of_entry()),
        };
        Name::from_symbols(Symbols::new(token.as_ref().chars()))
            .map_err(|_| StrError::custom("invalid domain name"))
    }

    fn scan_charstr(&mut self) -> Result<CharStr<Self::Octets>, Self::Error> {
        let token = match self.iter.next() {
            Some(token) => token,
            None => return Err(StrError::end_of_entry()),
        };
        let mut res =
            CharStrBuilder::<<Octets as FromBuilder>::Builder>::new();
        for sym in Symbols::new(token.as_ref().chars()) {
            match sym.into_octet() {
                Ok(ch) => res.append_slice(&[ch])?,
                Err(_) => return Err(StrError::custom("bad symbol")),
            }
        }
        Ok(res.finish())
    }

    fn scan_string(&mut self) -> Result<Str<Self::Octets>, Self::Error> {
        let token = match self.iter.next() {
            Some(token) => token,
            None => return Err(StrError::end_of_entry()),
        };
        let mut res = <Octets as FromBuilder>::Builder::empty();
        let mut buf = [0u8; 4];
        for sym in Symbols::new(token.as_ref().chars()) {
            match sym.into_char() {
                Ok(ch) => res
                    .append_slice(ch.encode_utf8(&mut buf).as_bytes())
                    .map_err(Into::into)?,
                Err(_) => return Err(StrError::custom("bad symbol")),
            }
        }
        Ok(Str::from_utf8(<Octets as FromBuilder>::from_builder(res))
            .unwrap())
    }

    fn scan_charstr_entry(&mut self) -> Result<Self::Octets, Self::Error> {
        // XXX This implementation is probably a bit too lazy.
        let mut res = <Octets as FromBuilder>::Builder::empty();
        while self.iter.peek().is_some() {
            self.scan_charstr()?.compose(&mut res).map_err(Into::into)?;
        }
        Ok(<Octets as FromBuilder>::from_builder(res))
    }

    fn scan_opt_unknown_marker(&mut self) -> Result<bool, Self::Error> {
        match self.iter.peek() {
            Some(token) if token.as_ref() == "\\#" => Ok(true),
            _ => Ok(false),
        }
    }

    fn octets_builder(&mut self) -> Result<Self::OctetsBuilder, Self::Error> {
        Ok(<Octets as FromBuilder>::Builder::empty())
    }
}

//============ Error Types ===================================================

//------------ SymbolCharsError ----------------------------------------------

/// An error happened when reading a symbol.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct SymbolCharsError(SymbolCharsEnum);

#[derive(Clone, Copy, Debug, Eq, PartialEq)]
enum SymbolCharsEnum {
    /// An illegal escape sequence was encountered.
    BadEscape,

    /// Unexpected end of input.
    ///
    /// This can only happen in a decimal escape sequence.
    ShortInput,
}

impl SymbolCharsError {
    /// Creates a “bad escape” variant of the error.
    pub(crate) const fn bad_escape() -> Self {
        Self(SymbolCharsEnum::BadEscape)
    }

    /// Creates a “short input” variant of the error.
    pub(crate) const fn short_input() -> Self {
        Self(SymbolCharsEnum::ShortInput)
    }

    /// Returns a static description of the error.
    #[must_use]
    pub fn as_str(self) -> &'static str {
        match self.0 {
            SymbolCharsEnum::BadEscape => "illegal escape sequence",
            SymbolCharsEnum::ShortInput => "unexpected end of input",
        }
    }
}

//--- Display and Error

impl fmt::Display for SymbolCharsError {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        f.write_str(self.as_str())
    }
}

#[cfg(feature = "std")]
impl std::error::Error for SymbolCharsError {}

//------------ SymbolOctetsError ---------------------------------------------

/// An error happened when reading a symbol.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct SymbolOctetsError(SymbolOctetsEnum);

#[derive(Clone, Copy, Debug, Eq, PartialEq)]
enum SymbolOctetsEnum {
    /// An illegal UTF-8 sequence was encountered.
    BadUtf8,

    /// An illegal escape sequence was encountered.
    BadEscape,

    /// Unexpected end of input.
    ///
    /// This can only happen in a decimal escape sequence.
    ShortInput,
}

impl SymbolOctetsError {
    #[must_use]
    pub fn as_str(self) -> &'static str {
        match self.0 {
            SymbolOctetsEnum::BadUtf8 => "illegal UTF-8 sequence",
            SymbolOctetsEnum::BadEscape => "illegal escape sequence",
            SymbolOctetsEnum::ShortInput => "unexpected end of data",
        }
    }
}

//--- Display and Error

impl fmt::Display for SymbolOctetsError {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        f.write_str(self.as_str())
    }
}

#[cfg(feature = "std")]
impl std::error::Error for SymbolOctetsError {}

//------------ BadSymbol -----------------------------------------------------

/// A symbol with an unexpected value was encountered.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct BadSymbol(BadSymbolEnum);

#[derive(Clone, Copy, Debug, Eq, PartialEq)]
enum BadSymbolEnum {
    /// A non-ASCII character was encountered.
    NonAscii,

    /// A non-UTF8 character was encountered.
    NonUtf8,

    /// A non-digit character was encountered.
    NonDigit,

    /// An unexpected escape sequence was encountered.
    Escape,
}

impl BadSymbol {
    pub(crate) fn non_ascii() -> Self {
        Self(BadSymbolEnum::NonAscii)
    }

    /// Returns a static description of the error.
    #[must_use]
    pub fn as_str(self) -> &'static str {
        match self.0 {
            BadSymbolEnum::NonAscii => "non-ASCII symbol",
            BadSymbolEnum::NonUtf8 => "invalid UTF-8 sequence",
            BadSymbolEnum::NonDigit => "expected digit",
            BadSymbolEnum::Escape => "unexpected escape sequence",
        }
    }
}

//--- Display and Error

impl fmt::Display for BadSymbol {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        f.write_str(self.as_str())
    }
}

#[cfg(feature = "std")]
impl std::error::Error for BadSymbol {}

#[cfg(feature = "std")]
impl From<BadSymbol> for std::io::Error {
    fn from(err: BadSymbol) -> Self {
        std::io::Error::new(std::io::ErrorKind::Other, err)
    }
}

//------------ StrError ------------------------------------------------------

/// A simple scanner error that just wraps a static str.
#[derive(Debug)]
pub struct StrError(&'static str);

impl ScannerError for StrError {
    fn custom(msg: &'static str) -> Self {
        StrError(msg)
    }

    fn end_of_entry() -> Self {
        Self::custom("unexpected end of entry")
    }

    fn short_buf() -> Self {
        Self::custom("short buffer")
    }

    fn trailing_tokens() -> Self {
        Self::custom("trailing data")
    }
}

impl From<ShortBuf> for StrError {
    fn from(_: ShortBuf) -> Self {
        Self::short_buf()
    }
}

impl fmt::Display for StrError {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        f.write_str(self.0)
    }
}

#[cfg(feature = "std")]
impl std::error::Error for StrError {}

//============ Testing =======================================================

#[cfg(test)]
#[cfg(feature = "std")]
mod test {
    use super::*;

    #[test]
    fn symbol_from_slice_index() {
        let mut buf = [0u8; 4];
        for ch in '\0'..char::MAX {
            if ch == '\\' {
                continue;
            }
            let slice = ch.encode_utf8(&mut buf).as_bytes();
            assert_eq!(
                Symbol::from_slice_index(slice, 0),
                Ok(Some((Symbol::Char(ch), ch.len_utf8()))),
                "char '{}'",
                ch,
            );
        }

        for ch in '0'..'\x7f' {
            if ch.is_ascii_digit() {
                continue;
            }
            assert_eq!(
                Symbol::from_slice_index(format!("\\{}", ch).as_bytes(), 0),
                Ok(Some((Symbol::SimpleEscape(ch as u8), 2))),
                "sequence \"\\{}\"",
                ch
            );
        }

        for ch in 0..256 {
            assert_eq!(
                Symbol::from_slice_index(
                    format!("\\{:03}", ch).as_bytes(),
                    0
                ),
                Ok(Some((Symbol::DecimalEscape(ch as u8), 4))),
                "sequence \"\\{:03}\"",
                ch
            );
        }
    }
}