1
use crate::{
2
    de::{Position, Span},
3
    error::SpannedResult,
4
};
5
use alloc::string::String;
6

            
7
impl Position {
8
    /// Given a Position and a string, return the 0-indexed grapheme index into the
9
    /// string at that position, or [None] if the Position is out of bounds of the string.
10
    #[must_use]
11
2715
    pub fn grapheme_index(&self, s: &str) -> Option<usize> {
12
        use unicode_segmentation::UnicodeSegmentation;
13
2715
        let mut line_no = 1;
14
2715
        let mut col_no = 1;
15
2715

            
16
2715
        if (self.line, self.col) == (1, 1) {
17
222
            return Some(0);
18
2493
        }
19
2493

            
20
2493
        let mut i = 0;
21
2493

            
22
2493
        // Slightly non-intuitive arithmetic: a zero-length string at line 1, col 1 -> 0
23
2493

            
24
2493
        if (line_no, col_no) == (self.line, self.col) {
25
            return Some(i);
26
2493
        }
27

            
28
91184
        for ch in s.graphemes(true) {
29
91184
            if (line_no, col_no) == (self.line, self.col) {
30
2204
                return Some(i);
31
88980
            }
32

            
33
            // "\n" and "\r\n" each come through the iterator as a single grapheme
34
88980
            if matches!(ch, "\n" | "\r\n") {
35
239
                line_no += 1;
36
239
                col_no = 1;
37
88741
            } else {
38
88741
                col_no += 1;
39
88741
            }
40

            
41
88980
            i += 1;
42
        }
43

            
44
        // ...and a string of length 7 at line 1, col 8 -> 7
45
289
        if (line_no, col_no) == (self.line, self.col) {
46
285
            return Some(i);
47
4
        }
48
4

            
49
4
        None
50
2715
    }
51
}
52

            
53
impl Span {
54
    /// Given a `Span` and a string, form the resulting string selected exclusively (as in `[start..end`]) by the `Span`
55
    /// or [`None`] if the span is out of bounds of the string at either end.
56
    #[must_use]
57
297
    pub fn substring_exclusive(&self, s: &str) -> Option<String> {
58
        use alloc::vec::Vec;
59
        use unicode_segmentation::UnicodeSegmentation;
60

            
61
297
        if let (Some(start), Some(end)) = (self.start.grapheme_index(s), self.end.grapheme_index(s))
62
        {
63
296
            Some(s.graphemes(true).collect::<Vec<&str>>()[start..end].concat())
64
        } else {
65
1
            None
66
        }
67
297
    }
68

            
69
    /// Given a `Span` and a string, form the resulting string selected inclusively (as in `[start..=end]`) by the `Span`
70
    /// or [`None`] if the span is out of bounds of the string at either end.
71
    #[must_use]
72
1053
    pub fn substring_inclusive(&self, s: &str) -> Option<String> {
73
        use alloc::vec::Vec;
74
        use unicode_segmentation::UnicodeSegmentation;
75

            
76
1053
        if let (Some(start), Some(end)) = (self.start.grapheme_index(s), self.end.grapheme_index(s))
77
        {
78
1053
            Some(s.graphemes(true).collect::<Vec<&str>>()[start..=end].concat())
79
        } else {
80
            None
81
        }
82
1053
    }
83
}
84

            
85
/// Given a string `ron`, a [`SpannedResult`], and a substring, verify that trying to parse `ron` results in an error
86
/// equal to the [`SpannedResult`] with a Span that exclusively (as in `[start..end]`) selects that substring.
87
/// Note that there are two versions of this helper, inclusive and exclusive. This is because while the parser cursor
88
/// arithmetic that computes span positions always produces exclusive spans (as in `[start..end]`),
89
/// when doing validation against a target substring, the inclusive check including the final grapheme that triggered
90
/// the error is often a more intuitive target to check against.
91
/// Meanwhile, if the parser threw an EOF, for example, there is no final grapheme to check, and so
92
/// only the exclusive check would produce a meaningful result.
93
#[allow(clippy::unwrap_used)]
94
#[allow(clippy::missing_panics_doc)]
95
10
pub fn check_error_span_exclusive<T: serde::de::DeserializeOwned + PartialEq + core::fmt::Debug>(
96
10
    ron: &str,
97
10
    check: SpannedResult<T>,
98
10
    substr: &str,
99
10
) {
100
10
    let res_str = crate::de::from_str::<T>(ron);
101
10
    assert_eq!(res_str, check);
102

            
103
10
    let res_bytes = crate::de::from_bytes::<T>(ron.as_bytes());
104
10
    assert_eq!(res_bytes, check);
105

            
106
    #[cfg(feature = "std")]
107
    {
108
10
        let res_reader = crate::de::from_reader::<&[u8], T>(ron.as_bytes());
109
10
        assert_eq!(res_reader, check);
110
    }
111

            
112
10
    assert_eq!(
113
10
        check.unwrap_err().span.substring_exclusive(ron).unwrap(),
114
10
        substr
115
10
    );
116
10
}
117

            
118
/// Given a string `ron`, a [`SpannedResult`], and a substring, verify that trying to parse `ron` results in an error
119
/// equal to the [`SpannedResult`] with a Span that inclusively (as in `[start..=end`]) selects that substring.
120
/// See [`check_error_span_exclusive`] for the rationale behind both versions of this helper.
121
#[allow(clippy::unwrap_used)]
122
#[allow(clippy::missing_panics_doc)]
123
18
pub fn check_error_span_inclusive<T: serde::de::DeserializeOwned + PartialEq + core::fmt::Debug>(
124
18
    ron: &str,
125
18
    check: SpannedResult<T>,
126
18
    substr: &str,
127
18
) {
128
18
    let res_str = crate::de::from_str::<T>(ron);
129
18
    assert_eq!(res_str, check);
130

            
131
18
    let res_bytes = crate::de::from_bytes::<T>(ron.as_bytes());
132
18
    assert_eq!(res_bytes, check);
133

            
134
    #[cfg(feature = "std")]
135
    {
136
18
        let res_reader = crate::de::from_reader::<&[u8], T>(ron.as_bytes());
137
18
        assert_eq!(res_reader, check);
138
    }
139

            
140
18
    assert_eq!(
141
18
        check.unwrap_err().span.substring_inclusive(ron).unwrap(),
142
18
        substr
143
18
    );
144
18
}
145

            
146
#[cfg(test)]
147
mod tests {
148
    use crate::de::{Position, Span};
149

            
150
9
    fn span(start: Position, end: Position) -> Span {
151
9
        Span { start, end }
152
9
    }
153

            
154
33
    fn pos(line: usize, col: usize) -> Position {
155
33
        Position { line, col }
156
33
    }
157

            
158
    #[test]
159
1
    fn ascii_basics() {
160
1
        let text = "hello\nworld";
161
1

            
162
1
        // first char / first col
163
1
        assert_eq!(pos(1, 1).grapheme_index(text), Some(0));
164

            
165
        // last char on first line ('o')
166
1
        assert_eq!(pos(1, 5).grapheme_index(text), Some(4));
167

            
168
        // start of second line ('w')
169
1
        assert_eq!(pos(2, 1).grapheme_index(text), Some(6));
170

            
171
        // span across the `\n`
172
1
        assert_eq!(
173
1
            span(pos(1, 4), pos(2, 2))
174
1
                .substring_exclusive(text)
175
1
                .unwrap(),
176
1
            "lo\nw"
177
1
        );
178
1
    }
179

            
180
    #[test]
181
1
    fn multibyte_greek() {
182
1
        let text = "αβγ\ndeux\n三四五\r\nend";
183
1

            
184
1
        // Beta
185
1
        assert_eq!(pos(1, 2).grapheme_index(text), Some(1));
186

            
187
        // 三
188
1
        assert_eq!(pos(3, 1).grapheme_index(text), Some(9));
189

            
190
        // e
191
1
        assert_eq!(pos(4, 1).grapheme_index(text), Some(13));
192

            
193
        // span from α to start of “deux”
194
1
        assert_eq!(
195
1
            span(pos(1, 1), pos(2, 1))
196
1
                .substring_exclusive(text)
197
1
                .unwrap(),
198
1
            "αβγ\n"
199
1
        );
200
1
    }
201

            
202
    #[test]
203
1
    fn combining_mark_cluster() {
204
1
        // é  ==  [0x65, 0xCC, 0x81] in UTF-8
205
1
        let text = "e\u{0301}x\n";
206
1

            
207
1
        // grapheme #1 (“é”)
208
1
        assert_eq!(pos(1, 1).grapheme_index(text), Some(0));
209

            
210
        // grapheme #2 (“x”)
211
1
        assert_eq!(pos(1, 2).grapheme_index(text), Some(1));
212

            
213
        // column 4 is past EOL
214
1
        assert_eq!(pos(1, 4).grapheme_index(text), None);
215

            
216
        // full span
217
1
        assert_eq!(
218
1
            span(pos(1, 1), pos(1, 2))
219
1
                .substring_exclusive(text)
220
1
                .unwrap(),
221
1
            "e\u{0301}"
222
1
        );
223
1
    }
224

            
225
    #[test]
226
1
    fn zwj_emoji_cluster() {
227
1
        let text = "👩‍👩‍👧‍👧 and 👨‍👩‍👦";
228
1

            
229
1
        // The family emoji is the first grapheme on the line.
230
1
        assert_eq!(pos(1, 1).grapheme_index(text), Some(0));
231

            
232
1
        assert_eq!(pos(1, 2).grapheme_index(text), Some(1));
233

            
234
        // Span selecting only the first emoji
235
1
        assert_eq!(
236
1
            span(pos(1, 1), pos(1, 2))
237
1
                .substring_exclusive(text)
238
1
                .unwrap(),
239
1
            "👩‍👩‍👧‍👧"
240
1
        );
241

            
242
        // Span selecting only the second emoji
243
1
        assert_eq!(
244
1
            span(pos(1, 7), pos(1, 8))
245
1
                .substring_exclusive(text)
246
1
                .unwrap(),
247
1
            "👨‍👩‍👦"
248
1
        );
249
1
    }
250

            
251
    #[test]
252
1
    fn mixed_newlines() {
253
1
        let text = "one\r\ntwo\nthree\r\n";
254
1

            
255
1
        // start of “two” (line numbers are 1-based)
256
1
        assert_eq!(pos(2, 1).grapheme_index(text), Some(4));
257

            
258
        // “three”
259
1
        assert_eq!(pos(3, 1).grapheme_index(text), Some(8));
260

            
261
        // span “two\n”
262
1
        assert_eq!(
263
1
            span(pos(2, 1), pos(3, 1))
264
1
                .substring_exclusive(text)
265
1
                .unwrap(),
266
1
            "two\n"
267
1
        );
268

            
269
        // span “two\nthree”
270
1
        assert_eq!(
271
1
            span(pos(2, 1), pos(3, 6))
272
1
                .substring_exclusive(text)
273
1
                .unwrap(),
274
1
            "two\nthree"
275
1
        );
276
1
    }
277

            
278
    #[test]
279
1
    fn oob_and_error_paths() {
280
1
        let text = "short";
281
1

            
282
1
        // line past EOF
283
1
        assert_eq!(pos(2, 1).grapheme_index(text), None);
284

            
285
        // column past EOL
286
1
        assert_eq!(pos(1, 10).grapheme_index(text), None);
287

            
288
        // span with either endpoint oob → None
289
1
        assert_eq!(span(pos(1, 1), pos(2, 1)).substring_exclusive(text), None);
290
1
    }
291

            
292
    #[test]
293
1
    fn whole_text_span() {
294
1
        let text = "αβγ\nδεζ";
295
1
        let all = span(pos(1, 1), pos(2, 4));
296
1
        assert_eq!(&all.substring_exclusive(text).unwrap(), text);
297
1
    }
298

            
299
    #[test]
300
1
    fn span_substring_helper() {
301
1
        assert_eq!(
302
1
            Span {
303
1
                start: Position { line: 1, col: 1 },
304
1
                end: Position { line: 2, col: 1 },
305
1
            }
306
1
            .substring_exclusive(
307
1
                "In the first place, there are two sorts of bets, or toh.11 There is the
308
1
single axial bet in the center between the principals (toh ketengah), and
309
1
there is the cloud of peripheral ones around the ring between members
310
1
of the audience (toh kesasi). ",
311
1
            )
312
1
            .unwrap(),
313
1
            "In the first place, there are two sorts of bets, or toh.11 There is the\n"
314
1
        );
315

            
316
1
        assert_eq!(
317
1
            Span {
318
1
                start: Position { line: 2, col: 1 },
319
1
                end: Position { line: 3, col: 1 },
320
1
            }
321
1
            .substring_exclusive(
322
1
                "In the first place, there are two sorts of bets, or toh.11 There is the
323
1
single axial bet in the center between the principals (toh ketengah), and
324
1
there is the cloud of peripheral ones around the ring between members
325
1
of the audience (toh kesasi). ",
326
1
            )
327
1
            .unwrap(),
328
1
            "single axial bet in the center between the principals (toh ketengah), and\n"
329
1
        );
330
1
    }
331
}