From df58de52f46534ecc301acf08549c99bb6237e41 Mon Sep 17 00:00:00 2001 From: Richard Newman Date: Thu, 15 Mar 2018 07:13:27 -0700 Subject: [PATCH] Correctly parse and unescape quotes etc. inside EDN strings. (#434) (#589) --- edn/src/edn.rustpeg | 20 +++++++++++++------- edn/tests/tests.rs | 8 ++++++++ 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/edn/src/edn.rustpeg b/edn/src/edn.rustpeg index 3e0e8674..729d8176 100644 --- a/edn/src/edn.rustpeg +++ b/edn/src/edn.rustpeg @@ -69,14 +69,20 @@ pub float -> SpannedValue = f:$(sign? digit+ ("." digit+)? ([eE] sign? digit+)?) number -> SpannedValue = ( bigint / basedinteger / hexinteger / octalinteger / integer / float ) -// TODO: \newline, \return, \space and \tab -special_char = quote / tab -quote = "\\\"" -tab = "\\tab" -char = [^"] / special_char +// TODO: standalone characters: \, \newline, \return, \space and \tab. -pub text -> SpannedValue = "\"" t:$( char* ) "\"" - { SpannedValue::Text(t.to_string()) } +string_special_char -> &'input str = "\\" $([\\"ntr]) +string_normal_chars -> &'input str = $([^"\\]+) + +// This is what we need to do in order to unescape. We can't just match the entire string slice: +// we get a Vec<&str> from rust-peg, where some of the parts might be unescaped special characters, +// and we join it together to form an output string. +// E.g., input = r#"\"foo\\\\bar\""# +// output = [quote, "foo", backslash, "bar", quote] +// result = r#""foo\\bar""# +// For the typical case, string_normal_chars will match multiple, leading to a single-element vec. +pub text -> SpannedValue = "\"" t:((string_special_char / string_normal_chars)*) "\"" + { SpannedValue::Text(t.join(&"").to_string()) } // RFC 3339 timestamps. #inst "1985-04-12T23:20:50.52Z" // We accept an arbitrary depth of decimals. diff --git a/edn/tests/tests.rs b/edn/tests/tests.rs index b2e37597..48692467 100644 --- a/edn/tests/tests.rs +++ b/edn/tests/tests.rs @@ -327,6 +327,14 @@ fn test_text() { assert!(text("\"").is_err()); assert!(text("nil").is_err()); + + let raw_edn = r#""This string contains a \" and a \\""#; + let raw_string = r#"This string contains a " and a \"#; + assert_eq!(parse::value(raw_edn).unwrap(), + ValueAndSpan { + inner: SpannedValue::Text(raw_string.to_string()), + span: Span(0, raw_edn.len() as u32) + }); } #[test]