Correctly parse and unescape quotes etc. inside EDN strings. (#434) (#589)

This commit is contained in:
Richard Newman 2018-03-15 07:13:27 -07:00 committed by GitHub
parent ea52e214af
commit df58de52f4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 21 additions and 7 deletions

View file

@ -69,14 +69,20 @@ pub float -> SpannedValue = f:$(sign? digit+ ("." digit+)? ([eE] sign? digit+)?)
number -> SpannedValue = ( bigint / basedinteger / hexinteger / octalinteger / integer / float )
// TODO: \newline, \return, \space and \tab
special_char = quote / tab
quote = "\\\""
tab = "\\tab"
char = [^"] / special_char
// TODO: standalone characters: \<char>, \newline, \return, \space and \tab.
pub text -> SpannedValue = "\"" t:$( char* ) "\""
{ SpannedValue::Text(t.to_string()) }
string_special_char -> &'input str = "\\" $([\\"ntr])
string_normal_chars -> &'input str = $([^"\\]+)
// This is what we need to do in order to unescape. We can't just match the entire string slice:
// we get a Vec<&str> from rust-peg, where some of the parts might be unescaped special characters,
// and we join it together to form an output string.
// E.g., input = r#"\"foo\\\\bar\""#
// output = [quote, "foo", backslash, "bar", quote]
// result = r#""foo\\bar""#
// For the typical case, string_normal_chars will match multiple, leading to a single-element vec.
pub text -> SpannedValue = "\"" t:((string_special_char / string_normal_chars)*) "\""
{ SpannedValue::Text(t.join(&"").to_string()) }
// RFC 3339 timestamps. #inst "1985-04-12T23:20:50.52Z"
// We accept an arbitrary depth of decimals.

View file

@ -327,6 +327,14 @@ fn test_text() {
assert!(text("\"").is_err());
assert!(text("nil").is_err());
let raw_edn = r#""This string contains a \" and a \\""#;
let raw_string = r#"This string contains a " and a \"#;
assert_eq!(parse::value(raw_edn).unwrap(),
ValueAndSpan {
inner: SpannedValue::Text(raw_string.to_string()),
span: Span(0, raw_edn.len() as u32)
});
}
#[test]