// Copyright 2015 Nicholas Allegra (comex). // Licensed under the Apache License, Version 2.0 or // the MIT license , at your option. This file may not be // copied, modified, or distributed except according to those terms. //! Parse strings like, and escape strings for, POSIX shells. //! //! Same idea as (but implementation not directly based on) the Python shlex module. //! //! Disabling the `std` feature (which is enabled by default) will allow the crate to work in //! `no_std` environments, where the `alloc` crate, and a global allocator, are available. //! //! ## Warning //! //! The [`try_quote`]/[`try_join`] family of APIs does not quote control characters (because they //! cannot be quoted portably). //! //! This is fully safe in noninteractive contexts, like shell scripts and `sh -c` arguments (or //! even scripts `source`d from interactive shells). //! //! But if you are quoting for human consumption, you should keep in mind that ugly inputs produce //! ugly outputs (which may not be copy-pastable). //! //! And if by chance you are piping the output of [`try_quote`]/[`try_join`] directly to the stdin //! of an interactive shell, you should stop, because control characters can lead to arbitrary //! command injection. //! //! For more information, and for information about more minor issues, please see [quoting_warning]. //! //! ## Compatibility //! //! This crate's quoting functionality tries to be compatible with **any POSIX-compatible shell**; //! it's tested against `bash`, `zsh`, `dash`, Busybox `ash`, and `mksh`, plus `fish` (which is not //! POSIX-compatible but close enough). //! //! It also aims to be compatible with Python `shlex` and C `wordexp`. #![cfg_attr(not(feature = "std"), no_std)] extern crate alloc; use alloc::vec::Vec; use alloc::borrow::Cow; use alloc::string::String; #[cfg(test)] use alloc::vec; #[cfg(test)] use alloc::borrow::ToOwned; pub mod bytes; #[cfg(all(doc, not(doctest)))] #[path = "quoting_warning.md"] pub mod quoting_warning; /// An iterator that takes an input string and splits it into the words using the same syntax as /// the POSIX shell. /// /// See [`bytes::Shlex`]. pub struct Shlex<'a>(bytes::Shlex<'a>); impl<'a> Shlex<'a> { pub fn new(in_str: &'a str) -> Self { Self(bytes::Shlex::new(in_str.as_bytes())) } } impl<'a> Iterator for Shlex<'a> { type Item = String; fn next(&mut self) -> Option { self.0.next().map(|byte_word| { // Safety: given valid UTF-8, bytes::Shlex will always return valid UTF-8. unsafe { String::from_utf8_unchecked(byte_word) } }) } } impl<'a> core::ops::Deref for Shlex<'a> { type Target = bytes::Shlex<'a>; fn deref(&self) -> &Self::Target { &self.0 } } impl<'a> core::ops::DerefMut for Shlex<'a> { fn deref_mut(&mut self) -> &mut Self::Target { &mut self.0 } } /// Convenience function that consumes the whole string at once. Returns None if the input was /// erroneous. pub fn split(in_str: &str) -> Option> { let mut shl = Shlex::new(in_str); let res = shl.by_ref().collect(); if shl.had_error { None } else { Some(res) } } /// Errors from [`Quoter::quote`], [`Quoter::join`], etc. (and their [`bytes`] counterparts). /// /// By default, the only error that can be returned is [`QuoteError::Nul`]. If you call /// `allow_nul(true)`, then no errors can be returned at all. Any error variants added in the /// future will not be enabled by default; they will be enabled through corresponding non-default /// [`Quoter`] options. /// /// ...In theory. In the unlikely event that additional classes of inputs are discovered that, /// like nul bytes, are fundamentally unsafe to quote even for non-interactive shells, the risk /// will be mitigated by adding corresponding [`QuoteError`] variants that *are* enabled by /// default. #[non_exhaustive] #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum QuoteError { /// The input contained a nul byte. In most cases, shells fundamentally [cannot handle strings /// containing nul bytes](quoting_warning#nul-bytes), no matter how they are quoted. But if /// you're sure you can handle nul bytes, you can call `allow_nul(true)` on the `Quoter` to let /// them pass through. Nul, } impl core::fmt::Display for QuoteError { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { match self { QuoteError::Nul => f.write_str("cannot shell-quote string containing nul byte"), } } } #[cfg(feature = "std")] impl std::error::Error for QuoteError {} /// A more configurable interface to quote strings. If you only want the default settings you can /// use the convenience functions [`try_quote`] and [`try_join`]. /// /// The bytes equivalent is [`bytes::Quoter`]. #[derive(Default, Debug, Clone)] pub struct Quoter { inner: bytes::Quoter, } impl Quoter { /// Create a new [`Quoter`] with default settings. #[inline] pub fn new() -> Self { Self::default() } /// Set whether to allow [nul bytes](quoting_warning#nul-bytes). By default they are not /// allowed and will result in an error of [`QuoteError::Nul`]. #[inline] pub fn allow_nul(mut self, allow: bool) -> Self { self.inner = self.inner.allow_nul(allow); self } /// Convenience function that consumes an iterable of words and turns it into a single string, /// quoting words when necessary. Consecutive words will be separated by a single space. pub fn join<'a, I: IntoIterator>(&self, words: I) -> Result { // Safety: given valid UTF-8, bytes::join() will always return valid UTF-8. self.inner.join(words.into_iter().map(|s| s.as_bytes())) .map(|bytes| unsafe { String::from_utf8_unchecked(bytes) }) } /// Given a single word, return a string suitable to encode it as a shell argument. pub fn quote<'a>(&self, in_str: &'a str) -> Result, QuoteError> { Ok(match self.inner.quote(in_str.as_bytes())? { Cow::Borrowed(out) => { // Safety: given valid UTF-8, bytes::quote() will always return valid UTF-8. unsafe { core::str::from_utf8_unchecked(out) }.into() } Cow::Owned(out) => { // Safety: given valid UTF-8, bytes::quote() will always return valid UTF-8. unsafe { String::from_utf8_unchecked(out) }.into() } }) } } impl From for Quoter { fn from(inner: bytes::Quoter) -> Quoter { Quoter { inner } } } impl From for bytes::Quoter { fn from(quoter: Quoter) -> bytes::Quoter { quoter.inner } } /// Convenience function that consumes an iterable of words and turns it into a single string, /// quoting words when necessary. Consecutive words will be separated by a single space. /// /// Uses default settings except that nul bytes are passed through, which [may be /// dangerous](quoting_warning#nul-bytes), leading to this function being deprecated. /// /// Equivalent to [`Quoter::new().allow_nul(true).join(words).unwrap()`](Quoter). /// /// (That configuration never returns `Err`, so this function does not panic.) /// /// The bytes equivalent is [bytes::join]. #[deprecated(since = "1.3.0", note = "replace with `try_join(words)?` to avoid nul byte danger")] pub fn join<'a, I: IntoIterator>(words: I) -> String { Quoter::new().allow_nul(true).join(words).unwrap() } /// Convenience function that consumes an iterable of words and turns it into a single string, /// quoting words when necessary. Consecutive words will be separated by a single space. /// /// Uses default settings. The only error that can be returned is [`QuoteError::Nul`]. /// /// Equivalent to [`Quoter::new().join(words)`](Quoter). /// /// The bytes equivalent is [bytes::try_join]. pub fn try_join<'a, I: IntoIterator>(words: I) -> Result { Quoter::new().join(words) } /// Given a single word, return a string suitable to encode it as a shell argument. /// /// Uses default settings except that nul bytes are passed through, which [may be /// dangerous](quoting_warning#nul-bytes), leading to this function being deprecated. /// /// Equivalent to [`Quoter::new().allow_nul(true).quote(in_str).unwrap()`](Quoter). /// /// (That configuration never returns `Err`, so this function does not panic.) /// /// The bytes equivalent is [bytes::quote]. #[deprecated(since = "1.3.0", note = "replace with `try_quote(str)?` to avoid nul byte danger")] pub fn quote(in_str: &str) -> Cow { Quoter::new().allow_nul(true).quote(in_str).unwrap() } /// Given a single word, return a string suitable to encode it as a shell argument. /// /// Uses default settings. The only error that can be returned is [`QuoteError::Nul`]. /// /// Equivalent to [`Quoter::new().quote(in_str)`](Quoter). /// /// (That configuration never returns `Err`, so this function does not panic.) /// /// The bytes equivalent is [bytes::try_quote]. pub fn try_quote(in_str: &str) -> Result, QuoteError> { Quoter::new().quote(in_str) } #[cfg(test)] static SPLIT_TEST_ITEMS: &'static [(&'static str, Option<&'static [&'static str]>)] = &[ ("foo$baz", Some(&["foo$baz"])), ("foo baz", Some(&["foo", "baz"])), ("foo\"bar\"baz", Some(&["foobarbaz"])), ("foo \"bar\"baz", Some(&["foo", "barbaz"])), (" foo \nbar", Some(&["foo", "bar"])), ("foo\\\nbar", Some(&["foobar"])), ("\"foo\\\nbar\"", Some(&["foobar"])), ("'baz\\$b'", Some(&["baz\\$b"])), ("'baz\\\''", None), ("\\", None), ("\"\\", None), ("'\\", None), ("\"", None), ("'", None), ("foo #bar\nbaz", Some(&["foo", "baz"])), ("foo #bar", Some(&["foo"])), ("foo#bar", Some(&["foo#bar"])), ("foo\"#bar", None), ("'\\n'", Some(&["\\n"])), ("'\\\\n'", Some(&["\\\\n"])), ]; #[test] fn test_split() { for &(input, output) in SPLIT_TEST_ITEMS { assert_eq!(split(input), output.map(|o| o.iter().map(|&x| x.to_owned()).collect())); } } #[test] fn test_lineno() { let mut sh = Shlex::new("\nfoo\nbar"); while let Some(word) = sh.next() { if word == "bar" { assert_eq!(sh.line_no, 3); } } } #[test] #[cfg_attr(not(feature = "std"), allow(unreachable_code, unused_mut))] fn test_quote() { // This is a list of (unquoted, quoted) pairs. // But it's using a single long (raw) string literal with an ad-hoc format, just because it's // hard to read if we have to put the test strings through Rust escaping on top of the escaping // being tested. (Even raw string literals are noisy for short strings). // Ad-hoc: "NL" is replaced with a literal newline; no other escape sequences. let tests = r#" <> => <''> => => <'foo bar'> <"foo bar'"> => <"\"foo bar'\""> <'foo bar'> => <"'foo bar'"> <"> => <'"'> <"'> => <"\"'"> => <'hello!world'> <'hello!world> => <"'hello"'!world'> <'hello!> => <"'hello"'!'> => <'hello ''^ world'> => => <'!world'"'"> <{a, b}> => <'{a, b}'> => <'NL'> <^> => <'^'> => => <'NLx''^'> => <'NL''^x'> => <'NL ''^x'> <{a,b}> => <'{a,b}'> => <'a,b'> <'$> => <"'"'$'> <"^> => <'"''^'> "#; let mut ok = true; for test in tests.trim().split('\n') { let parts: Vec = test .replace("NL", "\n") .split("=>") .map(|part| part.trim().trim_start_matches('<').trim_end_matches('>').to_owned()) .collect(); assert!(parts.len() == 2); let unquoted = &*parts[0]; let quoted_expected = &*parts[1]; let quoted_actual = try_quote(&parts[0]).unwrap(); if quoted_expected != quoted_actual { #[cfg(not(feature = "std"))] panic!("FAIL: for input <{}>, expected <{}>, got <{}>", unquoted, quoted_expected, quoted_actual); #[cfg(feature = "std")] println!("FAIL: for input <{}>, expected <{}>, got <{}>", unquoted, quoted_expected, quoted_actual); ok = false; } } assert!(ok); } #[test] #[allow(deprecated)] fn test_join() { assert_eq!(join(vec![]), ""); assert_eq!(join(vec![""]), "''"); assert_eq!(join(vec!["a", "b"]), "a b"); assert_eq!(join(vec!["foo bar", "baz"]), "'foo bar' baz"); } #[test] fn test_fallible() { assert_eq!(try_join(vec!["\0"]), Err(QuoteError::Nul)); assert_eq!(try_quote("\0"), Err(QuoteError::Nul)); }