// Copyright 2018 Tomasz Miąsko
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE>
// or the MIT license <LICENSE-MIT>, at your option.
//
//! Process command line according to parsing rules of Unix shell as specified
//! in [Shell Command Language in POSIX.1-2008][posix-shell].
//!
//! [posix-shell]: http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html

#![cfg_attr(not(feature = "std"), no_std)]
#![forbid(unsafe_code)]

#[cfg(feature = "std")]
extern crate core;

use core::fmt;
use core::mem;

#[cfg(not(feature = "std"))]
#[macro_use]
extern crate alloc;

#[cfg(not(feature = "std"))]
use alloc::string::String;
#[cfg(not(feature = "std"))]
use alloc::vec::Vec;

#[cfg(not(feature = "std"))]
use alloc::borrow::Cow;
#[cfg(feature = "std")]
use std::borrow::Cow;

/// An error returned when shell parsing fails.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct ParseError;

impl fmt::Display for ParseError {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        f.write_str("missing closing quote")
    }
}

#[cfg(feature = "std")]
impl std::error::Error for ParseError {}

enum State {
    /// Within a delimiter.
    Delimiter,
    /// After backslash, but before starting word.
    Backslash,
    /// Within an unquoted word.
    Unquoted,
    /// After backslash in an unquoted word.
    UnquotedBackslash,
    /// Within a single quoted word.
    SingleQuoted,
    /// Within a double quoted word.
    DoubleQuoted,
    /// After backslash inside a double quoted word.
    DoubleQuotedBackslash,
    /// Inside a comment.
    Comment,
}

/// Splits command line into separate arguments, in much the same way Unix shell
/// would, but without many of expansion the shell would perform.
///
/// The split functionality is compatible with behaviour of Unix shell, but with
/// word expansions limited to quote removal, and without special token
/// recognition rules for operators.
///
/// The result is exactly the same as one obtained from Unix shell as long as
/// those unsupported features are not present in input: no operators, no
/// variable assignments, no tilde expansion, no parameter expansion, no command
/// substitution, no arithmetic expansion, no pathname expansion.
///
/// In case those unsupported shell features are present, the syntax that
/// introduce them is interpreted literally.
///
/// # Errors
///
/// When input contains unmatched quote, an error is returned.
///
/// # Compatibility with other implementations
///
/// It should be fully compatible with g_shell_parse_argv from GLib, except that
/// in GLib it is an error not to have any words after tokenization.
///
/// It is also very close to shlex.split available in Python standard library,
/// when used in POSIX mode with support for comments. Though, shlex
/// implementation diverges from POSIX, and from implementation contained herein
/// in three aspects. First, it doesn't support line continuations.
/// Second, inside double quotes, the backslash characters retains its special
/// meaning as an escape character only when followed by \\ or \", whereas POSIX
/// specifies that it should retain its special meaning when followed by: $, \`,
/// \", \\, or a newline. Third, it treats carriage return as one of delimiters.
///
/// # Examples
///
/// Building an executable using compiler obtained from CC environment variable
/// and compiler flags from both CFLAGS and CPPFLAGS. Similar to default build
/// rule for C used in GNU Make:
///
/// ```rust,no_run
/// use std::env::var;
/// use std::process::Command;
///
/// let cc = var("CC").unwrap_or_else(|_| "cc".to_owned());
///
/// let cflags_str = var("CFLAGS").unwrap_or_else(|_| String::new());
/// let cflags = shell_words::split(&cflags_str).expect("failed to parse CFLAGS");
///
/// let cppflags_str = var("CPPFLAGS").unwrap_or_else(|_| String::new());
/// let cppflags = shell_words::split(&cppflags_str).expect("failed to parse CPPFLAGS");
///
/// Command::new(cc)
///     .args(cflags)
///     .args(cppflags)
///     .args(&["-c", "a.c", "-o", "a.out"])
///     .spawn()
///     .expect("failed to start subprocess")
///     .wait()
///     .expect("failed to wait for subprocess");
/// ```
pub fn split(s: &str) -> Result<Vec<String>, ParseError> {
    use State::*;

    let mut words = Vec::new();
    let mut word = String::new();
    let mut chars = s.chars();
    let mut state = Delimiter;

    loop {
        let c = chars.next();
        state = match state {
            Delimiter => match c {
                None => break,
                Some('\'') => SingleQuoted,
                Some('\"') => DoubleQuoted,
                Some('\\') => Backslash,
                Some('\t') | Some(' ') | Some('\n') => Delimiter,
                Some('#') => Comment,
                Some(c) => {
                    word.push(c);
                    Unquoted
                }
            },
            Backslash => match c {
                None => {
                    word.push('\\');
                    words.push(mem::replace(&mut word, String::new()));
                    break;
                }
                Some('\n') => Delimiter,
                Some(c) => {
                    word.push(c);
                    Unquoted
                }
            },
            Unquoted => match c {
                None => {
                    words.push(mem::replace(&mut word, String::new()));
                    break;
                }
                Some('\'') => SingleQuoted,
                Some('\"') => DoubleQuoted,
                Some('\\') => UnquotedBackslash,
                Some('\t') | Some(' ') | Some('\n') => {
                    words.push(mem::replace(&mut word, String::new()));
                    Delimiter
                }
                Some(c) => {
                    word.push(c);
                    Unquoted
                }
            },
            UnquotedBackslash => match c {
                None => {
                    word.push('\\');
                    words.push(mem::replace(&mut word, String::new()));
                    break;
                }
                Some('\n') => Unquoted,
                Some(c) => {
                    word.push(c);
                    Unquoted
                }
            },
            SingleQuoted => match c {
                None => return Err(ParseError),
                Some('\'') => Unquoted,
                Some(c) => {
                    word.push(c);
                    SingleQuoted
                }
            },
            DoubleQuoted => match c {
                None => return Err(ParseError),
                Some('\"') => Unquoted,
                Some('\\') => DoubleQuotedBackslash,
                Some(c) => {
                    word.push(c);
                    DoubleQuoted
                }
            },
            DoubleQuotedBackslash => match c {
                None => return Err(ParseError),
                Some('\n') => DoubleQuoted,
                Some(c @ '$') | Some(c @ '`') | Some(c @ '"') | Some(c @ '\\') => {
                    word.push(c);
                    DoubleQuoted
                }
                Some(c) => {
                    word.push('\\');
                    word.push(c);
                    DoubleQuoted
                }
            },
            Comment => match c {
                None => break,
                Some('\n') => Delimiter,
                Some(_) => Comment,
            },
        }
    }

    Ok(words)
}

enum EscapeStyle {
    /// No escaping.
    None,
    /// Wrap in single quotes.
    SingleQuoted,
    /// Single quotes combined with backslash.
    Mixed,
}

/// Determines escaping style to use.
fn escape_style(s: &str) -> EscapeStyle {
    if s.is_empty() {
        return EscapeStyle::SingleQuoted;
    }

    let mut special = false;
    let mut newline = false;
    let mut single_quote = false;

    for c in s.chars() {
        match c {
            '\n' => {
                newline = true;
                special = true;
            }
            '\'' => {
                single_quote = true;
                special = true;
            }
            '|' | '&' | ';' | '<' | '>' | '(' | ')' | '$' | '`' | '\\' | '"' | ' ' | '\t' | '*'
            | '?' | '[' | '#' | '˜' | '=' | '%' => {
                special = true;
            }
            _ => continue,
        }
    }

    if !special {
        EscapeStyle::None
    } else if newline && !single_quote {
        EscapeStyle::SingleQuoted
    } else {
        EscapeStyle::Mixed
    }
}

/// Escapes special characters in a string, so that it will retain its literal
/// meaning when used as a part of command in Unix shell.
///
/// It tries to avoid introducing any unnecessary quotes or escape characters,
/// but specifics regarding quoting style are left unspecified.
pub fn quote(s: &str) -> Cow<str> {
    // We are going somewhat out of the way to provide
    // minimal amount of quoting in typical cases.
    match escape_style(s) {
        EscapeStyle::None => s.into(),
        EscapeStyle::SingleQuoted => format!("'{}'", s).into(),
        EscapeStyle::Mixed => {
            let mut quoted = String::new();
            quoted.push('\'');
            for c in s.chars() {
                if c == '\'' {
                    quoted.push_str("'\\''");
                } else {
                    quoted.push(c);
                }
            }
            quoted.push('\'');
            quoted.into()
        }
    }
}

/// Joins arguments into a single command line suitable for execution in Unix
/// shell.
///
/// Each argument is quoted using [`quote`] to preserve its literal meaning when
/// parsed by Unix shell.
///
/// Note: This function is essentially an inverse of [`split`].
///
/// # Examples
///
/// Logging executed commands in format that can be easily copied and pasted
/// into an actual shell:
///
/// ```rust,no_run
/// fn execute(args: &[&str]) {
///     use std::process::Command;
///     println!("Executing: {}", shell_words::join(args));
///     Command::new(&args[0])
///         .args(&args[1..])
///         .spawn()
///         .expect("failed to start subprocess")
///         .wait()
///         .expect("failed to wait for subprocess");
/// }
///
/// execute(&["python", "-c", "print('Hello world!')"]);
/// ```
///
/// [`quote`]: fn.quote.html
/// [`split`]: fn.split.html
pub fn join<I, S>(words: I) -> String
where
    I: IntoIterator<Item = S>,
    S: AsRef<str>,
{
    let mut line = words.into_iter().fold(String::new(), |mut line, word| {
        let quoted = quote(word.as_ref());
        line.push_str(quoted.as_ref());
        line.push(' ');
        line
    });
    line.pop();
    line
}

#[cfg(test)]
mod tests {
    use super::*;

    fn split_ok(cases: &[(&str, &[&str])]) {
        for &(input, expected) in cases {
            match split(input) {
                Err(actual) => {
                    panic!(
                        "After split({:?})\nexpected: Ok({:?})\n  actual: Err({:?})\n",
                        input, expected, actual
                    );
                }
                Ok(actual) => {
                    assert!(
                        expected == actual.as_slice(),
                        "After split({:?}).unwrap()\nexpected: {:?}\n  actual: {:?}\n",
                        input,
                        expected,
                        actual
                    );
                }
            }
        }
    }

    #[test]
    fn split_empty() {
        split_ok(&[("", &[])]);
    }

    #[test]
    fn split_initial_whitespace_is_removed() {
        split_ok(&[
            ("     a", &["a"]),
            ("\t\t\t\tbar", &["bar"]),
            ("\t \nc", &["c"]),
        ]);
    }

    #[test]
    fn split_trailing_whitespace_is_removed() {
        split_ok(&[
            ("a  ", &["a"]),
            ("b\t", &["b"]),
            ("c\t \n \n \n", &["c"]),
            ("d\n\n", &["d"]),
        ]);
    }

    #[test]
    fn split_carriage_return_is_not_special() {
        split_ok(&[("c\ra\r'\r'\r", &["c\ra\r\r\r"])]);
    }

    #[test]
    fn split_single_quotes() {
        split_ok(&[
            (r#"''"#, &[r#""#]),
            (r#"'a'"#, &[r#"a"#]),
            (r#"'\'"#, &[r#"\"#]),
            (r#"' \ '"#, &[r#" \ "#]),
            (r#"'#'"#, &[r#"#"#]),
        ]);
    }

    #[test]
    fn split_double_quotes() {
        split_ok(&[
            (r#""""#, &[""]),
            (r#""""""#, &[""]),
            (r#""a b c' d""#, &["a b c' d"]),
            (r#""\a""#, &["\\a"]),
            (r#""$""#, &["$"]),
            (r#""\$""#, &["$"]),
            (r#""`""#, &["`"]),
            (r#""\`""#, &["`"]),
            (r#""\"""#, &["\""]),
            (r#""\\""#, &["\\"]),
            ("\"\n\"", &["\n"]),
            ("\"\\\n\"", &[""]),
        ]);
    }

    #[test]
    fn split_unquoted() {
        split_ok(&[
            (r#"\|\&\;"#, &[r#"|&;"#]),
            (r#"\<\>"#, &[r#"<>"#]),
            (r#"\(\)"#, &[r#"()"#]),
            (r#"\$"#, &[r#"$"#]),
            (r#"\`"#, &[r#"`"#]),
            (r#"\""#, &[r#"""#]),
            (r#"\'"#, &[r#"'"#]),
            ("\\\n", &[]),
            (" \\\n \n", &[]),
            ("a\nb\nc", &["a", "b", "c"]),
            ("a\\\nb\\\nc", &["abc"]),
            ("foo bar baz", &["foo", "bar", "baz"]),
            (r#"\🦉"#, &[r"🦉"]),
        ]);
    }

    #[test]
    fn split_trailing_backslash() {
        split_ok(&[("\\", &["\\"]), (" \\", &["\\"]), ("a\\", &["a\\"])]);
    }

    #[test]
    fn split_errors() {
        assert_eq!(split("'abc"), Err(ParseError));
        assert_eq!(split("\""), Err(ParseError));
        assert_eq!(split("'\\"), Err(ParseError));
        assert_eq!(split("'\\"), Err(ParseError));
    }

    #[test]
    fn split_comments() {
        split_ok(&[
            (r#" x # comment "#, &["x"]),
            (r#" w1#w2 "#, &["w1#w2"]),
            (r#"'not really a # comment'"#, &["not really a # comment"]),
            (" a # very long comment \n b # another comment", &["a", "b"]),
        ]);
    }

    #[test]
    fn test_quote() {
        assert_eq!(quote(""), "''");
        assert_eq!(quote("'"), "''\\'''");
        assert_eq!(quote("abc"), "abc");
        assert_eq!(quote("a \n  b"), "'a \n  b'");
        assert_eq!(quote("X'\nY"), "'X'\\''\nY'");
    }

    #[test]
    fn test_join() {
        assert_eq!(join(&["a", "b", "c"]), "a b c");
        assert_eq!(join(&[" ", "$", "\n"]), "' ' '$' '\n'");
    }

    #[test]
    fn join_followed_by_split_is_identity() {
        let cases: Vec<&[&str]> = vec![
            &["a"],
            &["python", "-c", "print('Hello world!')"],
            &["echo", " arg with spaces ", "arg \' with \" quotes"],
            &["even newlines are quoted correctly\n", "\n", "\n\n\t "],
            &["$", "`test`"],
            &["cat", "~user/log*"],
            &["test", "'a \"b", "\"X'"],
            &["empty", "", "", ""],
        ];
        for argv in cases {
            let args = join(argv);
            assert_eq!(split(&args).unwrap(), argv);
        }
    }
}