command_line.rb in erbtex-0.3.0

- old
+ new
@@ -2,139 +2,109 @@
 
 module ErbTeX
   class NoInputFile < StandardError; end
 
   class CommandLine
-    attr_reader :command_line, :marked_command_line, :input_file
-    attr_reader :progname, :input_path, :output_dir, :run_dir
+    attr_reader :erbtex_name, :tex_program, :tex_options
+    attr_reader :tex_commands, :input_file
 
-    def initialize(command_line)
-      @command_line = command_line
-      @input_file = @marked_command_line = nil
-      @run_dir = Dir.pwd
-      find_output_dir
-      find_progname
-      find_input_file
-      find_input_path
-      mark_command_line
-    end
+    def initialize(argv)
+      # Note: argv will be the command line arguments after processing by the
+      # shell, so if we see things such as '&', '~', '\' in the args, these were
+      # quoted by the user on the command-line and need no special treatment
+      # here. For example, '~/junk' on the commandline will show up here as
+      # '/home/ded/junk'. If we see '~/junk', that means the user has quoted the
+      # ~ on the command line with something like '\~junk', so we should assume
+      # that the user wants to keep it that way. Likewise, an arg with spaces in
+      # it will have been quoted by the user to be seen as a single argument.
+      # When we output these for use by the shell in the system command, we
+      # should apply shellquote to everything so that the receiving shell sees
+      # the args in the same way.
 
-    def find_progname
-      @progname = @command_line.split(' ')[0]
-    end
+      @erbtex_name = File.basename($0)
 
-    def find_output_dir
-      args = @command_line.split(' ')
-      # There is an -output-comment option, so -output-d is the shortest
-      # unambiguous way to write the -output-directory option.  It can use
-      # one or two dashes at the beginning, and the argument can be
-      # seaparated from it with an '=' or white space.
-      have_out_dir = false
-      out_dir = nil
-      args.each do |a|
-        if have_out_dir
-          # Found -output-directory on last pass without an equals sign
-          out_dir = a
-        end
-        if a =~ /^--?output-d(irectory)?=(\S+)/
-          out_dir = $2
-        elsif a =~ /^--?output-d(irectory)?$/
-          # Next arg is the out_dir
-          have_out_dir = true
-        end
+      # Find the tex_commands
+      @tex_commands = []
+      if argv.any? { |a| a =~ /\A\\/ }
+        # All args after first starting with '\' should be interpreted as TeX
+        # commands, even if they don't start with '\'
+        @tex_commands = argv.drop_while { |a| a !~ /\A\\/ }
+        first_tex_command_k = argv.size - @tex_commands.size
+        argv = argv[0..first_tex_command_k - 1]
       end
-      if out_dir.nil?
-        if File.writable?(Dir.pwd)
-          @output_dir = Dir.pwd
-        else
-          @output_dir = File.expand_path(ENV['TEXMFOUTPUT'])
-        end
-      else
-        @output_dir = File.expand_path(out_dir)
-      end
-    end
 
-    def find_input_file
-      # Remove the initial command from the command line
-      cmd = @command_line.split(/\s+/)[1..-1].join(' ')
-      cmd = cmd.gsub(/\s+--?[-a-zA-Z]+(=\S+)?/, ' ')
-      infile_re = %r{(\\input\s+)?(([-.~_/A-Za-z0-9]+)(\.[a-z]+)?)\s*$}
-      if cmd =~ infile_re
-        @input_file = "#{$2}"
-        if @input_file =~ /\.tex(\.erb)?$/
-          @input_file = @input_file
-        else
-          @input_file += ".tex"
-        end
-      elsif cmd =~ %r{(\\input\s+)?(["'])((?:\\?.)*?)\2} #"
-        # The re above captures single- or double-quoted strings with
-        # the insides in $3
-        @input_file = "#{$3}"
-        if @input_file !~ /\.tex$/
-          @input_file += ".tex#{$1}"
-        end
-      else
-        @input_file = nil
+      # Look for our --invoke=tex_command option
+      @tex_program = 'pdflatex'
+      if argv.any? { |a| a =~ /\A--invoke=(\w+)/ }
+        @tex_program = $1
+        argv.reject! { |a| a =~ /\A--invoke=(\w+)/ }
       end
-    end
 
-    def find_input_path
-      # If input_file is absolute, don't look further
-      if @input_file =~ /^\//
-        @input_path = @input_file
-      elsif @input_file.nil?
-        @input_path = nil
-      else
-        # The following cribbed from kpathsea.rb
-        @progname.untaint
-        @input_file.untaint
-        kpsewhich = "kpsewhich -progname=\"#{@progname}\" -format=\"tex\" \"#{@input_file}\""
-        lines = ""
-        IO.popen(kpsewhich) do |io|
-          lines = io.readlines
-        end
-        if $? == 0
-          @input_path = lines[0].chomp.untaint
-        else
-          raise NoInputFile, "Can't find #{@input_file} in TeX search path; try kpsewhich -format=tex #{@input_file}."
-        end
+      # The last argument, assuming it does not start with a '-' or '&', is
+      # assumed to be the name of the input_file.
+      if !argv.empty? && argv[-1] !~ /\A[-&]/
+        @input_file = CommandLine.expand_input_file(argv.pop)
       end
+
+      # What remains in argv should be the tex program's '-options', which
+      # should be passed through untouched. So, can form the full command line
+      # for tex_processing
+      @tex_options = argv.dup
     end
 
-    def new_command_line(new_progname, new_infile)
-      ncl = @marked_command_line.sub('^p^', new_progname)
-      # Quote the new_infile in case it has spaces
-      if new_infile
-        ncl = ncl.sub('^f^', "'#{new_infile}'")
-      end
-      ncl
+    def tex_command(tex_file = input_file)
+      "#{tex_program} " \
+      "#{tex_options.shelljoin} " \
+      "#{tex_commands.shelljoin} " \
+      "#{tex_file}"
+        .strip.squeeze(' ')
     end
 
-    def mark_command_line
-      # Replace input file with '^f^'
-      infile_re = %r{(\\input\s+)?(([-.~_/A-Za-z0-9]+)(\.[a-z]+)?)\s*$}
-      quoted_infile_re = %r{(\\input\s+)?(["'])((?:\\?.)*?)\2} #"
-      if @input_file.nil?
-        @marked_command_line = @command_line
-      elsif @command_line =~ infile_re
-        @marked_command_line = @command_line.sub(infile_re, "#{$1}^f^")
-      elsif @command_line =~ quoted_infile_re
-        @marked_command_line = @command_line.sub(quoted_infile_re, "#{$1}^f^")
+    # Return the name of the input file based on the name given in the command
+    # line. Try to find the right extension for the input file if none is given.
+    def self.expand_input_file(input_file)
+      full_ext = input_file[/\A(.*)(\.[\w.]+)\z/, 2]
+      if full_ext.nil? || full_ext.empty?
+        if File.exist?("#{input_file}.tex.erb")
+          "#{input_file}.tex.erb"
+        elsif File.exist?("#{input_file}.tex")
+          "#{input_file}.tex"
+        elsif File.exist?("#{input_file}.erb")
+          "#{input_file}.erb"
+        else
+          input_file
+        end
       else
-        @marked_command_line = @command_line
+        input_file
       end
-      # Replace progname with '^p^'
-      @marked_command_line = @marked_command_line.lstrip
-      @marked_command_line = @marked_command_line.sub(/\S+/, '^p^')
     end
   end
 end
 
 # NOTES:
-
 # The following text is from the Web2C documentation at
 # http://tug.org/texinfohtml/web2c.html#Output-file-location
 #
+# 4.1 TeX invocation
+#
+#   TeX, Metafont, and MetaPost process the command line (described here)
+#   and determine their memory dump (fmt) file in the same way (*note Memory
+#   dumps::).  Synopses:
+#
+#   tex [OPTION]... [TEXNAME[.tex]] [TEX-COMMANDS]
+#   tex [OPTION]... \FIRST-LINE
+#   tex [OPTION]... &FMT ARGS
+#
+#   TeX searches the usual places for the main input file TEXNAME (*note
+#   (kpathsea)Supported file formats::), extending TEXNAME with '.tex' if
+#   necessary.  To see all the relevant paths, set the environment variable
+#   'KPATHSEA_DEBUG' to '-1' before running the program.
+#
+#   After TEXNAME is read, TeX processes any remaining TEX-COMMANDS on
+#   the command line as regular TeX input.  Also, if the first non-option
+#   argument begins with a TeX escape character (usually '\'), TeX processes
+#   all non-option command-line arguments as a line of regular TeX input.
+
 # 3.4 Output file location
 #
 # All the programs generally follow the usual convention for output
 # files. Namely, they are placed in the directory current when the
 # program is run, regardless of any input file location; or, in a few