#!/usr/bin/env python # -*- coding: utf-8 -*- import sys, re, os, signal import traceback if 'PYGMENTS_PATH' in os.environ: sys.path.insert(0, os.environ['PYGMENTS_PATH']) dirname = os.path.dirname base_dir = dirname(dirname(dirname(os.path.abspath(__file__)))) sys.path.append(base_dir + "/vendor") sys.path.append(base_dir + "/vendor/pygments-main") sys.path.append(base_dir + "/vendor/simplejson") import pygments from pygments import lexers, formatters, styles, filters from threading import Lock try: import json except ImportError: import simplejson as json def _convert_keys(dictionary): if not isinstance(dictionary, dict): return dictionary return dict((str(k), _convert_keys(v)) for k, v in dictionary.items()) def _write_error(error): res = {"error": error} out_header = json.dumps(res).encode('utf-8') bits = _get_fixed_bits_from_header(out_header) sys.stdout.write(bits + "\n") sys.stdout.flush() sys.stdout.write(out_header + "\n") sys.stdout.flush() return def _get_fixed_bits_from_header(out_header): size = len(out_header) return "".join(map(lambda y:str((size>>y)&1), range(32-1, -1, -1))) def _signal_handler(signal, frame): """ Handle the signal given in the first argument, exiting gracefully """ sys.exit(0) class Mentos(object): """ Interacts with pygments.rb to provide access to pygments functionality """ def __init__(self): pass def return_lexer(self, lexer, args, inputs, code=None): """ Accepting a variety of possible inputs, return a Lexer object. The inputs argument should be a hash with at least one of the following keys: - 'lexer' ("python") - 'mimetype' ("text/x-ruby") - 'filename' ("yeaaah.py") The code argument should be a string, such as "import derp". The code guessing method is not especially great. It is advised that clients pass in a literal lexer name whenever possible, which provides the best probability of match (100 percent). """ if lexer: if inputs: return lexers.get_lexer_by_name(lexer, **inputs) else: return lexers.get_lexer_by_name(lexer) if inputs: if 'lexer' in inputs: return lexers.get_lexer_by_name(inputs['lexer'], **inputs) elif 'mimetype' in inputs: return lexers.get_lexer_for_mimetype(inputs['mimetype'], **inputs) elif 'filename' in inputs: name = inputs['filename'] # If we have code and a filename, pygments allows us to guess # with both. This is better than just guessing with code. if code: return lexers.guess_lexer_for_filename(name, code, **inputs) else: return lexers.get_lexer_for_filename(name, **inputs) # If all we got is code, try anyway. if code: return lexers.guess_lexer(code, **inputs) else: return None def highlight_text(self, code, lexer, formatter_name, args, kwargs): """ Highlight the relevant code, and return a result string. The default formatter is html, but alternate formatters can be passed in via the formatter_name argument. Additional paramters can be passed as args or kwargs. """ # Default to html if we don't have the formatter name. if formatter_name: _format_name = str(formatter_name) else: _format_name = "html" # Return a lexer object lexer = self.return_lexer(lexer, args, kwargs, code) # Make sure we sucessfuly got a lexer if lexer: formatter = pygments.formatters.get_formatter_by_name(str.lower(_format_name), **kwargs) # Do the damn thing. res = pygments.highlight(code, lexer, formatter) return res else: _write_error("No lexer") def get_data(self, method, lexer, args, kwargs, text=None): """ Based on the method argument, determine the action we'd like pygments to do. Then return the data generated from pygments. """ if kwargs: formatter_name = kwargs.get("formatter", None) opts = kwargs.get("options", {}) # Ensure there's a 'method' key before proceeeding if method: res = None # Now check what that method is. For the get methods, pygments # itself returns generators, so we make them lists so we can serialize # easier. if method == 'get_all_styles': res = json.dumps(list(pygments.styles.get_all_styles())) elif method == 'get_all_filters': res = json.dumps(list(pygments.filters.get_all_filters())) elif method == 'get_all_lexers': res = json.dumps(list(pygments.lexers.get_all_lexers())) elif method == 'get_all_formatters': res = [ [ft.__name__, ft.name, ft.aliases] for ft in pygments.formatters.get_all_formatters() ] res = json.dumps(res) elif method == 'highlight': try: text = text.decode('utf-8') except UnicodeDecodeError: # The text may already be encoded text = text res = self.highlight_text(text, lexer, formatter_name, args, _convert_keys(opts)) elif method == 'css': kwargs = _convert_keys(kwargs) fmt = pygments.formatters.get_formatter_by_name(args[0], **kwargs) res = fmt.get_style_defs(args[1]) elif method == 'lexer_name_for': lexer = self.return_lexer(None, args, kwargs, text) if lexer: # We don't want the Lexer itself, just the name. # Take the first alias. res = lexer.aliases[0] else: _write_error("No lexer") else: _write_error("Invalid method " + method) return res def _send_data(self, res, method): # Base header. We'll build on this, adding keys as necessary. base_header = {"method": method} res_bytes = len(res) + 1 base_header["bytes"] = res_bytes out_header = json.dumps(base_header).encode('utf-8') # Following the protocol, send over a fixed size represenation of the # size of the JSON header bits = _get_fixed_bits_from_header(out_header) # Send it to Rubyland sys.stdout.write(bits + "\n") sys.stdout.flush() # Send the header. sys.stdout.write(out_header + "\n") sys.stdout.flush() # Finally, send the result sys.stdout.write(res + "\n") sys.stdout.flush() def _get_ids(self, text): start_id = text[:8] end_id = text[-8:] return start_id, end_id def _check_and_return_text(self, text, start_id, end_id): # Sanity check. id_regex = re.compile('[A-Z]{8}') if not id_regex.match(start_id) and not id_regex.match(end_id): _write_error("ID check failed. Not an ID.") if not start_id == end_id: _write_error("ID check failed. ID's did not match.") # Passed the sanity check. Remove the id's and return text = text[10:-10] return text def _parse_header(self, header): method = header["method"] args = header.get("args", []) kwargs = header.get("kwargs", {}) lexer = kwargs.get("lexer", None) return (method, args, kwargs, lexer) def start(self): """ Main loop, waiting for inputs on stdin. When it gets some data, it goes to work. mentos exposes most of the "High-level API" of pygments. It always expects and requires a JSON header of metadata. If there is data to be pygmentized, this header will be followed by the text to be pygmentized. The header is of form: { "method": "highlight", "args": [], "kwargs": {"arg1": "v"}, "bytes": 128, "fd": "8"} """ lock = Lock() while True: # The loop begins by reading off a simple 32-arity string # representing an integer of 32 bits. This is the length of # our JSON header. size = sys.stdin.read(32) lock.acquire() try: # Read from stdin the amount of bytes we were told to expect. header_bytes = int(size, 2) # Sanity check the size size_regex = re.compile('[0-1]{32}') if not size_regex.match(size): _write_error("Size received is not valid.") line = sys.stdin.read(header_bytes) header = json.loads(line) method, args, kwargs, lexer = self._parse_header(header) _bytes = 0 if lexer: lexer = str(lexer) # Read more bytes if necessary if kwargs: _bytes = kwargs.get("bytes", 0) # Read up to the given number bytes (possibly 0) text = sys.stdin.read(_bytes) # Sanity check the return. if _bytes: start_id, end_id = self._get_ids(text) text = self._check_and_return_text(text, start_id, end_id) # Get the actual data from pygments. res = self.get_data(method, lexer, args, kwargs, text) # Put back the sanity check values. if method == "highlight": res = start_id + " " + res + " " + end_id self._send_data(res, method) except: tb = traceback.format_exc() _write_error(tb) finally: lock.release() def main(): # Signal handlers to trap signals. signal.signal(signal.SIGINT, _signal_handler) signal.signal(signal.SIGTERM, _signal_handler) if sys.platform != "win32": signal.signal(signal.SIGHUP, _signal_handler) mentos = Mentos() if sys.platform == "win32": # disable CRLF import msvcrt msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY) else: # close fd's inherited from the ruby parent import resource maxfd = resource.getrlimit(resource.RLIMIT_NOFILE)[1] if maxfd == resource.RLIM_INFINITY: maxfd = 65536 for fd in range(3, maxfd): try: os.close(fd) except: pass mentos.start() if __name__ == "__main__": main()