#!/usr/bin/env python import os import sys import json import argparse import re import shlex import StringIO import glob import itertools SUPPORTED_COMPILERS = [ "clang", "clang\+\+", "llvm-cpp-4.2", "llvm-g\+\+", "llvm-g\+\+-4.2", "llvm-gcc", "llvm-gcc-4.2", "arm-apple-darwin10-llvm-g\+\+-4.2", "arm-apple-darwin10-llvm-gcc-4.2", "i686-apple-darwin10-llvm-g\+\+-4.2", "i686-apple-darwin10-llvm-gcc-4.2", "gcc", "g\+\+", "c\+\+", "cc" ] DEFAULT_INPUT_FILE = "xcodebuild.log" JSON_COMPILATION_DATABASE = "compile_commands.json" _find_unsafe = re.compile('[ "\\\\]').search def clang_quote(s): """ Return a minimally-escaped version of the string *s* that clang-based tools understand. (See http://clang.llvm.org/docs/JSONCompilationDatabase.html) ex: clang_quote('aa') == r'aa' clang_quote('a"a') == r'"a\"a"' clang_quote('--my-option=a"a') == r'"--my-option=a\"a"' """ if not s: return '""' if _find_unsafe(s) is None: return s return '"' + s.replace('\\', '\\\\').replace('"', '\\"') + '"' _find_quoting = re.compile('[\'"\\\\]').search def tokenize_command(c): """Compute the list of shell arguments in *c* using shlex.split (note: make an explicit call to StringIO for compatibility with Python 2).""" if _find_quoting(c) is None: return map(str.strip, c.strip().split(' ')) else: return shlex.split(StringIO.StringIO(c)) # global table to map pth files to (source) pch files _pch_dictionary = {} def register_source_for_pth_file(clang_command, directory): tokens = iter(tokenize_command(clang_command)) src_file = "" pth_file = "" try: while 1: tok = tokens.next() if tok == '-c': file = tokens.next() src_file = file elif tok == '-o': file = tokens.next() if file.endswith('.pch.pth') or file.endswith('.pch.pch'): pth_file = file except StopIteration: if src_file and pth_file: _pch_dictionary[pth_file] = src_file def get_source_for_pth_file(file): src_file = _pch_dictionary.get(file + '.pth') if src_file is not None: return src_file return _pch_dictionary.get(file + '.pch') def process_clang_command(clang_command, directory): tokens = iter(tokenize_command(clang_command)) command = [] source_file = None try: while 1: tok = tokens.next() if tok == '-include': include_file = tokens.next() if not os.path.isfile(include_file): src_file = get_source_for_pth_file(include_file) if src_file is not None: include_file = src_file else: print "cannot find original pch source file for %s" % include_file exit(3) command.append(tok) command.append(clang_quote(include_file)) elif tok == '-c': source_file = tokens.next() command.append(tok) command.append(clang_quote(source_file)) else: command.append(clang_quote(tok)) except StopIteration: return {"directory": directory, "command": " ".join(command), "file": os.path.normpath(source_file)} def read_directory(line): tokens = tokenize_command(line) if tokens[0] == "cd": return tokens[1] else: return "" def convert(input_file, output_file, is_excluded=None): json_input_mode = input_file.endswith('.json') find_compileC = re.compile("CompileC").search find_processPCH = re.compile("ProcessPCH").search find_clang_command = re.compile("(" + "|".join(SUPPORTED_COMPILERS) + ") .* -c .* -o ").search if is_excluded is None: is_excluded = (lambda x: False) with open(output_file, "w") as output: with open(input_file, "r") as input: if json_input_mode: lines = itertools.chain(*(json.loads(line).get('command', '').encode("utf8").splitlines(True) for line in input)) else: lines = input try: output.write("[") cr = "\n" while 1: log_line = lines.next() compilation_section_re = find_compileC(log_line) if compilation_section_re: directory = read_directory(lines.next()) if is_excluded(directory): continue while 1: log_line = lines.next() if not find_clang_command(log_line): continue output_record = process_clang_command(log_line, directory) output.write(cr) output.write(json.dumps(output_record, indent=2)) cr = ",\n" break continue compilation_section_re = find_processPCH(log_line) if compilation_section_re: directory = read_directory(lines.next()) if is_excluded(directory): continue while 1: log_line = lines.next() if not find_clang_command(log_line): continue register_source_for_pth_file(log_line, directory) break continue except StopIteration: output.write("\n]\n"); def main(): arg_parser = argparse.ArgumentParser(description='Converts xcodebuild logs or xctool-json-reporter logs to compile_commands.json') arg_parser.add_argument("-e", "-exclude", dest="exclusion", help="Directory exclusion pattern (regular expression)") arg_parser.add_argument("-o", "-output", dest="output_file", help="Output json file (default: ./%s)" % JSON_COMPILATION_DATABASE) arg_parser.add_argument(metavar="FILE", nargs='?', dest="input_file", help="Input log file (default: ./%s)" % DEFAULT_INPUT_FILE) args = arg_parser.parse_args() if args.input_file: input_file = args.input_file else: input_file = DEFAULT_INPUT_FILE if args.output_file: output_file = args.output_file else: output_file = JSON_COMPILATION_DATABASE if not os.path.isfile(input_file): print "Error: %s not found." % input_file exit(1) if args.exclusion: is_excluded = re.compile(args.exclusion).match else: is_excluded = None convert(input_file, output_file, is_excluded) if __name__ == '__main__': main()