Sha256: f93bb164f64c00be1cb2d2413ae42ee764b6c7e8a9f9c3d9bed454ee339319c6

Contents?: true

Size: 652 Bytes

Versions: 29

Compression:

Stored size: 652 Bytes

Contents

#!/usr/bin/env python
""" sorts lines (or tab-sep records) by md5.  (e.g. for train/test splits).
optionally prepends with the md5 id too. 
brendan o'connor - anyall.org - gist.github.com/brendano """

import hashlib,sys,optparse
p = optparse.OptionParser()
p.add_option('-k',  type='int', default=False)
p.add_option('-p', action='store_true')
opts,args=p.parse_args()

lines = sys.stdin.readlines()
getter=lambda s: hashlib.md5(s[:-1]).hexdigest()
if opts.k:
  getter=lambda s: hashlib.md5(s[:-1].split("\t")[opts.k-1]).hexdigest()

lines.sort(key=lambda s: getter(s))
for line in lines:
  if opts.p:  line = getter(line) + "\t" + line
  print line,

Version data entries

29 entries across 29 versions & 3 rubygems

Version Path
ul-wukong-4.1.1 bin/md5sort
ul-wukong-4.1.0 bin/md5sort
mrflip-wukong-0.1.0 bin/md5sort
wukong-4.0.0 bin/md5sort
wukong-3.0.1 bin/md5sort
wukong-3.0.0 bin/md5sort
wukong-3.0.0.pre3 bin/md5sort
wukong-3.0.0.pre2 bin/md5sort
wukong-3.0.0.pre bin/md5sort
wukong-2.0.2 bin/md5sort
wukong-2.0.1 bin/md5sort
wukong-2.0.0 bin/md5sort
wukong-1.5.4 bin/md5sort
wukong-1.5.3 bin/md5sort
wukong-1.5.2 bin/md5sort
wukong-1.5.1 bin/md5sort
wukong-1.5.0 bin/md5sort
wukong-1.4.12 bin/md5sort
wukong-1.4.11 bin/md5sort
wukong-1.4.10 bin/md5sort