#!/usr/bin/env python

import re
import os
import sys

levenout = '__leven.tmp'
datfile = '__data.tmp'

######## begin PATH ########

if sys.platform[:3] == 'win':
    target = 'leven.exe'
else:
    target = 'leven'

bindir = ''

p = os.path.realpath(sys.argv[0])
while not bindir:
    p1, p2 = os.path.split(p)
    if p == p1:
        break
    p = p1
    pb = os.path.join(p, 'bin')
    if sys.platform[:3] == 'win':
        pw = os.path.join(p, 'windows')
        pbw = os.path.join(pb, 'windows')
        pwb = os.path.join(pw, 'bin')
        pp = [p, pb, pw, pbw, pwb]
    else:
        pp = [p, pb]
    for d in pp:
        if os.path.isfile(os.path.join(d, target)):
            bindir = d
            break

if bindir:
    sys.stdout.write('\nAttaching directory "%s" at front of PATH\n\n' % bindir)
    os.environ['PATH'] = bindir + os.path.pathsep + os.environ['PATH']

######## end PATH ########

def run(infile, outfile, tcols,
        levenargs='', featurefile=None, verbose=2, featureargs='',
        errorfile=None, keeptemp=False, overwrite=False):

    # clean-up of failed run
    for f in (levenout, datfile, datfile + '.ftr', 'features.table.out'):
        try:
            os.remove(f)
        except:
            pass

    # do output files already exist?
    files = [outfile]
    if errorfile:
        files.append(errorfile)
    for f in files:
        if os.path.isfile(f):
            if overwrite:
                os.remove(f)
            else:
                sys.stderr.write('\n\nERROR: File "%s" exists\n\n\n' % f)
                return

    # start processing main input file
    fpmain = open(infile, 'r')

    # process first line of main input file: column headers
    heads = []
    line = fpmain.readline()
    for h in line.split('\t'):
        heads.append(h.strip())
    nColumns = len(heads)

    # print column headers
    if verbose > 0:
        sys.stdout.write('\nDATA COLUMNS:\n')
        for i in range(nColumns):
            sys.stdout.write('%4i\t%s\n' % (i + 1, heads[i]))
        sys.stdout.write('\n')

    # which columns are test columns? if non then all
    argcols = ''
    tests = [False] * nColumns
    f = False
    for i in re.sub(r'[^0-9]+', ' ', tcols).split():
        c = int(i)
        if c >= 1 and c <= nColumns:
            if not tests[c - 1]:
                tests[c - 1] = True
                argcols += ' -C %i' % c
                f = True
        else:
            sys.stderr.write('\nWARNING - Invalid column number: %i (ignored)\n\n' % c)
    if not f:
        tests = [True] * nColumns

    # print headers of test columns
    if verbose > 0:
        sys.stdout.write('TEST COLUMNS:\n')
        for i in range(nColumns):
            if tests[i]:
                sys.stdout.write('%4i\t%s\n' % (i + 1, heads[i]))
        sys.stdout.write('\n')

    # create more heads
    for i in range(nColumns):
        for j in range(i + 1, nColumns):
            if tests[i] or tests[j]:
                heads.append(heads[i] + '__' + heads[j])

    # process rest of main input file
    # create data file for 'leven' program
    oldcols = []
    cnt = 0
    fpout = open(datfile, 'w')
    for line in fpmain:
        cnt += 1
	fpout.write('%i\n' % cnt);
        oldcol = []
        words = line.split('\t')
        for i in range(nColumns):
            w = words[i].strip()
            oldcol.append(w)
            if w:
                fpout.write('[%i]- %s\n' % (i + 1, w))
        oldcols.append(oldcol)
    fpout.close()

    # main input file done
    fpmain.close()

    # if features, process them
    if featurefile:
        cmd = 'features -c -g '
        if errorfile:
            cmd += '-e %s ' % errorfile
        cmd += featureargs + ' ' + featurefile + ' ' + datfile
        if verbose > 1:
            sys.stdout.write('\nRunning: %s\n\n' % cmd)
        os.system(cmd)

    # calculate all Levenshtein differences
    if featurefile:
        cmd = 'leven-r -q -P -n %i%s -o %s -s features.table.out %s %s.ftr' % (
            nColumns, argcols, levenout, levenargs, datfile)
    else:
        cmd = 'leven -q -P -n %i%s -o %s %s %s' % (
            nColumns, argcols, levenout, levenargs, datfile)
    if verbose > 1:
        sys.stdout.write('\nRunning: %s\n\n' % cmd)
    os.system(cmd)

    # parse results

    def dostore(store):
        result = []
        for i in range(nColumns):
            for j in range(i + 1, nColumns):
                if tests[i] or tests[j]:
                    result.append(store[i][j])
        return result

    r = re.compile("^\s*(\d+)\s+(\d+)\s+(\S+)\s*$")
    results = []
    store = [[None] * nColumns for i in range(nColumns)]
    data = False
    fpin = open(levenout, 'r')
    for l in fpin:
        rr = r.search(l)
        if rr:
            i = int(rr.group(1)) - 1
            j = int(rr.group(2)) - 1
            v = rr.group(3)
            store[i][j] = store[j][i] = v
            data = True
        elif data:
            results.append(dostore(store))
            data = False
    fpin.close()
    if data:
        results.append(dostore(store))

    # save results to file
    fpout = open(outfile, 'w')
    fpout.write('\t'.join(heads) + '\n')
    for i in range(len(oldcols)):
        fpout.write('\t'.join(oldcols[i]) + '\t' + '\t'.join(results[i]) + '\n')
    fpout.close()

    # clean-up
    if not keeptemp:
        os.remove(levenout)
        os.remove(datfile)
        if featurefile:
            os.remove('features.table.out')
            os.remove(datfile + '.ftr')

    if verbose > 2:
        sys.stdout.write('Result saved as: %s' % outfile)

    sys.stdout.write('\n\nDONE\n\n\n')

if __name__ == '__main__':

    import getopt

    opts = {}
    opts['-a'] = ''
    opts['-c'] = ''
    opts['-e'] = None
    opts['-f'] = None
    opts['-F'] = ''
    opts['-o'] = 'out.data'
    opts['-r'] = False
    opts['-t'] = False
    opts['-v'] = 3
    o, args = getopt.getopt(sys.argv[1:], 'a:c:o:e:f:F:rtv:')
    for k, v in o:
        if k == '-v':
            opts[k] = int(v)
        elif k == '-r' or k == '-t':
            opts[k] = True
        else:
            opts[k] = v

    if len(args) != 1:
        sys.stderr.write("""
Usage: %(prog)s
    [-a leven_arguments] [-c column_numbers]
    [-e error_file] [-f feature_file] [-F feature_arguments]
    [-o output_file] [-r] [-t] [-v 0|1|2|3]
    input_file

    -r : replace existing files
    -t : keep temporary files
    -v : verbosity level

Examples:

    %(prog)s sample.data

    %(prog)s -o results.txt -f features-example.txt -c '2 4 6 8' -a '-N 2' -F '-a -x' -v 3 sample.data

""" % {'prog': sys.argv[0]})
        sys.exit(1)

    run(args[0], opts['-o'], opts['-c'],
        levenargs=opts['-a'],
        featurefile=opts['-f'],
        verbose=opts['-v'],
        keeptemp=opts['-t'],
        featureargs=opts['-F'],
        errorfile=opts['-e'],
        overwrite=opts['-r'])
