#!/usr/bin/env python
"""
--documentation--
"""

__author__ = "Peter Kleiweg"
__version__ = "0.2"
__date__ = "2010/03/18"

import cgitb; cgitb.enable(format="text")

import os, sys

progname = os.path.basename(sys.argv[0])
if len(sys.argv) != 3:
    sys.stderr.write("""
Usage: %s tokenfile substfile

'tokenfile' is file as saved by programma 'features' with option -t
'substfile' is file 'features.table.out' as saved by programma 'features'


""" % progname)
    sys.exit()

tokenfile = sys.argv[1]
weightfile = sys.argv[2]

tokens = ['"indel"']
fp = open(tokenfile, 'r')
n = 0
state = 0
count = 0
for line in fp:
    if state == 0:
        if line.startswith('TOKEN'):
            n += 1
            i = int(line.split()[1])
            assert i == n
            state = 1
            count = 0
            token = ''
    elif state == 1:
        if not line.strip():
            tokens.append('"' + token.strip().replace('"', '""') + '"')
            state = 0
        else:
            i, j = line.split(None, 1)
            i = int(i)
            if i > count:
                token = j
                count = i
        
fp.close()

length = len(tokens) - 1

lines = []
fp = open(weightfile, 'r')
for line in fp:
    line = line.strip()
    if line and line[0] != '#':
        lines.append(line)
fp.close()

if lines[0].startswith('F:'):
    lines.pop(0)

i = int(lines[0])
assert i == length

d = [['0' for i in range(length + 1)] for j in range(length + 1)]

n = 0
for i in range(1, length + 1):
    for j in range(i):
        n += 1
        d[i][j] = d[j][i] = lines[n]

sys.stdout.write('\t' + '\t'.join(tokens) + '\n')
for i in range(length + 1):
    sys.stdout.write(tokens[i] + '\t' + '\t'.join(d[i]) + '\n')
