import subprocess
from pygcn import load_data
import numpy as np

dataset = 'protein_vidal_connected'


for full_net, version in [(True, ''), (False, 'train'), (False, 'test'), (False, 'valid')]:
#full_net = False
    print(full_net, version)
    make_file_int = True
    
    if full_net:
        outfile = 'data/{}/{}.content'.format(dataset, dataset)
        infile = 'data/{}/{}.cites'.format(dataset, dataset)
    else:
        train_pct = '0.40'
    #    version = 'valid'
        outfile = 'data/{0}/{0}_{2}_{1}.content'.format(dataset, train_pct, version)
        infile = 'data/{0}/{0}_{2}_{1}.cites'.format(dataset, train_pct, version)
        n = np.loadtxt('data/{}/{}.content'.format(dataset, dataset)).shape[0]
    
    if make_file_int:
        a = np.loadtxt(infile, dtype=int)
        np.savetxt(infile, a, fmt='%d')
    
    if full_net:
        subprocess.call('python node2vec/src/main.py --input {0} --output {1}'.format(infile, outfile))
    else:
        subprocess.call('python node2vec/src/main.py --input {0} --output {1} --n {2}'.format(infile, outfile, n))
    
    
    with open(outfile, 'r') as fin:
        data = fin.read().splitlines(True)
    with open(outfile, 'w') as fout:
        fout.writelines(data[1:])
    
    content = np.loadtxt(outfile)
    b = np.zeros((content.shape[0], 1))
    order = np.argsort(content[:, 0])
    content = content[order]
    a = np.concatenate((content,b), axis=1)
    np.savetxt(outfile, a, fmt = '%d ' + '%f '*(a.shape[1]-1))