import json


content = []
rel_list = []
with open('./tacred/test.json') as f:
    file = json.loads(f.read())
    for line in file:
        if 'Tuesday that her uncle' in ' '.join(line['token']):
            print(line)
        rel = line['relation']
        rel_list.append(rel)
        tokens = line['token']
        tokens[line['subj_start']] = '[SUBJ]' + tokens[line['subj_start']]
        tokens[line['subj_end']] = tokens[line['subj_end']] + '[/SUBJ]'
        tokens[line['obj_start']] = '[OBJ]' + tokens[line['obj_start']]
        tokens[line['obj_end']] = tokens[line['obj_end']] + '[/OBJ]'
        content.append({'rel': rel, 'sent': ' '.join(tokens)})

with open('./tacred_test_data.json', 'w') as f:
    jsonwrite = json.dumps(content, indent=2)
    f.write(jsonwrite)
rel_list = (set(rel_list))

'''
with_exp_list = []
with open('./tacred_exp.json') as f:
    json_file = json.load(f)
    for item in json_file:
        sent = item['sent'].split(' ')
        new_sen_list = []
        for sen in sent:
            if sen.startswith('OBJ') or sen.startswith('SUBJ'):
                continue
            else:
                new_sen_list.append(sen)
        new_sen_list = ' '.join(new_sen_list)
        if new_sen_list[-1] in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNPQRSTUVWXYZ':
            new_sen_list = new_sen_list + ' .'
        with_exp_list.append(new_sen_list)

with open('with_exp_list.txt', 'w', encoding='utf-8') as f:
    f.write('\n'.join(with_exp_list))

with_exp_dict = {}
with open('./tacred/train.json') as f:
    file = json.loads(f.read())
    for line in file:
        temp_tokens = []
        subj = " ".join(line['token'][line['subj_start']:line['subj_end']+1])
        obj = " ".join(line['token'][line['obj_start']:line['obj_end']+1])
        for i in range(len(line['token'])):
            if i >= line['subj_start'] and i <= line['subj_end']:
                continue
            elif i >= line['obj_start'] and i <= line['obj_end']:
                continue
            else:
                temp_tokens.append(line['token'][i])
        if 'then joined the senatorial campaign of John Warner' in ' '.join(line['token']):
            print(' '.join(line['token']))
            print(' '.join(temp_tokens))
        if ' '.join(temp_tokens) in with_exp_list:
            print('obj', obj)
            with_exp_dict[' '.join(temp_tokens)] = ('[SUBJ]' + subj + '[/SUBJ]', '[OBJ]' + obj + '[/OBJ]', ' '.join(line['token']))

json_file = json.dumps(with_exp_dict, indent=2)
with open('exp.json','w') as f:
    f.write(json_file)
print()
print()
new_item = []
cannot_write_item = []
with open('./tacred_exp.json') as f:
    json_file = json.load(f)
    for item in json_file:
        sent = item['sent'].split(' ')
        new_sen_list = []
        SUBJ = ""
        OBJ = ""
        for sen in sent:
            if sen.startswith('OBJ'):
                OBJ = sen
            elif sen.startswith('SUBJ'):
                SUBJ = sen
            else:
                new_sen_list.append(sen)
        new_sen = ' '.join(new_sen_list)
        if new_sen[-1] in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNPQRSTUVWXYZ':
            new_sen = new_sen + ' .'
        try:
            new_subj, new_obj, _ = with_exp_dict[new_sen]
            sent = item['sent'].replace(SUBJ, new_subj)
            sent = sent.replace(OBJ, new_obj)
            item['sent'] = sent
            print(sent)
            new_item.append(item)
        except KeyError:
            pass


json_file = json.dumps(new_item, indent=2)
with open('tacred_train_data_3.json','w') as f:
    f.write(json_file)
'''