import random

from pathlib import Path

def load_tgts(path):
    with path.open(mode='r') as f:
        tgts = [line.strip().lower() for line in f]
    return tgts

def singularize(s):
    if s == 'children':
        return 'child'
    elif s.endswith('s'):
        return s[:-1]
    else:
        return s


def pluralize(s):
    if WOMAN_RE.search(s) is not None:
        return WOMAN_RE.sub('women', s)
    elif MAN_RE.search(s) is not None:
        return MAN_RE.sub('men', s)
    elif s.endswith('y') and s[-2] not in 'aeiou':
        return s[:-1] + 'ies'
    elif s.endswith('ch'):
        return s + 'es'
    elif s.endswith('sh'):
        return s + 'es'
    elif s.endswith('s'):
        return s + 'es'
    else:
        return s + 's'

def truncate_lists(list1, list2):
    '''
    Truncate `list1`, `list2` to the minimum of their lengths by
    randomly removing items.
    '''
    min_len = min(len(list1), len(list2))
    list1 = [x for (i, x) in sorted(random.sample(list(enumerate(list1)), min_len))]
    list2 = [x for (i, x) in sorted(random.sample(list(enumerate(list2)), min_len))]
    return (list1, list2)


def fill_template(template, term):
    article = (
        'an'
        if (
            (
                term.startswith('honor') or any(
                    term.startswith(c) for c in 'aeiouAEIOU'
                )
            ) and not (
                term.startswith('European') or term.startswith('Ukrainian')
            )
        )
        else 'a'
    )
    sentence = template.format(article=article, term=term)
    return sentence[0].upper() + sentence[1:]

SINGULAR_NOUN_TEMPLATES = (
    'This is {article} {term}.',
    'That is {article} {term}.',
    'There is {article} {term}.',
    'Here is {article} {term}.',
    'The {term} is here.',
    'The {term} is there.',
)

if __name__ == "__main__":

    f_path = Path("/home/lily/yt325/150/project/data/winobias/female_occupations.txt")
    m_path = Path("/home/lily/yt325/150/project/data/winobias/male_occupations.txt")

    f_tgts = load_tgts(f_path)
    m_tgts = load_tgts(m_path)
    m_tgts = [tgt.split() for tgt in m_tgts]
    m_tgts = [tgt for sublist in m_tgts for tgt in sublist]
    f_tgts, m_tgts = truncate_lists(f_tgts, m_tgts)

    for f_tgt in f_tgts:
        print(f"\"{f_tgt}\",")
    print()
    for m_tgt in m_tgts:
        print(f"\"{m_tgt}\",")
    print()

    for f_tgt in f_tgts:
        sentences = [fill_template(template, f_tgt) for template in SINGULAR_NOUN_TEMPLATES]
        for sentence in sentences:
            print(f"\"{sentence}\",")
    print()

    for m_tgt in m_tgts:
        sentences = [fill_template(template, m_tgt) for template in SINGULAR_NOUN_TEMPLATES]
        for sentence in sentences:
            print(f"\"{sentence}\",")
    print()
