import argparse
from ruamel.yaml import YAML
from ruamel.yaml.scalarstring import LiteralScalarString as LSS
import json, os

"""
Functionality to convert SWE-bench style datasets to multiple single task instance YAMLs.
"""


def convert_to_literal_string(d):
    """
    Convert any multi-line strings to LiteralScalarString
    """
    for key, value in d.items():
        if isinstance(value, str) and '\n' in value:
            d[key] = LSS(value.replace('\r\n', '\n').replace('\r', '\n'))
        elif isinstance(value, dict):
            convert_to_literal_string(value)
    return d


def save_yaml(instance, filename):
    """
    Save a single task instance as a yaml file
    """
    data = {
        'repo': instance['repo'],
        'query': instance['problem_statement'],
        'version': instance['version'] if 'version' in instance else None,
        'base_commit': instance['base_commit'] if 'base_commit' in instance else None,
        'environment_setup_commit': instance['environment_setup_commit'] if 'environment_setup_commit' in instance else None,
    }
    data = convert_to_literal_string(data)
    yaml = YAML()
    yaml.indent(mapping=2, sequence=4, offset=2)
    with open(filename, 'w') as file:
        yaml.dump(data, file)


def convert_dataset_to_yamls(dataset_path, folder=None):
    """
    Convert a single dataset into multiple yaml files
    """
    data = json.load(open(dataset_path))
    for d in data:
        filename = f"{d['instance_id']}.yaml" if folder is None else os.path.join(folder, f"{d['instance_id']}.yaml")
        save_yaml(d, filename)


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('dataset_path', type=str, help='Path to dataset to convert to single instance YAMLs')
    parser.add_argument('--folder', type=str, help='Folder to save YAMLs to', default=None)
    args = parser.parse_args()

    if not os.path.exists(args.folder):
        os.mkdir(args.folder)

    convert_dataset_to_yamls(args.dataset_path, args.folder)