import os

from datasets import load_dataset

dataset_path = "/dtu/p1/sebulo/data/c4"

# Try to download the dataset
try:
    if not os.path.exists(dataset_path):
        os.makedirs(dataset_path)
    # This will download and cache the dataset if it's not already present
    data = load_dataset("allenai/c4", "en", split="train", cache_dir=dataset_path)
    print("Dataset downloaded and loaded from local copy.")
except Exception as e:
    print(f"Failed to download the dataset: {e}")
    data = None
