#!/usr/bin/env python
# coding: utf-8

# Ripley -- immuno

# In[2]:


import gudhi as gd
import numpy as np
from mma import *
from classif_helper import *
from sklearn.neighbors import KernelDensity
from sklearn.svm import SVC
from os.path import expanduser
from os import walk
from pandas import read_csv
import matplotlib.pyplot as plt
from xgboost import XGBClassifier
from os.path import exists




n_jobs = cpu_count()
print(f"Ncpu = {n_jobs}")
from argparse import ArgumentParser
p = ArgumentParser()
p.add_argument("-k","--k", default=5, type=int)
args = p.parse_args()
print("Arguments", args)


# In[3]:






# In[4]:


DATASET_PATH = expanduser("~/Datasets/1.5mmRegions/")
def get_regions():
    X, labels = [],[]
    for label in ["FoxP3", "CD8", "CD68"]:
#     for label in ["FoxP3", "CD8"]:
        for root, dirs, files in walk(DATASET_PATH + label+"/"):
            for name in files:
                X.append(np.array(read_csv(DATASET_PATH+label+"/"+name))/1500)
                labels.append(label)
    return X, labels
X, labels = get_regions();
elabels = LabelEncoder().fit_transform(labels)


# In[7]:


import ripleyk
from joblib import Parallel, delayed, cpu_count


# In[8]:


radii = list(np.linspace(start=0,stop=1,num=100))
if not exists(f"npy/ripley_immuno_num{num}.npy"):
	to_learn = Parallel(n_jobs=5)(delayed(ripleyk.calculate_ripley)(radii, 1, d1=x[:,0],d2=x[:,1]) for x in tqdm(X))
	to_learn= np.array(to_learn)
	np.save(f"npy/ripley_immuno_num{num}.npy", to_learn)
else:
	to_learn = np.load(f"npy/ripley_immuno_num{num}.npy")

# In[ ]:


rfc = RandomForestClassifier(n_estimators=500, n_jobs=8)
svmpc = SVC(kernel="poly")
svmlc = SVC(kernel="linear")
svmexpc = SVC(kernel="rbf")
xgbc = XGBClassifier()

results = kfold_acc([rfc, svmpc, svmlc,svmexpc,  xgbc], np.array(to_learn), elabels, k=args.k)




f = open("results", "a")
f.write(f"Immuno + Ripley, {num=}, k={args.k} : \n")
for result in results:
	f.write(result)
	f.write("\n")
f.write("\n")
f.close()
# In[ ]:




