#pragma once
#ifndef NO_MANUAL_VECTORIZATION
#ifdef __SSE__
#define USE_SSE
#ifdef __AVX__
#define USE_AVX
#endif
#endif
#endif


// TODO: Include guards
#include "HashBasedBooleanSet.h"
#include "GorderPriorityQueue.h"
#include "ExplicitSet.h"

#include <vector>
#include <queue> // for std::priority_queue
#include <algorithm> // for std::min
#include <utility> // for std::pair
#include <chrono>
#include <unordered_map>
#include <map>
#include <limits>

#include <random> // for benchmark testing

#include <iomanip>

#include "SpaceInterface.h"


#include <cstring>

// TODO: For cache reasons consider moving the data partition away from the

// TODO: change greedysearch function signature to void greedysearch(internal_heap_loc_t& cur_node, distance_t& cur_dist)
// that way we don't pay the extra repeat distance function evaluation costs
// TODO: consider abstracting the internal heap into a NodeHeap class

// TODO: Change all the duplicated functions into template functions that accept an ENUM NODE_TYPE: either HIGHWAY_NODE or DATA_NODE
// TODO: Write my own priority queue, which supports iteration


template<typename distance_t, typename label_t>
class HNSW {
    public:
        typedef unsigned int internal_heap_loc_t;
        typedef std::pair< distance_t, internal_heap_loc_t > d_node_t;
        typedef std::pair< distance_t, label_t > d_label_t;

        struct CompareNodes {
            constexpr bool operator()(d_node_t const &a, d_node_t const &b)
                                        const noexcept {return a.first < b.first;} 
        };
        typedef std::priority_queue< d_node_t , std::vector< d_node_t >, CompareNodes > PriorityQueue;

        typedef ExplicitSet VisitedSet;

    private:
        // Algorithm parameters
        size_t M; // number of links for bottom level
        size_t M_highway; // number of links for higher, "highway navigation" style levels

        // Memory layout parameters
        char* index_memory; // flat memory space for all nodes and links
        char* data_partition; // partition of index memory reserved for DataNodes
        char* highway_partition; // partition of index memory reserved for HighwayNodes

        size_t max_num_data_nodes; // maximum number of nodes in the bottom (full kNN graph) level
        size_t max_num_highway_nodes; // maximum number of nodes in higher levels

        size_t num_levels; // number of levels in the heirarchy

        size_t cur_num_data_nodes; // current number of nodes in the bottom (full kNN graph) level
        size_t cur_num_highway_nodes; // current number of nodes in higher levels

        size_t data_size; // size of a data point (for this particular SpaceInterface)
        size_t data_node_size; // size of a node in the lowest level
        size_t highway_node_size; // size of a node in the upper (filtering) levels

        // should probably rename to entry_node_ID to match semantics
        internal_heap_loc_t entry_node; // entry node for the whole graph search
        size_t entry_node_level; // level (so far) of the entry node

        DISTFUNC<distance_t> distance; // call-by-pointer distance function

        void* distance_param;
        VisitedSet is_visited;
        // (I still want to do this at compile-time not call-by-pointer)
        // there are only a few distance functions, and they're all short so it won't lead to increased code size


        public: // function implementations

HNSW(SpaceInterface<distance_t> *space, std::vector<size_t> &level_populations, size_t _M):
    max_num_data_nodes(0), max_num_highway_nodes(0), cur_num_data_nodes(0), cur_num_highway_nodes(0)
    {
    // M = 2*_M;
    M = 2*_M;
    M_highway = _M;
    // TODO: Check validity of inputseneccccrbifdnblrjdtnerhkgueldubjcirjbiulfggu
    

    // set the maximum number of nodes, and number of "highway" nodes
    // highway nodes are the nodes at the higher levels of the heirarchy
    // we call them highway nodes because they make long-range connections between
    // smaller neighborhoods
    num_levels = level_populations.size();
    for (int i = 0; i < level_populations.size(); i++){
        max_num_data_nodes += level_populations[i];
        // each "highway" node at level i (level 0 is excluded) is also a member of level i-1
        // so a node at level i will be represented i different times
        max_num_highway_nodes += i*level_populations[i];
    }

    is_visited = VisitedSet(std::max(max_num_data_nodes,max_num_highway_nodes)+1);

    // TODO: refactor SpaceInterface method names
    // TODO: Fix this whole SpaceInterface bullshit mess
    distance_param = space->get_dist_func_param(); 
    distance = space->get_dist_func();

    data_size = space->get_data_size(); 
    data_node_size = space->get_data_size() + sizeof(internal_heap_loc_t)*M + sizeof(label_t);
    highway_node_size = sizeof(internal_heap_loc_t)*(M_highway + 2);

    size_t index_memory_size = highway_node_size*max_num_highway_nodes + data_node_size*max_num_data_nodes;
    index_memory = new char[index_memory_size];

    data_partition = index_memory;
    highway_partition = index_memory + data_node_size*max_num_data_nodes;
}

HNSW(SpaceInterface<distance_t> *space, std::string& filename):
    max_num_data_nodes(0), max_num_highway_nodes(0), cur_num_data_nodes(0), cur_num_highway_nodes(0), index_memory(NULL)
    {
		load(filename, space);
}




~HNSW() {
    delete index_memory;
}

inline internal_heap_loc_t* highwayNode(const internal_heap_loc_t highway_node_ID) const {
    void* node_location = highway_partition + highway_node_size*highway_node_ID;
    return reinterpret_cast<internal_heap_loc_t*>(node_location);
}

inline void* highwayNodeData(const internal_heap_loc_t highway_node_ID) const {
    internal_heap_loc_t data_node_ID = *highwayNode(highway_node_ID);
    return dataNodeData(data_node_ID);
}

inline internal_heap_loc_t* highwayNodeChild(const internal_heap_loc_t highway_node_ID) const {
    return highwayNode(highway_node_ID)+1;
}

inline internal_heap_loc_t* highwayNodeLinks(const internal_heap_loc_t highway_node_ID) const {
    return highwayNode(highway_node_ID)+2;
}

inline void* dataNodeData(const internal_heap_loc_t data_node_ID) const {
    return data_partition + data_node_ID*data_node_size;
}

inline internal_heap_loc_t* dataNodeLinks(const internal_heap_loc_t data_node_ID) const {
    return reinterpret_cast<internal_heap_loc_t*>(data_partition + data_node_ID*data_node_size + data_size);
}

label_t* dataNodeLabel(const internal_heap_loc_t data_node_ID) const {
    return reinterpret_cast<label_t*>(data_partition + data_node_ID*data_node_size + data_size + sizeof(internal_heap_loc_t)*M);
}

void printDataNode(const internal_heap_loc_t data_node_ID) const {
    void* node_data = dataNodeData(data_node_ID);
    internal_heap_loc_t* node_links = dataNodeLinks(data_node_ID);
    label_t* node_label = dataNodeLabel(data_node_ID);
    std::cout<<"DataNode "<<data_node_ID<<" @"<<static_cast<void*>(node_data)<<": ";
    std::cout<<"[data] [";
    for (int i = 0; i < M; i++){
        if (node_links[i] == data_node_ID) break;
        std::cout<<node_links[i]; if(i < M-1){std::cout<<",";}
    }
    std::cout<<"] ["<<*node_label<<"]"<<std::endl;
}

void printHighwayNode(const internal_heap_loc_t highway_node_ID) const {
    // schema: [ptr to data] [ptr to child node] [list of edges (links)]
    internal_heap_loc_t* node_location = highwayNode(highway_node_ID);
    std::cout<<"HighwayNode "<<highway_node_ID<<" @"<<static_cast<void*>(node_location)<<": ";
    std::cout<<"["<<node_location[0]<<"] ["<<*highwayNodeChild(highway_node_ID)<<"] [";
    internal_heap_loc_t* highway_node_links = highwayNodeLinks(highway_node_ID);
    for (int m = 0; m < M_highway; m++){
        // std::cout<<highway_node_links[m]; if(m < M_highway-1){std::cout<<",";}
        if (highway_node_links[m] == highway_node_ID) break;
        std::cout<<*dataNodeLabel(*highwayNode(highway_node_links[m])); if(m < M_highway-1){std::cout<<",";}
    }
    std::cout<<"]"<<std::endl;
}



int allocateNode(const void* data_ptr, const label_t& label, const size_t level, 
                       internal_heap_loc_t& data_node_ID, internal_heap_loc_t& highway_node_ID){
    // allocates a node on the data partition of the internal memory heap
    // If level > 0, also allocates a highway node on the highway partition
    // note: this does not call malloc / new or anything. It just uses the pre-allocated memory

    // Input checking: Fail if we haven't allocated enough space for the new node
    if (cur_num_data_nodes >= max_num_data_nodes){ return 1; }
    if (level > 0){ // fail if we haven't allocated enough highway nodes
        if ( (cur_num_highway_nodes + level) >= max_num_highway_nodes){ return 2; }
    }
    // fail if we try to add to a non-existent level that we haven't allocated space for
    if (level < 0 || level >= num_levels){ return 3; }

    data_node_ID = cur_num_data_nodes;
    highway_node_ID = cur_num_highway_nodes;

    cur_num_data_nodes++;
    cur_num_highway_nodes += level;

    // add the DataNode to the data partition
    // This is preallocated memory heap (level 0, data storage partition)
    void* node_location = dataNodeData(data_node_ID);
    std::memcpy(node_location, data_ptr, data_size); // data

    // initialize all the links to be self-loops (i.e. unused).
    internal_heap_loc_t* node_links = dataNodeLinks(data_node_ID);

    for (int i = 0; i < M; i++){
        // The syntax here is a little vague - node_links is a pointer to the links for the node at node_location
        // The links are of type internal_heap_loc_t, since they are locations of other nodes on the internal pre-allocated heap
        node_links[i] = data_node_ID;
    }
    label_t* node_label = dataNodeLabel(data_node_ID); 
    *(node_label) = label;

    // if the node is a highway node, then allocate the higher-level links
    if (level > 0){
        for (int i = 1; i <= level; i++){
            // Build the highway node for each level, starting at level 1
            internal_heap_loc_t* highway_node_links = highwayNode(highway_node_ID + (i-1));
            highway_node_links[0] = data_node_ID; // ref to data storage location
            if (i == 1){ // if we are in level 1, our child in level 0 lives in the data partition 
                highway_node_links[1] = data_node_ID;
            }
            else { // our child is the node we built in the previous trip through this loop
                highway_node_links[1] = highway_node_ID + (i-2); // ref to child location
            }
            // Initialize all links to be unused. We use self-loops to denote this
            for (int m = 0; m < M_highway; m++){
                highway_node_links[2+m] = highway_node_ID + (i-1);
            }
        }
    }
    return 0;
}


int add(const void* data_ptr, const label_t& label, const size_t level, const int ef_construction) {
    // TODO: check if label exists in graph. If so, then update it rather than add it
    internal_heap_loc_t data_node_ID = 0;
    internal_heap_loc_t highway_node_ID = 0;

    int err = allocateNode(data_ptr, label, level, data_node_ID, highway_node_ID);

    if (err != 0){ return err; }

    if (cur_num_data_nodes > 1){ // we already have a valid entry point (stored in entry_node)
        internal_heap_loc_t cur_node = entry_node;
        // This for loop is from Alg 1 in HNSW paper, line: for l_c <- L ... l+1
        for (int i = entry_node_level; i > level; i--){
            // N_DISTANCE_EVALS = 0;
            cur_node = greedySearchHighwayLevel(cur_node, data_ptr);
            cur_node = *(highwayNodeChild(cur_node));
        }
        for (int i = std::min(level, entry_node_level); i > 0; i--) {
            // the (+level-1) part is because highway_node_ID+x contains the HighwayNode for level L(x+1)
            cur_node = connectHighwayNode(cur_node, highway_node_ID + i-1, ef_construction);
            // doing a child link traversal descends a level
            cur_node = *(highwayNodeChild(cur_node));
        }
        // now make the connections in level 0. cur_node is now a DataNode address rather than a HighwayNode address
        // std::clog<<cur_node<<"@"<<distance(dataNodeData(cur_node),data_ptr,distance_param)<<std::endl;

		/* TEST: to see if other code has a bug, set cur_node entry point to random
		int n_initializations = 100;
		int step_size = (cur_num_data_nodes-1) / n_initializations;
		if (step_size <= 0){ step_size = 1; }

		cur_node = 0;
		distance_t min_dist = std::numeric_limits<distance_t>::max();

		for( internal_heap_loc_t node = 0; node < (cur_num_data_nodes-1); node += step_size){
			distance_t dist = distance(data_ptr, dataNodeData(node), distance_param);
			if (dist < min_dist){
				min_dist = dist; 
				cur_node = node;
			}
		}
		// END TEST */ 

		connectDataNode(cur_node, data_node_ID, ef_construction);
        if (level > entry_node_level) {
            if (level == 0){ entry_node = data_node_ID; }
            else { entry_node = highway_node_ID + level-1; }
            entry_node_level = level;
        }
    }
    else { // this is the very first node, we don't have an entry point yet
        if (level == 0){ entry_node = data_node_ID; }
        else { entry_node = highway_node_ID + level-1; }
        entry_node_level = level;
    }
    return 0;
}


internal_heap_loc_t greedySearchHighwayLevel(const internal_heap_loc_t entry_highway_node, const void* query){
    // start_node should be a highway node, otherwise this function is not defined
    // N_DISTANCE_EVALS++;
    distance_t cur_distance = distance(query, highwayNodeData(entry_highway_node), distance_param);
    internal_heap_loc_t cur_highway_node = entry_highway_node;
    // we will probably have to skip self-loops here

    bool cur_node_is_optimal = false;
    while (!cur_node_is_optimal) {
        internal_heap_loc_t* highway_node_links = highwayNodeLinks(cur_highway_node);
        // std::cout<<"Current node ";printHighwayNode(cur_highway_node);

        cur_node_is_optimal = true;
        internal_heap_loc_t self_loop_node = cur_highway_node;
        for (int i = 0; i < M_highway; i++){
            if (highway_node_links[i] != self_loop_node){
                // N_DISTANCE_EVALS++;
                distance_t candidate_distance = distance(query, highwayNodeData(highway_node_links[i]), distance_param);
                if (candidate_distance < cur_distance){
                    cur_node_is_optimal = false;
                    cur_distance = candidate_distance;
                    cur_highway_node = highway_node_links[i];
                }
            }
        }
    }
    return cur_highway_node;
}

internal_heap_loc_t connectHighwayNode(const internal_heap_loc_t entry_highway_node, const internal_heap_loc_t new_highway_node, const int ef_param){
    PriorityQueue neighbors = searchHighwayLevel(entry_highway_node, highwayNodeData(new_highway_node), ef_param);
    selectHighwayNeighbors(neighbors, M_highway);

    std::vector<internal_heap_loc_t> selected_neighbors; 
    selected_neighbors.reserve(M_highway);
    while(neighbors.size() > 0){
        selected_neighbors.push_back(neighbors.top().second);
        neighbors.pop();
    }

    internal_heap_loc_t next_level_entry_node = entry_highway_node;
    // this was a bug in the original hnswlib implementation: it returned 
    // the furthest element rather than the nearest one
    // comment out to get the version from the actual HNSW code
    if (selected_neighbors.size() > 0){next_level_entry_node = selected_neighbors.back();}

    internal_heap_loc_t* new_node_links = highwayNodeLinks(new_highway_node);
    for (int i = 0; i < selected_neighbors.size(); i++){
        internal_heap_loc_t neighbor_node = selected_neighbors[i];
        // add the link to the current new node
        new_node_links[i] = neighbor_node;
        // now for the tricky part: doing the back-connections
        internal_heap_loc_t* neighbor_node_links = highwayNodeLinks(neighbor_node);
        
        bool is_inserted = false;
        for (int j = 0; j < M_highway; j++){
            if (neighbor_node_links[j] == neighbor_node){
                // if there is a self-loop, we replace the self-loop with the desired link
                neighbor_node_links[j] = new_highway_node;
                is_inserted = true;
                break;
            }
        }
        if (!is_inserted){
            // we have to replace one of the links - this will disconnect the old neighbor and introduce directional links
            // therefore, we must do this very carefully:
            // N_DISTANCE_EVALS++;
            distance_t max_dist = distance(highwayNodeData(new_highway_node), highwayNodeData(neighbor_node), distance_param);
            // std::cout<<"max distance = "<<max_dist<<std::endl;
            PriorityQueue candidates;
            candidates.emplace(max_dist, new_highway_node);

            for (int j = 0; j < M_highway; j++){
                // iterate over the destination node's links
                // OLD: distance(highwayNodeData(new_highway_node), highwayNodeData(neighbor_node_links[j]), distance_param),
                // std::cout<<"Checking node "<<neighbor_node_links[j]<<std::endl; 
                if (neighbor_node_links[j] != neighbor_node){
                    // N_DISTANCE_EVALS++;
                    candidates.emplace(
                        distance(highwayNodeData(neighbor_node), highwayNodeData(neighbor_node_links[j]), distance_param),
                        neighbor_node_links[j]);
                }
            }
            // std::cout<<"Selecting neighbors from"<<candidates.size()<<" options"<<std::endl;
            selectHighwayNeighbors(candidates, M_highway);
            // std::cout<<candidates.size()<<"neighbors selected"<<std::endl;

            int j = 0; 
            while( candidates.size() > 0){
                neighbor_node_links[j] = candidates.top().second;
                candidates.pop();
                j++;
            }
            while(j < M_highway){
                neighbor_node_links[j] = neighbor_node;
                j++;
            }
        }
    }
    return next_level_entry_node; 
}


void connectDataNode(const internal_heap_loc_t entry_data_node, const internal_heap_loc_t new_data_node, const int ef_param){
    PriorityQueue neighbors = searchDataLevel(entry_data_node, dataNodeData(new_data_node), ef_param);
	
	// PriorityQueue n2 = neighbors;
	// std::clog<<"Search Results: "<<std::endl;
	// while (n2.size() > 0){
	// 	std::clog<<"\t"<<n2.top().second<<" @ "<<n2.top().first<<std::endl;
	// 	n2.pop();
	// }
	
	selectDataNeighbors(neighbors, M_highway);
    std::vector<internal_heap_loc_t> selected_neighbors; 
    selected_neighbors.reserve(M);

	// std::clog<<"Connecting: "<<new_data_node<<std::endl;
	// std::clog<<"Connecting: "<<new_data_node<<"-> [";
    while(neighbors.size() > 0){
        selected_neighbors.push_back(neighbors.top().second);
		// std::clog<<"\t"<<neighbors.top().second<<" @ "<<neighbors.top().first<<std::endl;
		// std::clog<<neighbors.top().second<<" ";
        neighbors.pop();
    }
	// std::clog<<"]"<<std::endl;

    internal_heap_loc_t* new_node_links = dataNodeLinks(new_data_node);
    for (int i = 0; i < selected_neighbors.size(); i++){
        internal_heap_loc_t neighbor_node = selected_neighbors[i];
        // add the link to the current new node
        new_node_links[i] = neighbor_node;
        // now for the tricky part: doing the back-connections
        internal_heap_loc_t* neighbor_node_links = dataNodeLinks(neighbor_node);
        bool is_inserted = false;

        for (int j = 0; j < M; j++){
            if (neighbor_node_links[j] == neighbor_node){
                // if there is a self-loop, we replace the self-loop with the desired link
                neighbor_node_links[j] = new_data_node;
                is_inserted = true;
                break;
            }
        }
        if (!is_inserted){
            // we have to replace one of the links - this will disconnect the old neighbor and introduce directional links
            // therefore, we must do this very carefully:
            // N_DISTANCE_EVALS++;
            distance_t max_dist = distance(dataNodeData(new_data_node), dataNodeData(neighbor_node), distance_param);
            PriorityQueue candidates;
            candidates.emplace(max_dist, new_data_node);

            for (int j = 0; j < M; j++){
                // iterate over the destination node's links
                if (neighbor_node_links[j] != neighbor_node){
                    // if not a self-loop
                    // N_DISTANCE_EVALS++;
                    candidates.emplace(
                        distance(dataNodeData(neighbor_node), dataNodeData(neighbor_node_links[j]), distance_param),
                        neighbor_node_links[j]);
                }
            }
            selectDataNeighbors(candidates, M);

            int j = 0; 
			// std::clog<<"Connecting: "<<neighbor_node<<"-> [";
            while( candidates.size() > 0){
                neighbor_node_links[j] = candidates.top().second;
				// std::clog<<candidates.top().second<<" ";
                candidates.pop();
                j++;
            }
			// std::clog<<"]"<<std::endl;
            while(j < M){
                neighbor_node_links[j] = neighbor_node;
                j++;
            }
        }
    }
}


void selectHighwayNeighbors(PriorityQueue& neighbors, const int M){
    // uses the heuristic from getNeighborsByHeuristic2 (from original HNSW code)
    // std::cout<<"Selecting highway neighbors from "<<neighbors.size()<<" candidates: [";
    if (neighbors.size() < M){ return; }

    PriorityQueue candidates;
    std::vector<d_node_t> saved_candidates;
    while(neighbors.size() > 0){
        // std::cout<<*dataNodeLabel(*highwayNode(neighbors.top().second))<<"("<<neighbors.top().first<<"),";
        candidates.emplace(-neighbors.top().first, neighbors.top().second);
        neighbors.pop();
    }
    // std::cout<<std::endl;
    while(candidates.size() > 0){
        if (saved_candidates.size() >= M){ break; }
        d_node_t current_pair = candidates.top();
        candidates.pop();

        bool should_keep_candidate = true; 
        for (d_node_t second_pair : saved_candidates) {
            // N_DISTANCE_EVALS++;
            distance_t cur_dist = distance(highwayNodeData(second_pair.second), highwayNodeData(current_pair.second), distance_param); 
            if (cur_dist < (-current_pair.first) ) {
                should_keep_candidate = false; 
                break; 
            }
        }
        if (should_keep_candidate){
            saved_candidates.push_back(current_pair);
        }
    }
    // implement my own priority queue, so we don't have to have this stupid vector thing
    // and can just iterate over / add to the neighbors PriorityQueue
    for (d_node_t current_pair : saved_candidates){
        neighbors.emplace(-current_pair.first, current_pair.second);
    }
}


void selectDataNeighbors(PriorityQueue& neighbors, const int M){
    // uses the heuristic from getNeighborsByHeuristic2 (from original HNSW code)
    // std::cout<<"Considering "<<neighbors.size()<<" different neighbors (>? "<<M<<")"<<std::endl;
    // consider using a VisitedList in here
    if (neighbors.size() < M){ return; }

    PriorityQueue candidates;
    std::vector<d_node_t> saved_candidates;
	// std::clog<<"Candidates:"<<std::endl; 
    while(neighbors.size() > 0){
        candidates.emplace(-neighbors.top().first, neighbors.top().second);
		// std::clog<<"\t"<<neighbors.top().second<<"@"<<neighbors.top().first<<std::endl;
        neighbors.pop();
    }

    while(candidates.size() > 0){
        if (saved_candidates.size() >= M){ break; }
        d_node_t current_pair = candidates.top();
        candidates.pop();

        bool should_keep_candidate = true; 
        for (d_node_t second_pair : saved_candidates) {
            // N_DISTANCE_EVALS++;
            distance_t cur_dist = distance(dataNodeData(second_pair.second), dataNodeData(current_pair.second), distance_param); 
            if (cur_dist < (-current_pair.first) ) {
                should_keep_candidate = false; 
                break; 
            }
        }
        if (should_keep_candidate){
            saved_candidates.push_back(current_pair);
        }
    }
    // implement my own priority queue, so we don't have to have this stupid vector thing
    // and can just iterate over / add to the neighbors PriorityQueue
	// std::clog<<"Selection:"<<std::endl;
    for (d_node_t current_pair : saved_candidates){
		// std::clog<<"\t"<<current_pair.second<<"@"<<-current_pair.first<<std::endl;
        neighbors.emplace(-current_pair.first, current_pair.second);
    }
}


PriorityQueue searchHighwayLevel(const internal_heap_loc_t entry_highway_node, const void* query, const int ef_param){

    PriorityQueue neighbors; // W, in the paper
    PriorityQueue candidates; // C, in the paper
    // VisitedSet is_visited(cur_num_highway_nodes+1); // v, in the paper
    // test: move malloc outside
    // result: it doesn't change much
    is_visited.clear();

    // the +1 is because VisitedSet is_visited[0] will ALWAYS return true 
    // (this mind-bending behavior took me like 2 days to figure out - amazing)

    // N_DISTANCE_EVALS++;
    distance_t dist = distance(query, highwayNodeData(entry_highway_node), distance_param);
    distance_t max_dist = dist;


    candidates.emplace(-dist, entry_highway_node); // the minus sign is because we want fast access to nearest (lowest distance)
    neighbors.emplace(dist, entry_highway_node); // no minus sign because we want fast access to furthest neighbor

    is_visited.insert(entry_highway_node+1);

    while (!candidates.empty()) {
        // get nearest element from candidates
        d_node_t highway_d_node = candidates.top();
        if ((-highway_d_node.first) > max_dist){
            break;
        }
        candidates.pop();
        internal_heap_loc_t* highway_node_links = highwayNodeLinks(highway_d_node.second);
        for (int i = 0; i < M_highway; i++){
            if (!is_visited[highway_node_links[i]+1]){
                // TODO? Break this loop body into a function:
                // decideToKeep(data pointer, data ID, etc) (use better names)
                is_visited.insert(highway_node_links[i]+1);
                // d_node_t furthest_neighbor = neighbors.top();
                // #ifdef USE_SSE
                //     _mm_prefetch((char*)highwayNodeData(highway_node_links[i]), _MM_HINT_T0);
                // #endif
                // N_DISTANCE_EVALS++;
                dist = distance(query, highwayNodeData(highway_node_links[i]), distance_param);
                if (neighbors.size() < ef_param || dist < max_dist) {
                    candidates.emplace(-dist, highway_node_links[i]);
                    neighbors.emplace(dist, highway_node_links[i]);
                    if (neighbors.size() > ef_param){
                        neighbors.pop();
                    }
                    if (!neighbors.empty()){
                        max_dist = neighbors.top().first;
                    }
                }
            }
        }
    }
    return neighbors;
}

// I decided to have two separate versions of searchLevel, even though this results in code duplication, because 
// if we were to have a generic searchLevel, it would have to accept a level parameter. And this would
// be wrong, because if we called searchLevel(hn, query, 3) for a highwayNode hn in level 1, it will actually
// search level 1 and not level 3. This would cause bugs later. Plus, the searchDataLevel function can now 
// be optimized separately with prefetch hints and all that stuff, since it's the only one that gets called at query time

PriorityQueue searchDataLevel(const internal_heap_loc_t entry_data_node, const void* query, const int ef_param){

    PriorityQueue neighbors; // W, in the paper
    PriorityQueue candidates; // C, in the paper

    is_visited.clear();

    // N_DISTANCE_EVALS++;
    distance_t dist = distance(query, dataNodeData(entry_data_node), distance_param);

    distance_t max_dist = dist;


    candidates.emplace(-dist, entry_data_node); // the minus sign is because we want fast access to nearest (lowest distance)
    neighbors.emplace(dist, entry_data_node); // no minus sign because we want fast access to furthest neighbor
    is_visited.insert(entry_data_node+1);

    while (!candidates.empty()) {
        // get nearest element from candidates
        d_node_t d_node = candidates.top();
        if ((-d_node.first) > max_dist){
            break;
        }
        candidates.pop();
        internal_heap_loc_t* d_node_links = dataNodeLinks(d_node.second);

        for (int i = 0; i < M; i++){
            if (!is_visited[d_node_links[i]+1]){
                is_visited.insert(d_node_links[i]+1);

                #ifdef USE_SSE
                    _mm_prefetch((char*)dataNodeData(d_node_links[i]), _MM_HINT_T0);
                    // is_visited.prefetch(d_node_links[i]); // turns out that this does not help much
                #endif
                dist = distance(query, dataNodeData(d_node_links[i]), distance_param);

                if (neighbors.size() < ef_param || dist < max_dist) {
                    candidates.emplace(-dist, d_node_links[i]);
                    neighbors.emplace(dist, d_node_links[i]);
                    if (neighbors.size() > ef_param){
                        neighbors.pop();
                    }
                    if (!neighbors.empty()){
                        max_dist = neighbors.top().first;
                    }
                }
            }
        }
    }
    return neighbors;
}




std::vector< d_label_t > search(const void* query, const int k, const int ef_search){

    std::vector< d_label_t > results;
    if (cur_num_data_nodes <= 0) return results;

	/* Option 1: Use hierarchy
    internal_heap_loc_t cur_node = entry_node;
    for (int l = entry_node_level; l > 0; l--){
        cur_node = greedySearchHighwayLevel(cur_node, query);
        cur_node = *(highwayNodeChild(cur_node)); // this was a bug
        // cur_node = *(highwayNodeLinks(cur_node)); // this was the bug
    }
	// */

	/* Option 2: Random entry point
    // picking a selection of random entry points works fine, honestly
	std::random_device rd;     // only used once to initialise (seed) engine
	std::mt19937 rng(rd());    // random-number engine used (Mersenne-Twister in this case)
	std::uniform_int_distribution<int> uni(0,cur_num_data_nodes); // guaranteed unbiased
	internal_heap_loc_t cur_node = uni(rng);
	// */

	// /* Option 3: Assumes reordered graph
	const int n_probes = 100;
	int step_size = cur_num_data_nodes / n_probes;
	internal_heap_loc_t cur_node = 0;
    distance_t dist = distance(query, dataNodeData(cur_node), distance_param);
    distance_t min_dist = dist;
	// std::clog<<min_dist<<"->";

	for (internal_heap_loc_t node = 0; node < cur_num_data_nodes; node += step_size){
		dist = distance(query, dataNodeData(node), distance_param); 
		if (dist < min_dist){
			min_dist = dist; 
			cur_node = node;
		}
	}
	// std::clog<<cur_node<<"@"<<min_dist<<std::endl;
	// */
    PriorityQueue neighbors = searchDataLevel(cur_node, query, ef_search);

    while (neighbors.size() > k){
		// std::clog<<"\t [x] "<<neighbors.top().second<<" @ "<<neighbors.top().first<<std::endl;
        neighbors.pop();
    }
    while (neighbors.size() > 0) {
        d_node_t res = neighbors.top();
        results.push_back(d_label_t(res.first, *dataNodeLabel(res.second)));
		// std::clog<<"\t "<<neighbors.top().second<<" @ "<<neighbors.top().first<<std::endl;
        neighbors.pop();
    }
    std::sort(results.begin(), results.end(), [](const d_label_t &left, const d_label_t &right) 
            { return left.second < right.second; });
    
    // std::cout<<std::endl;
    return results;
}

int gorder_objective(int w){

    // Create table of in-degrees
    std::unordered_map<internal_heap_loc_t,std::vector<internal_heap_loc_t> > indegree_table;
    for (int node = 0; node < cur_num_data_nodes; node++){
        internal_heap_loc_t* node_edges = dataNodeLinks(node);
        for (int m = 0; m < M; m++){
            if (node_edges[m] != node){
                indegree_table[node_edges[m]].push_back(node);
            }
        }
    }
    // compute the objective function score
    int F = 0;
    for (int node = 0; node < cur_num_data_nodes; node++){
        for (int j = node - w; j < node; j++){
            if (j < 0) continue;
            // compute S_s(u,v)
            for (auto& x : indegree_table[node]){
                for (auto& y : indegree_table[j]){
                    if (x == y){
                        F++;
                    }
                }
            }
            // compute S_n(u,v)
            internal_heap_loc_t* node_edges = dataNodeLinks(node);
            for (int m = 0; m < M; m++){
                if (node_edges[m] == j){
                    F++;
                    break;
                }
            }
            internal_heap_loc_t* neighbor_edges = dataNodeLinks(node);
            for (int m = 0; m < M; m++){
                if (neighbor_edges[m] == node){
                    F++;
                    break;
                }
            }
        }
    }
    return F;
}

std::vector<internal_heap_loc_t> gorder(int w){

    // Find entry node for gorder
    internal_heap_loc_t seed_node = entry_node;
    for (int l = entry_node_level; l > 0; l--){
        seed_node = *(highwayNodeChild(seed_node));
    }

    // Create table of in-degrees
    std::unordered_map<internal_heap_loc_t,std::vector<internal_heap_loc_t> > indegree_table;
    for (internal_heap_loc_t node = 0; node < cur_num_data_nodes; node++){
        internal_heap_loc_t* node_edges = dataNodeLinks(node);
        for (int m = 0; m < M; m++){
            if (node_edges[m] != node){
                // this was a bug (i think):
                // indegree_table[node].push_back(node_edges[m]);
                indegree_table[node_edges[m]].push_back(node);
            }
        }
    }

    // do the actual gorder
    GorderPriorityQueue<internal_heap_loc_t> Q(cur_num_data_nodes);
    std::vector<internal_heap_loc_t> P(cur_num_data_nodes, 0);
    Q.increment(seed_node);
    P[0] = Q.pop();

    // for i = 1 to N:
    for (int i = 1; i < cur_num_data_nodes; i++){
        internal_heap_loc_t v_e = P[i-1];
        // ve = P[i-1] # the newest node in window
        // for each node u in out-edges of ve:
        internal_heap_loc_t* v_e_edges = dataNodeLinks(v_e);
        for (int m = 0; m < M; m++){
            if (v_e_edges[m] != v_e){
                // if u in Q, increment priority of u
                Q.increment(v_e_edges[m]);
            }
        }

        // for each node u in in-edges of ve: 
        for (internal_heap_loc_t& u : indegree_table[v_e]){
            // if u in Q, increment priority of u
            Q.increment(u);
            // for each node v in out-edges of u:
            internal_heap_loc_t* u_edges = dataNodeLinks(u);
            for (int m = 0; m < M; m++){
                if (u_edges[m] != u){
                    // if v in Q, increment priority of v
                    Q.increment(u_edges[m]);
                }
            }
        }

        if (i > w+1){
            internal_heap_loc_t v_b = P[i-w-1];
            // for each node u in out-edges of vb:
            internal_heap_loc_t* v_b_edges = dataNodeLinks(v_b);
            for (int m = 0; m < M; m++){
                if (v_b_edges[m] != v_b){
                    // if u in Q, decrement priority of u
                    Q.decrement(v_b_edges[m]);
                }
            }
            // for each node u in in-edges of vb:
            for (internal_heap_loc_t& u : indegree_table[v_b]){
                // if u in Q, increment priority of u
                Q.increment(u); // this could be a bug?
                // for each node v in out-edges of u:
                internal_heap_loc_t* u_edges = dataNodeLinks(u);
                for (int m = 0; m < M; m++){
                    if (u_edges[m] != u){
                        // if v in Q, decrement priority of v
                        Q.decrement(u_edges[m]);
                    }
                }
            }
        }
        P[i] = Q.pop();
    }

    // for(int n = 0; n < cur_num_data_nodes; n++){
    //     // void* node_data = dataNodeData(data_node_ID);
    //     internal_heap_loc_t* node_links = dataNodeLinks(n);
    //     label_t* node_label = dataNodeLabel(n);
    //     std::cout<<"DataNode "<<*node_label<<"@{"<<n<<"}->("<<P[n]<<"): ";
    //     std::cout<<"[";
    //     for (int i = 0; i < M; i++){
    //         if (node_links[i] == n) continue;
    //         label_t* neighbor_label = dataNodeLabel(node_links[i]);

    //         std::cout<<*neighbor_label<< "@{"<<node_links[i]<<"}->("<<P[node_links[i]]<<") ";
    //     }
    //     std::cout<<"]"<<std::endl;
    // }
    // now we have a mapping P[i] -> node whose new label is i
    std::vector<internal_heap_loc_t> Pinv(cur_num_data_nodes, 0);
    for (int n = 0; n < cur_num_data_nodes; n++){
        Pinv[P[n]] = n;
    }
    // now we have a mapping Pinv[i] -> new label of node i
    return Pinv;
}


std::vector<internal_heap_loc_t> indegree_order(){

    std::map< internal_heap_loc_t, int, std::greater<int> > indegrees;
    for (internal_heap_loc_t node = 0; node < cur_num_data_nodes; node++){
        internal_heap_loc_t* node_edges = dataNodeLinks(node);
        for (int m = 0; m < M; m++){
            if (node_edges[m] != node){
                auto it = indegrees.find(node_edges[m]);
                if (it == indegrees.end()){
                    indegrees[node_edges[m]] = 1;
                }
                else {
                    indegrees[node_edges[m]]++;
                }
            }
        }
    }

    std::vector<internal_heap_loc_t> P(cur_num_data_nodes, 0);

    int idx = 0; 
    for (const auto& [key,value] : indegrees){
        P[idx] = key;
        idx++;
    }

    std::vector<internal_heap_loc_t> Pinv(cur_num_data_nodes, 0);
    for (int n = 0; n < cur_num_data_nodes; n++){
        Pinv[P[n]] = n;
    }
    return Pinv;
}

std::vector<internal_heap_loc_t> outdegree_order(){

    std::map< internal_heap_loc_t, int, std::greater<int> > outdegrees;
    for (internal_heap_loc_t node = 0; node < cur_num_data_nodes; node++){
        internal_heap_loc_t* node_edges = dataNodeLinks(node);
        outdegrees[node] = 0;
        for (int m = 0; m < M; m++){
            if (node_edges[m] != node){
                outdegrees[node]++;
            }
        }
    }

    std::vector<internal_heap_loc_t> P(cur_num_data_nodes, 0);

    int idx = 0;
    for (const auto& [key,value] : outdegrees){
        P[idx] = key;
        idx++;
    }

    std::vector<internal_heap_loc_t> Pinv(cur_num_data_nodes, 0);
    for (int n = 0; n < cur_num_data_nodes; n++){
        Pinv[P[n]] = n;
    }
    return Pinv;
}

std::vector<internal_heap_loc_t> hubsort_order(){
    // sorted by in-degree, since kNN is a push implementation
    std::vector< int > indegrees(cur_num_data_nodes, 0);
    double mean_deg = 0; 
    for (internal_heap_loc_t node = 0; node < cur_num_data_nodes; node++){
        internal_heap_loc_t* node_edges = dataNodeLinks(node);
        for (int m = 0; m < M; m++){
            if (node_edges[m] != node){
                indegrees[node_edges[m]] += 1;
                mean_deg += 1;
            }
        }
    }

    mean_deg = mean_deg / cur_num_data_nodes;

    std::vector< std::pair<internal_heap_loc_t,int> > sorted_hubs;
    std::vector<internal_heap_loc_t> non_hubs;
    std::vector<internal_heap_loc_t> P;

    for (internal_heap_loc_t node = 0; node < cur_num_data_nodes; node++){
        if (indegrees[node] >= mean_deg){
            sorted_hubs.push_back({node, indegrees[node]});
        }
        else{
            non_hubs.push_back(node);
        }
    }

    std::sort(sorted_hubs.begin(), sorted_hubs.end(), [](const std::pair<internal_heap_loc_t,int> &left, const std::pair<internal_heap_loc_t,int> &right)
        { return left.second > right.second; });

    for (int i = 0; i < sorted_hubs.size(); i++){
        P.push_back( sorted_hubs[i].first );
    }
    for (int i = 0; i < non_hubs.size(); i++){
        P.push_back( non_hubs[i] );
    }

    std::vector<internal_heap_loc_t> Pinv(cur_num_data_nodes, 0);
    for (int n = 0; n < cur_num_data_nodes; n++){
        Pinv[P[n]] = n;
    }
    return Pinv;
}

std::vector<internal_heap_loc_t> hubcluster_order(){
    // sorted by in-degree, since kNN is a push implementation
    std::vector< int > indegrees(cur_num_data_nodes, 0);
    double mean_deg = 0; 
    for (internal_heap_loc_t node = 0; node < cur_num_data_nodes; node++){
        internal_heap_loc_t* node_edges = dataNodeLinks(node);
        for (int m = 0; m < M; m++){
            if (node_edges[m] != node){
                indegrees[node_edges[m]] += 1;
                mean_deg += 1;
            }
        }
    }

    mean_deg = mean_deg / cur_num_data_nodes;

    std::vector<internal_heap_loc_t> hubs;
    std::vector<internal_heap_loc_t> non_hubs;
    std::vector<internal_heap_loc_t> P;

    for (internal_heap_loc_t node = 0; node < cur_num_data_nodes; node++){
        if (indegrees[node] >= mean_deg){
            hubs.push_back(node);
        }
        else{
            non_hubs.push_back(node);
        }
    }

    for (int i = 0; i < hubs.size(); i++){
        P.push_back( hubs[i] );
    }
    for (int i = 0; i < non_hubs.size(); i++){
        P.push_back( non_hubs[i] );
    }

    std::vector<internal_heap_loc_t> Pinv(cur_num_data_nodes, 0);
    for (int n = 0; n < cur_num_data_nodes; n++){
        Pinv[P[n]] = n;
    }
    return Pinv;
}


std::vector<internal_heap_loc_t> dbg_order(int w){
    // partitions the degrees into W quantiles
    std::vector< int > indegrees(cur_num_data_nodes, 0);
    double mean_deg = 0;
    for (internal_heap_loc_t node = 0; node < cur_num_data_nodes; node++){
        internal_heap_loc_t* node_edges = dataNodeLinks(node);
        for (int m = 0; m < M; m++){
            if (node_edges[m] != node){
                indegrees[node_edges[m]] += 1;
                mean_deg += 1;
            }
        }
    }

    std::vector< int > quantiles(indegrees);
    std::sort(quantiles.begin(), quantiles.end());

    std::vector< int > thresholds(w,0);
    for (int i = 0; i < w; i++){
        int idx = ( (i*cur_num_data_nodes) / w);
        // std::cout<<"T"<<idx<<":"<<quantiles[idx]<<std::endl;
        thresholds[i] = quantiles[idx];
    }

    std::vector< std::vector<internal_heap_loc_t> > assignments(w);
    std::vector<internal_heap_loc_t> P;

    for (internal_heap_loc_t node = 0; node < cur_num_data_nodes; node++){
        int deg = indegrees[node];
        int group_number = 0;
        // std::clog<<deg<<": ";
        for (int i = 0; i < w; i++){
            // std::clog<<thresholds[i]<<" ";
            if (deg > thresholds[i]){
                group_number = i;
            }
        }
        // std::clog<<group_number<<std::endl; // stupid! STUPID STUPID STUPID
        assignments[group_number].push_back(node);
    }

    for (int i = 0; i < assignments.size(); i++){
        for (int j = 0; j < assignments[i].size(); j++){
            P.push_back(assignments[i][j]);
        }
    }

    std::reverse(P.begin(),P.end());
    std::vector<internal_heap_loc_t> Pinv(cur_num_data_nodes, 0);
    for (int n = 0; n < cur_num_data_nodes; n++){
        Pinv[P[n]] = n;
    }
    return Pinv;
}

std::vector<internal_heap_loc_t> reverse_cuthill_mckee_order(){

    std::vector< std::pair<internal_heap_loc_t,int> > sorted_nodes;
    std::vector< int > degrees;
    
    for (internal_heap_loc_t node = 0; node < cur_num_data_nodes; node++){
        internal_heap_loc_t* node_edges = dataNodeLinks(node);
        int deg = 0;
        for (int m = 0; m < M; m++){
            if (node_edges[m] != node){
                deg++;
            }
        }
        sorted_nodes.push_back({node, deg});
        degrees.push_back(deg);
    }

    std::sort(sorted_nodes.begin(), sorted_nodes.end(), [](const std::pair<internal_heap_loc_t,int> &left, const std::pair<internal_heap_loc_t,int> &right)
            { return left.second < right.second; });

    std::vector<internal_heap_loc_t> P;
    VisitedSet is_listed = VisitedSet(cur_num_data_nodes+1);
    is_listed.clear();

    for (int i = 0; i < sorted_nodes.size(); i++){
        internal_heap_loc_t node = sorted_nodes[i].first;
        std::queue<internal_heap_loc_t> Q;

        if (!is_listed[node+1]){
            // add node to permutation
            P.push_back(node);
            is_listed.insert(node+1);

            // get list of neighbors
            std::vector< std::pair<internal_heap_loc_t,int> > neighbors;
            internal_heap_loc_t* node_edges = dataNodeLinks(node);
            int deg = 0;
            for (int m = 0; m < M; m++){
                if (node_edges[m] != node){
                    neighbors.push_back( {node_edges[m], degrees[ node_edges[m] ] } );
                }
            }
            // sort neighbors by degree (min degree first)
            std::sort(neighbors.begin(), neighbors.end(), [](const std::pair<internal_heap_loc_t,int> &left, const std::pair<internal_heap_loc_t,int> &right) 
                { return left.second < right.second; });

            // add neighbors to queue
            for (int j = 0; j < neighbors.size(); j++){
                Q.push(neighbors[j].first);
                // std::cout<<"\t +"<<neighbors[j].first<<std::endl;
            }
            // std::cout<<"(F) QS: "<<Q.size()<<std::endl;

            while (Q.size() > 0){
                // exhaust all the neighbors
                // std::cout<<"(G) QS: "<<Q.size()<<std::endl;
                internal_heap_loc_t candidate = Q.front();
                // std::cout<<"\t cand = "<<candidate<<std::endl;
                Q.pop();

                if (!is_listed[candidate+1]){

                    // std::cout<<"(0) QS: "<<Q.size()<<std::endl;
                    P.push_back(candidate);
                    is_listed.insert(candidate+1);

                    // get list of neighbors of candidate
                    std::vector< std::pair<internal_heap_loc_t,int> > candidate_neighbors;
                    internal_heap_loc_t* candidate_edges = dataNodeLinks(candidate);
                    int candidate_deg = 0;
                    for (int m = 0; m < M; m++){
                        if (candidate_edges[m] != candidate){
                            candidate_neighbors.push_back( { candidate_edges[m], degrees[ candidate_edges[m] ] } );
                        }
                    }
                    // sort neighbors by degree (min degree first)
                    std::sort(candidate_neighbors.begin(), candidate_neighbors.end(), [](const std::pair<internal_heap_loc_t,int> &left, const std::pair<internal_heap_loc_t,int> &right) 
                        { return left.second < right.second; });
                    // add neighbors to queue
                    for (int j = 0; j < candidate_neighbors.size(); j++){
                        Q.push(candidate_neighbors[j].first);
                        // std::cout<<"\t +"<<candidate_neighbors[j].first<<std::endl;
                    }
                    // std::cout<<"(1) QS: "<<Q.size()<<std::endl;
                }
            }
        }
    }

    std::reverse(P.begin(),P.end());
    std::clog<<P.size()<<" "<<cur_num_data_nodes<<std::endl;
    std::vector<internal_heap_loc_t> Pinv(cur_num_data_nodes, 0);
    for (int n = 0; n < cur_num_data_nodes; n++){
        Pinv[P[n]] = n;
    }
    return Pinv;
}

void connectedHighwayComponent(std::vector<internal_heap_loc_t> &nodes){

    std::vector<internal_heap_loc_t> candidates;
    VisitedSet is_listed = VisitedSet(max_num_highway_nodes+1);
    is_listed.clear();

    for (auto& n : nodes){
        candidates.push_back(n);
        // nodes.push_back(candidates.back());
        is_listed.insert(candidates.back()+1);
    }

    while (candidates.size() > 0){
        if (!is_listed[candidates.back()+1]){
            nodes.push_back(candidates.back());
            is_listed.insert(candidates.back()+1);
        }

        internal_heap_loc_t* highway_node_links = highwayNodeLinks(candidates.back());
        candidates.pop_back();

        for (int m = 0; m < M_highway; m++){
            if (!is_listed[highway_node_links[m]+1]){
                candidates.push_back(highway_node_links[m]);
            }
        }
    }
}


// /*
void relabel(const std::vector<internal_heap_loc_t>& P){

    // 1: rewire all the highway nodes
    std::vector<internal_heap_loc_t> highway_nodes;
    highway_nodes.push_back(entry_node);
    for (int l = entry_node_level; l>0; l--){
        connectedHighwayComponent(highway_nodes);
        // do the reconnections:
        for (int i = 0; i < highway_nodes.size(); i++){
            // WARN: potential bug: if data layout changes, we have to change this part
            // update the data location for this highway node, to point to the new data location
            internal_heap_loc_t data_node_ID = *highwayNode(highway_nodes[i]);
            internal_heap_loc_t highway_node_ID = highway_nodes[i];
            *highwayNode(highway_node_ID) = P[data_node_ID];
            if (l == 1){
                // then update the child location too, since child is in data-space
                internal_heap_loc_t child_node = *( highwayNodeChild(highway_node_ID) );
                *( highwayNodeChild(highway_node_ID) ) = P[child_node];
            }
            // drop down a level
            highway_nodes[i] = *highwayNodeChild(highway_nodes[i]);
        }
    }

    // 2: re-layout the datanodes
    // 2a: allocate a buffer for the new data node space
    char* new_partition = new char[data_node_size*max_num_data_nodes];
    char* old_partition = data_partition;

    // 2b: iterate through the existing datanodes, relabeling and remapping everything
    for (int old_node = 0; old_node < cur_num_data_nodes; old_node++){
        // load pointers for the old data (access functions like dataNodeData use an offset from
        // "data_partition" so we assign data_partition = old_partition to load from old)
        data_partition = old_partition;
        void* old_node_data_p = dataNodeData(old_node);
        internal_heap_loc_t* old_node_links = dataNodeLinks(old_node);
        label_t* old_node_label = dataNodeLabel(old_node);

        data_partition = new_partition;
        // copy the data from the old to the new partition
        void* new_node_data_p = dataNodeData(P[old_node]);
        std::memcpy(new_node_data_p, old_node_data_p, data_size);

        // copy the links from the old to the new partition, remapping links as we go
        internal_heap_loc_t* new_node_links = dataNodeLinks(P[old_node]);
        for (int i = 0; i < M; i++){
            new_node_links[i] = P[old_node_links[i]];
        }
        // copy over the label
        label_t* new_node_label = dataNodeLabel(P[old_node]);
        *(new_node_label) = *(old_node_label);
    }

    // copy old address into new one
    std::memcpy(old_partition, new_partition, data_node_size*max_num_data_nodes);

    // reassign data_partition to be correct
    data_partition = old_partition;

    // free the temporary partition
    delete[] new_partition;
}

void reorder_gorder(){
    std::vector<internal_heap_loc_t> P = gorder(5);
    relabel(P);
}

void reorder_indegree_sort(){
    std::vector<internal_heap_loc_t> P = indegree_order();
    relabel(P);
}

void reorder_outdegree_sort(){
    std::vector<internal_heap_loc_t> P = outdegree_order();
    relabel(P);
}
void reorder_RCM(){
    std::vector<internal_heap_loc_t> P = reverse_cuthill_mckee_order();
    relabel(P);
}

void reorder_hubsort(){
    std::vector<internal_heap_loc_t> P = hubsort_order();
    relabel(P);
}

void reorder_hubcluster(){
    std::vector<internal_heap_loc_t> P = hubcluster_order();
    relabel(P);
}

void reorder_dbg(){
    std::vector<internal_heap_loc_t> P = dbg_order(9);
    relabel(P);
}

/* Notes:

Gorder Algorithm: 

insert all v into Q each with priority 0
select a start node into P

for i = 1 to N:
    ve = P[i-1] # new node in window
    for each node u in out-edges of ve: 
        if u in Q, increment priority of u
    for each node u in in-edges of ve: 
        if u in Q, increment priority of u
        for each node v in out-edges of u:
            if v in Q, increment priority of v
    if i > w+1: 
        // remove the tailing node
        vb = P[i - w - 1]
        for each node u in out-edges of vb: 
            if u in Q, decrement priority of u
        for each node u in in-edges of fb:
            if u in Q, decrement priority of u
            for each node v in out-edges of u:
                if v in Q, decrement priority of v
    vmax = Q.pop()
    P[i] = vmax
    i++
*/






void save(const std::string& location){
	std::ofstream out(location, std::ios::binary);

	// I would normally do all the endian-ness checks but you know what 
	// none of this is safe across machines or even across compilers, probably. Sorry about that.
	// I had to do experiments for a deadline and haven't gotten around to doing it the right way 

	out.write(reinterpret_cast< char *>(&M), sizeof(size_t));
	out.write(reinterpret_cast< char *>(&M_highway), sizeof(size_t));

	out.write(reinterpret_cast< char *>(&num_levels), sizeof(size_t));
	out.write(reinterpret_cast< char *>(&entry_node_level), sizeof(size_t));

	out.write(reinterpret_cast< char *>(&max_num_data_nodes), sizeof(size_t));
	out.write(reinterpret_cast< char *>(&cur_num_data_nodes), sizeof(size_t));

	out.write(reinterpret_cast< char *>(&max_num_highway_nodes), sizeof(size_t));
	out.write(reinterpret_cast< char *>(&cur_num_highway_nodes), sizeof(size_t));

	out.write(reinterpret_cast< char *>(&data_size), sizeof(size_t));
	out.write(reinterpret_cast< char *>(&data_node_size), sizeof(size_t));
	out.write(reinterpret_cast< char *>(&highway_node_size), sizeof(size_t));

	out.write(reinterpret_cast< char *>(&entry_node), sizeof(internal_heap_loc_t));

	// write the index partition
    size_t index_memory_size = highway_node_size*max_num_highway_nodes + data_node_size*max_num_data_nodes;
	out.write(reinterpret_cast< char *>(index_memory), index_memory_size);
	out.close();
}

void load(const std::string& location, SpaceInterface<distance_t> *space){

	std::ifstream in(location, std::ios::binary);
	in.read(reinterpret_cast< char *>(&M), sizeof(size_t));
	in.read(reinterpret_cast< char *>(&M_highway), sizeof(size_t));
	
	in.read(reinterpret_cast< char *>(&num_levels), sizeof(size_t));
	in.read(reinterpret_cast< char *>(&entry_node_level), sizeof(size_t));

	in.read(reinterpret_cast< char *>(&max_num_data_nodes), sizeof(size_t));
	in.read(reinterpret_cast< char *>(&cur_num_data_nodes), sizeof(size_t));

	in.read(reinterpret_cast< char *>(&max_num_highway_nodes), sizeof(size_t));
	in.read(reinterpret_cast< char *>(&cur_num_highway_nodes), sizeof(size_t));

	in.read(reinterpret_cast< char *>(&data_size), sizeof(size_t));
	in.read(reinterpret_cast< char *>(&data_node_size), sizeof(size_t));
	in.read(reinterpret_cast< char *>(&highway_node_size), sizeof(size_t));

	in.read(reinterpret_cast< char *>(&entry_node), sizeof(internal_heap_loc_t));
	
	if (index_memory != NULL){
		delete[] index_memory;
		index_memory = NULL;
	}

    size_t index_memory_size = highway_node_size*max_num_highway_nodes + data_node_size*max_num_data_nodes;
    index_memory = new char[index_memory_size];
	in.read(reinterpret_cast< char *>(index_memory), index_memory_size);
    data_partition = index_memory;
    highway_partition = index_memory + data_node_size*max_num_data_nodes;

    distance_param = space->get_dist_func_param(); 
    distance = space->get_dist_func();
    is_visited = VisitedSet(std::max(max_num_data_nodes,max_num_highway_nodes)+1);
}

void dump(std::ostream& out){
	
	for (internal_heap_loc_t node = 0; node < cur_num_data_nodes; node++){
        internal_heap_loc_t* node_edges = dataNodeLinks(node);
        for (int m = 0; m < M; m++){
            if (node_edges[m] != node){
				distance_t dist = distance(dataNodeData(node), dataNodeData(node_edges[m]), distance_param);
				out<<node<<" "<<node_edges[m]<<" "<<dist<<std::endl;
            }
        }
    }
}

};
