Optimize Tree

#include <stdlib.h>
#include <stdio.h>
#include <omp.h>
#include "gtmp.h"

/*
From the MCS Paper: A sense-reversing centralized barrier
shared count: integer := P
shared sense : Boolean := true
processor private local_sense : Boolean := true
	local_sense := not local_sense // each processor
if fetch_and_decrement(&count) = 1
	count := P
	sense := local_sense
	else
		repeat until sense = local_sense
*/
typedef struct _node_t{
	int k;
	int count;
	int locksense;
	struct _node_t* parent;
} node_t;

static int num_leaves;
static node_t* nodes;

void gtmp_barrier_aux(node_t* node, int sense);

node_t* _gtmp_get_node(int i){
	return &nodes[i];
}

void gtmp_init(int num_threads){
	int i, v, num_nodes;
	node_t* curnode;

	/*Setting constants */
	v = 1;
	while( v < num_threads)
		v *= 2;

	num_nodes = v - 1;
	num_leaves = v/2;

	/* Setting up the tree */
	nodes = (node_t*) malloc(num_nodes * sizeof(node_t));

	for(i = 0; i < num_nodes; i++){
		curnode = _gtmp_get_node(i);
		curnode->k = i < num_threads - 1 ? 1 : 1;
		curnode->count = curnode->k;
		curnode->locksense = 0;
		curnode->parent = _gtmp_get_node((i-1)/2);
	}

	curnode = _gtmp_get_node(0);
	curnode->parent = NULL;
}

void gtmp_barrier(){
	node_t* mynode;
	int sense;

	mynode = _gtmp_get_node(num_leaves - 1 + (omp_get_thread_num() % num_leaves));
	sense = !mynode->locksense;
	gtmp_barrier_aux(mynode, sense);		
}

void gtmp_barrier_aux(node_t* node, int sense){
	int test;

#pragma omp critical
{
	test = node->count;
	node->count--;
}

	if( 1 == test ){
		if(node->parent != NULL)
			gtmp_barrier_aux(node->parent, sense);
		node->count = node->k;
		node->locksense = !node->locksense;
	}
	while(node->locksense != sense);
}
void gtmp_finalize(){
	free(nodes);
}