Binary Tree (add-only, unbalanced)

From liblfds.org
Jump to navigation Jump to search

Source Files

└───liblfds700
    ├───inc
    │   └───liblfds700
    │           lfds700_btree_addonly_unbalanced.h
    └───src
        └───lfds700_btree_addonly_unbalanced
                lfds700_btree_addonly_unbalanced_cleanup.c
                lfds700_btree_addonly_unbalanced_get.c
                lfds700_btree_addonly_unbalanced_init.c
                lfds700_btree_addonly_unbalanced_insert.c
                lfds700_btree_addonly_unbalanced_internal.h
                lfds700_btree_addonly_unbalanced_query.c

Enums

enum lfds700_btree_au_absolute_position
{
  LFDS700_BTREE_AU_ABSOLUTE_POSITION_ROOT,
  LFDS700_BTREE_AU_ABSOLUTE_POSITION_SMALLEST_IN_TREE,
  LFDS700_BTREE_AU_ABSOLUTE_POSITION_LARGEST_IN_TREE
};

enum lfds700_btree_au_existing_key
{
  LFDS700_BTREE_AU_EXISTING_KEY_OVERWRITE,
  LFDS700_BTREE_AU_EXISTING_KEY_FAIL
};

enum lfds700_btree_au_insert_result
{
  LFDS700_BTREE_AU_INSERT_RESULT_FAILURE_EXISTING_KEY,
  LFDS700_BTREE_AU_INSERT_RESULT_SUCCESS_OVERWRITE,
  LFDS700_BTREE_AU_INSERT_RESULT_SUCCESS
};

enum lfds700_btree_au_query
{
  LFDS700_BTREE_AU_QUERY_GET_POTENTIALLY_INACCURATE_COUNT,
  LFDS700_BTREE_AU_QUERY_SINGLETHREADED_VALIDATE
};

enum lfds700_btree_au_relative_position
{
  LFDS700_BTREE_AU_RELATIVE_POSITION_UP,
  LFDS700_BTREE_AU_RELATIVE_POSITION_LEFT,
  LFDS700_BTREE_AU_RELATIVE_POSITION_RIGHT,
  LFDS700_BTREE_AU_RELATIVE_POSITION_SMALLEST_ELEMENT_BELOW_CURRENT_ELEMENT,
  LFDS700_BTREE_AU_RELATIVE_POSITION_LARGEST_ELEMENT_BELOW_CURRENT_ELEMENT,
  LFDS700_BTREE_AU_RELATIVE_POSITION_NEXT_SMALLER_ELEMENT_IN_ENTIRE_TREE,
  LFDS700_BTREE_AU_RELATIVE_POSITION_NEXT_LARGER_ELEMENT_IN_ENTIRE_TREE
};

Opaque Structures

struct lfds700_btree_au_element;
struct lfds700_btree_au_state;
struct lfds700_misc_prng_state;

Macros

#define LFDS700_BTREE_AU_GET_KEY_FROM_ELEMENT( btree_au_element )
#define LFDS700_BTREE_AU_SET_KEY_IN_ELEMENT( btree_au_element, new_key )

#define LFDS700_BTREE_AU_GET_VALUE_FROM_ELEMENT( btree_au_element )
#define LFDS700_BTREE_AU_SET_VALUE_IN_ELEMENT( btree_au_element, new_value )

#define LFDS700_BTREE_AU_GET_USER_STATE_FROM_STATE( btree_au_state )

Prototypes

void lfds700_btree_au_init_valid_on_current_logical_core( struct lfds700_btree_au_state *baus,
                                                          int (*key_compare_function)(void const *new_key, void const *existing_key),
                                                          enum lfds700_btree_au_existing_key existing_key,
                                                          void *user_state );

void lfds700_btree_au_cleanup( struct lfds700_btree_au_state *baus,
                               void (*element_cleanup_callback)(struct lfds700_btree_au_state *baus, struct lfds700_btree_au_element *baue) );

enum lfds700_btree_au_insert_result lfds700_btree_au_insert( struct lfds700_btree_au_state *baus,
                                                             struct lfds700_btree_au_element *baue,
                                                             struct lfds700_btree_au_element **existing_baue,
                                                             struct lfds700_misc_prng_state *ps );

int lfds700_btree_au_get_by_key( struct lfds700_btree_au_state *baus, 
                                 void *key,
                                 struct lfds700_btree_au_element **baue );

int lfds700_btree_au_get_by_absolute_position_and_then_by_relative_position( struct lfds700_btree_au_state *baus,
                                                                             struct lfds700_btree_au_element **baue,
                                                                             enum lfds700_btree_au_absolute_position absolute_position,
                                                                             enum lfds700_btree_au_relative_position relative_position );

int lfds700_btree_au_get_by_absolute_position( struct lfds700_btree_au_state *baus,
                                               struct lfds700_btree_au_element **baue,
                                               enum lfds700_btree_au_absolute_position absolute_position );

int lfds700_btree_au_get_by_relative_position( struct lfds700_btree_au_element **baue,
                                               enum lfds700_btree_au_relative_position relative_position );
 
void lfds700_btree_au_query( struct lfds700_btree_au_state *baus,
                             enum lfds700_btree_au_query query_type,
                             void *query_input,
                             void *query_output );

Overview

This data structure implements an add-only, unbalanced btree. It supports any number of concurrent users, and internally implements exponential backoff to help deal with high load and so improve scalability (although being a btree it naturally acts to distribute memory access behaviour which helps scalability).

The implementation performs no allocations. The user is responsible for all allocations (and deallocations), where these allocations are passed into the API functions, which then use them. As such, allocations can be on the stack, on the heap, or as can sometimes be the the case in embedded systems, allocated with fixed addresses at compile time from a fixed global store. Allocations can also be shared memory, but in this case, the virtual addresses used must be the same in all processes.

General usage is that the user calls lfds700_btree_au_init_valid_on_current_logical_core to initialize a struct lfds700_btree_au_state, and then calls lfds700_btree_au_link to add elements. A btree element provides the ability to store a key (used to place elements in the btree) and a value, both of which are of type void * and can either point to data, or can be used directly, as key comparason is performed by a user-provided callback and the value is not touched by the btree code.

(See the section below, on lock-free specific behaviour, for an explanation of the unusual init function name.)

The key and value are get and set in elements by macros, such as LFDS700_BTREE_AU_SET_VALUE_IN_ELEMENT. The key can only be set in elements before they are added to a tree. The value can be set at any time, in elements both inside and outside of the tree.

The state and element structures are both public, present in the lfds700_btree_au.h header file, so that users can embed them in their own structures (and where necessary pass them to sizeof). Expected use is that user structures which are to enter btrees contain within themselves a struct lfds700_btree_au_element, where the user sets the key as necessary for the btree and the value to point to the user structure entering the btree. This approach permits zero run-time allocation of store and also ensures the btree element is normally in the same memory page as the user data it refers to.

When initializing the btree, the caller specifies the behaviour of the btree when the attempt is mde to add a new element which has a key already present in the btree. The btree can be configured to either fail to add the element, or it can be configured to overwrite the value in the existing element with the value of the new element.

Finally, when all is said and done, use lfds700_btree_au_cleanup to cleanup the tree. Once this function has returned, the user is then safe to deallocate all allocations.

Lock-free Specific Behaviour

The state initialization function, lfds700_btree_au_init_valid_on_current_logical_core, as the same suggests, initializes the state structure but that initialization is only valid on the current logical core. For the initialization to be valid on other logical cores (i.e. other threads where they are running on other logical cores) those other threads need to call the long-windedly named macro LFDS700_MISC_MAKE_VALID_ON_CURRENT_LOGICAL_CORE_INITS_COMPLETED_BEFORE_NOW_ON_ANY_OTHER_LOGICAL_CORE, which will do that which its name suggests.

Once a btree element structure has been linked to the btree, it cannot be deallocated (free, or stack allocation lifetimes ending due to say a thread ending, etc) until lfds700_btree_au_cleanup has returned. Elements cannot be removed from the tree, or moved within the tree. Their value however can be changed at any time.

The struct lfds700_misc_prng_state argument is the state for a single-threaded, fast, high quality random number generator, required by the exponential backoff code. Each thread should allocate and initialize one of these structures, and then it can be used for all API calls which take this argument.

The SET macro for the key in an element can only be correctly used on elements which are outside of a btree. The SET macro for the value in an element can be used at any time, on any element. By correctly is it meant to say that the GET macros will actually read the data written by the SET macros, and not some other data. I don't have to tell you how much chaos will ensure if different logical cores are reading different keys for the same element...

If shared memory is used for allocations, the virtual addresses must be the same across different processes.

White Paper

There is no white paper for this data structure; it is native to liblfds. I have read that Valois (a long time ago, I'd imagine) wrote a white paper implementing this data structure, but I've not seen it.

Example

#include <stdio.h>
#include "liblfds700.h"

struct test_data
{
  int long long unsigned
    unique_id;

  char
    payload[64];

  struct lfds700_btree_au_element
    buae;
};

int key_compare_function( void const *new_key, void const *existing_key )
{
  int
    cr = 0;

  int long long unsigned
    *new_key = (int long long unsigned *) new_key,
    *existing_key  = (int long long unsigned *) existing_key;

  if( *new_key > *existing_key )
    cr = 1;

  if( *new_key < *existing_key )
    cr = -1;

  return( cr );
}

int main()
{
  enum lfds700_btree_au_insert_result
    bauir;

  int long long unsigned
    loop;

  struct lfds700_btree_au_element
    *buae = NULL;

  struct lfds700_btree_au_state
    baus;

  struct lfds700_misc_prng_state
    ps;

  struct test_data
    *td,
    *temp_td;

  lfds700_misc_library_init_valid_on_current_logical_core();

  lfds700_misc_prng_init( &ps );

  lfds700_btree_au_init_valid_on_current_logical_core( &baus, key_compare_function, LFDS700_BTREE_AU_EXISTING_KEY_FAIL, NULL );

  // TRD : allocate ten test elements, populate with dummy data and link to tree
  td = malloc( sizeof(struct test_data) * 10 );

  for( loop = 0 ; loop < 10 ; loop++ )
  {
    td[loop].unique_id = loop;
    sprintf( td[loop].payload, "the unique id is %llu", loop );

    LFDS700_BTREE_AU_SET_KEY_IN_ELEMENT( td[loop].baue, &td[loop].unique_id );
    LFDS700_BTREE_AU_SET_VALUE_IN_ELEMENT( td[loop].baue, &td[loop] );
    bauir = lfds700_btree_au_insert( baus, &td[loop].baue, NULL, &ps );
    if( bauir != LFDS700_BTREE_AU_INSERT_RESULT_SUCCESS )
      printf( "Well, bugger!  so much for quality control\n" );
  }

  // TRD : now in-order walk the tree
  while( lfds700_btree_au_get_by_position_and_then_by_direction(baus, &baue, LFDS700_BTREE_AU_SMALLEST_IN_TREE, LFDS700_BTREE_AU_INORDER_WALK_FROM_SMALLEST_TO_LARGEST) )
  {
    temp_td = LFDS700_BTREE_AU_GET_VALUE_FROM_ELEMENT( *baue );
    printf( "element %llu has value \"%s\"\n", temp_td->unique_id, temp_id->payload );
  }

  lfds700_btree_au_cleanup( &baus );

  free( td );

  lfds700_misc_library_cleanup();

  return( EXIT_SUCCESS );
}

See Also