Hash (add-only)

From liblfds.org
Jump to navigation Jump to search

Source Files

└───liblfds700
    ├───inc
    │   └───liblfds700
    │           lfds700_hash_addonly.h
    └───src
        └───lfds700_hash_addonly
                lfds700_hash_addonly_cleanup.c
                lfds700_hash_addonly_get.c
                lfds700_hash_addonly_init.c
                lfds700_hash_addonly_insert.c
                lfds700_hash_addonly_internal.h
                lfds700_hash_addonly_iterate.c
                lfds700_hash_addonly_query.c

Enums

enum lfds700_hash_a_existing_key
{
  LFDS700_HASH_A_EXISTING_KEY_OVERWRITE,
  LFDS700_HASH_A_EXISTING_KEY_FAIL
};

enum lfds700_hash_a_insert_result
{
  LFDS700_HASH_A_INSERT_RESULT_FAILURE_EXISTING_KEY,
  LFDS700_HASH_A_INSERT_RESULT_SUCCESS_OVERWRITE,
  LFDS700_HASH_A_INSERT_RESULT_SUCCESS
};

enum lfds700_hash_a_query
{
  LFDS700_HASH_A_QUERY_GET_POTENTIALLY_INACCURATE_COUNT,
  LFDS700_HASH_A_QUERY_SINGLETHREADED_VALIDATE
};

Opaque Structures

struct lfds700_hash_a_element;
struct lfds700_hash_a_iterate;
struct lfds700_hash_a_state;
struct lfds700_misc_prng_state;

Macros

#define LFDS700_HASH_A_GET_KEY_FROM_ELEMENT( hash_a_element )
#define LFDS700_HASH_A_SET_KEY_IN_ELEMENT( hash_a_element, new_key )

#define LFDS700_HASH_A_GET_VALUE_FROM_ELEMENT( hash_a_element )
#define LFDS700_HASH_A_SET_VALUE_IN_ELEMENT( hash_a_element, new_value )

#define LFDS700_HASH_A_GET_USER_STATE_FROM_STATE( hash_a_state )

#define LFDS700_HASH_A_32BIT_HASH_FUNCTION( data, data_length_in_bytes, hash )

Prototypes

void lfds700_hash_a_init_valid_on_current_logical_core( struct lfds700_hash_a_state *has,
                                                        struct lfds700_btree_au_state *baus_array,
                                                        lfds700_uint_t array_size,
                                                        int (*key_compare_function)(void const *new_key, void const *existing_key),
                                                        void (*key_hash_function)(void const *key, lfds700_uint_t *hash),
                                                        enum lfds700_hash_a_existing_key existing_key,
                                                        void *user_state );

void lfds700_hash_a_cleanup( struct lfds700_hash_a_state *has,
                             void (*element_cleanup_function)(struct lfds700_hash_a_state *has, struct lfds700_hash_a_element *hae) );

enum lfds700_hash_a_insert_result lfds700_hash_a_insert( struct lfds700_hash_a_state *has,
                                                         struct lfds700_hash_a_element *hae,
                                                         struct lfds700_hash_a_element **existing_hae,
                                                         struct lfds700_misc_prng_state *ps );

int lfds700_hash_a_get_by_key( struct lfds700_hash_a_state *has,
                               void *key,
                               struct lfds700_hash_a_element **hae );

void lfds700_hash_a_iterate_init( struct lfds700_hash_a_state *has, struct lfds700_hash_a_iterate *hai );
int lfds700_hash_a_iterate( struct lfds700_hash_a_iterate *hai, struct lfds700_hash_a_element **hae );

void lfds700_hash_a_query( struct lfds700_hash_a_state *has,
                           enum lfds700_hash_a_query query_type,
                           void *query_input,
                           void *query_output );

Overview

This data structure implements an add-only hash. It is not a real hash, not a direct implementation - it is something knocked together quickly, for now, to provide this type of functionality; internally, the hash is actually an array of btrees and the hash code itself needs to perform no atomic work - it merely wraps the array of btrees with a hash-type API.

The implementation performs no allocations. The user is responsible for all allocations (and deallocations), where these allocations are passed into the API functions, which then use them. As such, allocations can be on the stack, on the heap, or as can sometimes be the the case in embedded systems, allocated with fixed addresses at compile time from a fixed global store. Allocations can also be shared memory, but in this case, the virtual addresses used must be the same in all processes.

General usage is that the user calls lfds700_hash_a_init_valid_on_current_logical_core to initialize a struct lfds700_hash_a_state, and then calls lfds700_hash_a_insert_element and lfds700_hash_a_get_by_key to put and get struct lfds700_hash_a_elements. A hash element provides the ability to store a key (used to place elements in the hash) and a value, both of which are of type void * and can either point to data, or can be used directly, as key comparason is performed by a user-provided callback and the value is not touched by the hash code.

(See the section below, on lock-free specific behaviour, for an explanation of the unusual init function name.)

The state and element structures are both public, present in the lfds700_hash_addonly.h header file, so that users can embed them in their own structures (and where necessary pass them to sizeof). Expected use is that user structures which are to enter stacks contain within themselves a struct lfds700_hash_a_element, and this is used when calling lfds700_hash_a_put_element, and the value set in the hash element is a pointer to the user structure entering the hash. This approach permits zero run-time allocation of store and also ensures the hash element is normally in the same memory page as the user data it refers to.

The hash is configured by the user as to whether it should fail or overwrite, when lfds700_hash_a_insert is called with an existing key. In the event of putting an existing key into a hash configured for failure on existing key, a pointer can be returned to the existing element, saving a second lookup.

Iteration is supported. An iteration structure is initialized by lfds700_hash_a_iterate_init, and then passed repeatedly to lfds700_hash_a_iterate, where it returns the next key and its value.

Lock-free Specific Behaviour

The state initialization function, lfds700_hash_a_init_valid_on_current_logical_core, as the same suggests, initializes the state structure but that initialization is only valid on the current logical core. For the initialization to be valid on other logical cores (i.e. other threads where they are running on other logical cores) those other threads need to call the long-windedly named macro LFDS700_MISC_MAKE_VALID_ON_CURRENT_LOGICAL_CORE_INITS_COMPLETED_BEFORE_NOW_ON_ANY_OTHER_LOGICAL_CORE, which will do that which its name suggests.

Once a hash element structure has been put into the hash, it cannot be deallocated (free, or stack allocation lifetimes ending due to say a thread ending, etc) until lfds700_hash_a_cleanup has returned. Elements cannot be removed from the hash, or moved within the hash. Their value however can be changed at any time.

The struct lfds700_misc_prng_state argument is the state for a single-threaded, fast, high quality random number generator, required by the exponential backoff code. Each thread should allocate and initialize one of these structures, and then it can be used for all API calls which take this argument.

The SET macro for the key in an element can only be correctly used on elements which are outside of a hash. The SET macro for the value in an element can be used at any time, on any element. By correctly is it meant to say that the GET macros will actually read the data written by the SET macros, and not some other data. I don't have to tell you how much chaos will ensure if different logical cores are reading different keys for the same element...

If shared memory is used for allocations, the virtual addresses must be the same across different processes.

White Paper

There is no white paper for this data structure; it is native to liblfds.

Example

#include <stdio.h>
#include <string.h>
#include "liblfds700.h"

struct test_data
{
  int long long unsigned
    unique_id;

  char
    payload[64];

  struct lfds700_hash_a_element
    hae;
};

int key_compare_function( void const *new_key, void const *existing_key )
{
  int
    cr = 0;

  int long long unsigned
    *new_key = (int long long unsigned *) new_key,
    *existing_key  = (int long long unsigned *) existing_key;

  if( *new_key > *existing_key )
    cr = 1;

  if( *new_key < *existing_key )
    cr = -1;

  return( cr );
}

void key_hash_function( void const *key, lfds700_pal_uint_t *hash )
{
  int unsigned
    temp_hash = 0;

  LFDS700_HASH_A_32BIT_HASH_FUNCTION( key, sizeof(int long long unsigned), temp_hash );

  *hash = temp_hash;

  return;
}

int main()
{
  int long long unsigned
    key,
    *key_temp;

  struct lfds700_btree_au_state
    baus_array[10];

  struct lfds700_hash_a_element
    *hae;

  struct lfds700_hash_a_state
    has;

  struct lfds700_misc_prng_state
    ps;

  struct test_data
    td,
    *temp_td;

  lfds700_misc_library_init_valid_on_current_logical_core();

  lfds700_misc_prng_init( &ps );

  lfds700_hash_a_init_valid_on_current_logical_core( &has, baus_array, 10, key_compare_function, key_hash_function, LFDS700_HASH_A_EXISTING_KEY_FAIL, NULL );

  td.unique_id = 15;
  strcpy( td.payload, "Linear A" );

  LFDS700_HASH_A_SET_KEY_IN_ELEMENT( td.hae, &td.unique_id );
  LFDS700_HASH_A_SET_VALUE_IN_ELEMENT( td.hae, &td );
  lfds700_hash_a_insert_element( &has, &hae, NULL, &ps );

  key = 15;
  lfds700_hash_a_get_by_key( &has, (void *) &key, &hae );
  key_temp = LFDS700_HASH_A_GET_KEY_FROM_ELEMENT( *hae );
  temp_td = LFDS700_HASH_A_GET_VALUE_FROM_ELEMENT( *hae );

  printf( "key in element is %llu\n"
          "payload in element is \"%s\"\n", *key_temp, temp_td->payload );

  lfds700_hash_a_cleanup( &has, NULL );

  lfds700_misc_library_cleanup();

  return( EXIT_SUCCESS );
}

See Also