Hash (add-only)

From liblfds.org
Jump to navigation Jump to search

Source Files

└───liblfds710
    ├───inc
    │   └───liblfds710
    │           lfds710_hash_addonly.h
    └───src
        └───lfds710_hash_addonly
                lfds710_hash_addonly_cleanup.c
                lfds710_hash_addonly_get.c
                lfds710_hash_addonly_init.c
                lfds710_hash_addonly_insert.c
                lfds710_hash_addonly_internal.h
                lfds710_hash_addonly_iterate.c
                lfds710_hash_addonly_query.c

Enums

enum lfds710_hash_a_existing_key;
enum lfds710_hash_a_insert_result;
enum lfds710_hash_a_query;

Opaque Structures

struct lfds710_hash_a_element;
struct lfds710_hash_a_iterate;
struct lfds710_hash_a_state;

Macros

#define LFDS710_HASH_A_GET_KEY_FROM_ELEMENT( hash_a_element )
#define LFDS710_HASH_A_SET_KEY_IN_ELEMENT( hash_a_element, new_key )

#define LFDS710_HASH_A_GET_VALUE_FROM_ELEMENT( hash_a_element )
#define LFDS710_HASH_A_SET_VALUE_IN_ELEMENT( hash_a_element, new_value )

#define LFDS710_HASH_A_GET_USER_STATE_FROM_STATE( hash_a_state )

#define LFDS710_HASH_A_HASH_FUNCTION( data, data_length_in_bytes, hash )

Prototypes

void lfds710_hash_a_init_valid_on_current_logical_core( struct lfds710_hash_a_state *has,
                                                        struct lfds710_btree_au_state *baus_array,
                                                        lfds710_uint_t array_size,
                                                        int (*key_compare_function)(void const *new_key, void const *existing_key),
                                                        void (*key_hash_function)(void const *key, lfds710_uint_t *hash),
                                                        enum lfds710_hash_a_existing_key existing_key,
                                                        void *user_state );

void lfds710_hash_a_cleanup( struct lfds710_hash_a_state *has,
                             void (*element_cleanup_function)(struct lfds710_hash_a_state *has, struct lfds710_hash_a_element *hae) );

enum lfds710_hash_a_insert_result lfds710_hash_a_insert( struct lfds710_hash_a_state *has,
                                                         struct lfds710_hash_a_element *hae,
                                                         struct lfds710_hash_a_element **existing_hae );

int lfds710_hash_a_get_by_key( struct lfds710_hash_a_state *has,
                               int (*key_compare_function)(void const *new_key, void const *existing_key),
                               void (*key_hash_function)(void const *key, lfds710_pal_uint_t *hash),
                               void *key,
                               struct lfds710_hash_a_element **hae );

void lfds710_hash_a_iterate_init( struct lfds710_hash_a_state *has, struct lfds710_hash_a_iterate *hai );
int lfds710_hash_a_iterate( struct lfds710_hash_a_iterate *hai, struct lfds710_hash_a_element **hae );

void lfds710_hash_a_query( struct lfds710_hash_a_state *has,
                           enum lfds710_hash_a_query query_type,
                           void *query_input,
                           void *query_output );

Overview

This data structure implements an add-only hash. It is not a real hash, not a direct implementation - it is something knocked together quickly, for now, to provide this type of functionality; internally, the hash is actually an array of btrees and the hash code itself needs to perform no atomic work - it merely wraps the array of btrees with a hash-type API.

The implementation performs no allocations. The user is responsible for all allocations (and deallocations), where these allocations are passed into the API functions, which then use them. As such, allocations can be on the stack, on the heap, or as can sometimes be the the case in embedded systems, allocated with fixed addresses at compile time from a fixed global store. Allocations can also be shared memory, but in this case, the virtual addresses used must be the same in all processes.

General usage is that the user calls lfds710_hash_a_init_valid_on_current_logical_core to initialize a struct lfds710_hash_a_state, and then calls lfds710_hash_a_insert_element and lfds710_hash_a_get_by_key to put and get struct lfds710_hash_a_elements. A hash element provides the ability to store a key (used to place elements in the hash) and a value, both of which are of type void * and can either point to data, or can be used directly, as key comparason is performed by a user-provided callback and the value is not touched by the hash code.

(See the section below, on lock-free specific behaviour, for an explanation of the unusual init function name.)

The state and element structures are both public, present in the lfds710_hash_addonly.h header file, so that users can embed them in their own structures (and where necessary pass them to sizeof). Expected use is that user structures which are to enter stacks contain within themselves a struct lfds710_hash_a_element, and this is used when calling lfds710_hash_a_put_element, and the value set in the hash element is a pointer to the user structure entering the hash. This approach permits zero run-time allocation of store and also ensures the hash element is normally in the same memory page as the user data it refers to.

The hash is configured by the user as to whether it should fail or overwrite, when lfds710_hash_a_insert is called with an existing key. In the event of putting an existing key into a hash configured for failure on existing key, a pointer can be returned to the existing element, saving a second lookup.

Iteration is supported. An iteration structure is initialized by lfds710_hash_a_iterate_init, and then passed repeatedly to lfds710_hash_a_iterate, where it returns the next key and its value.

Lock-free Specific Behaviour

The state initialization function, lfds710_hash_a_init_valid_on_current_logical_core, as the same suggests, initializes the state structure but that initialization is only valid on the current logical core. For the initialization to be valid on other logical cores (i.e. other threads where they are running on other logical cores) those other threads need to call the long-windedly named macro LFDS710_MISC_MAKE_VALID_ON_CURRENT_LOGICAL_CORE_INITS_COMPLETED_BEFORE_NOW_ON_ANY_OTHER_LOGICAL_CORE, which will do that which its name suggests.

Once a hash element structure has been put into the hash, it cannot be deallocated (free, or stack allocation lifetimes ending due to say a thread ending, etc) until lfds710_hash_a_cleanup has returned. Elements cannot be removed from the hash, or moved within the hash. Their value however can be changed at any time.

The SET macro for the key in an element can only be correctly used on elements which are outside of a hash. The SET macro for the value in an element can be used at any time, on any element. By correctly is it meant to say that the GET macros will actually read the data written by the SET macros, and not some other data. I don't have to tell you how much chaos will ensure if different logical cores are reading different keys for the same element...

If shared memory is used for allocations, the virtual addresses must be the same across different processes.

White Paper

There is no white paper for this data structure; it is native to liblfds.

License

Standard liblfds license - there is no license. You are free to use this code in any way. Go forth and create wealth!

If however for legal reasons a licence is required, the license of your choice will be granted, and license for convenience is hereby granted up front for a range of popular licenses : the MIT license, the BSD license, the Apache license, the GPL and LPGL (all versions thereof) and the Creative Commons licenses (all of them). Additionally, this library (which is to say, the source code, build files, documentation, everything) is placed in the public domain.

Example

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "liblfds710.h"

struct test_data
{
  char
    payload[64];

  int long long unsigned
    unique_id;

  struct lfds710_hash_a_element
    hae;
};

int key_compare_function( void const *new_key, void const *existing_key )
{
  int long long unsigned
    *new_key = (int long long unsigned *) new_key,
    *existing_key  = (int long long unsigned *) existing_key;

  if( *new_key > *existing_key )
    return( 1 );

  if( *new_key < *existing_key )
    return( -1 );

  return( 0 );
}

void key_hash_function( void const *key, lfds710_pal_uint_t *hash )
{
  *hash = 0;

  LFDS710_HASH_A_HASH_FUNCTION( key, sizeof(int long long unsigned), *hash );

  return;
}

int main()
{
  int long long unsigned
    key,
    *key_temp;

  struct lfds710_btree_au_state
    baus_array[10];

  struct lfds710_hash_a_element
    *hae;

  struct lfds710_hash_a_state
    has;

  struct test_data
    *td,
    *temp_td;

  lfds710_hash_a_init_valid_on_current_logical_core( &has, baus_array, 10, key_compare_function, key_hash_function, LFDS710_HASH_A_EXISTING_KEY_FAIL, NULL );

  // TRD : insert some keys
  td = malloc( sizeof(struct test_data) * 10 );

  for( loop = 0 ; loop < 10 ; loop++ )
  {
    td[loop].unique_id = loop;
    sprintf( td.payload, "Letter %c", 'A' + loop );

    LFDS710_HASH_A_SET_KEY_IN_ELEMENT( td.hae, &td.unique_id );
    LFDS710_HASH_A_SET_VALUE_IN_ELEMENT( td.hae, &td );

    lfds710_hash_a_insert_element( &has, &hae, NULL );
  }

  // TRD : query a key
  key = 3;
  lfds710_hash_a_get_by_key( &has, NULL, NULL, (void *) &key, &hae );

  key_temp = LFDS710_HASH_A_GET_KEY_FROM_ELEMENT( *hae );
  temp_td = LFDS710_HASH_A_GET_VALUE_FROM_ELEMENT( *hae );

  printf( "key in element is %llu\n"
          "payload in element is \"%s\"\n", *key_temp, temp_td->payload );

  lfds710_hash_a_cleanup( &has, NULL );

  free( td );

  return( EXIT_SUCCESS );
}

See Also