Dr. Dobb's is part of the Informa Tech Division of Informa PLC

This site is operated by a business or businesses owned by Informa PLC and all copyright resides with them. Informa PLC's registered office is 5 Howick Place, London SW1P 1WG. Registered in England and Wales. Number 8860726.


Channels ▼
RSS

The Postman's Sort


August 1992/The Postman's Sort/Listing 1

Listing 1 (sort.c)

/*
Postman's Sort (R) Version 1.0
Copyright (c) Robert Ramey 1991. All Rights Reserved
*/

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <assert.h>
#include <links.h>
#include "psort.h"
#include "sublist.h"
#include "key.h"
#include "io.h"
#include "os.h"

#define FILE_GUESS 50000
#define RECORD_SIZE_GUESS 80
#define NSIZE 31
                /* default maximum number of digits left of radix point */
                                   /* don't increase this beyond 127 */
#define MAX_BYTES_PER_PASS 8

/*********************************************************************
The following structure contains one member for each byte on which a
record may be distributed. That is, if a sublist is to be distributed
on the first two letters of an alphabetic key, the first two members of
the table will be used. In this example byte_count will equal 2.
***********************************************************************/
typedef struct {
   unsigned int *value;                  /* points to sequence value array */
   unsigned int order; /* number of possible sequence values for this byte */
   unsigned int count;    /* number of sublists to skip to get to next one */
   unsigned int key_index;        /* index of key where this byte is found */
   unsigned int field_index;                              /* key_index + 1 */
   unsigned int depth?      /* displacement within key where byte is found */
} BYTE_TABLE;

/***********************~*********************************************
The following data stores the state of sublist arrays and sort keys.
**********************************************************************/
typedef struct {
   SUBLIST *sublist;
   unsigned int
      sublist_count,                  /* number of sublists at this level */
      byte_count;              /* number of bytes to test on current pass */
   struct {
      unsigned int
         icount,                     /* number of frames in environment */
         count;               /* number of data frames so far in memory */
      FILE_SIZE
         limit,              /* amount of memory to be cleared to disk */
                                                    /* at a time */
         size;                       /* amount filled in current frame */
   } frame;
   BYTE_TABLE byte_table[MAX_BYTES_PER_PASS];
                         /* see above for explanation of BYTE TABLE */
} ENVIRONMENT;

/******************************************************************
global data
*******************************************************************/
BOOLEAN uflag : FALSE;                                 /* unique flag value */
RECORD *(*infunc)();          /* pointer to function which gets next record */
MEM_SIZE seg_length = 16;                  /* default size of stack segment */
unsigned int max_sublists = 0;
                              /* maximum number of sublists / segment */
ENVIRONMENT *env_ptr : (ENVIRONMENT *)NULL;
                                   /* pointer to current environment */
void
sort();
private void
psort(unsigned int, unsigned int, SUBLIST *);
private unsigned int
plan(ENVIRONMENT *, unsigned int, unsigned int, unsigned long, FILE_SIZE);
private int
dist(SUBLIST *, SUBLIST*);
private void
dsort(SUBlIST *, unsigned int);
private void
nsort(SUBLIST *, unsigned int);
private RECORD *
list_input(SUBLIST *);
private void
fill_fields(RECORD *);
private BOOLEAN
fits(FILE_SIZE, unsigned int);
void *
need(STACK *, size_t);
void *
get_space(STACK *, size_t);
private void
display_out(clock_t);
private clock_t
display_in(unsigned int, unsigned int, long, ENVIRONMENT *);
private void
free_memory(FILE_SIZE);
private void
free_frames(unsigned int);
/*********************************************************************
sort - top level driver for sort engine
**********************************************************************/
void
sort(){
   ENVIRONMENT env;                           /* dummy initial environment */
   unsigned int n;
   unsigned long rc;
   FILE_SIZE fsize;
   clock_t time;

                                      /* miniature version of sort */
                                /* for a full explanation see below */

                                        /* try to guess file size */
   fsize = os_flength(stdin);
   if(fsize <= 0L)
      fsize = FILE_GUESS;

   rc = fsize / (record_size ? record_size : RECORD_SIZE_GUESS);
   n = plan(&env, 0, 0, rc, fsize);
   env.sublist = sublist_allocate(n);
   env.sublist_count = n;

                                              /* initialize stacks */
   assert(stk_push(d_stack));

   env.frame.limit = (FILE_SIZE) rb_size * n * K;
   env.frame.size = 0;
   env.frame.icount =
   env.frame.count = 1;
   env_ptr = &env;

   sublist_fclose();

   time = display_in(0, 0, 0L, &env);

   dist((SUBLIST *)NULL, env.sublist);

   display_out(time);

   infunc = sublist_input;

   dsort(env.sublist, 0);

   stk_pop(d_stack);
   stk_free(d_stack);

   return;
}

/*************************************************************
psort - distribution sort
**************************************************************/
private
void
psort(key_index, depth, sublist)
unsigned int
   key_index,
   depth;
SUBLIST *sublist;
{
   ENVIRONMENT
      *prev_env,                    /* pointer to previous environment */
      env;                     /* current set of environment variables */
   clock_t time;                /* used to record time function started */

      /* use statics to save stack space for 2nd order recursive function */
   static unsigned int n;
                         /* number of sublists needed by on this pass */
   static long record_count;
                            /* number of records in the input sublist */

   record_count = sublist->pcount + sublist->count;

                                        /* take care of end points */
   if(record_count == 0)
      return;
   if(record count == 1){
      sublist_output(sublist, uflag);
      return;
   }

                               /* there are no more keys, we're done */
   if(key_index >= key_count){
      sublist_output(sublist, uflag);
      return;
   }

                                     /* if current key is exhausted */
   if(depth >= key[key_index].size){
                                        /* move on to the next one */
      depth = 0;
                              /* if that was the last key, we're done */
      if(++key_index >= key_count){
         sublist_output(sublist, uflag);
         return;
      }
   }

   /* figure out how many key bytes to use in distribution */
   /* and fill them into byte table of new environment */
   n = plan(&env, key_index, depth, record_count, sublist->size);

           /* allocate the calculated number of sublists, creating a new */
                                       /* memory area if necessary */
   env.sublist = sublist_allocate(n);

   env.sublist_count = n;

                                         /* save state of storage */
                    /* try to be sure that memory exists for next pass */
   free_memory(sublist->size);

   assert(stk_push(d_stack));
   env.frame.limit = (FILE_SIZE) rb_size * n * K;
   env.frame.size = 0;
   env.frame.icount =
   env.frame.count = env_ptr->frame.count+1;

                                  /* close switch to other swap file */
   sublist_fclose();

             /* save pointer to previous environment for end of function */
   prev_env = env_ptr;
   env_ptr = &env;

   time = display_in(key_index, depth, record_count, &env);

   /* distribute the input sublist among the new sublists */
   /* according to the key bytes specified in the byte table*/
   n = dist(sublist, env.sublist);

   if(n = 0){
                                /* reuse space used by sublist array */
      *sublist = (env.sublist)[n];
      env.sublist = sublist;
      env.sublist_count = 1;
      sublist_shrink();
      dsort(env.sublist, env.byte_count);
   }
   else{
                             /* sort sublists in the proper sequence */
       dsort(env.sublist, 0);
   }

                                /* and recover environment of caller */
   sublist_free();
   do{
      assert(stk_pop(d_stack));
   }while(env.frame.count-- > env.frame.icount);
   env_ptr = prev_env;
   display_out(time);
   return;
}
/*********************************************************************
plan - Figure how many sublists should be created for distributing the
current sublist. Figure how many bytes should be used to determine
where a record will be distributed.
**********************************************************************/
private
unsigned int
plan(env_ptr, key_index, depth, record_count, fsize)
ENVIRONMENT *env_ptr;      /* address of environment where new byte table is*/
                                                   /* to be loaded */
unsigned int
   key_index,                                 /* where the next key starts */
   depth;
unsigned long record_count;
                    /* number of records in sublists to be distributed */
FILE_SIZE fsize;                 /* total size of sublist records in bytes */
{
   unsigned int i, j, sublist_count;
   BYTE_TABLE *bptr;

                                        /* initialize accumulators */
   env_ptr->byte_count = 0;
   bptr = env_ptr->byte_table;
   sublist_count = 1;

                      /* figure how many bytes we can handle at a time */
   for(;;){

                                          /* fill byte table entry */
      bptr->value = key[key_index].seq->value;
      bptr->order = key[key_index].seq->order;
      bptr->key_index = key_index;
      bptr->field_index = key_index + 1;
      bptr->depth = depth;

                        /* figure how many sublists will be created by */
                                   /* continuing one more level deep */
      i = sublist_count * bptr->order + 1;

      if(sublist_count > 1){

      /* There is no point in spreading records among too many */
      /* sublists most of which will be empty */
         if(sublist_count > record_count)
            break;

                              /* if sublist won't fit into a segment */
         if(i > max_sublists)
            break;

                     /* if more sublists won't fit in memory with data */
         if( !fits(fsize, i)){

                 /* if blocks written to disk are going to be too small */
                 /* we can quit */
                 if( !fits((FILE_SIZE)i * rb_size * K, i))
                     break;
            }
         }
         sublist_count = i;
                                   /* increment pointer and counter */
         ++bptr;
         ++(env_ptr->byte_count);

                              /* if this key has been totally checked */
         if(++depth >= key[key_index].size){
                                         /* if this is the last key */
           if(++key_index == key_count)
                                                      /* we're done */
              break;
                         /* there are more keys, go on to the next one */
           depth = 0;
      }
   }

                    /* figure increments at each level of distribution */
   i = 1;
   for(j = env_ptr->byte_count; j-- > 0 ;){
      env_ptr->byte_table[j].count = i;
      i = 1 + i * env_ptr->byte_table[j].order;
   }
   return sublist_count;
}
/*********************************************************************
fits - determine whether or not memory exists for the specified areas
**********************************************************************/
private
BOOLEAN
fits(fsize, scount)
FILE_SIZE fsize;
unsigned int scount;
{
   unsigned int blk_count;

   blk_count = stk_unused() + stk_blks(d_stack);

   if(scount * sizeof(SUBLIST) > stk_avl(s_stack))
      if(blk_count < 2)
         return FALSE;
      else
         --blk_count;

   if((FILE_SIZE)blk_count * seg_length * k + stk_av1(d_stack) > fsize)
      return TRUE;
   else
      return FALSE;
}
/*********************************************************************
dist - Distribute input list among sublists created for this level
of key. This is the heart of the sort.
**********************************************************************/
private
int
dist(input_sublist, sublist)
SUBLIST *input_sublist, *sublist;
{
   register_BYTE_TABLE *bptr;
   RECORD *record_address;
   unsigned int i, j, disp, dcount;
   BOOLEAN hflag;          /* flag to hold output for unique output record */
   BOOLEAN oflag;            /* flag to output directly since no more keys */
   int pdisp;

   oflag =
      env_ptr->byte_table[0].key_index == key_count - 1
      ? TRUE : FALSE;

   hflag = FALSE;
   dcount =
   disp = 0;
   pdisp = -1;

                                   /* for each record in input list */
   while(record_address = (*infunc)(input_sublist)){

      disp = 0;
      bptr = env_ptr->byte_table;

                                         /* check each byte in key */
      i = env_ptr->byte_count;
      do{
         j =bptr->value[                                    /* key table */
                (record_address->data)[              /* record address */
                    record_address->field[          /* field pointers */
                       bptr->field_index           /* key field index */
                    ] + bptr->depth          /* displacement in field */
                ]
            ];
                                 /* if key is terminated prematurely */
         if(j == 0)
                              /* add to sublist in appropriate array */
            break;

                    /* accumulate displacement within array of sublists */
         disp += 1 + (j - 1) * (bptr++)->count;
                                   /* and go on to next byte in key */

                    /* continue as long as we can check more of the key */
      }while(--i);

                   /* if last key and key value is the minimum possible */
      if(disp == 0 && oflag){
                                    /* write record out immediatly  */
         if(!hflag)
            rec_output(record_address);
                     /* if unique flag is set, make sure that was the */
                                                    /* record out */
         hflag = uflag;
                                   /* free up space used by record */
         if(!rec_memflag(record_address))
            stk_drop(d_stack);
      }
      else{
         if(!rec_memflag(record_address))
            rec_frame(record_address) = env_ptr->frame.count;
                             /* link record into appropriate sublist */
         assert(record_address);
         link(record_address, sublist[disp].memory, 0);
         assert(sublist);
         sublist[disp].memory = record_address;
         ++(sublist[disp].count);
      }

      if(disp != pdisp){
         ++dcount;
         pdisp = disp;
      }
   }
   if(dcount > 1)
      return 0;
   else
      return disp;
}
/**********************************************************
dsort - small function to sort sublists in proper sequence.
************************************************************/
private
void
dsort(sublist, level)
SUBLIST *sublist;
unsigned level;
{
   unsigned int j;
   BYTE_TABLE *bptr;
   if(level == env_ptr->byte_count){
      bptr = &env_ptr->byte_table[level-1];
      psort(bptr->key_index, bptr->depth+l, sublist);
      return;
   }
   bptr = &env_ptr->byte_table[level];
   if(key[bptr->key_index].key_type == SIGN){
      nsort(sublist, level);
      return;
   }
   if(!key[bptr->key_index].inverted){
      psort(bptr->key_index+1, 0, sublist++);
      for(j = 0; j < bptr->order ; ++j){
         dsort(sublist, level+1);
         sublist += bptr->count;
      }
   }
   else{
      sublist += bptr->count * bptr->order + 1;
      for(j = bptr->order; j > 0; --j){
         sublist -= bptr->count;
         dsort(sublist, level+1);
      }
      psort(bptr->key_index+1, 0, --sublist);
   }
}
/**********************************************************
nsort - small function to sort sublists in proper sequence.
special version of dsort for numeric fields.
***********************************************************/
private
void
nsort(sublist, level)
SUBLIST *sublist;
unsigned int level;
{
   unsigned int key_index;
   int k, j;
   BYTE_TABLE *bptr;

   bptr = &env_ptr->byte_table[level];
   key_index = bptr->key_index;

   k = bptr->count;
   ++sublist;
   if(key[key_index].inverted){
      sublist += k * bptr->order;
      k *= -1;
   }
   key[key_index+1].inverted = TRUE;
   key[key_index+2].inverted = TRUE;
   for(j = NSIZE; j > 0; --j){
      dsort(sublist, level + 1);
      sublist += k;
   }
   key[key_index+1].inverted = FALSE;
   key[key_index+2].inverted = FALSE;
   for(; j < NSIZE; ++j){
      dsort(sublist, level + 1);
      sublist += k;
   }
   return;
}
/* End of File */

Related Reading


More Insights






Currently we allow the following HTML tags in comments:

Single tags

These tags can be used alone and don't need an ending tag.

<br> Defines a single line break

<hr> Defines a horizontal line

Matching tags

These require an ending tag - e.g. <i>italic text</i>

<a> Defines an anchor

<b> Defines bold text

<big> Defines big text

<blockquote> Defines a long quotation

<caption> Defines a table caption

<cite> Defines a citation

<code> Defines computer code text

<em> Defines emphasized text

<fieldset> Defines a border around elements in a form

<h1> This is heading 1

<h2> This is heading 2

<h3> This is heading 3

<h4> This is heading 4

<h5> This is heading 5

<h6> This is heading 6

<i> Defines italic text

<p> Defines a paragraph

<pre> Defines preformatted text

<q> Defines a short quotation

<samp> Defines sample computer code text

<small> Defines small text

<span> Defines a section in a document

<s> Defines strikethrough text

<strike> Defines strikethrough text

<strong> Defines strong text

<sub> Defines subscripted text

<sup> Defines superscripted text

<u> Defines underlined text

Dr. Dobb's encourages readers to engage in spirited, healthy debate, including taking us to task. However, Dr. Dobb's moderates all comments posted to our site, and reserves the right to modify or remove any content that it determines to be derogatory, offensive, inflammatory, vulgar, irrelevant/off-topic, racist or obvious marketing or spam. Dr. Dobb's further reserves the right to disable the profile of any commenter participating in said activities.

 
Disqus Tips To upload an avatar photo, first complete your Disqus profile. | View the list of supported HTML tags you can use to style comments. | Please read our commenting policy.