Listing 1 (sort.c)
/* Postman's Sort (R) Version 1.0 Copyright (c) Robert Ramey 1991. All Rights Reserved */ #include <stdio.h> #include <stdlib.h> #include <time.h> #include <assert.h> #include <links.h> #include "psort.h" #include "sublist.h" #include "key.h" #include "io.h" #include "os.h" #define FILE_GUESS 50000 #define RECORD_SIZE_GUESS 80 #define NSIZE 31 /* default maximum number of digits left of radix point */ /* don't increase this beyond 127 */ #define MAX_BYTES_PER_PASS 8 /********************************************************************* The following structure contains one member for each byte on which a record may be distributed. That is, if a sublist is to be distributed on the first two letters of an alphabetic key, the first two members of the table will be used. In this example byte_count will equal 2. ***********************************************************************/ typedef struct { unsigned int *value; /* points to sequence value array */ unsigned int order; /* number of possible sequence values for this byte */ unsigned int count; /* number of sublists to skip to get to next one */ unsigned int key_index; /* index of key where this byte is found */ unsigned int field_index; /* key_index + 1 */ unsigned int depth? /* displacement within key where byte is found */ } BYTE_TABLE; /***********************~********************************************* The following data stores the state of sublist arrays and sort keys. **********************************************************************/ typedef struct { SUBLIST *sublist; unsigned int sublist_count, /* number of sublists at this level */ byte_count; /* number of bytes to test on current pass */ struct { unsigned int icount, /* number of frames in environment */ count; /* number of data frames so far in memory */ FILE_SIZE limit, /* amount of memory to be cleared to disk */ /* at a time */ size; /* amount filled in current frame */ } frame; BYTE_TABLE byte_table[MAX_BYTES_PER_PASS]; /* see above for explanation of BYTE TABLE */ } ENVIRONMENT; /****************************************************************** global data *******************************************************************/ BOOLEAN uflag : FALSE; /* unique flag value */ RECORD *(*infunc)(); /* pointer to function which gets next record */ MEM_SIZE seg_length = 16; /* default size of stack segment */ unsigned int max_sublists = 0; /* maximum number of sublists / segment */ ENVIRONMENT *env_ptr : (ENVIRONMENT *)NULL; /* pointer to current environment */ void sort(); private void psort(unsigned int, unsigned int, SUBLIST *); private unsigned int plan(ENVIRONMENT *, unsigned int, unsigned int, unsigned long, FILE_SIZE); private int dist(SUBLIST *, SUBLIST*); private void dsort(SUBlIST *, unsigned int); private void nsort(SUBLIST *, unsigned int); private RECORD * list_input(SUBLIST *); private void fill_fields(RECORD *); private BOOLEAN fits(FILE_SIZE, unsigned int); void * need(STACK *, size_t); void * get_space(STACK *, size_t); private void display_out(clock_t); private clock_t display_in(unsigned int, unsigned int, long, ENVIRONMENT *); private void free_memory(FILE_SIZE); private void free_frames(unsigned int); /********************************************************************* sort - top level driver for sort engine **********************************************************************/ void sort(){ ENVIRONMENT env; /* dummy initial environment */ unsigned int n; unsigned long rc; FILE_SIZE fsize; clock_t time; /* miniature version of sort */ /* for a full explanation see below */ /* try to guess file size */ fsize = os_flength(stdin); if(fsize <= 0L) fsize = FILE_GUESS; rc = fsize / (record_size ? record_size : RECORD_SIZE_GUESS); n = plan(&env, 0, 0, rc, fsize); env.sublist = sublist_allocate(n); env.sublist_count = n; /* initialize stacks */ assert(stk_push(d_stack)); env.frame.limit = (FILE_SIZE) rb_size * n * K; env.frame.size = 0; env.frame.icount = env.frame.count = 1; env_ptr = &env; sublist_fclose(); time = display_in(0, 0, 0L, &env); dist((SUBLIST *)NULL, env.sublist); display_out(time); infunc = sublist_input; dsort(env.sublist, 0); stk_pop(d_stack); stk_free(d_stack); return; } /************************************************************* psort - distribution sort **************************************************************/ private void psort(key_index, depth, sublist) unsigned int key_index, depth; SUBLIST *sublist; { ENVIRONMENT *prev_env, /* pointer to previous environment */ env; /* current set of environment variables */ clock_t time; /* used to record time function started */ /* use statics to save stack space for 2nd order recursive function */ static unsigned int n; /* number of sublists needed by on this pass */ static long record_count; /* number of records in the input sublist */ record_count = sublist->pcount + sublist->count; /* take care of end points */ if(record_count == 0) return; if(record count == 1){ sublist_output(sublist, uflag); return; } /* there are no more keys, we're done */ if(key_index >= key_count){ sublist_output(sublist, uflag); return; } /* if current key is exhausted */ if(depth >= key[key_index].size){ /* move on to the next one */ depth = 0; /* if that was the last key, we're done */ if(++key_index >= key_count){ sublist_output(sublist, uflag); return; } } /* figure out how many key bytes to use in distribution */ /* and fill them into byte table of new environment */ n = plan(&env, key_index, depth, record_count, sublist->size); /* allocate the calculated number of sublists, creating a new */ /* memory area if necessary */ env.sublist = sublist_allocate(n); env.sublist_count = n; /* save state of storage */ /* try to be sure that memory exists for next pass */ free_memory(sublist->size); assert(stk_push(d_stack)); env.frame.limit = (FILE_SIZE) rb_size * n * K; env.frame.size = 0; env.frame.icount = env.frame.count = env_ptr->frame.count+1; /* close switch to other swap file */ sublist_fclose(); /* save pointer to previous environment for end of function */ prev_env = env_ptr; env_ptr = &env; time = display_in(key_index, depth, record_count, &env); /* distribute the input sublist among the new sublists */ /* according to the key bytes specified in the byte table*/ n = dist(sublist, env.sublist); if(n = 0){ /* reuse space used by sublist array */ *sublist = (env.sublist)[n]; env.sublist = sublist; env.sublist_count = 1; sublist_shrink(); dsort(env.sublist, env.byte_count); } else{ /* sort sublists in the proper sequence */ dsort(env.sublist, 0); } /* and recover environment of caller */ sublist_free(); do{ assert(stk_pop(d_stack)); }while(env.frame.count-- > env.frame.icount); env_ptr = prev_env; display_out(time); return; } /********************************************************************* plan - Figure how many sublists should be created for distributing the current sublist. Figure how many bytes should be used to determine where a record will be distributed. **********************************************************************/ private unsigned int plan(env_ptr, key_index, depth, record_count, fsize) ENVIRONMENT *env_ptr; /* address of environment where new byte table is*/ /* to be loaded */ unsigned int key_index, /* where the next key starts */ depth; unsigned long record_count; /* number of records in sublists to be distributed */ FILE_SIZE fsize; /* total size of sublist records in bytes */ { unsigned int i, j, sublist_count; BYTE_TABLE *bptr; /* initialize accumulators */ env_ptr->byte_count = 0; bptr = env_ptr->byte_table; sublist_count = 1; /* figure how many bytes we can handle at a time */ for(;;){ /* fill byte table entry */ bptr->value = key[key_index].seq->value; bptr->order = key[key_index].seq->order; bptr->key_index = key_index; bptr->field_index = key_index + 1; bptr->depth = depth; /* figure how many sublists will be created by */ /* continuing one more level deep */ i = sublist_count * bptr->order + 1; if(sublist_count > 1){ /* There is no point in spreading records among too many */ /* sublists most of which will be empty */ if(sublist_count > record_count) break; /* if sublist won't fit into a segment */ if(i > max_sublists) break; /* if more sublists won't fit in memory with data */ if( !fits(fsize, i)){ /* if blocks written to disk are going to be too small */ /* we can quit */ if( !fits((FILE_SIZE)i * rb_size * K, i)) break; } } sublist_count = i; /* increment pointer and counter */ ++bptr; ++(env_ptr->byte_count); /* if this key has been totally checked */ if(++depth >= key[key_index].size){ /* if this is the last key */ if(++key_index == key_count) /* we're done */ break; /* there are more keys, go on to the next one */ depth = 0; } } /* figure increments at each level of distribution */ i = 1; for(j = env_ptr->byte_count; j-- > 0 ;){ env_ptr->byte_table[j].count = i; i = 1 + i * env_ptr->byte_table[j].order; } return sublist_count; } /********************************************************************* fits - determine whether or not memory exists for the specified areas **********************************************************************/ private BOOLEAN fits(fsize, scount) FILE_SIZE fsize; unsigned int scount; { unsigned int blk_count; blk_count = stk_unused() + stk_blks(d_stack); if(scount * sizeof(SUBLIST) > stk_avl(s_stack)) if(blk_count < 2) return FALSE; else --blk_count; if((FILE_SIZE)blk_count * seg_length * k + stk_av1(d_stack) > fsize) return TRUE; else return FALSE; } /********************************************************************* dist - Distribute input list among sublists created for this level of key. This is the heart of the sort. **********************************************************************/ private int dist(input_sublist, sublist) SUBLIST *input_sublist, *sublist; { register_BYTE_TABLE *bptr; RECORD *record_address; unsigned int i, j, disp, dcount; BOOLEAN hflag; /* flag to hold output for unique output record */ BOOLEAN oflag; /* flag to output directly since no more keys */ int pdisp; oflag = env_ptr->byte_table[0].key_index == key_count - 1 ? TRUE : FALSE; hflag = FALSE; dcount = disp = 0; pdisp = -1; /* for each record in input list */ while(record_address = (*infunc)(input_sublist)){ disp = 0; bptr = env_ptr->byte_table; /* check each byte in key */ i = env_ptr->byte_count; do{ j =bptr->value[ /* key table */ (record_address->data)[ /* record address */ record_address->field[ /* field pointers */ bptr->field_index /* key field index */ ] + bptr->depth /* displacement in field */ ] ]; /* if key is terminated prematurely */ if(j == 0) /* add to sublist in appropriate array */ break; /* accumulate displacement within array of sublists */ disp += 1 + (j - 1) * (bptr++)->count; /* and go on to next byte in key */ /* continue as long as we can check more of the key */ }while(--i); /* if last key and key value is the minimum possible */ if(disp == 0 && oflag){ /* write record out immediatly */ if(!hflag) rec_output(record_address); /* if unique flag is set, make sure that was the */ /* record out */ hflag = uflag; /* free up space used by record */ if(!rec_memflag(record_address)) stk_drop(d_stack); } else{ if(!rec_memflag(record_address)) rec_frame(record_address) = env_ptr->frame.count; /* link record into appropriate sublist */ assert(record_address); link(record_address, sublist[disp].memory, 0); assert(sublist); sublist[disp].memory = record_address; ++(sublist[disp].count); } if(disp != pdisp){ ++dcount; pdisp = disp; } } if(dcount > 1) return 0; else return disp; } /********************************************************** dsort - small function to sort sublists in proper sequence. ************************************************************/ private void dsort(sublist, level) SUBLIST *sublist; unsigned level; { unsigned int j; BYTE_TABLE *bptr; if(level == env_ptr->byte_count){ bptr = &env_ptr->byte_table[level-1]; psort(bptr->key_index, bptr->depth+l, sublist); return; } bptr = &env_ptr->byte_table[level]; if(key[bptr->key_index].key_type == SIGN){ nsort(sublist, level); return; } if(!key[bptr->key_index].inverted){ psort(bptr->key_index+1, 0, sublist++); for(j = 0; j < bptr->order ; ++j){ dsort(sublist, level+1); sublist += bptr->count; } } else{ sublist += bptr->count * bptr->order + 1; for(j = bptr->order; j > 0; --j){ sublist -= bptr->count; dsort(sublist, level+1); } psort(bptr->key_index+1, 0, --sublist); } } /********************************************************** nsort - small function to sort sublists in proper sequence. special version of dsort for numeric fields. ***********************************************************/ private void nsort(sublist, level) SUBLIST *sublist; unsigned int level; { unsigned int key_index; int k, j; BYTE_TABLE *bptr; bptr = &env_ptr->byte_table[level]; key_index = bptr->key_index; k = bptr->count; ++sublist; if(key[key_index].inverted){ sublist += k * bptr->order; k *= -1; } key[key_index+1].inverted = TRUE; key[key_index+2].inverted = TRUE; for(j = NSIZE; j > 0; --j){ dsort(sublist, level + 1); sublist += k; } key[key_index+1].inverted = FALSE; key[key_index+2].inverted = FALSE; for(; j < NSIZE; ++j){ dsort(sublist, level + 1); sublist += k; } return; } /* End of File */