diff --git a/contrib/gperf/COPYING b/contrib/gperf/COPYING new file mode 100644 index 000000000000..9a1703758111 --- /dev/null +++ b/contrib/gperf/COPYING @@ -0,0 +1,249 @@ + + GNU GENERAL PUBLIC LICENSE + Version 1, February 1989 + + Copyright (C) 1989 Free Software Foundation, Inc. + 675 Mass Ave, Cambridge, MA 02139, USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The license agreements of most software companies try to keep users +at the mercy of those companies. By contrast, our General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. The +General Public License applies to the Free Software Foundation's +software and to any other program whose authors commit to using it. +You can use it for your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Specifically, the General Public License is designed to make +sure that you have the freedom to give away or sell copies of free +software, that you receive source code or can get it if you want it, +that you can change the software or use pieces of it in new free +programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of a such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must tell them their rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any program or other work which +contains a notice placed by the copyright holder saying it may be +distributed under the terms of this General Public License. The +"Program", below, refers to any such program or work, and a "work based +on the Program" means either the Program or any work containing the +Program or a portion of it, either verbatim or with modifications. Each +licensee is addressed as "you". + + 1. You may copy and distribute verbatim copies of the Program's source +code as you receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice and +disclaimer of warranty; keep intact all the notices that refer to this +General Public License and to the absence of any warranty; and give any +other recipients of the Program a copy of this General Public License +along with the Program. You may charge a fee for the physical act of +transferring a copy. + + 2. You may modify your copy or copies of the Program or any portion of +it, and copy and distribute such modifications under the terms of Paragraph +1 above, provided that you also do the following: + + a) cause the modified files to carry prominent notices stating that + you changed the files and the date of any change; and + + b) cause the whole of any work that you distribute or publish, that + in whole or in part contains the Program or any part thereof, either + with or without modifications, to be licensed at no charge to all + third parties under the terms of this General Public License (except + that you may choose to grant warranty protection to some or all + third parties, at your option). + + c) If the modified program normally reads commands interactively when + run, you must cause it, when started running for such interactive use + in the simplest and most usual way, to print or display an + announcement including an appropriate copyright notice and a notice + that there is no warranty (or else, saying that you provide a + warranty) and that users may redistribute the program under these + conditions, and telling the user how to view a copy of this General + Public License. + + d) You may charge a fee for the physical act of transferring a + copy, and you may at your option offer warranty protection in + exchange for a fee. + +Mere aggregation of another independent work with the Program (or its +derivative) on a volume of a storage or distribution medium does not bring +the other work under the scope of these terms. + + 3. You may copy and distribute the Program (or a portion or derivative of +it, under Paragraph 2) in object code or executable form under the terms of +Paragraphs 1 and 2 above provided that you also do one of the following: + + a) accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of + Paragraphs 1 and 2 above; or, + + b) accompany it with a written offer, valid for at least three + years, to give any third party free (except for a nominal charge + for the cost of distribution) a complete machine-readable copy of the + corresponding source code, to be distributed under the terms of + Paragraphs 1 and 2 above; or, + + c) accompany it with the information you received as to where the + corresponding source code may be obtained. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form alone.) + +Source code for a work means the preferred form of the work for making +modifications to it. For an executable file, complete source code means +all the source code for all modules it contains; but, as a special +exception, it need not include source code for modules which are standard +libraries that accompany the operating system on which the executable +file runs, or for standard header files or definitions files that +accompany that operating system. + + 4. You may not copy, modify, sublicense, distribute or transfer the +Program except as expressly provided under this General Public License. +Any attempt otherwise to copy, modify, sublicense, distribute or transfer +the Program is void, and will automatically terminate your rights to use +the Program under this License. However, parties who have received +copies, or rights to use copies, from you under this General Public +License will not have their licenses terminated so long as such parties +remain in full compliance. + + 5. By copying, distributing or modifying the Program (or any work based +on the Program) you indicate your acceptance of this license to do so, +and all its terms and conditions. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the original +licensor to copy, distribute or modify the Program subject to these +terms and conditions. You may not impose any further restrictions on the +recipients' exercise of the rights granted herein. + + 7. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of the license which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +the license, you may choose any version ever published by the Free Software +Foundation. + + 8. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 9. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 10. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + Appendix: How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to humanity, the best way to achieve this is to make it +free software which everyone can redistribute and change under these +terms. + + To do so, attach the following notices to the program. It is safest to +attach them to the start of each source file to most effectively convey +the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + + Copyright (C) 19yy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 1, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) 19xx name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the +appropriate parts of the General Public License. Of course, the +commands you use may be called something other than `show w' and `show +c'; they could even be mouse-clicks or menu items--whatever suits your +program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + program `Gnomovision' (a program to direct compilers to make passes + at assemblers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +That's all there is to it! diff --git a/contrib/gperf/ChangeLog b/contrib/gperf/ChangeLog new file mode 100644 index 000000000000..92e1c39c30ec --- /dev/null +++ b/contrib/gperf/ChangeLog @@ -0,0 +1,375 @@ +Sat Nov 11 13:43:17 1989 Doug Schmidt (schmidt at zola.ics.uci.edu) + + * Modified options by changing macro TOTAL_POSITIONS to GET_CHARSET_SIZE + and SET_CHARSET_SIZE. These two routines now either return + the total charset size *or* the length of the largest keyword + if the user specifies the -k'*' (ALLCHARS) option. This change + cleans up client code. + +Fri Nov 10 15:21:19 1989 Doug Schmidt (schmidt at zola.ics.uci.edu) + + * Made sure to explicitly initialize perfect.fewest_collisions to + 0. + +Wed Nov 1 21:39:54 1989 Doug Schmidt (schmidt at zola.ics.uci.edu) + + * Upgraded the version number to 2.0, reflecting all the + major recent changes! + + * Rearranged code so that fewer source lines are greater than 80 columns. + + * Cleaned up some loose ends noticed by Nels Olson. + 1. Removed `if (collisions <= perfect.fewest_collisions)' + from affects_prev () since it was superfluous. + 2. Removed the fields best_char_value and best_asso_value + from PERFECT. There were also unnecessary. + 3. Fixed a braino in boolarray.c's bool_array_reset () + function. Since iteration numbers can never be zero + the `if (bool_array.iteration_number++ == 0)' must be + `if (++bool_array.iteration_number == 0).' + 4. Broke the -h help options string into 3 pieces. This + should silence broken C compilers for a while! + 5. Modified `report_error ()' so that it correctly handled + "%%". + +Tue Oct 31 23:01:11 1989 Doug Schmidt (schmidt at siam.ics.uci.edu) + + * It is important to note that -D no longer enables -S. + There is a good reason for this change, which will become + manifested in the next release... (suspense!). + + * Made some subtle changes to print_switch so that if finally + seems to work correctly. Needs more stress testing, however... + +Mon Oct 30 15:06:59 1989 Doug Schmidt (schmidt at zola.ics.uci.edu) + + * Made a major change to the keylist.c's print_switch () function. + The user can now specify the number of switch statements to generate + via an argument to the -S option, i.e., -S1 means `generate 1 + switch statement with all keywords in it,' -S2 means generate + 2 switch statements with 1/2 the elements in each one, etc. + Hopefully this will fix the problem with C compilers not being + able to generate code for giant switch statements (but don't + hold your breath!) + + * Changed keylist.c's length () function to keyword_list_length (). + Also put an extern decl for this function and max_key_length () + into the keylist.h file (don't know why it wasn't already there...). + +Sun Oct 29 08:55:29 1989 Doug Schmidt (schmidt at siam.ics.uci.edu) + + * Added a feature to main.c that prints out the starting wall-clock + time before the program begins and prints out the ending wall-clock + time when the program is finished. + + * Added the GATHER_STATISTICS code in hashtable.c so we can + keep track of how well double hashing is doing. Eventually, + GATHER_STATISTICS will be added so that all instrumentation + code can be conditionally compiled in. + + * Modified xmalloc.c's buffered_malloc () routine so that it + rounded the SIZE byte requests up to the correct alignment + size for the target machine. + +Sat Oct 28 11:17:36 1989 Doug Schmidt (schmidt at glacier.ics.uci.edu) + + * Fixed a stupid bug in keylist.c's print_switch () routine. This + was necessary to make sure the generated switch statement worked + correctly when *both* `natural,' i.e., static links and dynamic + links, i.e., unresolved duplicates, hash to the same value. + + * Modified LIST_NODE so that the char *char_set field is redeclared + as char char_set[1] and placed at the bottom of the struct. This + enables us to play the old `variable-length string in the struct + trick' (third time I fell for it this week, chief!). The + main purpose of this is to remove n calls to buffered_malloc, + where n is the number of keywords. + + * Added a new function to xmalloc.c called buffered_malloc. This + reduces the number of calls to malloc by grabbing large chunks + of memory and doling them out in small pieces. Almost all uses + of xmalloc were replaced with calls to buffered_malloc (). + + * Modified boolarray.c's bool_array_destroy () function so that + it now frees the bool_array.storage_array when it is no longer + needed. Since this array is generally very large it makes sense + to return the memory to the freelist when it is no longer in use. + + * Changed the interface to hash_table_init. This function is + now passed a pointer to a power-of-two sized buffer that serves + as storage for the hash table. Although this weakens information + hiding a little bit it greatly reduces dynamic memory fragmentation, + since we can now obtain the memory via a call to alloca, rather + than malloc. This change modified keylist.c's read_keys () calling + interface. + + * Since alloca is now being used more aggressively a conditional + compilation section was added to the init_all () routine in main.c. + Taken from GNU GCC, this code gets rid of any avoidable limit + on stack size so that alloca does not fail. It is only used + if the -DRLIMIT_STACK symbol is defined when gperf is compiled. + + * Moved the `destructor' for bool_array from destroy_all () in + main.c into perfect_generate () in perfect.c. This enables + us to get free up the large dynamic memory as soon as it is no + longer necessary. Also moved the hash_table destructor from + destroy_all into read_keys () from keylist.c. This accomplishes + the same purpose, i.e., we can free up the space immediately. + + * Added warnings in option.c so that user's would be informed + that -r superceeds -i on the command-line. + + * Rewrote affects_prev () from perfect.c. First, the code structure + was cleaned up considerably (removing the need for a dreaded + goto!). Secondly, a major change occurred so that affects_prev () + returns FALSE (success) when fewest_hits gets down to whatever + it was after inserting the previous key (instead of waiting for + it to reach 0). In other words, it stops trying if it can + resolve the new collisions added by a key, even if there are + still other old, unresolved collisions. This modification was + suggested by Nels Olson and seems to *greatly* increase the + speed of gperf for large keyfiles. Thanks Nels! + + * In a similar vein, inside the change () routine from perfect.c + the variable `perfect.fewest_collisions is no longer initialized + with the length of the keyword list. Instead it starts out at + 0 and is incremented by 1 every time change () is called. + The rationale for this behavior is that there are times when a + collision causes the number of duplicates (collisions) to + increase by a large amount when it would presumably just have + gone up by 1 if none of the asso_values were changed. That is, + at the beginning of change(), you could initialize fewest_hits + to 1+(previous value of fewest_hits) instead of to the number of + keys. Thanks again, Nels. + + * Replaced alloca with xmalloc in perfect.c's change () function. + This should eliminate some overhead at the expense of a little + extra memory that is never reclaimed. + + * Renamed perfect.c's merge_sets () to compute_disjoint_union () + to reflect the change in behavior. + +Fri Oct 27 10:12:27 1989 Doug Schmidt (schmidt at crimee.ics.uci.edu) + + * Added the -e option so users can supply a string containing + the characters used to separate keywords from their attributes. + The default behavior is ",\n". + + * Removed the char *uniq_set field from LIST_NODE and modified + uses of uniq_set in perfect.c and keylist.c. Due to changes + to perfect.c's merge_set () described below this field was + no longer necessary, and its removal makes the program + smaller and potentially faster. + + * Added lots of changes/fixes suggested by Nels Olson + (umls.UUCP!olson@mis.ucsf.edu). In particular: + 1. Changed BOOL_ARRAY so that it would dynamically create + an array of unsigned shorts rather than ints if the + LO_CAL symbol was defined during program compilation. + This cuts the amount of dynamic memory usage in half, + which is important for large keyfile input. + 2. Added some additional debugging statements that print extra + info to stderr when the -d option is enabled. + 3. Fixed a really stupid bug in the print_switch () from keylist.c. + A right paren was placed at the wrong location. + 4. Fixed a subtle problem with printing case values when keylinks + appear. The logic failed to account for the fact that there + can be keylinks *and* regular node info also! + 5. Finally split the huge help string into two parts. This keeps + breaking compilers with static limits on the length of tokens... + 6. Modified the -j option so that -j 0 means `try random values + when searching for a way to resolve collisions.' + 7. Added a field `num_done' to the PERFECT struct. This is used + to report information collected when trying to resolve + hash collisions. + 8. Modified the merge_sets algorithm to perform a disjoint + union of two multisets. This ensures that subsequent + processing in perfect.c function affect_prev () doesn't + waste time trying to change an associated value that is + shared between two conflicting keywords. + 9. Modified affects_prev so that it doesn't try random jump + values unless the -j 0 option is enabled. + 10. Fixed a silly bug in perfect.c change (). + This problem caused gperf to seg fault when + the -k* option was given and the keyfile file had long + keywords. + 11. Changed the behavior of keylist.c's read_keys () routine + so that it would honor -D unequivocally, i.e., it doesn't + try to turn off dup handling if the user requests it, even + if there are no immediate links in the keyfile input. + +Mon Oct 16 19:58:08 1989 Doug Schmidt (schmidt at glacier.ics.uci.edu) + + * Fixed a number of small bugs kindly brought to my attention by + Adam de Boor (bsw!adam@uunet.UU.NET). Thanks Adam! In particular, + changed the behavior for the -a (ANSI) option so that the + generated prototypes use int rather than size_t for the LEN + parameter. It was too ugly having to #include all + over the place... + + * Added a majorly neat hack to Bool_Array, suggested by rfg. + The basic idea was to throw away the Ullman array technique. + The Ullman array was used to remove the need to reinitialize all + the Bool_Array elements to zero everytime we needed to determine + whether there were duplicate hash values in the keyword list. + The current trick uses an `iteration number' scheme, which takes + about 1/3 the space and reduces the overall program running a + time by about 20 percent for large input! The hack works as + follows: + + 1. Dynamically allocate 1 boolean array of size k. + 2. Initialize the boolean array to zeros, and consider the first + iteration to be iteration 1. + 2. Then on all subsequent iterations we `reset' the bool array by + kicking the iteration count by 1. + 3. When it comes time to check whether a hash value is currently + in the boolean array we simply check its index location. If + the value stored there is *not* equal to the current iteration + number then the item is clearly *not* in the set. In that + case we assign the iteration number to that array's index + location for future reference. Otherwise, if the item at + the index location *is* equal to the iteration number we've + found a duplicate. No muss, no fuss! + +Thu Oct 12 18:08:43 1989 Doug Schmidt (schmidt at zola.ics.uci.edu) + + * Updated the version number to 1.9. + + * Added support for the -C option. This makes the contents of + all generated tables `readonly'. + + * Changed the handling of generated switches so that there is + only one call to str[n]?cmp. This *greatly* reduces the size of + the generated assembly code on all compilers I've seen. + + * Fixed a subtle bug that occurred when the -l and -S option + was given. Code produced looked something like: + + if (len != key_len || !strcmp (s1, resword->name)) return resword; + + which doesn't make any sense. Clearly, this should be: + + if (len == key_len && !strcmp (s1, resword->name)) return resword; + +Sat Sep 30 12:55:24 1989 Doug Schmidt (schmidt at glacier.ics.uci.edu) + + * Fixed a stupid bug in Key_List::print_hash_function that manifested + itself if the `-k$' option was given (i.e., only use the key[length] + character in the hash function). + +Mon Jul 24 17:09:46 1989 Doug Schmidt (schmidt at glacier.ics.uci.edu) + + * Fixed a bug in PRINT_MIN_MAX that resulted in MAX_INT being printed + for the MIN_KEY_LEN if there was only 1 keyword in the input file + (yeah, that's a pretty unlikely occurrence, I agree!). + + * Fixed PRINT_HASH_FUNCTION and PRINT_LOOKUP_FUNCTION in keylist.c + so that the generated functions take an unsigned int length argument. + If -a is enabled the prototype is (const char *str, size_t len). + +Fri Jul 21 13:06:15 1989 Doug Schmidt (schmidt at zola.ics.uci.edu) + + * Fixed a horrible typo in PRINT_KEYWORD_TABLE in keylist.cc + that prevented links from being printed correctly. + +Sun Jul 9 17:53:28 1989 Doug Schmidt (schmidt at glacier.ics.uci.edu) + + * Changed the ./tests subdirectory Makefile so that it + uses $(CC) instead of gcc. + +Sun Jul 2 12:14:04 1989 Doug Schmidt (schmidt at glacier.ics.uci.edu) + + * Moved comment handling from keylist.c to readline.c. This + simplifies the code and reduces the number of malloc calls. + + * Fixed a number of subtle bugs that occurred when -S was + combined with various and sundry options. + + * Added the -G option, that makes the generated keyword table + a global static variable, rather than hiding it inside + the lookup function. This allows other functions to directly + access the contents in this table. + +Sat Jul 1 10:12:21 1989 Doug Schmidt (schmidt at crimee.ics.uci.edu) + + * Added the "#" feature, that allows comments inside the keyword + list from the input file. + + * Also added the -H option (user can give the name of the hash + function) and the -T option (prevents the transfer of the type decl + to the output file, which is useful if the type is already defined + elsewhere). + +Fri Jun 30 18:22:35 1989 Doug Schmidt (schmidt at crimee.ics.uci.edu) + + * Added Adam de Boor's changes. Created an UNSET_OPTION macro. + +Sat Jun 17 10:56:00 1989 Doug Schmidt (schmidt at glacier.ics.uci.edu) + + * Modified option.h and option.c so that all mixed operations + between integers and enumerals are cast correctly to int. + This prevents errors in some brain-damaged C compilers. + +Fri Jun 16 14:13:15 1989 Doug Schmidt (schmidt at crimee.ics.uci.edu) + + * Modified the -f (FAST) option. This now takes an argument. + The argument corresponds to the number of iterations used + to resolve collisions. -f 0 uses the length of the + keyword list (which is what -f did before). This makes + life much easier when dealing with large keyword files. + +Wed Jun 7 23:07:13 1989 Doug Schmidt (schmidt at zola.ics.uci.edu) + + * Updated to version 1.8 in preparation to release to Doug Lea + and FSF. + + * Added the -c (comparison) option. Enabling this + will use the strncmp function for string comparisons. + The default is to use strcmp. + +Tue Jun 6 16:32:09 1989 Doug Schmidt (schmidt at zola.ics.uci.edu) + + * Fixed another stupid typo in xmalloc.c (XMALLOC). I accidentally + left the ANSI-fied prototype in place. This obviously + fails on old-style C compilers. + + * Fixed stupid typos in PRINT_SWITCH from the keylist.c. This + caused the -D option to produce incorrect output when used + in conjunction with -p and -t. + + * Replaced the use of STRCMP with STRNCMP for the generated + C output code. + +Fri Jun 2 23:16:01 1989 Doug Schmidt (schmidt at trinite.ics.uci.edu) + + * Added a new function (XMALLOC) and file (xmalloc.c). All + calls to MALLOC were replaced by calls to XMALLOC. This + will complain when virtual memory runs out (don't laugh, + this has happened!) + +Thu Jun 1 21:10:10 1989 Doug Schmidt (schmidt at zola.ics.uci.edu) + + * Fixed a typo in options.c that prevented the -f option + from being given on the command-line. + +Wed May 3 17:48:02 1989 Doug Schmidt (schmidt at zola.ics.uci.edu) + + * Updated to version 1.7. This reflects the recent major changes + and the new C port. + + * Fixed a typo in perfect.c perfect_destroy that prevented the actual + maximum hash table size from being printed. + + * Added support for the -f option. This generates the perfect + hash function ``fast.'' It reduces the execution time of + gperf, at the cost of minimizing the range of hash values. + +Tue May 2 16:23:29 1989 Doug Schmidt (schmidt at crimee.ics.uci.edu) + + * Enabled the diagnostics dump if the debugging option is enabled. + + * Removed all calls to FREE (silly to do this at program termination). + + * Ported gperf to C. From now on both K&R C and GNU G++ versions + will be supported. + diff --git a/contrib/gperf/Makefile b/contrib/gperf/Makefile new file mode 100644 index 000000000000..9f728473ca6d --- /dev/null +++ b/contrib/gperf/Makefile @@ -0,0 +1,41 @@ +# Copyright (C) 1989 Free Software Foundation, Inc. +# written by Douglas C. Schmidt (schmidt@ics.uci.edu) +# +# This file is part of GNU GPERF. +# +# GNU GPERF is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 1, or (at your option) +# any later version. +# +# GNU GPERF is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GNU GPERF; see the file COPYING. If not, write to +# the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + +GPERF = ../src/gperf + +all: gperf tests + +gperf: + (cd src; $(MAKE)) + +tests: gperf + (cd tests; $(MAKE) GPERF=$(GPERF)) + +distrib: + (cd ..; rm -f cperf.tar.Z; tar cvf cperf.tar cperf; compress cperf.tar; uuencode cperf.tar.Z < cperf.tar.Z > CSHAR) + +clean: + (cd src; $(MAKE) clean) + (cd tests; $(MAKE) clean) + +realclean: + (cd src; $(MAKE) realclean) + (cd tests; $(MAKE) clean) + -rm -f gperf.info* gperf.?? gperf.??s gperf.log gperf.toc \ + gperf.*aux *inset.c *out gperf diff --git a/contrib/gperf/README b/contrib/gperf/README new file mode 100644 index 000000000000..26ace32d65d2 --- /dev/null +++ b/contrib/gperf/README @@ -0,0 +1,28 @@ +While teaching a data structures course at University of California, +Irvine, I developed a program called GPERF that generates perfect hash +functions for sets of key words. A perfect hash function is simply: + + A hash function and a data structure that allows + recognition of a key word in a set of words using + exactly 1 probe into the data structure. + +The gperf.texinfo file explains how the program works, the form of the +input, what options are available, and hints on choosing the best +options for particular key word sets. The texinfo file is readable +both via the GNU emacs `info' command, and is also suitable for +typesetting with TeX. The texinfo.tex macros needed to run +gperf.texinfo through TeX are available in the GNU GCC release. If +you don't have access to these please email me and I'll send them to +you (about 75k). + +The enclosed Makefile creates the executable program ``gperf'' and +also runs some tests. + +Output from the GPERF program is used to recognize reserved words in +the GNU C, GNU C++, and GNU Pascal compilers, as well as with the GNU +indent program. + +Happy hacking! + +Douglas C. Schmidt +schmidt@ics.uci.edu diff --git a/contrib/gperf/README-FIRST b/contrib/gperf/README-FIRST new file mode 100644 index 000000000000..681f2ff213bb --- /dev/null +++ b/contrib/gperf/README-FIRST @@ -0,0 +1,8 @@ + Note: Just for clarification, this version of `gperf' is written in +portable K&R C. The `gperf' supplied in the top-level libg++ directory on +this distribution tape is written in C++. Either will generate output for +C or C++, so which one to compile is up to you. + +--Noah Friedman, GNU tape distribution maintainer. + friedman@prep.ai.mit.edu + December 24, 1991 diff --git a/contrib/gperf/gperf.1 b/contrib/gperf/gperf.1 new file mode 100644 index 000000000000..5673c80062af --- /dev/null +++ b/contrib/gperf/gperf.1 @@ -0,0 +1,23 @@ +.TH GPERF 1 "December 16, 1988 +.UC 4 +.SH NAME +gperf \- generate a perfect hash function from a key set +.SH SYNOPSIS +.B gperf +[ +.B \-adghijklnoprsStv +] [ +.I keyfile +] +.SH DESCRIPTION + +\fIgperf\fP reads a set of ``keys'' from \fIkeyfile\fP (or, by +default, from the standard input) and attempts to find a non-minimal +perfect hashing function that recognizes a member of the key set in +constant, i.e., O(1), time. If such a function is found the program +generates a pair of \fIC\fP source code routines that perform the +hashing and table lookup. All generated code is directed to the +standard output. + +Please refer to the \fIgperf.texinfo\fP file for more information. +This file is distributed with \fIgperf\fP release. diff --git a/contrib/gperf/gperf.texinfo b/contrib/gperf/gperf.texinfo new file mode 100644 index 000000000000..c957269dccf4 --- /dev/null +++ b/contrib/gperf/gperf.texinfo @@ -0,0 +1,1129 @@ +\input texinfo @c -*-texinfo-*- +@settitle User's Guide to @code{gperf} +@setfilename gperf.info + +@ifinfo +This file documents the features of the GNU Perfect Hash Function Generator + +Copyright (C) 1989 Free Software Foundation, Inc. + +Permission is granted to make and distribute verbatim copies of +this manual provided the copyright notice and this permission notice +are preserved on all copies. + +@ignore +Permission is granted to process this file through @TeX{} and print the +results, provided the printed document carries copying permission +notice identical to this one except for the removal of this paragraph +(this paragraph not being relevant to the printed manual). + +@end ignore + +Permission is granted to copy and distribute modified versions of this +manual under the conditions for verbatim copying, provided also that the +section entitled ``GNU General Public License'' is included exactly as +in the original, and provided that the entire resulting derived work is +distributed under the terms of a permission notice identical to this one. + +Permission is granted to copy and distribute translations of this manual +into another language, under the above conditions for modified versions, +except that the section entitled ``GNU @code{gperf} General Public License'' and +this permission notice may be included in translations approved by the +Free Software Foundation instead of in the original English. +@end ifinfo + +@setchapternewpage odd + +@titlepage +@center @titlefont{User's Guide} +@sp 2 +@center @titlefont{for the} +@sp 2 +@center @titlefont{GNU @code{gperf} Utility} +@sp 4 +@center Douglas C. Schmidt +@sp 3 +@center last updated 1 November 1989 +@sp 1 +@center for version 2.0 +@page +@vskip 0pt plus 1filll +Copyright @copyright{} 1989 Free Software Foundation, Inc. + + +Permission is granted to make and distribute verbatim copies of +this manual provided the copyright notice and this permission notice +are preserved on all copies. + +Permission is granted to copy and distribute modified versions of this +manual under the conditions for verbatim copying, provided also that the +section entitled ``GNU @code{gperf} General Public License'' is included exactly as +in the original, and provided that the entire resulting derived work is +distributed under the terms of a permission notice identical to this one. + +Permission is granted to copy and distribute translations of this manual +into another language, under the above conditions for modified versions, +except that the section entitled ``GNU @code{gperf} General Public License'' may be +included in a translation approved by the author instead of in the original +English. +@end titlepage + +@ifinfo +@node Top, Copying, , (DIR) +@ichapter Introduction + +This manual documents the GNU @code{gperf} perfect hash function generator +utility, focusing on its features and how to use them, and how to report +bugs. + +@end ifinfo +@menu +* Copying:: GNU @code{gperf} General Public License says + how you can copy and share @code{gperf}. +* Contributors:: People who have contributed to @code{gperf}. +* Motivation:: Introduction to @code{gperf}. +* Search Structures:: Static search structures and GNU GPERF. +* Description:: High-level discussion of how GPERF functions. +* Options:: A description of options to the program. +* Bugs:: Known bugs and limitations with GPERF. +* Projects:: Things still left to do. +* Implementation:: Implementation Details for GNU GPERF. +* Bibliography:: Material Referenced in this Report. +@end menu + +@node Copying, Contributors, Top, Top +@unnumbered GNU GENERAL PUBLIC LICENSE +@center Version 1, February 1989 + +@display +Copyright @copyright{} 1989 Free Software Foundation, Inc. +675 Mass Ave, Cambridge, MA 02139, USA + +Everyone is permitted to copy and distribute verbatim copies +of this license document, but changing it is not allowed. +@end display + +@unnumberedsec Preamble + + The license agreements of most software companies try to keep users +at the mercy of those companies. By contrast, our General Public +License is intended to guarantee your freedom to share and change free +software---to make sure the software is free for all its users. The +General Public License applies to the Free Software Foundation's +software and to any other program whose authors commit to using it. +You can use it for your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Specifically, the General Public License is designed to make +sure that you have the freedom to give away or sell copies of free +software, that you receive source code or can get it if you want it, +that you can change the software or use pieces of it in new free +programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of a such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must tell them their rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + The precise terms and conditions for copying, distribution and +modification follow. + +@iftex +@unnumberedsec TERMS AND CONDITIONS +@end iftex +@ifinfo +@center TERMS AND CONDITIONS +@end ifinfo + +@enumerate +@item +This License Agreement applies to any program or other work which +contains a notice placed by the copyright holder saying it may be +distributed under the terms of this General Public License. The +``Program'', below, refers to any such program or work, and a ``work based +on the Program'' means either the Program or any work containing the +Program or a portion of it, either verbatim or with modifications. Each +licensee is addressed as ``you''. + +@item +You may copy and distribute verbatim copies of the Program's source +code as you receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice and +disclaimer of warranty; keep intact all the notices that refer to this +General Public License and to the absence of any warranty; and give any +other recipients of the Program a copy of this General Public License +along with the Program. You may charge a fee for the physical act of +transferring a copy. + +@item +You may modify your copy or copies of the Program or any portion of +it, and copy and distribute such modifications under the terms of Paragraph +1 above, provided that you also do the following: + +@itemize @bullet +@item +cause the modified files to carry prominent notices stating that +you changed the files and the date of any change; and + +@item +cause the whole of any work that you distribute or publish, that +in whole or in part contains the Program or any part thereof, either +with or without modifications, to be licensed at no charge to all +third parties under the terms of this General Public License (except +that you may choose to grant warranty protection to some or all +third parties, at your option). + +@item +If the modified program normally reads commands interactively when +run, you must cause it, when started running for such interactive use +in the simplest and most usual way, to print or display an +announcement including an appropriate copyright notice and a notice +that there is no warranty (or else, saying that you provide a +warranty) and that users may redistribute the program under these +conditions, and telling the user how to view a copy of this General +Public License. + +@item +You may charge a fee for the physical act of transferring a +copy, and you may at your option offer warranty protection in +exchange for a fee. +@end itemize + +Mere aggregation of another independent work with the Program (or its +derivative) on a volume of a storage or distribution medium does not bring +the other work under the scope of these terms. + +@item +You may copy and distribute the Program (or a portion or derivative of +it, under Paragraph 2) in object code or executable form under the terms of +Paragraphs 1 and 2 above provided that you also do one of the following: + +@itemize @bullet +@item +accompany it with the complete corresponding machine-readable +source code, which must be distributed under the terms of +Paragraphs 1 and 2 above; or, + +@item +accompany it with a written offer, valid for at least three +years, to give any third party free (except for a nominal charge +for the cost of distribution) a complete machine-readable copy of the +corresponding source code, to be distributed under the terms of +Paragraphs 1 and 2 above; or, + +@item +accompany it with the information you received as to where the +corresponding source code may be obtained. (This alternative is +allowed only for noncommercial distribution and only if you +received the program in object code or executable form alone.) +@end itemize + +Source code for a work means the preferred form of the work for making +modifications to it. For an executable file, complete source code means +all the source code for all modules it contains; but, as a special +exception, it need not include source code for modules which are standard +libraries that accompany the operating system on which the executable +file runs, or for standard header files or definitions files that +accompany that operating system. + +@item +You may not copy, modify, sublicense, distribute or transfer the +Program except as expressly provided under this General Public License. +Any attempt otherwise to copy, modify, sublicense, distribute or transfer +the Program is void, and will automatically terminate your rights to use +the Program under this License. However, parties who have received +copies, or rights to use copies, from you under this General Public +License will not have their licenses terminated so long as such parties +remain in full compliance. + +@item +By copying, distributing or modifying the Program (or any work based +on the Program) you indicate your acceptance of this license to do so, +and all its terms and conditions. + +@item +Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the original +licensor to copy, distribute or modify the Program subject to these +terms and conditions. You may not impose any further restrictions on the +recipients' exercise of the rights granted herein. + +@item +The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of the license which applies to it and ``any +later version'', you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +the license, you may choose any version ever published by the Free Software +Foundation. + +@item +If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + +@iftex +@heading NO WARRANTY +@end iftex +@ifinfo +@center NO WARRANTY +@end ifinfo + +@item +BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM ``AS IS'' WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + +@item +IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL +ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES +ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT +LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES +SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE +WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN +ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. +@end enumerate + +@iftex +@heading END OF TERMS AND CONDITIONS +@end iftex +@ifinfo +@center END OF TERMS AND CONDITIONS +@end ifinfo + +@page +@unnumberedsec Appendix: How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to humanity, the best way to achieve this is to make it +free software which everyone can redistribute and change under these +terms. + + To do so, attach the following notices to the program. It is safest to +attach them to the start of each source file to most effectively convey +the exclusion of warranty; and each file should have at least the +``copyright'' line and a pointer to where the full notice is found. + +@smallexample +@var{one line to give the program's name and a brief idea of what it does.} +Copyright (C) 19@var{yy} @var{name of author} + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 1, or (at your option) +any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +@end smallexample + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + +@smallexample +Gnomovision version 69, Copyright (C) 19@var{yy} @var{name of author} +Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. +This is free software, and you are welcome to redistribute it +under certain conditions; type `show c' for details. +@end smallexample + +The hypothetical commands `show w' and `show c' should show the +appropriate parts of the General Public License. Of course, the +commands you use may be called something other than `show w' and `show +c'; they could even be mouse-clicks or menu items---whatever suits your +program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a ``copyright disclaimer'' for the program, if +necessary. Here a sample; alter the names: + +@example +Yoyodyne, Inc., hereby disclaims all copyright interest in the +program `Gnomovision' (a program to direct compilers to make passes +at assemblers) written by James Hacker. + +@var{signature of Ty Coon}, 1 April 1989 +Ty Coon, President of Vice +@end example + +That's all there is to it! + +@node Contributors, Motivation, Copying, Top +@unnumbered Contributors to GNU @code{gperf} Utility + +@itemize @bullet +@item +The GNU @code{gperf} perfect hash function generator utility was +originally written in GNU C++ by Douglas C. Schmidt. It is now also +available in a highly-portable ``old-style'' C version. The general +idea for the perfect hash function generator was inspired by Keith +Bostic's algorithm written in C, and distributed to @code{net.sources} around +1984. The current program is a heavily modified, enhanced, and extended +implementation of Keith's basic idea, created at the University of +California, Irvine. Bugs, patches, and suggestions should be reported +to @code{schmidt@@ics.uci.edu}. + +@item +Special thanks is extended to Michael Tiemann and Doug Lea, for +providing a useful compiler, and for giving me a forum to exhibit my +creation. + +In addition, Adam de Boor and Nels Olson provided many tips and insights +that greatly helped improve the quality and functionality of @code{gperf}. +@end itemize + +@node Motivation, Search Structures, Contributors, Top +@chapter Introduction + +@code{gperf} is a perfect hash function generator written in C++. It +transforms an @emph{n} element user-specified keyword set @emph{W} into +a perfect hash function @emph{F}. @emph{F} uniquely maps keywords in +@emph{W} onto the range 0..@emph{k}, where @emph{k} >= @emph{n}. If +@emph{k = n} then @emph{F} is a @emph{minimal} perfect hash function. +@code{gperf} generates a 0..@emph{k} element static lookup table and a +pair of C functions. These functions determine whether a given +character string @emph{s} occurs in @emph{W}, using at most one probe +into the lookup table. + +@code{gperf} currently generates the reserved keyword recognizer for +lexical analyzers in several production and research compilers and +language processing tools, including GNU C, GNU C++, GNU Pascal, GNU +Modula 3, and GNU indent. Complete C++ source code for @code{gperf} is +available via anonymous ftp from @code{ics.uci.edu}. @code{gperf} also is +distributed along with the GNU libg++ library. Finally, a highly +portable, functionally equivalent K&R C version of @code{gperf} is +archived in @code{comp.sources.unix}, volume 20. + +@node Search Structures, Description, Motivation, Top +@chapter Static search structures and GNU @code{gperf} + +A @dfn{static search structure} is an Abstract Data Type with certain +fundamental operations, @emph{e.g.}, @emph{initialize}, @emph{insert}, +and @emph{retrieve}. Conceptually, all insertions occur before any +retrievals.@footnote{In practice, @code{gperf} generates a @code{static} +array containing search set keywords and any associated attributes +specified by the user. Thus, there is essentially no execution-time +cost for the insertions.} It is a useful data structure for +representing @emph{static search sets}. Static search sets occur +frequently in software system applications. Typical static search +sets include compiler reserved words, assembler instruction opcodes, +and built-in shell interpreter commands. Search set members, called +@dfn{keywords}, are inserted into the structure only once, usually +during program initialization, and are not generally modified at +run-time. + +Numerous static search structure implementations exist, @emph{e.g.}, +arrays, linked lists, binary search trees, digital search tries, and +hash tables. Different approaches offer trade-offs between space +utilization and search time efficiency. For example, an $n$ element +sorted array is space efficient, though the average-case time +complexity for retrieval operations using binary search is +proportional to $\log n$. Conversely, hash table implementations +often locate a table entry in constant time, but typically impose +additional memory overhead and exhibit poor worst case performance +@cite{aho, etc.}. + +@emph{Minimal perfect hash functions} provide an optimal solution for a +particular class of static search sets. A minimal perfect hash +function is defined by two properties: + +@itemize @bullet +@item It allows keyword recognition in a static search set using +at most @emph{one} probe into the hash table. This represents the +``perfect'' property. +@item The actual memory allocated to store the keywords is precisely +large enough for the keyword set, and @emph{no larger}. This is the +``minimal'' property. +@end itemize + +For most applications it is far easier to generate @emph{perfect} hash +functions than @emph{minimal perfect} hash functions @cite{many bozos}. +Moreover, non-minimal perfect hash functions frequently execute faster +than minimal ones in practice @cite{cichelli}. This phenomena occurs +since searching a sparse keyword table increases the probability of +locating a ``null'' entry, thereby reducing string comparisons. +@code{gperf}'s default behavior generates @emph{near-minimal} perfect hash +functions for keyword sets. However, @code{gperf} provides many +options that permit user control over the degree of minimality and +perfection. + +Static search sets often exhibit relative stability over time. For +example, Ada's 63 reserved words have remained constant for nearly a +decade. It is therefore frequently worthwhile to expend concerted +effort building an optimal search structure @emph{once}, if it +subsequently receives heavy use multiple times. @code{gperf} removes +the drudgery associated with constructing time- and space-efficient +search structures by hand. It has proven a useful and practical tool +for serious programming projects. Output from @code{gperf} is +currently used in several production and research compilers, including +GNU C, GNU C++, GNU Pascal, and GNU Modula 3. @footnote{The latter two +compilers are not yet part of the official GNU distribution.} Each +compiler utilizes @code{gperf} to automatically generate static search +structures that efficiently identify their respective reserved +keywords. + +@node Description, Options, Search Structures, Top +@chapter High-Level Description of GNU @code{gperf} + +@menu +* Input Format:: Input Format to @code{gperf} +* Output Format:: Output Format for Generated C Code with @code{gperf} +@end menu + +The perfect hash function generator @code{gperf} reads a set of +``keywords'' from a @dfn{keyfile} (or from the standard input by +default). It attempts to derive a perfect hashing function that +recognizes a member of the @dfn{static keyword set} with at most a +single probe into the lookup table. If @code{gperf} succeeds in +generating such a function it produces a pair of C source code routines +that perform hashing and table lookup recognition. All generated C code +is directed to the standard output. Command-line options described +below allow you to modify the input and output format to @code{gperf}. + +By default, @code{gperf} attempts to produce time-efficient code, with +less emphasis on efficient space utilization. However, several options +exist that permit trading-off execution time for storage space and vice +versa. In particular, expanding the generated table size produces a +sparse search structure, generally yielding faster searches. +Conversely, you can direct @code{gperf} to utilize a C @code{switch} +statement scheme that minimizes data space storage size. Furthermore, +using a C @code{switch} may actually speed up the keyword retrieval time +somewhat. Actual results depend on your C compiler, of course. + +In general, @code{gperf} assigns values to the characters it is using +for hashing until some set of values gives each keyword a unique value. +A helpful heuristic is that the larger the hash value range, the easier +it is for @code{gperf} to find and generate a perfect hash function. +Experimentation is the key to getting the most from @code{gperf}. + +@node Input Format, Declarations, Description, Description +@section Input Format to @code{gperf} + +You can control the input keyfile format by varying certain command-line +arguments, in particular the @samp{-t} option. The input's appearance +is similar to GNU utilities @code{flex} and @code{bison} (or UNIX +utilities @code{lex} and @code{yacc}). Here's an outline of the general +format: + +@group +@example +declarations +%% +keywords +%% +functions +@end example +@end group + +@emph{Unlike} @code{flex} or @code{bison}, all sections of @code{gperf}'s input +are optional. The following sections describe the input format for each +section. + +@menu +* Declarations:: @code{struct} Declarations and C Code Inclusion. +* Keywords:: Format for Keyword Entries. +* Functions:: Including Additional C Functions. +@end menu + +@node Declarations, Keywords, Input Format, Input Format +@subsection @code{struct} Declarations and C Code Inclusion + +The keyword input file optionally contains a section for including +arbitrary C declarations and definitions, as well as provisions for +providing a user-supplied @code{struct}. If the @samp{-t} option +@emph{is} enabled, you @emph{must} provide a C @code{struct} as the last +component in the declaration section from the keyfile file. The first +field in this struct must be a @code{char *} identifier called ``name,'' +although it is possible to modify this field's name with the @samp{-K} +option described below. + +Here is simple example, using months of the year and their attributes as +input: + +@group +@example +struct months @{ char *name; int number; int days; int leap_days; @}; +%% +january, 1, 31, 31 +february, 2, 28, 29 +march, 3, 31, 31 +april, 4, 30, 30 +may, 5, 31, 31 +june, 6, 30, 30 +july, 7, 31, 31 +august, 8, 31, 31 +september, 9, 30, 30 +october, 10, 31, 31 +november, 11, 30, 30 +december, 12, 31, 31 +@end example +@end group + +Separating the @code{struct} declaration from the list of key words and +other fields are a pair of consecutive percent signs, @code{%%}, +appearing left justified in the first column, as in the UNIX utility +@code{lex}. + +Using a syntax similar to GNU utilities @code{flex} and @code{bison}, it +is possible to directly include C source text and comments verbatim into +the generated output file. This is accomplished by enclosing the region +inside left-justified surrounding @code{%@{}, @code{%@}} pairs. Here is +an input fragment based on the previous example that illustrates this +feature: + +@group +@example +%@{ +#include +/* This section of code is inserted directly into the output. */ +int return_month_days (struct months *months, int is_leap_year); +%@} +struct months @{ char *name; int number; int days; int leap_days; @}; +%% +january, 1, 31, 31 +february, 2, 28, 29 +march, 3, 31, 31 +... +@end example +@end group + +It is possible to omit the declaration section entirely. In this case +the keyfile begins directly with the first keyword line, @emph{e.g.}: + +@group +@example +january, 1, 31, 31 +february, 2, 28, 29 +march, 3, 31, 31 +april, 4, 30, 30 +... +@end example +@end group + +@node Keywords, Functions, Declarations, Input Format +@subsection Format for Keyword Entries + +The second keyfile format section contains lines of keywords and any +associated attributes you might supply. A line beginning with @samp{#} +in the first column is considered a comment. Everything following the +@samp{#} is ignored, up to and including the following newline. + +The first field of each non-comment line is always the key itself. It +should be given as a simple name, @emph{i.e.}, without surrounding +string quotation marks, and be left-justified flush against the first +column. In this context, a ``field'' is considered to extend up to, but +not include, the first blank, comma, or newline. Here is a simple +example taken from a partial list of C reserved words: + +@group +@example +# These are a few C reserved words, see the c.@code{gperf} file +# for a complete list of ANSI C reserved words. +unsigned +sizeof +switch +signed +if +default +for +while +return +@end example +@end group + +Note that unlike @code{flex} or @code{bison} the first @code{%%} marker +may be elided if the declaration section is empty. + +Additional fields may optionally follow the leading keyword. Fields +should be separated by commas, and terminate at the end of line. What +these fields mean is entirely up to you; they are used to initialize the +elements of the user-defined @code{struct} provided by you in the +declaration section. If the @samp{-t} option is @emph{not} enabled +these fields are simply ignored. All previous examples except the last +one contain keyword attributes. + +@node Functions, Output Format, Keywords, Input Format +@subsection Including Additional C Functions + +The optional third section also corresponds closely with conventions +found in @code{flex} and @code{bison}. All text in this section, +starting at the final @code{%%} and extending to the end of the input +file, is included verbatim into the generated output file. Naturally, +it is your responsibility to ensure that the code contained in this +section is valid C. + +@node Output Format, , Functions, Description +@section Output Format for Generated C Code with @code{gperf} + +Several options control how the generated C code appears on the standard +output. Two C function are generated. They are called @code{hash} and +@code{in_word_set}, although you may modify the name for +@code{in_word_set} with a command-line option. Both functions require +two arguments, a string, @code{char *} @var{str}, and a length +parameter, @code{int} @var{len}. Their default function prototypes are +as follows: + +@group +@example +static int hash (char *str, int len); +int in_word_set (char *str, int len); +@end example +@end group + +By default, the generated @code{hash} function returns an integer value +created by adding @var{len} to several user-specified @var{str} key +positions indexed into an @dfn{associated values} table stored in a +local static array. The associated values table is constructed +internally by @code{gperf} and later output as a static local C array called +@var{hash_table}; its meaning and properties are described below. +@xref{Implementation}. The relevant key positions are specified via the +@samp{-k} option when running @code{gperf}, as detailed in the @emph{Options} +section below. @xref{Options}. + +Two options, @samp{-g} (assume you are compiling with GNU C and its +@code{inline} feature) and @samp{-a} (assume ANSI C-style function +prototypes), alter the content of both the generated @code{hash} and +@code{in_word_set} routines. However, function @code{in_word_set} may +be modified more extensively, in response to your option settings. The +options that affect the @code{in_word_set} structure are: + +@itemize @bullet +@table @samp +@item -p +Have function @code{in_word_set} return a pointer rather than a boolean. + +@item -t +Make use of the user-defined @code{struct}. + +@item -S @var{total switch statements} +Generate 1 or more C @code{switch} statement rather than use a large, +(and potentially sparse) static array. Although the exact time and +space savings of this approach vary according to your C compiler's +degree of optimization, this method often results in smaller and faster +code. +@end table +@end itemize + +If the @samp{-t}, @samp{-S}, and @samp{-p} options are omitted the +default action is to generate a @code{char *} array containing the keys, +together with additional null strings used for padding the array. By +experimenting with the various input and output options, and timing the +resulting C code, you can determine the best option choices for +different keyword set characteristics. + +@node Options, Bugs, Description, Top +@chapter Options to the @code{gperf} Utility + +There are @emph{many} options to @code{gperf}. They were added to make +the program more convenient for use with real applications. ``On-line'' +help is readily available via the @samp{-h} option. Other options +include: + +@itemize @bullet +@table @samp +@item -a +Generate ANSI Standard C code using function prototypes. The default is +to use ``classic'' K&R C function declaration syntax. + +@item -c +Generates C code that uses the @code{strncmp} function to perform +string comparisons. The default action is to use @code{strcmp}. + +@item -C +Makes the contents of all generated lookup tables constant, @emph{i.e.}, +``readonly.'' Many compilers can generate more efficient code for this +by putting the tables in readonly memory. + +@item -d +Enables the debugging option. This produces verbose diagnostics to +``standard error'' when @code{gperf} is executing. It is useful both for +maintaining the program and for determining whether a given set of +options is actually speeding up the search for a solution. Some useful +information is dumped at the end of the program when the @samp{-d} +option is enabled. + +@item -D +Handle keywords whose key position sets hash to duplicate values. +Duplicate hash values occur for two reasons: + +@itemize @bullet +@item +Since @code{gperf} does not backtrack it is possible for it to process +all your input keywords without finding a unique mapping for each word. +However, frequently only a very small number of duplicates occur, and +the majority of keys still require one probe into the table. +@item +Sometimes a set of keys may have the same names, but possess different +attributes. With the -D option @code{gperf} treats all these keys as part of +an equivalence class and generates a perfect hash function with multiple +comparisons for duplicate keys. It is up to you to completely +disambiguate the keywords by modifying the generated C code. However, +@code{gperf} helps you out by organizing the output. +@end itemize + +Option @samp{-D} is extremely useful for certain large or highly +redundant keyword sets, @emph{i.e.}, assembler instruction opcodes. +Using this option usually means that the generated hash function is no +longer perfect. On the other hand, it permits @code{gperf} to work on keyword +sets that it otherwise could not handle. + +@item -e @var{keyword delimiter list} +Allows the user to provide a string containing delimiters used to +separate keywords from their attributes. The default is ",\n". This +option is essential if you want to use keywords that have embedded +commas or newlines. One useful trick is to use -e'TAB', where TAB is +the literal tab character. + +@item -f @var{iteration amount} +Generate the perfect hash function ``fast.'' This decreases @code{gperf}'s +running time at the cost of minimizing generated table-size. The +iteration amount represents the number of times to iterate when +resolving a collision. `0' means `iterate by the number of keywords. +This option is probably most useful when used in conjunction with options +@samp{-D} and/or @samp{-S} for @emph{large} keyword sets. + +@item -g +Assume a GNU compiler, @emph{e.g.}, @code{g++} or @code{gcc}. This +makes all generated routines use the ``inline'' keyword to remove the +cost of function calls. Note that @samp{-g} does @emph{not} imply +@samp{-a}, since other non-ANSI C compilers may have provisions for a +function @code{inline} feature. + +@item -G +Generate the static table of keywords as a static global variable, +rather than hiding it inside of the lookup function (which is the +default behavior). + +@item -h +Prints a short summary on the meaning of each program option. Aborts +further program execution. + +@item -H @var{hash function name} +Allows you to specify the name for the generated hash function. Default +name is `hash.' This option permits the use of two hash tables in the +same file. + +@item -i @var{initial value} +Provides an initial @var{value} for the associate values array. Default +is 0. Increasing the initial value helps inflate the final table size, +possibly leading to more time efficient keyword lookups. Note that this +option is not particularly useful when @samp{-S} is used. Also, +@samp{-i} is overriden when the @samp{-r} option is used. + +@item -j @var{jump value} +Affects the ``jump value,'' @emph{i.e.}, how far to advance the +associated character value upon collisions. @var{Jump value} is rounded +up to an odd number, the default is 5. If the @var{jump value} is 0 @code{gperf} +jumps by random amounts. + +@item -k @var{keys} +Allows selection of the character key positions used in the keywords' +hash function. The allowable choices range between 1-126, inclusive. +The positions are separated by commas, @emph{e.g.}, @samp{-k 9,4,13,14}; +ranges may be used, @emph{e.g.}, @samp{-k 2-7}; and positions may occur +in any order. Furthermore, the meta-character '*' causes the generated +hash function to consider @strong{all} character positions in each key, +whereas '$' instructs the hash function to use the ``final character'' +of a key (this is the only way to use a character position greater than +126, incidentally). + +For instance, the option @samp{-k 1,2,4,6-10,'$'} generates a hash +function that considers positions 1,2,4,6,7,8,9,10, plus the last +character in each key (which may differ for each key, obviously). Keys +with length less than the indicated key positions work properly, since +selected key positions exceeding the key length are simply not +referenced in the hash function. + +@item -K @var{key name} +By default, the program assumes the structure component identifier for +the keyword is ``name.'' This option allows an arbitrary choice of +identifier for this component, although it still must occur as the first +field in your supplied @code{struct}. + +@item -l +Compare key lengths before trying a string comparison. This might cut +down on the number of string comparisons made during the lookup, since +keys with different lengths are never compared via @code{strcmp}. +However, using @samp{-l} might greatly increase the size of the +generated C code if the lookup table range is large (which implies that +the switch option @samp{-S} is not enabled), since the length table +contains as many elements as there are entries in the lookup table. + +@item -n +Instructs the generator not to include the length of a keyword when +computing its hash value. This may save a few assembly instructions in +the generated lookup table. + +@item -N @var{lookup function name} +Allows you to specify the name for the generated lookup function. +Default name is `in_word_set.' This option permits completely automatic +generation of perfect hash functions, especially when multiple generated +hash functions are used in the same application. + +@item -o +Reorders the keywords by sorting the keywords so that frequently +occuring key position set components appear first. A second reordering +pass follows so that keys with ``already determined values'' are placed +towards the front of the keylist. This may decrease the time required +to generate a perfect hash function for many keyword sets, and also +produce more minimal perfect hash functions. The reason for this is +that the reordering helps prune the search time by handling inevitable +collisions early in the search process. On the other hand, if the +number of keywords is @emph{very} large using @samp{-o} may +@emph{increase} @code{gperf}'s execution time, since collisions will begin +earlier and continue throughout the remainder of keyword processing. +See Cichelli's paper from the January 1980 Communications of the ACM for +details. + +@item -p +Changes the return value of the generated function @code{in_word_set} +from boolean (@emph{i.e.}, 0 or 1), to either type ``pointer to +user-defined struct,'' (if the @samp{-t} option is enabled), or simply +to @code{char *}, if @samp{-t} is not enabled. This option is most +useful when the @samp{-t} option (allowing user-defined structs) is +used. For example, it is possible to automatically generate the GNU C +reserved word lookup routine with the options @samp{-p} and @samp{-t}. + +@item -r +Utilizes randomness to initialize the associated values table. This +frequently generates solutions faster than using deterministic +initialization (which starts all associated values at 0). Furthermore, +using the randomization option generally increases the size of the +table. If @code{gperf} has difficultly with a certain keyword set try using +@samp{-r} or @samp{-D}. + +@item -s @var{size-multiple} +Affects the size of the generated hash table. The numeric argument for +this option indicates ``how many times larger'' the maximum associated +value range should be, in relationship to the number of keys. For +example, a value of 3 means ``allow the maximum associated value to be +about 3 times larger than the number of input keys.'' If option +@samp{-S} is @emph{not} enabled, the maximum associated value influences +the static array table size, and a larger table should decrease the time +required for an unsuccessful search, at the expense of extra table +space. + +The default value is 1, thus the default maximum associated value about +the same size as the number of keys ( for efficiency, the maximum +associated value is always rounded up to a power of 2). The actual +table size may vary somewhat, since this technique is essentially a +heuristic. In particular, setting this value too high slows down +@code{gperf}'s runtime, since it must search through a much larger range of +values. Judicious use of the @samp{-f} option helps alleviate this +overhead, however. + +@item -S @var{total switch statements} +Causes the generated C code to use a @code{switch} statement scheme, +rather than an array lookup table. This can lead to a reduction in both +time and space requirements for some keyfiles. The argument to this +option determines how many @code{switch} statements are generated. A +value of 1 generates 1 @code{switch} containing all the elements, a +value of 2 generates 2 tables with 1/2 the elements in each +@code{switch}, etc. This is useful since many C compilers cannot +correctly generate code for large @code{switch} statements. This option +was inspired in part by Keith Bostic's original C program. + +@item -t +Allows you to include a @code{struct} type declaration for generated +code. Any text before a pair of consecutive %% is consider part of the +type declaration. Key words and additional fields may follow this, one +group of fields per line. A set of examples for generating perfect hash +tables and functions for Ada, C, and G++, Pascal, and Modula 2 and 3 +reserved words are distributed with this release. + +@item -T +Prevents the transfer of the type declaration to the output file. Use +this option if the type is already defined elsewhere. + +@item -v +Prints out the current version number. +@end table +@end itemize + +@node Bugs, Projects, Options, Top +@chapter Known Bugs and Limitations with @code{gperf} + +The following are some limitations with the current release of +@code{gperf}: + +@itemize @bullet +@item +The @code{gperf} utility is tuned to execute quickly, and works quickly +for small to medium size data sets (around 1000 keywords). It is +extremely useful for maintaining perfect hash functions for compiler +keyword sets. Several recent enhancements now enable @code{gperf} to +work efficiently on much larger keyword sets (over 15,000 keywords). +When processing large keyword sets it helps greatly to have over 8 megs +of RAM. + +However, since @code{gperf} does not backtrack no guaranteed solution +occurs on every run. On the other hand, it is usually easy to obtain a +solution by varying the option parameters. In particular, try the +@samp{-r} option, and also try changing the default arguments to the +@samp{-s} and @samp{-j} options. To @emph{guarantee} a solution, use +the @samp{-D} and @samp{-S} options, although the final results are not +likely to be a @emph{perfect} hash function anymore! Finally, use the +@samp{-f} option if you want @code{gperf} to generate the perfect hash +function @emph{fast}, with less emphasis on making it minimal. + +@item +The size of the generate static keyword array can get @emph{extremely} +large if the input keyword file is large or if the keywords are quite +similar. This tends to slow down the compilation of the generated C +code, and @emph{greatly} inflates the object code size. If this +situation occurs, consider using the @samp{-S} option to reduce data +size, potentially increasing keyword recognition time a negligible +amount. Since many C compilers cannot correctly generated code for +large switch statements it is important to qualify the @var{-S} option +with an appropriate numerical argument that controls the number of +switch statements generated. + +@item +The maximum number of key positions selected for a given key has an +arbitrary limit of 126. This restriction should be removed, and if +anyone considers this a problem write me and let me know so I can remove +the constraint. + +@item +The C++ source code only compiles correctly with GNU G++, version 1.36 +(and hopefully later versions). Porting to AT&T cfront would be +tedious, but possible (and desirable). There is also a K&R C version +available now. This should compile without change on most BSD systems, +but may require a bit of work to run on SYSV, since @code{gperf} uses +@var{alloca} in several places. Send mail to @code{schmidt@@ics.uci.edu} for +information. +@end itemize + +@node Projects, Implementation, Bugs, Top +@chapter Things Still Left to Do + +It should be ``relatively'' easy to replace the current perfect hash +function algorithm with a more exhaustive approach; the perfect hash +module is essential independent from other program modules. Additional +worthwhile improvements include: + +@itemize @bullet +@item +Make the algorithm more robust. At present, the program halts with an +error diagnostic if it can't find a direct solution and the @samp{-D} +option is not enabled. A more comprehensive, albeit computationally +expensive, approach would employ backtracking or enable alternative +options and retry. It's not clear how helpful this would be, in +general, since most search sets are rather small in practice. + +@item +Another useful extension involves modifying the program to generate +``minimal'' perfect hash functions (under certain circumstances, the +current version can be rather extravagant in the generated table size). +Again, this is mostly of theoretical interest, since a sparse table +often produces faster lookups, and use of the @samp{-S} @code{switch} +option can minimize the data size, at the expense of slightly longer +lookups (note that the gcc compiler generally produces good code for +@code{switch} statements, reducing the need for more complex schemes). + +@item +In addition to improving the algorithm, it would also be useful to +generate a C++ class or Ada package as the code output, in addition to +the current C routines. +@end itemize + +@node Implementation, Bibliography, Projects, Top +@chapter Implementation Details of GNU @code{gperf} + +A paper describing the high-level description of the data structures and +algorithms used to implement @code{gperf} will soon be available. This +paper is useful not only from a maintenance and enhancement perspective, +but also because they demonstrate several clever and useful programming +techniques, @emph{e.g.}, `Iteration Number' boolean arrays, double +hashing, a ``safe'' and efficient method for reading arbitrarily long +input from a file, and a provably optimal algorithm for simultaneously +determining both the minimum and maximum elements in a list. + +@page + +@node Bibliography, , Implementation, Top +@chapter Bibliography + +[1] Chang, C.C.: @i{A Scheme for Constructing Ordered Minimal Perfect +Hashing Functions} Information Sciences 39(1986), 187-195. + +[2] Cichelli, Richard J. @i{Author's Response to ``On Cichelli's Minimal Perfect Hash +Functions Method''} Communications of the ACM, 23, 12(December 1980), 729. + +[3] Cichelli, Richard J. @i{Minimal Perfect Hash Functions Made Simple} +Communications of the ACM, 23, 1(January 1980), 17-19. + +[4] Cook, C. R. and Oldehoeft, R.R. @i{A Letter Oriented Minimal +Perfect Hashing Function} SIGPLAN Notices, 17, 9(September 1982), 18-27. + +[5] Cormack, G. V. and Horspool, R. N. S. and Kaiserwerth, M. +@i{Practical Perfect Hashing} Computer Journal, 28, 1(January 1985), 54-58. + +[6] Jaeschke, G. @i{Reciprocal Hashing: A Method for Generating Minimal +Perfect Hashing Functions} Communications of the ACM, 24, 12(December +1981), 829-833. + +[7] Jaeschke, G. and Osterburg, G. @i{On Cichelli's Minimal Perfect +Hash Functions Method} Communications of the ACM, 23, 12(December 1980), +728-729. + +[8] Sager, Thomas J. @i{A Polynomial Time Generator for Minimal Perfect +Hash Functions} Communications of the ACM, 28, 5(December 1985), 523-532 + +[9] Sebesta, R.W. and Taylor, M.A. @i{Minimal Perfect Hash Functions +for Reserved Word Lists} SIGPLAN Notices, 20, 12(September 1985), 47-53. +@contents + +[10] Sprugnoli, R. @i{Perfect Hashing Functions: A Single Probe +Retrieving Method for Static Sets} Communications of the ACM, 20 +11(November 1977), 841-850. + +[11] Stallman, Richard M. @i{Using and Porting GNU CC} Free Software Foundation, +1988. + +[12] Stroustrup, Bjarne @i{The C++ Programming Language.} Addison-Wesley, 1986. + +[13] Tiemann, Michael D. @i{User's Guide to GNU C++} Free Software +Foundation, 1989. +@bye diff --git a/contrib/gperf/src/Makefile b/contrib/gperf/src/Makefile new file mode 100644 index 000000000000..05f59a4bdabb --- /dev/null +++ b/contrib/gperf/src/Makefile @@ -0,0 +1,77 @@ +# Copyright (C) 1989 Free Software Foundation, Inc. +# written by Douglas C. Schmidt (schmidt@ics.uci.edu) +# +# This file is part of GNU GPERF. +# +# GNU GPERF is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 1, or (at your option) +# any later version. +# +# GNU GPERF is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GNU GPERF; see the file COPYING. If not, write to +# the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + +CC = gcc +DFLAGS= -DLO_CAL -DGATHER_STATISTICS #-DRLIMIT_STACK +OFLAGS= -O -p -g -fstrength-reduce -fomit-frame-pointer -fdelayed-branch -finline-functions # gcc options +CFLAGS= $(DFLAGS) $(OFLAGS) +OBJS = options.o iterator.o main.o perfect.o keylist.o listnode.o xmalloc.o \ + hashtable.o boolarray.o readline.o stderr.o version.o getopt.o +SOURCES = options.c iterator.c main.c perfect.c keylist.c listnode.c xmalloc.c \ + hashtable.c boolarray.c readline.c stderr.c version.c getopt.c + +all: gperf + +gperf: $(OBJS) + $(CC) $(CFLAGS) -o $@ $(OBJS) $(LIBS) + +clean: + -rm -f *.o core *~ #*# + +realclean: clean + -rm -f gperf + +# dependencies +# DO NOT DELETE THIS LINE -- mkdep uses it. +# DO NOT PUT ANYTHING AFTER THIS LINE, IT WILL GO AWAY. + +boolarray.o: boolarray.c /usr/include/stdio.h boolarray.h prototype.h options.h +boolarray.o: /usr/include/stdio.h prototype.h +getopt.o: getopt.c /usr/include/stdio.h +hashtable.o: hashtable.c /usr/include/stdio.h hashtable.h keylist.h +hashtable.o: /usr/include/stdio.h listnode.h prototype.h prototype.h options.h +hashtable.o: /usr/include/stdio.h prototype.h +iterator.o: iterator.c /usr/include/stdio.h /usr/include/ctype.h iterator.h +iterator.o: prototype.h +keylist.o: keylist.c /usr/include/assert.h /usr/include/stdio.h options.h +keylist.o: /usr/include/stdio.h prototype.h readline.h prototype.h keylist.h +keylist.o: /usr/include/stdio.h listnode.h prototype.h hashtable.h keylist.h +keylist.o: prototype.h stderr.h prototype.h /usr/include/varargs.h +listnode.o: listnode.c /usr/include/stdio.h options.h /usr/include/stdio.h +listnode.o: prototype.h listnode.h prototype.h stderr.h prototype.h +listnode.o: /usr/include/varargs.h +main.o: main.c /usr/include/stdio.h stderr.h prototype.h /usr/include/varargs.h +main.o: options.h /usr/include/stdio.h prototype.h perfect.h prototype.h +main.o: keylist.h /usr/include/stdio.h listnode.h prototype.h boolarray.h +main.o: prototype.h +options.o: options.c /usr/include/stdio.h /usr/include/assert.h options.h +options.o: /usr/include/stdio.h prototype.h iterator.h prototype.h stderr.h +options.o: prototype.h /usr/include/varargs.h +perfect.o: perfect.c /usr/include/stdio.h /usr/include/assert.h +perfect.o: /usr/include/ctype.h options.h /usr/include/stdio.h prototype.h +perfect.o: perfect.h prototype.h keylist.h /usr/include/stdio.h listnode.h +perfect.o: prototype.h boolarray.h prototype.h stderr.h prototype.h +perfect.o: /usr/include/varargs.h +readline.o: readline.c /usr/include/stdio.h readline.h prototype.h +stderr.o: stderr.c /usr/include/stdio.h stderr.h prototype.h +stderr.o: /usr/include/varargs.h +version.o: version.c +xmalloc.o: xmalloc.c /usr/include/stdio.h + +# IF YOU PUT ANYTHING HERE IT WILL GO AWAY diff --git a/contrib/gperf/src/boolarray.c b/contrib/gperf/src/boolarray.c new file mode 100644 index 000000000000..890613499200 --- /dev/null +++ b/contrib/gperf/src/boolarray.c @@ -0,0 +1,90 @@ +/* Fast lookup table abstraction implemented as a Guilmette Array + Copyright (C) 1989 Free Software Foundation, Inc. + written by Douglas C. Schmidt (schmidt@ics.uci.edu) + +This file is part of GNU GPERF. + +GNU GPERF is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 1, or (at your option) +any later version. + +GNU GPERF is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU GPERF; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include +#include "boolarray.h" +#include "options.h" + +/* Locally visible BOOL_ARRAY object. */ + +static BOOL_ARRAY bool_array; + +/* Prints out debugging diagnostics. */ + +void +bool_array_destroy () +{ + if (OPTION_ENABLED (option, DEBUG)) + fprintf (stderr, "\ndumping boolean array information\niteration number = %d\nend of array dump\n", + bool_array.iteration_number); + free ((char *) bool_array.storage_array); +} + +void +bool_array_init (size) + int size; +{ + STORAGE_TYPE *xmalloc (); + bool_array.iteration_number = 1; + bool_array.size = size; + bool_array.storage_array = xmalloc (size * sizeof *bool_array.storage_array); + bzero (bool_array.storage_array, size * sizeof *bool_array.storage_array); + if (OPTION_ENABLED (option, DEBUG)) + fprintf (stderr, "\nbool array size = %d, total bytes = %d\n", + bool_array.size, bool_array.size * sizeof *bool_array.storage_array); +} + +bool +lookup (index) + int index; +{ + if (bool_array.storage_array[index] == bool_array.iteration_number) + return 1; + else + { + bool_array.storage_array[index] = bool_array.iteration_number; + return 0; + } +} + +/* Simple enough to reset, eh?! */ + +void +bool_array_reset () +{ + /* If we wrap around it's time to zero things out again! */ + + + if (++bool_array.iteration_number == 0) + { + if (OPTION_ENABLED (option, DEBUG)) + { + fprintf (stderr, "(re-initializing bool_array)..."); + fflush (stderr); + } + bool_array.iteration_number = 1; + bzero (bool_array.storage_array, bool_array.size * sizeof *bool_array.storage_array); + if (OPTION_ENABLED (option, DEBUG)) + { + fprintf (stderr, "done\n"); + fflush (stderr); + } + } +} diff --git a/contrib/gperf/src/boolarray.h b/contrib/gperf/src/boolarray.h new file mode 100644 index 000000000000..48339755060a --- /dev/null +++ b/contrib/gperf/src/boolarray.h @@ -0,0 +1,48 @@ +/* Simple lookup table abstraction implemented as a Guilmette Array. + + Copyright (C) 1989 Free Software Foundation, Inc. + written by Douglas C. Schmidt (schmidt@ics.uci.edu) + +This file is part of GNU GPERF. + +GNU GPERF is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 1, or (at your option) +any later version. + +GNU GPERF is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU GPERF; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* Define and implement a simple boolean array abstraction, + uses a Guilmette array implementation to save on initialization time. */ + +#ifndef _boolarray_h +#define _boolarray_h +#include "prototype.h" + +#ifdef LO_CAL +/* If we are on a memory diet then we'll only make these use a limited + amount of storage space. */ +typedef unsigned short STORAGE_TYPE; +#else +typedef int STORAGE_TYPE; +#endif +typedef struct bool_array +{ + STORAGE_TYPE *storage_array; /* Initialization of the index space. */ + STORAGE_TYPE iteration_number; /* Keep track of the current iteration. */ + int size; /* Size of the entire array (dynamically initialized). */ +} BOOL_ARRAY; + +extern void bool_array_init P ((int size)); +extern void bool_array_destroy P ((void)); +extern bool lookup P ((int hash_value)); +extern void bool_array_reset P ((void)); + +#endif /* _boolarray_h */ diff --git a/contrib/gperf/src/getopt.c b/contrib/gperf/src/getopt.c new file mode 100644 index 000000000000..4eb3c2090887 --- /dev/null +++ b/contrib/gperf/src/getopt.c @@ -0,0 +1,413 @@ +/* Getopt for GNU. + Copyright (C) 1987, 1989 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 1, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + + + +/* This version of `getopt' appears to the caller like standard Unix `getopt' + but it behaves differently for the user, since it allows the user + to intersperse the options with the other arguments. + + As `getopt' works, it permutes the elements of `argv' so that, + when it is done, all the options precede everything else. Thus + all application programs are extended to handle flexible argument order. + + Setting the environment variable _POSIX_OPTION_ORDER disables permutation. + Then the behavior is completely standard. + + GNU application programs can use a third alternative mode in which + they can distinguish the relative order of options and other arguments. */ + +#include + +#ifdef sparc +#include +#endif +#ifdef USG +#define bcopy(s, d, l) memcpy((d), (s), (l)) +#endif + +/* For communication from `getopt' to the caller. + When `getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when `ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +char *optarg = 0; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to `getopt'. + + On entry to `getopt', zero means this is the first call; initialize. + + When `getopt' returns EOF, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, `optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +int optind = 0; + +/* The next char to be scanned in the option-element + in which the last option character we returned was found. + This allows us to pick up the scan where we left off. + + If this is zero, or a null string, it means resume the scan + by advancing to the next ARGV-element. */ + +static char *nextchar; + +/* Callers store zero here to inhibit the error message + for unrecognized options. */ + +int opterr = 1; + +/* Describe how to deal with options that follow non-option ARGV-elements. + + UNSPECIFIED means the caller did not specify anything; + the default is then REQUIRE_ORDER if the environment variable + _OPTIONS_FIRST is defined, PERMUTE otherwise. + + REQUIRE_ORDER means don't recognize them as options. + Stop option processing when the first non-option is seen. + This is what Unix does. + + PERMUTE is the default. We permute the contents of `argv' as we scan, + so that eventually all the options are at the end. This allows options + to be given in any order, even with programs that were not written to + expect this. + + RETURN_IN_ORDER is an option available to programs that were written + to expect options and other ARGV-elements in any order and that care about + the ordering of the two. We describe each non-option ARGV-element + as if it were the argument of an option with character code zero. + Using `-' as the first character of the list of option characters + requests this mode of operation. + + The special argument `--' forces an end of option-scanning regardless + of the value of `ordering'. In the case of RETURN_IN_ORDER, only + `--' can cause `getopt' to return EOF with `optind' != ARGC. */ + +static enum { REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER } ordering; + +/* Handle permutation of arguments. */ + +/* Describe the part of ARGV that contains non-options that have + been skipped. `first_nonopt' is the index in ARGV of the first of them; + `last_nonopt' is the index after the last of them. */ + +static int first_nonopt; +static int last_nonopt; + +/* Exchange two adjacent subsequences of ARGV. + One subsequence is elements [first_nonopt,last_nonopt) + which contains all the non-options that have been skipped so far. + The other is elements [last_nonopt,optind), which contains all + the options processed since those non-options were skipped. + + `first_nonopt' and `last_nonopt' are relocated so that they describe + the new indices of the non-options in ARGV after they are moved. */ + +static void +exchange (argv) + char **argv; +{ + int nonopts_size + = (last_nonopt - first_nonopt) * sizeof (char *); + char **temp = (char **) alloca (nonopts_size); + + /* Interchange the two blocks of data in argv. */ + + bcopy (&argv[first_nonopt], temp, nonopts_size); + bcopy (&argv[last_nonopt], &argv[first_nonopt], + (optind - last_nonopt) * sizeof (char *)); + bcopy (temp, &argv[first_nonopt + optind - last_nonopt], + nonopts_size); + + /* Update records for the slots the non-options now occupy. */ + + first_nonopt += (optind - last_nonopt); + last_nonopt = optind; +} + +/* Scan elements of ARGV (whose length is ARGC) for option characters + given in OPTSTRING. + + If an element of ARGV starts with '-', and is not exactly "-" or "--", + then it is an option element. The characters of this element + (aside from the initial '-') are option characters. If `getopt' + is called repeatedly, it returns successively each of theoption characters + from each of the option elements. + + If `getopt' finds another option character, it returns that character, + updating `optind' and `nextchar' so that the next call to `getopt' can + resume the scan with the following option character or ARGV-element. + + If there are no more option characters, `getopt' returns `EOF'. + Then `optind' is the index in ARGV of the first ARGV-element + that is not an option. (The ARGV-elements have been permuted + so that those that are not options now come last.) + + OPTSTRING is a string containing the legitimate option characters. + A colon in OPTSTRING means that the previous character is an option + that wants an argument. The argument is taken from the rest of the + current ARGV-element, or from the following ARGV-element, + and returned in `optarg'. + + If an option character is seen that is not listed in OPTSTRING, + return '?' after printing an error message. If you set `opterr' to + zero, the error message is suppressed but we still return '?'. + + If a char in OPTSTRING is followed by a colon, that means it wants an arg, + so the following text in the same ARGV-element, or the text of the following + ARGV-element, is returned in `optarg. Two colons mean an option that + wants an optional arg; if there is text in the current ARGV-element, + it is returned in `optarg'. + + If OPTSTRING starts with `-', it requests a different method of handling the + non-option ARGV-elements. See the comments about RETURN_IN_ORDER, above. */ + +int +getopt (argc, argv, optstring) + int argc; + char **argv; + char *optstring; +{ + /* Initialize the internal data when the first call is made. + Start processing options with ARGV-element 1 (since ARGV-element 0 + is the program name); the sequence of previously skipped + non-option ARGV-elements is empty. */ + + if (optind == 0) + { + first_nonopt = last_nonopt = optind = 1; + + nextchar = 0; + + /* Determine how to handle the ordering of options and nonoptions. */ + + if (optstring[0] == '-') + ordering = RETURN_IN_ORDER; + else if (getenv ("_POSIX_OPTION_ORDER") != 0) + ordering = REQUIRE_ORDER; + else + ordering = PERMUTE; + } + + if (nextchar == 0 || *nextchar == 0) + { + if (ordering == PERMUTE) + { + /* If we have just processed some options following some non-options, + exchange them so that the options come first. */ + + if (first_nonopt != last_nonopt && last_nonopt != optind) + exchange (argv); + else if (last_nonopt != optind) + first_nonopt = optind; + + /* Now skip any additional non-options + and extend the range of non-options previously skipped. */ + + while (optind < argc + && (argv[optind][0] != '-' + || argv[optind][1] == 0)) + optind++; + last_nonopt = optind; + } + + /* Special ARGV-element `--' means premature end of options. + Skip it like a null option, + then exchange with previous non-options as if it were an option, + then skip everything else like a non-option. */ + + if (optind != argc && !strcmp (argv[optind], "--")) + { + optind++; + + if (first_nonopt != last_nonopt && last_nonopt != optind) + exchange (argv); + else if (first_nonopt == last_nonopt) + first_nonopt = optind; + last_nonopt = argc; + + optind = argc; + } + + /* If we have done all the ARGV-elements, stop the scan + and back over any non-options that we skipped and permuted. */ + + if (optind == argc) + { + /* Set the next-arg-index to point at the non-options + that we previously skipped, so the caller will digest them. */ + if (first_nonopt != last_nonopt) + optind = first_nonopt; + return EOF; + } + + /* If we have come to a non-option and did not permute it, + either stop the scan or describe it to the caller and pass it by. */ + + if (argv[optind][0] != '-' || argv[optind][1] == 0) + { + if (ordering == REQUIRE_ORDER) + return EOF; + optarg = argv[optind++]; + return 0; + } + + /* We have found another option-ARGV-element. + Start decoding its characters. */ + + nextchar = argv[optind] + 1; + } + + /* Look at and handle the next option-character. */ + + { + char c = *nextchar++; + char *temp = (char *) index (optstring, c); + + /* Increment `optind' when we start to process its last character. */ + if (*nextchar == 0) + optind++; + + if (temp == 0 || c == ':') + { + if (opterr != 0) + { + if (c < 040 || c >= 0177) + fprintf (stderr, "%s: unrecognized option, character code 0%o\n", + argv[0], c); + else + fprintf (stderr, "%s: unrecognized option `-%c'\n", + argv[0], c); + } + return '?'; + } + if (temp[1] == ':') + { + if (temp[2] == ':') + { + /* This is an option that accepts an argument optionally. */ + if (*nextchar != 0) + { + optarg = nextchar; + optind++; + } + else + optarg = 0; + nextchar = 0; + } + else + { + /* This is an option that requires an argument. */ + if (*nextchar != 0) + { + optarg = nextchar; + /* If we end this ARGV-element by taking the rest as an arg, + we must advance to the next element now. */ + optind++; + } + else if (optind == argc) + { + if (opterr != 0) + fprintf (stderr, "%s: no argument for `-%c' option\n", + argv[0], c); + c = '?'; + } + else + /* We already incremented `optind' once; + increment it again when taking next ARGV-elt as argument. */ + optarg = argv[optind++]; + nextchar = 0; + } + } + return c; + } +} + +#ifdef TEST + +/* Compile with -DTEST to make an executable for use in testing + the above definition of `getopt'. */ + +int +main (argc, argv) + int argc; + char **argv; +{ + char c; + int digit_optind = 0; + + while (1) + { + int this_option_optind = optind; + if ((c = getopt (argc, argv, "abc:d:0123456789")) == EOF) + break; + + switch (c) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if (digit_optind != 0 && digit_optind != this_option_optind) + printf ("digits occur in two different argv-elements.\n"); + digit_optind = this_option_optind; + printf ("option %c\n", c); + break; + + case 'a': + printf ("option a\n"); + break; + + case 'b': + printf ("option b\n"); + break; + + case 'c': + printf ("option c with value `%s'\n", optarg); + break; + + case '?': + break; + + default: + printf ("?? getopt returned character code 0%o ??\n", c); + } + } + + if (optind < argc) + { + printf ("non-option ARGV-elements: "); + while (optind < argc) + printf ("%s ", argv[optind++]); + printf ("\n"); + } + + return 0; +} + +#endif /* TEST */ diff --git a/contrib/gperf/src/gperf-to-do b/contrib/gperf/src/gperf-to-do new file mode 100644 index 000000000000..05caecca9dbd --- /dev/null +++ b/contrib/gperf/src/gperf-to-do @@ -0,0 +1,22 @@ +1. provide output diagnostics that explain how many input keys total, + how many after dealing with static links, and finally, after the + algorithm is complete, how many dynamic duplicates do we now + have. +2. fix up GATHER_STATISTICS for all instrumentation. +3. Useful idea: + + a. Generate the wordlist as a contiguous block of keywords, as before. + This wordlist *must* be sorted by hash value. + + b. generate the lookup_array, which are an array of signed {chars,shorts,ints}, + which ever allows full coverage of the wordlist dimensions. If the + value v, where v = lookup_array[hash(str,len)], is >= 0, then we + simply use this result as a direct access into wordlist to snag + the keyword for comparison. + + c. Otherwise, if v is < 0 this is an indication that we'll need to + search through some number of duplicates hash values. Using a + hash linking scheme we'd then index into a duplicate_address + table that would provide the starting index and total length of + the duplicate entries to consider sequentially. + diff --git a/contrib/gperf/src/hashtable.c b/contrib/gperf/src/hashtable.c new file mode 100644 index 000000000000..c256addd307c --- /dev/null +++ b/contrib/gperf/src/hashtable.c @@ -0,0 +1,132 @@ +/* Hash table for checking keyword links. Implemented using double hashing. + Copyright (C) 1989 Free Software Foundation, Inc. + written by Douglas C. Schmidt (schmidt@ics.uci.edu) + +This file is part of GNU GPERF. + +GNU GPERF is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 1, or (at your option) +any later version. + +GNU GPERF is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU GPERF; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include +#include "hashtable.h" +#include "options.h" + +#ifdef GATHER_STATISTICS +/* Find out how well our double hashing is working! */ +static collisions = 0; +#endif + +/* Locally visible hash table. */ +static HASH_TABLE hash_table; + +/* Basically the algorithm from the Dragon book. */ + +static unsigned +hash_pjw (str) + char *str; +{ + char *temp; + unsigned g, h = 0; + + for (temp = str; *temp; temp++) + { + h = (h << 4) + (*temp * 13); + if (g = h & 0xf0000000) + { + h ^= (g >> 24); + h ^= g; + } + } + + return h; +} + +/* The size of the hash table is always the smallest power of 2 >= the size + indicated by the user. This allows several optimizations, including + the use of double hashing and elimination of the mod instruction. + Note that the size had better be larger than the number of items + in the hash table, else there's trouble!!! Note that the memory + for the hash table is allocated *outside* the intialization routine. + This compromises information hiding somewhat, but greatly reduces + memory fragmentation, since we can now use alloca! */ + +void +hash_table_init (table, s) + LIST_NODE **table; + int s; +{ + hash_table.size = s; + hash_table.table = table; + bzero ((char *) hash_table.table, hash_table.size * sizeof *hash_table.table); +} + +/* Frees the dynamically allocated table. Note that since we don't + really need this space anymore, and since it is potentially quite + big it is best to return it when we are done. */ + +void +hash_table_destroy () +{ + if (OPTION_ENABLED (option, DEBUG)) + { + int i; + + fprintf (stderr, "\ndumping the hash table\ntotal elements = %d, bytes = %d\n", + hash_table.size, hash_table.size * sizeof *hash_table.table); + + for (i = hash_table.size - 1; i >= 0; i--) + if (hash_table.table[i]) + fprintf (stderr, "location[%d] has charset \"%s\" and keyword \"%s\"\n", + i, hash_table.table[i]->char_set, hash_table.table[i]->key); + +#ifdef GATHER_STATISTICS + fprintf (stderr, "\ntotal collisions during hashing = %d\n", collisions); +#endif + fprintf (stderr, "end dumping hash table\n\n"); + } +} + +/* If the ITEM is already in the hash table return the item found + in the table. Otherwise inserts the ITEM, and returns FALSE. + Uses double hashing. */ + +LIST_NODE * +retrieve (item, ignore_length) + LIST_NODE *item; + int ignore_length; +{ + unsigned hash_val = hash_pjw (item->char_set); + int probe = hash_val & hash_table.size - 1; + int increment = (hash_val ^ item->length | 1) & hash_table.size - 1; + + while (hash_table.table[probe] + && (strcmp (hash_table.table[probe]->char_set, item->char_set) + || (!ignore_length && hash_table.table[probe]->length != item->length))) + { +#ifdef GATHER_STATISTICS + collisions++; +#endif + probe = probe + increment & hash_table.size - 1; + } + + if (hash_table.table[probe]) + return hash_table.table[probe]; + else + { + hash_table.table[probe] = item; + return 0; + } +} + + diff --git a/contrib/gperf/src/hashtable.h b/contrib/gperf/src/hashtable.h new file mode 100644 index 000000000000..218e9874a1d1 --- /dev/null +++ b/contrib/gperf/src/hashtable.h @@ -0,0 +1,37 @@ +/* Hash table used to check for duplicate keyword entries. + + Copyright (C) 1989 Free Software Foundation, Inc. + written by Douglas C. Schmidt (schmidt@ics.uci.edu) + +This file is part of GNU GPERF. + +GNU GPERF is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 1, or (at your option) +any later version. + +GNU GPERF is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU GPERF; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#ifndef _hashtable_h +#define _hashtable_h +#include "keylist.h" +#include "prototype.h" + +typedef struct hash_table +{ + LIST_NODE **table; /* Vector of pointers to linked lists of List_Node's. */ + int size; /* Size of the vector. */ +} HASH_TABLE; + +extern void hash_table_init P ((LIST_NODE **table, int size)); +extern void hash_table_destroy P ((void)); +extern LIST_NODE *retrieve P ((LIST_NODE *item, int ignore_length)); + +#endif /* _hashtable_h */ diff --git a/contrib/gperf/src/iterator.c b/contrib/gperf/src/iterator.c new file mode 100644 index 000000000000..b5930f089bb2 --- /dev/null +++ b/contrib/gperf/src/iterator.c @@ -0,0 +1,106 @@ +/* Provides an Iterator for keyword characters. + Copyright (C) 1989 Free Software Foundation, Inc. + written by Douglas C. Schmidt (schmidt@ics.uci.edu) + +This file is part of GNU GPERF. + +GNU GPERF is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 1, or (at your option) +any later version. + +GNU GPERF is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU GPERF; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include +#include +#include "iterator.h" + +/* Locally visible ITERATOR object. */ + +ITERATOR iterator; + +/* Constructor for ITERATOR. */ + +void +iterator_init (s, lo, hi, word_end, bad_val, key_end) + char *s; + int lo; + int hi; + int word_end; + int bad_val; + int key_end; +{ + iterator.end = key_end; + iterator.error_value = bad_val; + iterator.end_word = word_end; + iterator.str = s; + iterator.hi_bound = hi; + iterator.lo_bound = lo; +} + +/* Define several useful macros to clarify subsequent code. */ +#define ISPOSDIGIT(X) ((X)<='9'&&(X)>'0') +#define TODIGIT(X) ((X)-'0') + +/* Provide an Iterator, returning the ``next'' value from + the list of valid values given in the constructor. */ + +int +next () +{ +/* Variables to record the Iterator's status when handling ranges, e.g., 3-12. */ + + static int size; + static int curr_value; + static int upper_bound; + + if (size) + { + if (++curr_value >= upper_bound) + size = 0; + return curr_value; + } + else + { + while (*iterator.str) + { + if (*iterator.str == ',') + iterator.str++; + else if (*iterator.str == '$') + { + iterator.str++; + return iterator.end_word; + } + else if (ISPOSDIGIT (*iterator.str)) + { + + for (curr_value = 0; isdigit (*iterator.str); iterator.str++) + curr_value = curr_value * 10 + *iterator.str - '0'; + + if (*iterator.str == '-') + { + + for (size = 1, upper_bound = 0; + isdigit (*++iterator.str); + upper_bound = upper_bound * 10 + *iterator.str - '0'); + + if (upper_bound <= curr_value || upper_bound > iterator.hi_bound) + return iterator.error_value; + } + return curr_value >= iterator.lo_bound && curr_value <= iterator.hi_bound + ? curr_value : iterator.error_value; + } + else + return iterator.error_value; + } + + return iterator.end; + } +} diff --git a/contrib/gperf/src/iterator.h b/contrib/gperf/src/iterator.h new file mode 100644 index 000000000000..06dcffd0c992 --- /dev/null +++ b/contrib/gperf/src/iterator.h @@ -0,0 +1,47 @@ +/* Provides an Iterator for keyword characters. + + Copyright (C) 1989 Free Software Foundation, Inc. + written by Douglas C. Schmidt (schmidt@ics.uci.edu) + +This file is part of GNU GPERF. + +GNU GPERF is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 1, or (at your option) +any later version. + +GNU GPERF is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU GPERF; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* Provides an Iterator that expands and decodes a control string containing digits + and ranges, returning an integer every time the generator function is called. + This is used to decode the user's key position requests. For example: + "-k 1,2,5-10,$" will return 1, 2, 5, 6, 7, 8, 9, 10, and 0 ( representing + the abstract ``last character of the key'' on successive calls to the + member function operator (). + No errors are handled in these routines, they are passed back to the + calling routines via a user-supplied Error_Value */ + +#ifndef _iterator_h +#define _iterator_h +#include "prototype.h" + +typedef struct iterator +{ + char *str; /* A pointer to the string provided by the user. */ + int end; /* Value returned after last key is processed. */ + int end_word; /* A value marking the abstract ``end of word'' ( usually '$'). */ + int error_value; /* Error value returned when input is syntactically erroneous. */ + int hi_bound; /* Greatest possible value, inclusive. */ + int lo_bound; /* Smallest possible value, inclusive. */ +} ITERATOR; + +extern void iterator_init P ((char *s, int lo, int hi, int word_end, int bad_val, int key_end)); +extern int next P ((void)); +#endif /* _iterator_h */ diff --git a/contrib/gperf/src/keylist.c b/contrib/gperf/src/keylist.c new file mode 100644 index 000000000000..f92d97549694 --- /dev/null +++ b/contrib/gperf/src/keylist.c @@ -0,0 +1,1033 @@ +/* Routines for building, ordering, and printing the keyword list. + Copyright (C) 1989 Free Software Foundation, Inc. + written by Douglas C. Schmidt (schmidt@ics.uci.edu) + +This file is part of GNU GPERF. + +GNU GPERF is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 1, or (at your option) +any later version. + +GNU GPERF is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU GPERF; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include +#include +#include "options.h" +#include "readline.h" +#include "keylist.h" +#include "hashtable.h" +#include "stderr.h" +#ifdef sparc +#include +#endif + +/* Current release version. */ +extern char *version_string; + +/* See comments in perfect.cc. */ +extern int occurrences[ALPHABET_SIZE]; + +/* Ditto. */ +extern int asso_values[ALPHABET_SIZE]; + +/* Used in function reorder, below. */ +static bool determined[ALPHABET_SIZE]; + +/* Default type for generated code. */ +static char *default_array_type = "char *"; + +/* Generated function ``in_word_set'' default return type. */ +static char *default_return_type = "char *"; + +/* Largest positive integer value. */ +#define MAX_INT ((~(unsigned)0)>>1) + +/* Most negative integer value. */ +#define NEG_MAX_INT ((~(unsigned)0)^((~(unsigned)0)>>1)) + +/* Maximum value an unsigned char can take. */ +#define MAX_UNSIGNED_CHAR 256 + +/* Maximum value an unsigned short can take. */ +#define MAX_UNSIGNED_SHORT 65536 + +/* Make the hash table 5 times larger than the number of keyword entries. */ +#define TABLE_MULTIPLE 5 + +/* Efficiently returns the least power of two greater than or equal to X! */ +#define POW(X) ((!X)?1:(X-=1,X|=X>>1,X|=X>>2,X|=X>>4,X|=X>>8,X|=X>>16,(++X))) + +/* How wide the printed field width must be to contain the maximum hash value. */ +static int field_width = 2; + +/* Globally visible KEY_LIST object. */ + +KEY_LIST key_list; + +/* Gathers the input stream into a buffer until one of two things occur: + + 1. We read a '%' followed by a '%' + 2. We read a '%' followed by a '}' + + The first symbolizes the beginning of the keyword list proper, + The second symbolizes the end of the C source code to be generated + verbatim in the output file. + + I assume that the keys are separated from the optional preceding struct + declaration by a consecutive % followed by either % or } starting in + the first column. The code below uses an expandible buffer to scan off + and return a pointer to all the code (if any) appearing before the delimiter. */ + +static char * +get_special_input (delimiter) + char delimiter; +{ + char *xmalloc (); + int size = 80; + char *buf = xmalloc (size); + int c, i; + + for (i = 0; (c = getchar ()) != EOF; i++) + { + if (c == '%') + { + if ((c = getchar ()) == delimiter) + { + + while ((c = getchar ()) != '\n') + ; /* Discard newline. */ + + if (i == 0) + return ""; + else + { + buf[delimiter == '%' && buf[i - 2] == ';' ? i - 2 : i - 1] = '\0'; + return buf; + } + } + else + ungetc (c, stdin); + } + else if (i >= size) /* Yikes, time to grow the buffer! */ + { + char *temp = xmalloc (size *= 2); + int j; + + for (j = 0; j < i; j++) + temp[j] = buf[j]; + + free (buf); + buf = temp; + } + buf[i] = c; + } + + return NULL; /* Problem here. */ +} + +/* Stores any C text that must be included verbatim into the + generated code output. */ + +static char * +save_include_src () +{ + int c; + + if ((c = getchar ()) != '%') + { + ungetc (c, stdin); + return ""; + } + else if ((c = getchar ()) != '{') + report_error ("internal error, %c != '{' on line %d in file %s%a", c, __LINE__, __FILE__); + /*NOT REACHED*/ + else + return get_special_input ('}'); +} + +/* strcspn - find length of initial segment of s consisting entirely + of characters not from reject (borrowed from Henry Spencer's + ANSI string package). */ + +static int +strcspn (s, reject) + char *s; + char *reject; +{ + char *scan; + char *rej_scan; + int count = 0; + + for (scan = s; *scan; scan++) + { + + for (rej_scan = reject; *rej_scan;) + if (*scan == *rej_scan++) + return count; + + count++; + } + + return count; +} + +/* Determines from the input file whether the user wants to build a table + from a user-defined struct, or whether the user is content to simply + use the default array of keys. */ + +static char * +get_array_type () +{ + return get_special_input ('%'); +} + +/* Sets up the Return_Type, the Struct_Tag type and the Array_Type + based upon various user Options. */ + +static void +set_output_types () +{ + char *xmalloc (); + + if (OPTION_ENABLED (option, TYPE) && !(key_list.array_type = get_array_type ())) + return; /* Something's wrong, bug we'll catch it later on.... */ + else if (OPTION_ENABLED (option, TYPE)) /* Yow, we've got a user-defined type... */ + { + int struct_tag_length = strcspn (key_list.array_type, "{\n\0"); + + if (OPTION_ENABLED (option, POINTER)) /* And it must return a pointer... */ + { + key_list.return_type = xmalloc (struct_tag_length + 2); + strncpy (key_list.return_type, key_list.array_type, struct_tag_length); + key_list.return_type[struct_tag_length] = '\0'; + strcat (key_list.return_type, "*"); + } + + key_list.struct_tag = (char *) xmalloc (struct_tag_length + 1); + strncpy (key_list.struct_tag, key_list.array_type, struct_tag_length); + key_list.struct_tag[struct_tag_length] = '\0'; + } + else if (OPTION_ENABLED (option, POINTER)) /* Return a char *. */ + key_list.return_type = default_array_type; +} + +/* Reads in all keys from standard input and creates a linked list pointed + to by Head. This list is then quickly checked for ``links,'' i.e., + unhashable elements possessing identical key sets and lengths. */ + +void +read_keys () +{ + char *ptr; + + key_list.include_src = save_include_src (); + set_output_types (); + + /* Oops, problem with the input file. */ + if (! (ptr = read_line ())) + report_error ("No words in input file, did you forget\ + to prepend %s or use -t accidentally?\n%a", "%%"); + + /* Read in all the keywords from the input file. */ + else + { + LIST_NODE *temp, *trail; + char *delimiter = GET_DELIMITER (option); + + for (temp = key_list.head = make_list_node (ptr, strcspn (ptr, delimiter)); + (ptr = read_line ()) && strcmp (ptr, "%%"); + key_list.total_keys++, temp = temp->next) + temp->next = make_list_node (ptr, strcspn (ptr, delimiter)); + + /* See if any additional C code is included at end of this file. */ + if (ptr) + key_list.additional_code = TRUE; + { + /* If this becomes TRUE we've got a link. */ + bool link = FALSE; + + /* Make large hash table for efficiency. */ + int table_size = (key_list.list_len = key_list.total_keys) * TABLE_MULTIPLE; + + /* By allocating the memory here we save on dynamic allocation overhead. + Table must be a power of 2 for the hash function scheme to work. */ + LIST_NODE **table = (LIST_NODE **) alloca (POW (table_size) * sizeof (LIST_NODE *)); + + hash_table_init (table, table_size); + + /* Test whether there are any links and also set the maximum length of + an identifier in the keyword list. */ + + for (temp = key_list.head, trail = NULL; temp; temp = temp->next) + { + LIST_NODE *ptr = retrieve (temp, OPTION_ENABLED (option, NOLENGTH)); + + /* Check for links. We deal with these by building an equivalence class + of all duplicate values (i.e., links) so that only 1 keyword is + representative of the entire collection. This *greatly* simplifies + processing during later stages of the program. */ + + if (ptr) + { + key_list.list_len--; + trail->next = temp->next; + temp->link = ptr->link; + ptr->link = temp; + link = TRUE; + + /* Complain if user hasn't enabled the duplicate option. */ + if (!OPTION_ENABLED (option, DUP)) + fprintf (stderr, "Key link: \"%s\" = \"%s\", with key set \"%s\".\n", + temp->key, ptr->key, temp->char_set); + else if (OPTION_ENABLED (option, DEBUG)) + fprintf (stderr, "Key link: \"%s\" = \"%s\", with key set \"%s\".\n", + temp->key, ptr->key, temp->char_set); + } + else + trail = temp; + + /* Update minimum and maximum keyword length, if needed. */ + if (temp->length > key_list.max_key_len) + key_list.max_key_len = temp->length; + if (temp->length < key_list.min_key_len) + key_list.min_key_len = temp->length; + } + + /* Free up the dynamic memory used in the hash table. */ + hash_table_destroy (); + + /* Exit program if links exists and option[DUP] not set, since we can't continue safely. */ + if (link) + report_error (OPTION_ENABLED (option, DUP) + ? "Some input keys have identical hash values, examine output carefully...\n" + : "Some input keys have identical hash values,\ntry different key positions or use option -D.\n%a"); + } + if (OPTION_ENABLED (option, ALLCHARS)) + SET_CHARSET_SIZE (option, key_list.max_key_len); + } +} + +/* Recursively merges two sorted lists together to form one sorted list. The + ordering criteria is by frequency of occurrence of elements in the key set + or by the hash value. This is a kludge, but permits nice sharing of + almost identical code without incurring the overhead of a function + call comparison. */ + +static LIST_NODE * +merge (list1, list2) + LIST_NODE *list1; + LIST_NODE *list2; +{ + if (!list1) + return list2; + else if (!list2) + return list1; + else if (key_list.occurrence_sort && list1->occurrence < list2->occurrence + || key_list.hash_sort && list1->hash_value > list2->hash_value) + { + list2->next = merge (list2->next, list1); + return list2; + } + else + { + list1->next = merge (list1->next, list2); + return list1; + } +} + +/* Applies the merge sort algorithm to recursively sort the key list by + frequency of occurrence of elements in the key set. */ + +static LIST_NODE * +merge_sort (head) + LIST_NODE *head; +{ + if (!head || !head->next) + return head; + else + { + LIST_NODE *middle = head; + LIST_NODE *temp = head->next->next; + + while (temp) + { + temp = temp->next; + middle = middle->next; + if (temp) + temp = temp->next; + } + + temp = middle->next; + middle->next = NULL; + return merge (merge_sort (head), merge_sort (temp)); + } +} + +/* Returns the frequency of occurrence of elements in the key set. */ + +static int +get_occurrence (ptr) + LIST_NODE *ptr; +{ + int value = 0; + char *temp; + + for (temp = ptr->char_set; *temp; temp++) + value += occurrences[*temp]; + + return value; +} + +/* Enables the index location of all key set elements that are now + determined. */ + +static void +set_determined (ptr) + LIST_NODE *ptr; +{ + char *temp; + + for (temp = ptr->char_set; *temp; temp++) + determined[*temp] = TRUE; + +} + +/* Returns TRUE if PTR's key set is already completely determined. */ + +static bool +already_determined (ptr) + LIST_NODE *ptr; +{ + bool is_determined = TRUE; + char *temp; + + for (temp = ptr->char_set; is_determined && *temp; temp++) + is_determined = determined[*temp]; + + return is_determined; +} + +/* Reorders the table by first sorting the list so that frequently occuring + keys appear first, and then the list is reorded so that keys whose values + are already determined will be placed towards the front of the list. This + helps prune the search time by handling inevitable collisions early in the + search process. See Cichelli's paper from Jan 1980 JACM for details.... */ + +void +reorder () +{ + LIST_NODE *ptr; + + for (ptr = key_list.head; ptr; ptr = ptr->next) + ptr->occurrence = get_occurrence (ptr); + + key_list.hash_sort = FALSE; + key_list.occurrence_sort = TRUE; + + for (ptr = key_list.head = merge_sort (key_list.head); ptr->next; ptr = ptr->next) + { + set_determined (ptr); + + if (already_determined (ptr->next)) + continue; + else + { + LIST_NODE *trail_ptr = ptr->next; + LIST_NODE *run_ptr = trail_ptr->next; + + for (; run_ptr; run_ptr = trail_ptr->next) + { + + if (already_determined (run_ptr)) + { + trail_ptr->next = run_ptr->next; + run_ptr->next = ptr->next; + ptr = ptr->next = run_ptr; + } + else + trail_ptr = run_ptr; + } + } + } +} + +/* Determines the maximum and minimum hash values. One notable feature is + Ira Pohl's optimal algorithm to calculate both the maximum and minimum + items in a list in O(3n/2) time (faster than the O (2n) method). + Returns the maximum hash value encountered. */ + +static int +print_min_max () +{ + int min_hash_value; + int max_hash_value; + LIST_NODE *temp; + + if (ODD (key_list.list_len)) /* Pre-process first item, list now has an even length. */ + { + min_hash_value = max_hash_value = key_list.head->hash_value; + temp = key_list.head->next; + } + else /* List is already even length, no extra work necessary. */ + { + min_hash_value = MAX_INT; + max_hash_value = NEG_MAX_INT; + temp = key_list.head; + } + + for ( ; temp; temp = temp->next) /* Find max and min in optimal o(3n/2) time. */ + { + static int i; + int key_2, key_1 = temp->hash_value; + temp = temp->next; + key_2 = temp->hash_value; + i++; + + if (key_1 < key_2) + { + if (key_1 < min_hash_value) + min_hash_value = key_1; + if (key_2 > max_hash_value) + max_hash_value = key_2; + } + else + { + if (key_2 < min_hash_value) + min_hash_value = key_2; + if (key_1 > max_hash_value) + max_hash_value = key_1; + } + } + + printf ("\n#define MIN_WORD_LENGTH %d\n#define MAX_WORD_LENGTH %d\ +\n#define MIN_HASH_VALUE %d\n#define MAX_HASH_VALUE %d\ +\n/*\n%5d keywords\n%5d is the maximum key range\n*/\n\n", + key_list.min_key_len == MAX_INT ? key_list.max_key_len : key_list.min_key_len, + key_list.max_key_len, min_hash_value, max_hash_value, + key_list.total_keys, (max_hash_value - min_hash_value + 1)); + return max_hash_value; +} + +/* Generates the output using a C switch. This trades increased search + time for decreased table space (potentially *much* less space for + sparse tables). It the user has specified their own struct in the + keyword file *and* they enable the POINTER option we have extra work to + do. The solution here is to maintain a local static array of user + defined struct's, as with the Print_Lookup_Function. Then we use for + switch statements to perform a strcmp or strncmp, returning 0 if the str + fails to match, and otherwise returning a pointer to appropriate index + location in the local static array. */ + +static void +print_switch () +{ + char *comp_buffer; + LIST_NODE *curr = key_list.head; + int pointer_and_type_enabled = OPTION_ENABLED (option, POINTER) && OPTION_ENABLED (option, TYPE); + int total_switches = GET_TOTAL_SWITCHES (option); + int switch_size = keyword_list_length () / total_switches; + + if (pointer_and_type_enabled) + { + comp_buffer = (char *) alloca (strlen ("*str == *resword->%s && !strncmp (str + 1, resword->%s + 1, len - 1)") + + 2 * strlen (GET_KEY_NAME (option)) + 1); + sprintf (comp_buffer, OPTION_ENABLED (option, COMP) + ? "*str == *resword->%s && !strncmp (str + 1, resword->%s + 1, len - 1)" + : "*str == *resword->%s && !strcmp (str + 1, resword->%s + 1)", + GET_KEY_NAME (option), GET_KEY_NAME (option)); + } + else + comp_buffer = OPTION_ENABLED (option, COMP) + ? "*str == *resword && !strncmp (str + 1, resword + 1, len - 1)" + : "*str == *resword && !strcmp (str + 1, resword + 1)"; + + printf (" if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)\n {\n\ + register int key = %s (str, len);\n\n\ + if (key <= MAX_HASH_VALUE && key >= MIN_HASH_VALUE)\n {\n", GET_HASH_NAME (option)); + + /* Properly deal with user's who request multiple switch statements. */ + + while (curr) + { + LIST_NODE *temp = curr; + int lowest_case_value = curr->hash_value; + int number_of_cases = 0; + + /* Figure out a good cut point to end this switch. */ + + for (; temp && ++number_of_cases < switch_size; temp = temp->next) + if (temp->next && temp->hash_value == temp->next->hash_value) + while (temp->next && temp->hash_value == temp->next->hash_value) + temp = temp->next; + + if (temp) + printf (" if (key <= %d)\n {\n", temp->hash_value); + else + printf (" {\n"); + + /* Output each keyword as part of a switch statement indexed by hash value. */ + + if (OPTION_ENABLED (option, POINTER) || OPTION_ENABLED (option, DUP)) + { + int i = 0; + + printf (" %s%s *resword; %s\n\n", + OPTION_ENABLED (option, CONST) ? "const " : "", + pointer_and_type_enabled ? key_list.struct_tag : "char", + OPTION_ENABLED (option, LENTABLE) && !OPTION_ENABLED (option, DUP) ? "int key_len;" : ""); + printf (" switch (key - %d)\n {\n", lowest_case_value); + + for (temp = curr; temp && ++i <= number_of_cases; temp = temp->next) + { + printf (" case %*d:", field_width, temp->hash_value - lowest_case_value); + if (OPTION_ENABLED (option, DEBUG)) + printf (" /* hash value = %4d, keyword = \"%s\" */", temp->hash_value, temp->key); + putchar ('\n'); + + /* Handle `natural links,' i.e., those that occur statically. */ + + if (temp->link) + { + LIST_NODE *links; + + for (links = temp; links; links = links->link) + { + if (pointer_and_type_enabled) + printf (" resword = &wordlist[%d];\n", links->index); + else + printf (" resword = \"%s\";\n", links->key); + printf (" if (%s) return resword;\n", comp_buffer); + } + } + /* Handle unresolved duplicate hash values. These are guaranteed + to be adjacent since we sorted the keyword list by increasing + hash values. */ + if (temp->next && temp->hash_value == temp->next->hash_value) + { + + for ( ; temp->next && temp->hash_value == temp->next->hash_value; + temp = temp->next) + { + if (pointer_and_type_enabled) + printf (" resword = &wordlist[%d];\n", temp->index); + else + printf (" resword = \"%s\";\n", temp->key); + printf (" if (%s) return resword;\n", comp_buffer); + } + if (pointer_and_type_enabled) + printf (" resword = &wordlist[%d];\n", temp->index); + else + printf (" resword = \"%s\";\n", temp->key); + printf (" return %s ? resword : 0;\n", comp_buffer); + } + else if (temp->link) + printf (" return 0;\n"); + else + { + if (pointer_and_type_enabled) + printf (" resword = &wordlist[%d];", temp->index); + else + printf (" resword = \"%s\";", temp->key); + if (OPTION_ENABLED (option, LENTABLE) && !OPTION_ENABLED (option, DUP)) + printf (" key_len = %d;", temp->length); + printf (" break;\n"); + } + } + printf (" default: return 0;\n }\n"); + printf (OPTION_ENABLED (option, LENTABLE) && !OPTION_ENABLED (option, DUP) + ? " if (len == key_len && %s)\n return resword;\n" + : " if (%s)\n return resword;\n", comp_buffer); + printf (" return 0;\n }\n"); + curr = temp; + } + else /* Nothing special required here. */ + { + int i = 0; + printf (" char *s;\n\n switch (key - %d)\n {\n", + lowest_case_value); + + for (temp = curr; temp && ++i <= number_of_cases; temp = temp->next) + if (OPTION_ENABLED (option, LENTABLE)) + printf (" case %*d: if (len == %d) s = \"%s\"; else return 0; break;\n", + field_width, temp->hash_value - lowest_case_value, + temp->length, temp->key); + else + printf (" case %*d: s = \"%s\"; break;\n", + field_width, temp->hash_value - lowest_case_value, temp->key); + + printf (" default: return 0;\n }\n "); + printf ("return *s == *str && !%s;\n }\n", + OPTION_ENABLED (option, COMP) + ? "strncmp (s + 1, str + 1, len - 1)" : "strcmp (s + 1, str + 1)"); + curr = temp; + } + } + printf (" }\n }\n return 0;\n}\n"); +} + +/* Prints out a table of keyword lengths, for use with the + comparison code in generated function ``in_word_set.'' */ + +static void +print_keylength_table () +{ + int max_column = 15; + int index = 0; + int column = 0; + char *indent = OPTION_ENABLED (option, GLOBAL) ? "" : " "; + LIST_NODE *temp; + + if (!OPTION_ENABLED (option, DUP) && !OPTION_ENABLED (option, SWITCH)) + { + printf ("\n%sstatic %sunsigned %s lengthtable[] =\n%s%s{\n ", + indent, OPTION_ENABLED (option, CONST) ? "const " : "", + key_list.max_key_len < MAX_UNSIGNED_CHAR ? "char" : + (key_list.max_key_len < MAX_UNSIGNED_SHORT ? "short" : "long"), + indent, indent); + + for (temp = key_list.head; temp; temp = temp->next, index++) + { + + if (index < temp->hash_value) + { + + for ( ; index < temp->hash_value; index++) + printf ("%3d%s", 0, ++column % (max_column - 1) ? "," : ",\n "); + } + + printf ("%3d%s", temp->length, ++column % (max_column - 1 ) ? "," : ",\n "); + } + + printf ("\n%s%s};\n\n", indent, indent); + } +} + +/* Prints out the array containing the key words for the Perfect + hash function. */ + +static void +print_keyword_table () +{ + char *l_brace = *key_list.head->rest ? "{" : ""; + char *r_brace = *key_list.head->rest ? "}," : ""; + int doing_switch = OPTION_ENABLED (option, SWITCH); + char *indent = OPTION_ENABLED (option, GLOBAL) ? "" : " "; + int index = 0; + LIST_NODE *temp; + + printf ("\n%sstatic %s%s wordlist[] =\n%s%s{\n", + indent, OPTION_ENABLED (option, CONST) ? "const " : "", + key_list.struct_tag, indent, indent); + + /* Generate an array of reserved words at appropriate locations. */ + + for (temp = key_list.head; temp; temp = temp->next, index++) + { + temp->index = index; + + if (!doing_switch && index < temp->hash_value) + { + int column; + + printf (" "); + + for (column = 1; index < temp->hash_value; index++, column++) + printf ("%s\"\",%s %s", l_brace, r_brace, column % 9 ? "" : "\n "); + + if (column % 10) + printf ("\n"); + else + { + printf ("%s\"%s\", %s%s\n", l_brace, temp->key, temp->rest, r_brace); + continue; + } + } + + printf (" %s\"%s\", %s%s\n", l_brace, temp->key, temp->rest, r_brace); + + /* Deal with links specially. */ + if (temp->link) + { + LIST_NODE *links; + + for (links = temp->link; links; links = links->link) + { + links->index = ++index; + printf (" %s\"%s\", %s%s\n", l_brace, links->key, links->rest, r_brace); + } + } + + } + + printf ("%s%s};\n\n", indent, indent); +} + +/* Generates C code for the hash function that returns the + proper encoding for each key word. */ + +static void +print_hash_function (max_hash_value) + int max_hash_value; +{ + int max_column = 10; + int count = max_hash_value; + + /* Calculate maximum number of digits required for MAX_HASH_VALUE. */ + + while ((count /= 10) > 0) + field_width++; + + if (OPTION_ENABLED (option, GNU)) + printf ("#ifdef __GNUC__\ninline\n#endif\n"); + + printf (OPTION_ENABLED (option, ANSI) + ? "static int\n%s (register const char *str, register int len)\n{\n static %sunsigned %s hash_table[] =\n {" + : "static int\n%s (str, len)\n register char *str;\n register unsigned int len;\n{\n static %sunsigned %s hash_table[] =\n {", + GET_HASH_NAME (option), OPTION_ENABLED (option, CONST) ? "const " : "", + max_hash_value < MAX_UNSIGNED_CHAR + ? "char" : (max_hash_value < MAX_UNSIGNED_SHORT ? "short" : "int")); + + for (count = 0; count < ALPHABET_SIZE; ++count) + { + if (!(count % max_column)) + printf ("\n "); + + printf ("%*d,", field_width, occurrences[count] ? asso_values[count] : max_hash_value); + } + + /* Optimize special case of ``-k 1,$'' */ + if (OPTION_ENABLED (option, DEFAULTCHARS)) + printf ("\n };\n return %s + hash_table[str[len - 1]] + hash_table[str[0]];\n}\n\n", + OPTION_ENABLED (option, NOLENGTH) ? "0" : "len"); + else + { + int key_pos; + + RESET (option); + + /* Get first (also highest) key position. */ + key_pos = GET (option); + + /* We can perform additional optimizations here. */ + if (!OPTION_ENABLED (option, ALLCHARS) && key_pos <= key_list.min_key_len) + { + printf ("\n };\n return %s", OPTION_ENABLED (option, NOLENGTH) ? "0" : "len"); + + for ( ; key_pos != EOS && key_pos != WORD_END; key_pos = GET (option)) + printf (" + hash_table[str[%d]]", key_pos - 1); + + printf ("%s;\n}\n\n", key_pos == WORD_END ? " + hash_table[str[len - 1]]" : ""); + } + + /* We've got to use the correct, but brute force, technique. */ + else + { + printf ("\n };\n register int hval = %s;\n\n switch (%s)\n {\n default:\n", + OPTION_ENABLED (option, NOLENGTH) + ? "0" : "len", OPTION_ENABLED (option, NOLENGTH) ? "len" : "hval"); + + /* User wants *all* characters considered in hash. */ + if (OPTION_ENABLED (option, ALLCHARS)) + { + int i; + + for (i = key_list.max_key_len; i > 0; i--) + printf (" case %d:\n hval += hash_table[str[%d]];\n", i, i - 1); + + printf (" }\n return hval;\n}\n\n"); + } + else /* do the hard part... */ + { + count = key_pos + 1; + + do + { + + while (--count > key_pos) + printf (" case %d:\n", count); + + printf (" case %d:\n hval += hash_table[str[%d]];\n", + key_pos, key_pos - 1); + } + while ((key_pos = GET (option)) != EOS && key_pos != WORD_END); + + printf (" }\n return hval%s ;\n}\n\n", key_pos == WORD_END + ? " + hash_table[str[len - 1]]" : ""); + } + } + } +} + +/* Generates C code to perform the keyword lookup. */ + +static void +print_lookup_function () +{ + printf (" if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)\n {\n\ + register int key = %s (str, len);\n\n\ + if (key <= MAX_HASH_VALUE && key >= MIN_HASH_VALUE)\n {\n\ + register %schar *s = wordlist[key]", + GET_HASH_NAME (option), OPTION_ENABLED (option, CONST) ? "const " : ""); + if (key_list.array_type != default_array_type) + printf (".%s", GET_KEY_NAME (option)); + + printf (";\n\n if (%s*s == *str && !%s)\n return %s", + OPTION_ENABLED (option, LENTABLE) ? "len == lengthtable[key]\n && " : "", + OPTION_ENABLED (option, COMP) ? "strncmp (str + 1, s + 1, len - 1)" : "strcmp (str + 1, s + 1)", + OPTION_ENABLED (option, TYPE) && OPTION_ENABLED (option, POINTER) ? "&wordlist[key]" : "s"); + printf (";\n }\n }\n return 0;\n}\n"); +} + +/* Generates the hash function and the key word recognizer function + based upon the user's Options. */ + +void +print_output () +{ + int global_table = OPTION_ENABLED (option, GLOBAL); + + printf ("%s\n", key_list.include_src); + + /* Potentially output type declaration now, reference it later on.... */ + if (OPTION_ENABLED (option, TYPE) && !OPTION_ENABLED (option, NOTYPE)) + printf ("%s;\n", key_list.array_type); + + print_hash_function (print_min_max ()); + + if (global_table) + if (OPTION_ENABLED (option, SWITCH)) + { + if (OPTION_ENABLED (option, LENTABLE) && OPTION_ENABLED (option, DUP)) + print_keylength_table (); + if (OPTION_ENABLED (option, POINTER) && OPTION_ENABLED (option, TYPE)) + print_keyword_table (); + } + else + { + if (OPTION_ENABLED (option, LENTABLE)) + print_keylength_table (); + print_keyword_table (); + } + /* Use the inline keyword to remove function overhead. */ + if (OPTION_ENABLED (option, GNU)) + printf ("#ifdef __GNUC__\ninline\n#endif\n"); + + /* Use ANSI function prototypes. */ + printf (OPTION_ENABLED (option, ANSI) + ? "%s%s\n%s (register const char *str, register int len)\n{\n" + : "%s%s\n%s (str, len)\n register char *str;\n register unsigned int len;\n{\n", + OPTION_ENABLED (option, CONST) ? "const " : "", + key_list.return_type, GET_FUNCTION_NAME (option)); + + /* Use the switch in place of lookup table. */ + if (OPTION_ENABLED (option, SWITCH)) + { + if (!global_table) + { + if (OPTION_ENABLED (option, LENTABLE) && OPTION_ENABLED (option, DUP)) + print_keylength_table (); + if (OPTION_ENABLED (option, POINTER) && OPTION_ENABLED (option, TYPE)) + print_keyword_table (); + } + print_switch (); + } + else /* Use the lookup table, in place of switch. */ + { + if (!global_table) + { + if (OPTION_ENABLED (option, LENTABLE)) + print_keylength_table (); + print_keyword_table (); + } + print_lookup_function (); + } + + if (key_list.additional_code) + { + int c; + + while ((c = getchar ()) != EOF) + putchar (c); + } + fflush (stdout); +} + +/* Sorts the keys by hash value. */ + +void +sort () +{ + key_list.hash_sort = TRUE; + key_list.occurrence_sort = FALSE; + + key_list.head = merge_sort (key_list.head); +} + +/* Dumps the key list to stderr stream. */ + +static void +dump () +{ + LIST_NODE *ptr; + + fprintf (stderr, "\nList contents are:\n(hash value, key length, index, key set, key):\n"); + + for (ptr = key_list.head; ptr; ptr = ptr->next) + fprintf (stderr, "%7d,%7d,%6d, %s, %s\n", + ptr->hash_value, ptr->length, ptr->index, + ptr->char_set, ptr->key); +} + +/* Simple-minded constructor action here... */ + +void +key_list_init () +{ + key_list.total_keys = 1; + key_list.max_key_len = NEG_MAX_INT; + key_list.min_key_len = MAX_INT; + key_list.return_type = default_return_type; + key_list.array_type = key_list.struct_tag = default_array_type; + key_list.head = NULL; + key_list.additional_code = FALSE; +} + +/* Returns the length of entire key list. */ + +int +keyword_list_length () +{ + return key_list.list_len; +} + +/* Returns length of longest key read. */ + +int +max_key_length () +{ + return key_list.max_key_len; +} + +/* DESTRUCTOR dumps diagnostics during debugging. */ + +void +key_list_destroy () +{ + if (OPTION_ENABLED (option, DEBUG)) + { + fprintf (stderr, "\nDumping key list information:\ntotal unique keywords = %d\ +\ntotal keywords = %d\nmaximum key length = %d.\n", + key_list.list_len, key_list.total_keys, key_list.max_key_len); + dump (); + fprintf (stderr, "End dumping list.\n\n"); + } +} + diff --git a/contrib/gperf/src/keylist.h b/contrib/gperf/src/keylist.h new file mode 100644 index 000000000000..38143b73cb14 --- /dev/null +++ b/contrib/gperf/src/keylist.h @@ -0,0 +1,54 @@ +/* Data and function member declarations for the keyword list class. + + Copyright (C) 1989 Free Software Foundation, Inc. + written by Douglas C. Schmidt (schmidt@ics.uci.edu) + +This file is part of GNU GPERF. + +GNU GPERF is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 1, or (at your option) +any later version. + +GNU GPERF is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU GPERF; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* The key word list is a useful abstraction that keeps track of + various pieces of information that enable that fast generation + of the Perfect.hash function. A Key_List is a singly-linked + list of List_Nodes. */ + +#ifndef _keylist_h +#define _keylist_h +#include +#include "listnode.h" + +typedef struct key_list +{ + LIST_NODE *head; /* Points to the head of the linked list. */ + char *array_type; /* Pointer to the type for word list. */ + char *return_type; /* Pointer to return type for lookup function. */ + char *struct_tag; /* Shorthand for user-defined struct tag type. */ + char *include_src; /* C source code to be included verbatim. */ + int list_len; /* Length of head's Key_List, not counting duplicates. */ + int total_keys; /* Total number of keys, counting duplicates. */ + int max_key_len; /* Maximum length of the longest keyword. */ + int min_key_len; /* Minimum length of the shortest keyword. */ + bool occurrence_sort; /* True if sorting by occurrence. */ + bool hash_sort; /* True if sorting by hash value. */ + bool additional_code; /* True if any additional C code is included. */ +} KEY_LIST; + +extern void key_list_init P ((void)); +extern void key_list_destroy P ((void)); +extern void print_output P ((void)); +extern int keyword_list_length P ((void)); +extern int max_key_length P ((void)); +extern KEY_LIST key_list; +#endif /* _keylist_h */ diff --git a/contrib/gperf/src/listnode.c b/contrib/gperf/src/listnode.c new file mode 100644 index 000000000000..2eec1a6f74f8 --- /dev/null +++ b/contrib/gperf/src/listnode.c @@ -0,0 +1,111 @@ +/* Creates and initializes a new list node. + Copyright (C) 1989 Free Software Foundation, Inc. + written by Douglas C. Schmidt (schmidt@ics.uci.edu) + +This file is part of GNU GPERF. + +GNU GPERF is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 1, or (at your option) +any later version. + +GNU GPERF is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU GPERF; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include +#include "options.h" +#include "listnode.h" +#include "stderr.h" + +/* See comments in perfect.cc. */ +extern int occurrences[ALPHABET_SIZE]; + +/* Sorts the key set alphabetically to speed up subsequent operations. + Uses insertion sort since the set is probably quite small. */ + +static void +set_sort (base, len) + char *base; + int len; +{ + int i, j; + + for (i = 0, j = len - 1; i < j; i++) + { + char curr, tmp; + + for (curr = i + 1, tmp = base[curr]; curr > 0 && tmp < base[curr-1]; curr--) + base[curr] = base[curr - 1]; + + base[curr] = tmp; + + } +} + +/* Initializes a List_Node. This requires obtaining memory for the KEY_SET + initializing them using the information stored in the + KEY_POSITIONS array in Options, and checking for simple errors. + It's important to note that KEY and REST are both pointers to + the different offsets into the same block of dynamic memory pointed to + by parameter K. The data member REST is used to store any additional fields + of the input file (it is set to the "" string if Option[TYPE] is not enabled). + This is useful if the user wishes to incorporate a lookup structure, + rather than just an array of keys. */ + +LIST_NODE * +make_list_node (k, len) + char *k; + int len; +{ + LIST_NODE *buffered_malloc (); + int char_set_size = OPTION_ENABLED (option, ALLCHARS) ? len : GET_CHARSET_SIZE (option) + 1; + LIST_NODE *temp = buffered_malloc (sizeof (LIST_NODE) + char_set_size); + char *ptr = temp->char_set; + + k[len] = '\0'; /* Null terminate KEY to separate it from REST. */ + temp->key = k; + temp->next = 0; + temp->index = 0; + temp->length = len; + temp->link = 0; + temp->rest = OPTION_ENABLED (option, TYPE) ? k + len + 1 : ""; + + if (OPTION_ENABLED (option, ALLCHARS)) /* Use all the character position in the KEY. */ + + for (; *k; k++, ptr++) + ++occurrences[*ptr = *k]; + + else /* Only use those character positions specified by the user. */ + { + int i; + + /* Iterate thru the list of key_positions, initializing occurrences table + and temp->char_set (via char * pointer ptr). */ + + for(RESET (option); (i = GET (option)) != EOS; ) + { + if (i == WORD_END) /* Special notation for last KEY position, i.e. '$'. */ + *ptr = temp->key[len - 1]; + else if (i <= len) /* Within range of KEY length, so we'll keep it. */ + *ptr = temp->key[i - 1]; + else /* Out of range of KEY length, so we'll just skip it. */ + continue; + ++occurrences[*ptr++]; + } + + if (ptr == temp->char_set) /* Didn't get any hits, i.e., no usable positions. */ + report_error ("can't hash keyword %s with chosen key positions\n%a", temp->key); + } + + *ptr = '\0'; /* Terminate this bastard.... */ + /* Sort the KEY_SET items alphabetically. */ + set_sort (temp->char_set, ptr - temp->char_set); + + return temp; +} diff --git a/contrib/gperf/src/listnode.h b/contrib/gperf/src/listnode.h new file mode 100644 index 000000000000..3e64709f5c10 --- /dev/null +++ b/contrib/gperf/src/listnode.h @@ -0,0 +1,43 @@ +/* Data and function members for defining values and operations of a list node. + + Copyright (C) 1989 Free Software Foundation, Inc. + written by Douglas C. Schmidt (schmidt@ics.uci.edu) + +This file is part of GNU GPERF. + +GNU GPERF is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 1, or (at your option) +any later version. + +GNU GPERF is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU GPERF; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#ifndef _listnode_h +#define _listnode_h +#include "prototype.h" + +#define ALPHABET_SIZE 128 + +typedef struct list_node +{ + struct list_node *link; /* TRUE if key has an identical KEY_SET as another key. */ + struct list_node *next; /* Points to next element on the list. */ + int length; /* Length of the key. */ + int hash_value; /* Hash value for the key. */ + int occurrence; /* A metric for frequency of key set occurrences. */ + int index; /* Position of this node relative to other nodes. */ + char *key; /* Key string. */ + char *rest; /* Additional information for building hash function. */ + char char_set[1]; /* Set of characters to hash, specified by user. */ +} LIST_NODE; + +extern LIST_NODE *make_list_node P ((char *k, int len)); + +#endif _listnode_h diff --git a/contrib/gperf/src/main.c b/contrib/gperf/src/main.c new file mode 100644 index 000000000000..a54c1dfc5069 --- /dev/null +++ b/contrib/gperf/src/main.c @@ -0,0 +1,96 @@ +/* Driver program for the Perfect hash function generator. + Copyright (C) 1989 Free Software Foundation, Inc. + written by Douglas C. Schmidt (schmidt@ics.uci.edu) + +This file is part of GNU GPERF. + +GNU GPERF is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 1, or (at your option) +any later version. + +GNU GPERF is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU GPERF; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* Simple driver program for the Perfect.hash function generator. + Most of the hard work is done in class Perfect and its class methods. */ + +#include +#include +#include +#include "stderr.h" +#include "options.h" +#include "perfect.h" + +/* Calls the appropriate intialization routines for each + ADT. Note that certain initialization routines require + initialization *after* certain values are computed. Therefore, + they cannot be called here. */ + +static void +init_all (argc, argv) + int argc; + char *argv[]; +{ +#ifdef RLIMIT_STACK + /* Get rid of any avoidable limit on stack size. */ + { + struct rlimit rlim; + + /* Set the stack limit huge so that alloca does not fail. */ + getrlimit (RLIMIT_STACK, &rlim); + rlim.rlim_cur = rlim.rlim_max; + setrlimit (RLIMIT_STACK, &rlim); + } +#endif /* RLIMIT_STACK */ + + options_init (argc, argv); + key_list_init (); + perfect_init (); +} + +/* Calls appropriate destruction routines for each ADT. These + routines print diagnostics if the debugging option is enabled. */ + +static void +destroy_all () +{ + options_destroy (); + key_list_destroy (); + perfect_destroy (); +} + +/* Driver for perfect hash function generation. */ + +int +main (argc, argv) + int argc; + char *argv[]; +{ + struct tm *tm; + time_t clock; + int status; + + time (&clock); + tm = localtime (&clock); + + fprintf (stderr, "/* starting time is %d:%d:%d */\n", tm->tm_hour, tm->tm_min, tm->tm_sec); + /* Sets the options. */ + init_all (argc, argv); + + /* Generates the perfect hash table. + Also prints generated code neatly to the output. */ + status = perfect_generate (); + destroy_all (); + + time (&clock); + tm = localtime (&clock); + fprintf (stderr, "/* ending time is %d:%d:%d */\n", tm->tm_hour, tm->tm_min, tm->tm_sec); + return status; +} diff --git a/contrib/gperf/src/options.c b/contrib/gperf/src/options.c new file mode 100644 index 000000000000..40fdf0a6588e --- /dev/null +++ b/contrib/gperf/src/options.c @@ -0,0 +1,444 @@ +/* Handles parsing the Options provided to the user. + Copyright (C) 1989 Free Software Foundation, Inc. + written by Douglas C. Schmidt (schmidt@ics.uci.edu) + +This file is part of GNU GPERF. + +GNU GPERF is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 1, or (at your option) +any later version. + +GNU GPERF is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU GPERF; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include +#include +#include "options.h" +#include "iterator.h" +#include "stderr.h" + +/* Current program version. */ +extern char *version_string; + +/* Size to jump on a collision. */ +#define DEFAULT_JUMP_VALUE 5 + +/* Default name for generated lookup function. */ +#define DEFAULT_NAME "in_word_set" + +/* Default name for the key component. */ +#define DEFAULT_KEY "name" + +/* Default name for generated hash function. */ +#define DEFAULT_HASH_NAME "hash" + +/* Globally visible OPTIONS object. */ +OPTIONS option; + +/* Default delimiters that separate keywords from their attributes. */ +#define DEFAULT_DELIMITERS ",\n" + +/* Prints program usage to standard error stream. */ + +void +usage () +{ + report_error ("usage: %n [-acCdDef[num]gGhHijk\ +KlnNoprsStTv].\n(type %n -h for help)\n"); +} + +/* Sorts the key positions *IN REVERSE ORDER!!* + This makes further routines more efficient. Especially when generating code. + Uses a simple Insertion Sort since the set is probably ordered. + Returns 1 if there are no duplicates, 0 otherwise. */ + +static int +key_sort (base, len) + char *base; + int len; +{ + int i, j; + + for (i = 0, j = len - 1; i < j; i++) + { + int curr, tmp; + + for (curr = i + 1,tmp = base[curr]; curr > 0 && tmp >= base[curr - 1]; curr--) + if ((base[curr] = base[curr - 1]) == tmp) /* oh no, a duplicate!!! */ + return 0; + + base[curr] = tmp; + } + + return 1; +} + +/* Dumps option status when debug is set. */ + +void +options_destroy () +{ + if (OPTION_ENABLED (option, DEBUG)) + { + char *ptr; + + fprintf (stderr, "\ndumping Options:\nDEBUG is.......: %s\nORDER is.......: %s\ +\nANSI is........: %s\nTYPE is........: %s\nGNU is.........: %s\nRANDOM is......: %s\ +\nDEFAULTCHARS is: %s\nSWITCH is......: %s\nPOINTER is.....: %s\nNOLENGTH is....: %s\ +\nLENTABLE is....: %s\nDUP is.........: %s\nCOMP is........: %s\nFAST is........: %s\ +\nNOTYPE is......: %s\nGLOBAL is......: %s\nCONST is.......: %s\niterations = %d\ +\nlookup function name = %s\nhash function name = %s\nkey name = %s\ +\njump value = %d\nmax associcated value = %d\ninitial associated value = %d\ +\ndelimiters = %s\nnumber of switch statements = %d\napproximate switch statement size = %d\n", + OPTION_ENABLED (option, DEBUG) ? "enabled" : "disabled", + OPTION_ENABLED (option, ORDER) ? "enabled" : "disabled", + OPTION_ENABLED (option, ANSI) ? "enabled" : "disabled", + OPTION_ENABLED (option, TYPE) ? "enabled" : "disabled", + OPTION_ENABLED (option, GNU) ? "enabled" : "disabled", + OPTION_ENABLED (option, RANDOM) ? "enabled" : "disabled", + OPTION_ENABLED (option, DEFAULTCHARS) ? "enabled" : "disabled", + OPTION_ENABLED (option, SWITCH) ? "enabled" : "disabled", + OPTION_ENABLED (option, POINTER) ? "enabled" : "disabled", + OPTION_ENABLED (option, NOLENGTH) ? "enabled" : "disabled", + OPTION_ENABLED (option, LENTABLE) ? "enabled" : "disabled", + OPTION_ENABLED (option, DUP) ? "enabled" : "disabled", + OPTION_ENABLED (option, COMP) ? "enabled" : "disabled", + OPTION_ENABLED (option, FAST) ? "enabled" : "disabled", + OPTION_ENABLED (option, NOTYPE) ? "enabled" : "disabled", + OPTION_ENABLED (option, GLOBAL) ? "enabled" : "disabled", + OPTION_ENABLED (option, CONST) ? "enabled" : "disabled", + option.iterations, option.function_name, option.hash_name, + option.key_name, option.jump, option.size - 1, + option.initial_asso_value, option.delimiters, option.total_switches, + keyword_list_length () / option.total_switches); + + if (OPTION_ENABLED (option, ALLCHARS)) + fprintf (stderr, "all characters are used in the hash function\n"); + fprintf (stderr, "maximum charset size = %d\nkey positions are: \n", + option.total_charset_size); + + for (ptr = option.key_positions; *ptr != EOS; ptr++) + if (*ptr == WORD_END) + fprintf (stderr, "$\n"); + else + fprintf (stderr, "%d\n", *ptr); + + fprintf (stderr, "finished dumping Options\n"); + } +} + +/* Parses the command line Options and sets appropriate flags in option.option_word. */ + +void +options_init (argc, argv) + int argc; + char *argv[]; +{ + extern int optind; + extern char *optarg; + int option_char; + + option.key_positions[0] = WORD_START; + option.key_positions[1] = WORD_END; + option.key_positions[2] = EOS; + option.total_charset_size = 2; + option.jump = DEFAULT_JUMP_VALUE; + option.option_word = (int) DEFAULTCHARS; + option.function_name = DEFAULT_NAME; + option.hash_name = DEFAULT_HASH_NAME; + option.key_name = DEFAULT_KEY; + option.delimiters = DEFAULT_DELIMITERS; + option.initial_asso_value = option.size = option.iterations = 0; + option.total_switches = 1; + option.argument_count = argc; + option.argument_vector = argv; + set_program_name (argv[0]); + + while ((option_char = getopt (argc, argv, "adcCDe:f:gGhH:i:j:k:K:lnN:oprs:S:tTv")) != EOF) + { + switch (option_char) + { + case 'a': /* Generated coded uses the ANSI prototype format. */ + { + SET_OPTION (option, ANSI); + break; + } + case 'c': /* Generate strncmp rather than strcmp. */ + { + SET_OPTION (option, COMP); + break; + } + case 'C': /* Make the generated tables readonly (const). */ + { + SET_OPTION (option, CONST); + break; + } + case 'd': /* Enable debugging option. */ + { + SET_OPTION (option, DEBUG); + report_error ("starting program %n, version %s, with debuggin on.\n", + version_string); + break; + } + case 'D': /* Enable duplicate option. */ + { + SET_OPTION (option, DUP); + break; + } + case 'e': /* Allows user to provide keyword/attribute separator */ + { + SET_DELIMITERS (option, optarg); + break; + } + case 'f': /* Generate the hash table ``fast.'' */ + { + SET_OPTION (option, FAST); + if ((option.iterations = atoi (optarg)) < 0) + { + report_error ("iterations value must not be negative, assuming 0\n"); + option.iterations = 0; + } + break; + } + case 'g': /* Use the ``inline'' keyword for generated sub-routines. */ + { + SET_OPTION (option, GNU); + break; + } + case 'G': /* Make the keyword table a global variable. */ + { + SET_OPTION (option, GLOBAL); + break; + } + case 'h': /* Displays a list of helpful Options to the user. */ + { + report_error ( +"-a\tGenerate ANSI standard C output code, i.e., function prototypes.\n\ +-c\tGenerate comparison code using strncmp rather than strcmp.\n\ +-C\tMake the contents of generated lookup tables constant, i.e., readonly.\n\ +-d\tEnables the debugging option (produces verbose output to Std_Err).\n\ +-D\tHandle keywords that hash to duplicate values. This is useful\n\ +\tfor certain highly redundant keyword sets. It enables the -S option.\n\ +-e\tAllow user to provide a string containing delimiters used to separate\n\ +\tkeywords from their attributes. Default is \",\\n\"\n\ +-f\tGenerate the perfect hash function ``fast.'' This decreases GPERF's\n\ +\trunning time at the cost of minimizing generated table-size.\n\ +\tThe numeric argument represents the number of times to iterate when\n\ +\tresolving a collision. `0' means ``iterate by the number of keywords''.\n\ +-g\tAssume a GNU compiler, e.g., g++ or gcc. This makes all generated\n\ +\troutines use the ``inline'' keyword to remove cost of function calls.\n\ +-G\tGenerate the static table of keywords as a static global variable,\n\ +\trather than hiding it inside of the lookup function (which is the\n\ +\tdefault behavior).\n\ +-h\tPrints this mesage.\n"); + report_error ( +"-H\tAllow user to specify name of generated hash function. Default is `hash'.\n\ +-i\tProvide an initial value for the associate values array. Default is 0.\n\ +\tSetting this value larger helps inflate the size of the final table.\n\ +-j\tAffects the ``jump value,'' i.e., how far to advance the associated\n\ +\tcharacter value upon collisions. Must be an odd number, default is %d.\n\ +-k\tAllows selection of the key positions used in the hash function.\n\ +\tThe allowable choices range between 1-%d, inclusive. The positions\n\ +\tare separated by commas, ranges may be used, and key positions may\n\ +\toccur in any order. Also, the meta-character '*' causes the generated\n\ +\thash function to consider ALL key positions, and $ indicates the\n\ +\t``final character'' of a key, e.g., $,1,2,4,6-10.\n\ +-K\tAllow user to select name of the keyword component in the keyword structure.\n\ +-l\tCompare key lengths before trying a string comparison. This helps\n\ +\tcut down on the number of string comparisons made during the lookup.\n\ +-n\tDo not include the length of the keyword when computing the hash function\n\ +-N\tAllow user to specify name of generated lookup function. Default\n\ +\tname is `in_word_set.'\n\ +-o\tReorders input keys by frequency of occurrence of the key sets.\n\ +\tThis should decrease the search time dramatically.\n\ +-p\tChanges the return value of the generated function ``in_word_set''\n\ +\tfrom its default boolean value (i.e., 0 or 1), to type ``pointer\n\ +\tto wordlist array'' This is most useful when the -t option, allowing\n\ +\tuser-defined structs, is used.\n", + DEFAULT_JUMP_VALUE, MAX_KEY_POS - 1); + report_error ( +"-r\tUtilizes randomness to initialize the associated values table.\n\ +-s\tAffects the size of the generated hash table. The numeric argument\n\ +\tfor this option indicates ``how many times larger'' the table range\n\ +\tshould be, in relationship to the number of keys, e.g. a value of 3\n\ +\tmeans ``make the table about 3 times larger than the number of input\n\ +\tkeys.'' A larger table should decrease the time required for an\n\ +\tunsuccessful search, at the expense of extra table space. Default\n\ +\tvalue is 1. This actual table size may vary somewhat.\n\ +-S\tCauses the generated C code to use a switch statement scheme, rather\n\ +\tthan an array lookup table. This can lead to a reduction in both\n\ +\ttime and space requirements for some keyfiles. The argument to\n\ +\tthis option determines how many switch statements are generated.\n\ +\tA value of 1 generates 1 switch containing all the elements, a value of 2\n\ +\tgenerates 2 tables with 1/2 the elements in each table, etc. This\n\ +\tis useful since many C compilers cannot correctly generate code for\n\ +\tlarge switch statements.\n\ +\tthe expense of longer time for each lookup. Mostly important for\n\ +\t*large* input sets, i.e., greater than around 100 items or so.\n\ +-t\tAllows the user to include a structured type declaration for \n\ +\tgenerated code. Any text before %%%% is consider part of the type\n\ +\tdeclaration. Key words and additional fields may follow this, one\n\ +\tgroup of fields per line.\n\ +-T\tPrevents the transfer of the type declaration to the output file.\n\ +\tUse this option if the type is already defined elsewhere.\n\ +-v\tPrints out the current version number\n%e%a\n", + usage); + } + case 'H': /* Sets the name for the hash function */ + { + option.hash_name = optarg; + break; + } + case 'i': /* Sets the initial value for the associated values array. */ + { + if ((option.initial_asso_value = atoi (optarg)) < 0) + report_error ("initial value %d must be non-zero, ignoring and continuing\n", + option.initial_asso_value); + if (OPTION_ENABLED (option, RANDOM)) + report_error ("warning, -r option superceeds -i, ignoring -i option and continuing\n"); + break; + } + case 'j': /* Sets the jump value, must be odd for later algorithms. */ + { + if ((option.jump = atoi (optarg)) < 0) + report_error ("jump value %d must be a positive number\n%e%a", + option.jump, usage); + else if (option.jump && EVEN (option.jump)) + report_error ("jump value %d should be odd, adding 1 and continuing...\n", + option.jump++); + break; + } + case 'k': /* Sets key positions used for hash function. */ + { + int BAD_VALUE = -1; + int value; + + iterator_init (optarg, 1, MAX_KEY_POS - 1, WORD_END, BAD_VALUE, EOS); + + if (*optarg == '*') /* Use all the characters for hashing!!!! */ + { + UNSET_OPTION (option, DEFAULTCHARS); + SET_OPTION (option, ALLCHARS); + } + else + { + char *key_pos; + + for (key_pos = option.key_positions; (value = next ()) != EOS; key_pos++) + if (value == BAD_VALUE) + report_error ("illegal key value or range, use 1,2,3-%d,'$' or '*'.\n%e%a", + (MAX_KEY_POS - 1),usage); + else + *key_pos = value;; + + *key_pos = EOS; + + if (! (option.total_charset_size = (key_pos - option.key_positions))) + report_error ("no keys selected\n%e%a", usage); + else if (! key_sort (option.key_positions, option.total_charset_size)) + report_error ("duplicate keys selected\n%e%a", usage); + + if (option.total_charset_size != 2 + || (option.key_positions[0] != 1 || option.key_positions[1] != WORD_END)) + UNSET_OPTION (option, DEFAULTCHARS); + } + break; + } + case 'K': /* Make this the keyname for the keyword component field. */ + { + option.key_name = optarg; + break; + } + case 'l': /* Create length table to avoid extra string compares. */ + { + SET_OPTION (option, LENTABLE); + break; + } + case 'n': /* Don't include the length when computing hash function. */ + { + SET_OPTION (option, NOLENGTH); + break; + } + case 'N': /* Make generated lookup function name be optarg */ + { + option.function_name = optarg; + break; + } + case 'o': /* Order input by frequency of key set occurrence. */ + { + SET_OPTION (option, ORDER); + break; + } + case 'p': /* Generated lookup function now a pointer instead of int. */ + { + SET_OPTION (option, POINTER); + break; + } + case 'r': /* Utilize randomness to initialize the associated values table. */ + { + SET_OPTION (option, RANDOM); + if (option.initial_asso_value != 0) + report_error ("warning, -r option superceeds -i, disabling -i option and continuing\n"); + break; + } + case 's': /* Range of associated values, determines size of final table. */ + { + if ((option.size = atoi (optarg)) <= 0) + report_error ("improper range argument %s\n%e%a", optarg, usage); + else if (option.size > 50) + report_error ("%d is excessive, did you really mean this?! (type %n -h for help)\n", + option.size); + break; + } + case 'S': /* Generate switch statement output, rather than lookup table. */ + { + SET_OPTION (option, SWITCH); + if ((option.total_switches = atoi (optarg)) <= 0) + report_error ("number of switches %s must be a positive number\n%e%a", optarg, usage); + break; + } + case 't': /* Enable the TYPE mode, allowing arbitrary user structures. */ + { + SET_OPTION (option, TYPE); + break; + } + case 'T': /* Don't print structure definition. */ + { + SET_OPTION (option, NOTYPE); + break; + } + case 'v': /* Print out the version and quit. */ + report_error ("%n: version %s\n%e%a\n", version_string, usage); + default: + report_error ("%e%a", usage); + } + } + + if (argv[optind] && ! freopen (argv[optind], "r", stdin)) + report_error ("unable to read key word file %s\n%e%a", argv[optind], usage); + + if (++optind < argc) + report_error ("extra trailing arguments to %n\n%e%a", usage); +} + +/* Output command-line Options. */ +void +print_options () +{ + int i; + + printf ("/* Command-line: "); + + for (i = 0; i < option.argument_count; i++) + printf ("%s ", option.argument_vector[i]); + + printf (" */\n\n"); +} + diff --git a/contrib/gperf/src/options.h b/contrib/gperf/src/options.h new file mode 100644 index 000000000000..1928c7383227 --- /dev/null +++ b/contrib/gperf/src/options.h @@ -0,0 +1,153 @@ +/* Handles parsing the Options provided to the user. + + Copyright (C) 1989 Free Software Foundation, Inc. + written by Douglas C. Schmidt (schmidt@ics.uci.edu) + +This file is part of GNU GPERF. + +GNU GPERF is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 1, or (at your option) +any later version. + +GNU GPERF is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU GPERF; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* This module provides a uniform interface to the various Options available + to a user of the Perfect.hash function generator. In addition to the + run-time Options, found in the Option_Type below, there is also the + hash table Size and the Keys to be used in the hashing. + The overall design of this module was an experiment in using C++ + classes as a mechanism to enhance centralization of option and + and error handling, which tend to get out of hand in a C program. */ + +#ifndef _options_h +#define _options_h + +#include +#include "prototype.h" + +/* Enumerate the potential debugging Options. */ + +enum option_type +{ + DEBUG = 01, /* Enable debugging (prints diagnostics to Std_Err). */ + ORDER = 02, /* Apply ordering heuristic to speed-up search time. */ + ANSI = 04, /* Generate ANSI prototypes. */ + ALLCHARS = 010, /* Use all characters in hash function. */ + GNU = 020, /* Assume GNU extensions (primarily function inline). */ + TYPE = 040, /* Handle user-defined type structured keyword input. */ + RANDOM = 0100, /* Randomly initialize the associated values table. */ + DEFAULTCHARS = 0200, /* Make default char positions be 1,$ (end of keyword). */ + SWITCH = 0400, /* Generate switch output to save space. */ + POINTER = 01000, /* Have in_word_set function return pointer, not boolean. */ + NOLENGTH = 02000, /* Don't include keyword length in hash computations. */ + LENTABLE = 04000, /* Generate a length table for string comparison. */ + DUP = 010000, /* Handle duplicate hash values for keywords. */ + FAST = 020000, /* Generate the hash function ``fast.'' */ + NOTYPE = 040000, /* Don't include user-defined type definition + in output -- it's already defined elsewhere. */ + COMP = 0100000, /* Generate strncmp rather than strcmp. */ + GLOBAL = 0200000, /* Make the keyword table a global variable. */ + CONST = 0400000, /* Make the generated tables readonly (const). */ +}; + +/* Define some useful constants. */ + +/* Max size of each word's key set. */ +#define MAX_KEY_POS (128 - 1) + +/* Signals the start of a word. */ +#define WORD_START 1 + +/* Signals the end of a word. */ +#define WORD_END 0 + +/* Signals end of the key list. */ +#define EOS MAX_KEY_POS + +/* Returns TRUE if option O is enabled. */ +#define OPTION_ENABLED(OW,O) (OW.option_word & (int)O) + +/* Enables option O in OPTION_WORD. */ +#define SET_OPTION(OW,O) (OW.option_word |= (int)O) + +/* Disable option O in OPTION_WORD. */ +#define UNSET_OPTION(OW,O) (OW.option_word &= ~(int)(O)) + +/* Returns total distinct key positions. */ +#define GET_CHARSET_SIZE(O) (O.total_charset_size) + +/* Set the total distinct key positions. */ +#define SET_CHARSET_SIZE(O,S) (O.total_charset_size = (S)) + +/* Initializes the key Iterator. */ +#define RESET(O) (O.key_pos = 0) + +/* Returns current key_position and advances index. */ +#define GET(O) (O.key_positions[O.key_pos++]) + +/* Sets the size of the table size. */ +#define SET_ASSO_MAX(O,R) (O.size = (R)) + +/* Returns the size of the table size. */ +#define GET_ASSO_MAX(O) (O.size) + +/* Returns the jump value. */ +#define GET_JUMP(O) (O.jump) + +/* Returns the iteration value. */ +#define GET_ITERATIONS(O) (O.iterations) + +/* Returns the lookup function name. */ +#define GET_FUNCTION_NAME(O) (O.function_name) + +/* Returns the keyword key name. */ +#define GET_KEY_NAME(O) (O.key_name) + +/* Returns the hash function name. */ +#define GET_HASH_NAME(O) (O.hash_name) + +/* Returns the initial associated character value. */ +#define INITIAL_VALUE(O) (O.initial_asso_value) + +/* Returns the string used to delimit keywords from other attributes. */ +#define GET_DELIMITER(O) (O.delimiters) + +/* Sets the keyword/attribute delimiters with value of D. */ +#define SET_DELIMITERS(O,D) (O.delimiters = (D)) + +/* Gets the total number of switch statements to generate. */ +#define GET_TOTAL_SWITCHES(O) (O.total_switches) + +/* Class manager for gperf program options. */ + +typedef struct options +{ + int option_word; /* Holds the user-specified Options. */ + int total_charset_size; /* Total number of distinct key_positions. */ + int size; /* Range of the hash table. */ + int key_pos; /* Tracks current key position for Iterator. */ + int jump; /* Jump length when trying alternative values. */ + int initial_asso_value; /* Initial value for asso_values table. */ + int argument_count; /* Records count of command-line arguments. */ + int iterations; /* Amount to iterate when a collision occurs. */ + int total_switches; /* Number of switch statements to generate. */ + char **argument_vector; /* Stores a pointer to command-line vector. */ + char *function_name; /* Name used for generated lookup function. */ + char *key_name; /* Name used for keyword key. */ + char *hash_name; /* Name used for generated hash function. */ + char *delimiters; /* Separates keywords from other attributes. */ + char key_positions[MAX_KEY_POS]; /* Contains user-specified key choices. */ +} OPTIONS; + +extern void options_init P ((int argc, char *argv[])); +extern void options_destroy P ((void)); +extern OPTIONS option; +#endif /* _options_h */ diff --git a/contrib/gperf/src/perfect.c b/contrib/gperf/src/perfect.c new file mode 100644 index 000000000000..25b958e2e44e --- /dev/null +++ b/contrib/gperf/src/perfect.c @@ -0,0 +1,350 @@ +/* Provides high-level routines to manipulate the keywork list + structures the code generation output. + Copyright (C) 1989 Free Software Foundation, Inc. + written by Douglas C. Schmidt (schmidt@ics.uci.edu) + +This file is part of GNU GPERF. + +GNU GPERF is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 1, or (at your option) +any later version. + +GNU GPERF is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU GPERF; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include +#include +#include +#include "options.h" +#include "perfect.h" +#include "stderr.h" + +/* Current release version. */ +extern char *version_string; + +/* Counts occurrences of each key set character. */ +int occurrences[ALPHABET_SIZE]; + +/* Value associated with each character. */ +int asso_values[ALPHABET_SIZE]; + +/* Locally visible PERFECT object. */ +PERFECT perfect; + +/* Efficiently returns the least power of two greater than or equal to X! */ +#define POW(X) ((!X)?1:(X-=1,X|=X>>1,X|=X>>2,X|=X>>4,X|=X>>8,X|=X>>16,(++X))) + +/* Reads input keys, possibly applies the reordering heuristic, sets the + maximum associated value size (rounded up to the nearest power of 2), + may initialize the associated values array, and determines the maximum + hash table size. Note: using the random numbers is often helpful, + though not as deterministic, of course! */ + +void +perfect_init () +{ + int asso_value_max; + int len; + + perfect.num_done = 1; + perfect.fewest_collisions = 0; + read_keys (); + if (OPTION_ENABLED (option, ORDER)) + reorder (); + asso_value_max = GET_ASSO_MAX (option); + len = keyword_list_length (); + asso_value_max = (asso_value_max ? asso_value_max * len : len); + SET_ASSO_MAX (option, POW (asso_value_max)); + + if (OPTION_ENABLED (option, RANDOM)) + { + int i; + + srandom (time (0)); + + for (i = 0; i < ALPHABET_SIZE; i++) + asso_values[i] = (random () & asso_value_max - 1); + } + else + { + int asso_value = INITIAL_VALUE (option); + if (asso_value) /* Initialize array if user requests non-zero default. */ + { + int i; + + for (i = ALPHABET_SIZE - 1; i >= 0; i--) + asso_values[i] = asso_value & GET_ASSO_MAX (option) - 1; + } + } + perfect.max_hash_value = max_key_length () + GET_ASSO_MAX (option) * + GET_CHARSET_SIZE (option); + + printf ("/* C code produced by gperf version %s */\n", version_string); + print_options (); + + if (OPTION_ENABLED (option, DEBUG)) + { + int i; + fprintf (stderr, "\nnumber of keys = %d\nmaximum associated value is %d\ +\nmaximum possible size of generated hash table is %d\n", + len, asso_value_max, perfect.max_hash_value); + } +} + +/* Merge two hash key multisets to form the ordered disjoint union of the sets. + (In a multiset, an element can occur multiple times). Precondition: both + set_1 and set_2 must be ordered. Returns the length of the combined set. */ + +static int +compute_disjoint_union (set_1, set_2, set_3) + char *set_1; + char *set_2; + char *set_3; +{ + char *base = set_3; + + while (*set_1 && *set_2) + if (*set_1 == *set_2) + set_1++, set_2++; + else + { + *set_3 = *set_1 < *set_2 ? *set_1++ : *set_2++; + if (set_3 == base || *set_3 != *(set_3-1)) set_3++; + } + + while (*set_1) + { + *set_3 = *set_1++; + if (set_3 == base || *set_3 != *(set_3-1)) set_3++; + } + + while (*set_2) + { + *set_3 = *set_2++; + if (set_3 == base || *set_3 != *(set_3-1)) set_3++; + } + *set_3 = '\0'; + return set_3 - base; +} + +/* Sort the UNION_SET in increasing frequency of occurrence. + This speeds up later processing since we may assume the resulting + set (Set_3, in this case), is ordered. Uses insertion sort, since + the UNION_SET is typically short. */ + +static void +sort_set (union_set, len) + char *union_set; + int len; +{ + int i, j; + + for (i = 0, j = len - 1; i < j; i++) + { + char curr, tmp; + + for (curr = i+1, tmp = union_set[curr]; + curr > 0 && occurrences[tmp] < occurrences[union_set[curr-1]]; + curr--) + union_set[curr] = union_set[curr - 1]; + + union_set[curr] = tmp; + } +} + +/* Generate a key set's hash value. */ + +static int +hash (key_node) + LIST_NODE *key_node; +{ + int sum = OPTION_ENABLED (option, NOLENGTH) ? 0 : key_node->length; + char *ptr; + + for (ptr = key_node->char_set; *ptr; ptr++) + sum += asso_values[*ptr]; + + return key_node->hash_value = sum; +} + +/* Find out how associated value changes affect successfully hashed items. + Returns FALSE if no other hash values are affected, else returns TRUE. + Note that because GET_ASSO_MAX (option) is a power of two we can guarantee + that all legal ASSO_VALUES are visited without repetition since + GET_JUMP (option) was forced to be an odd value! */ + +static bool +affects_prev (c, curr) + char c; + LIST_NODE *curr; +{ + int original_char = asso_values[c]; + int i = !OPTION_ENABLED (option, FAST) ? GET_ASSO_MAX (option) : + GET_ITERATIONS (option) == 0 ? key_list.list_len : GET_ITERATIONS (option); + + /* Try all asso_values. */ + + while (--i >= 0) + { + int collisions = 0; + LIST_NODE *ptr; + + asso_values[c] = asso_values[c] + (GET_JUMP (option) ? GET_JUMP (option) : random ()) + & GET_ASSO_MAX (option) - 1; + bool_array_reset (); + + /* See how this asso_value change affects previous keywords. If + it does better than before we'll take it! */ + + for (ptr = key_list.head; + !lookup (hash (ptr)) || ++collisions < perfect.fewest_collisions; + ptr = ptr->next) + if (ptr == curr) + { + perfect.fewest_collisions = collisions; + return FALSE; + } + } + + asso_values[c] = original_char; /* Restore original values, no more tries. */ + return TRUE; /* If we're this far it's time to try the next character.... */ +} + +/* Change a character value, try least-used characters first. */ + +static void +change (prior, curr) + LIST_NODE *prior; + LIST_NODE *curr; +{ + char *xmalloc (); + static char *union_set = 0; + char *temp; + LIST_NODE *ptr; + + if (!union_set) + union_set = xmalloc (2 * GET_CHARSET_SIZE (option) + 1); + + if (OPTION_ENABLED (option, DEBUG)) /* Very useful for debugging. */ + { + fprintf (stderr, "collision on keyword #%d, prior=\"%s\", curr=\"%s\", hash=%d\n", + perfect.num_done, prior->key, curr->key, curr->hash_value); + fflush (stderr); + } + sort_set (union_set, compute_disjoint_union (prior->char_set, curr->char_set, union_set)); + + /* Try changing some values, if change doesn't alter other values continue normal action. */ + + perfect.fewest_collisions++; + + for (temp = union_set; *temp; temp++) + if (!affects_prev (*temp, curr)) + { + if (OPTION_ENABLED (option, DEBUG)) + { + fprintf (stderr, "- resolved by changing asso_value['%c'] (char #%d) to %d\n", + *temp, temp - union_set + 1, asso_values[*temp]); + fflush (stderr); + } + return; /* Good, doesn't affect previous hash values, we'll take it. */ + } + + for (ptr = key_list.head; ptr != curr; ptr = ptr->next) + hash (ptr); + + hash (curr); + + if (OPTION_ENABLED (option, DEBUG)) + { + fprintf (stderr, "** collision not resolved, %d duplicates remain, continuing...\n", + perfect.fewest_collisions); + fflush (stderr); + } +} + +/* Does the hard stuff.... + Initializes the Iteration Number boolean array, and then trys to find a + perfect function that will hash all the key words without getting any + duplications. This is made much easier since we aren't attempting + to generate *minimum* functions, only perfect ones. + If we can't generate a perfect function in one pass *and* the user + hasn't enabled the DUP option, we'll inform the user to try the + randomization option, use -D, or choose alternative key positions. + The alternatives (e.g., back-tracking) are too time-consuming, i.e, + exponential in the number of keys. */ + +int +perfect_generate () +{ + LIST_NODE *curr; + bool_array_init (perfect.max_hash_value); + + for (curr = key_list.head; curr; curr = curr->next) + { + LIST_NODE *ptr; + hash (curr); + + for (ptr = key_list.head; ptr != curr; ptr = ptr->next) + if (ptr->hash_value == curr->hash_value) + { + change (ptr, curr); + break; + } + perfect.num_done++; + } + + + /* Make one final check, just to make sure nothing weird happened.... */ + bool_array_reset (); + + for (curr = key_list.head; curr; curr = curr->next) + if (lookup (hash (curr))) + if (OPTION_ENABLED (option, DUP)) /* We'll try to deal with this later..... */ + break; + else /* Yow, big problems. we're outta here! */ + { + report_error ("\nInternal error, duplicate value %d:\n\ +try options -D or -r, or use new key positions.\n\n", + hash (curr)); + return 1; + } + + bool_array_destroy (); + + /* First sorts the key word list by hash value, and the outputs the + list to the proper ostream. The generated hash table code is only + output if the early stage of processing turned out O.K. */ + + sort (); + print_output (); + return 0; +} + +/* Prints out some diagnostics upon completion. */ + +void +perfect_destroy () +{ + if (OPTION_ENABLED (option, DEBUG)) + { + int i; + + fprintf (stderr, "\ndumping occurrence and associated values tables\n"); + + for (i = 0; i < ALPHABET_SIZE; i++) + if (occurrences[i]) + fprintf (stderr, "asso_values[%c] = %3d, occurrences[%c] = %3d\n", + i, asso_values[i], i, occurrences[i]); + + fprintf (stderr, "end table dumping\n"); + + } +} + diff --git a/contrib/gperf/src/perfect.h b/contrib/gperf/src/perfect.h new file mode 100644 index 000000000000..c5b9443413d5 --- /dev/null +++ b/contrib/gperf/src/perfect.h @@ -0,0 +1,45 @@ +/* Provides high-level routines to manipulate the keyword list + structures the code generation output. + + Copyright (C) 1989 Free Software Foundation, Inc. + written by Douglas C. Schmidt (schmidt@ics.uci.edu) + +This file is part of GNU GPERF. + +GNU GPERF is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 1, or (at your option) +any later version. + +GNU GPERF is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU GPERF; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#ifndef _perfect_h +#define _perfect_h + +#include "prototype.h" +#include "keylist.h" +#include "boolarray.h" + +typedef struct perfect +{ + KEY_LIST list; /* List of key words provided by the user. */ + BOOL_ARRAY duplicate; /* Speeds up check for redundant hash values. */ + int max_hash_value; /* Maximum possible hash value. */ + int fewest_collisions; /* Records fewest # of collisions for asso value. */ + int num_done; /* Number of keywords processed without a collision. */ +} PERFECT; + +extern void perfect_init P ((void)); +extern void perfect_destroy P ((void)); +extern int perfect_generate P ((void)); +extern void perfect_print P ((void)); +#endif /* _perfect_h */ + + diff --git a/contrib/gperf/src/prototype.h b/contrib/gperf/src/prototype.h new file mode 100644 index 000000000000..a6077b65c206 --- /dev/null +++ b/contrib/gperf/src/prototype.h @@ -0,0 +1,15 @@ +#ifndef _prototype_h +#define _prototype_h +#ifdef __STDC__ +#define P(X) X +#else +#define P(X) () +#endif + +typedef char bool; +#define FALSE 0 +#define TRUE 1 + +#define ODD(X) ((X) & 1) +#define EVEN(X) (!((X) & 1)) +#endif /* _prototype_h */ diff --git a/contrib/gperf/src/readline.c b/contrib/gperf/src/readline.c new file mode 100644 index 000000000000..19ac5e56366c --- /dev/null +++ b/contrib/gperf/src/readline.c @@ -0,0 +1,87 @@ +/* Correctly reads an arbitrarily size string. + + Copyright (C) 1989 Free Software Foundation, Inc. + written by Douglas C. Schmidt (schmidt@ics.uci.edu) + +This file is part of GNU GPERF. + +GNU GPERF is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 1, or (at your option) +any later version. + +GNU GPERF is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU GPERF; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include +#include "readline.h" + +/* Size of each chunk. */ +#define CHUNK_SIZE BUFSIZ + +/* Recursively fills up the buffer. */ + +static char * +readln_aux (chunks) + int chunks; +{ + char *buffered_malloc (); + char buf[CHUNK_SIZE]; + register char *bufptr = buf; + register char *ptr; + int c; + + while ((c = getchar ()) != EOF && c != '\n') /* fill the current buffer */ + { + *bufptr++ = c; + if (bufptr - buf >= CHUNK_SIZE) /* prepend remainder to ptr buffer */ + { + if (ptr = readln_aux (chunks + 1)) + + for (; bufptr != buf; *--ptr = *--bufptr); + + return ptr; + } + } + + if (c == EOF && bufptr == buf) + return NULL; + + c = (chunks * CHUNK_SIZE + bufptr - buf) + 1; + + if (ptr = buffered_malloc (c)) + { + + for (*(ptr += (c - 1)) = '\0'; bufptr != buf; *--ptr = *--bufptr) + ; + + return ptr; + } + else + return NULL; +} + +/* Returns the ``next'' line, ignoring comments beginning with '#'. */ + +char *read_line () +{ + int c; + if ((c = getchar ()) == '#') + { + while ((c = getchar ()) != '\n' && c != EOF) + ; + + return c != EOF ? read_line () : NULL; + } + else + { + ungetc (c, stdin); + return readln_aux (0); + } +} diff --git a/contrib/gperf/src/readline.h b/contrib/gperf/src/readline.h new file mode 100644 index 000000000000..13164d902bb9 --- /dev/null +++ b/contrib/gperf/src/readline.h @@ -0,0 +1,31 @@ +/* Reads arbitrarily long string from input file, returning it as a dynamic buffer. + + Copyright (C) 1989 Free Software Foundation, Inc. + written by Douglas C. Schmidt (schmidt@ics.uci.edu) + +This file is part of GNU GPERF. + +GNU GPERF is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 1, or (at your option) +any later version. + +GNU GPERF is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU GPERF; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* Returns a pointer to an arbitrary length string. Returns NULL on error or EOF + The storage for the string is dynamically allocated by new. */ + +#ifndef _readline_h +#define _readline_h +#include "prototype.h" + +extern char *read_line P ((void)); +#endif /* _readline_h */ + diff --git a/contrib/gperf/src/stderr.c b/contrib/gperf/src/stderr.c new file mode 100644 index 000000000000..3f00dd50eebb --- /dev/null +++ b/contrib/gperf/src/stderr.c @@ -0,0 +1,90 @@ +/* Provides a useful variable-length argument error handling abstraction. + + Copyright (C) 1989 Free Software Foundation, Inc. + written by Douglas C. Schmidt (schmidt@ics.uci.edu) + +This file is part of GNU GPERF. + +GNU GPERF is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 1, or (at your option) +any later version. + +GNU GPERF is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU GPERF; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include +#include "stderr.h" + +/* Holds the name of the currently active program. */ +static char *program_name; + +/* Sets name of program. */ + +void +set_program_name (prog_name) + char *prog_name; +{ + program_name = prog_name; +} + +/* Valid Options (prefixed by '%', as in printf format strings) include: + 'a': exit the program at this point + 'c': print a character + 'd': print a decimal number + 'e': call the function pointed to by the corresponding argument + 'f','g': print a double + 'n': print the name of the program (NULL if not set in constructor or elsewhere) + 'p': print out the appropriate errno value from sys_errlist + 's': print out a character string + '%': print out a single percent sign, '%' */ + +void +report_error (va_alist) + va_dcl +{ + extern int errno, sys_nerr; + extern char *sys_errlist[]; + typedef void (*PTF)(); + typedef char *CHARP; + va_list argp; + int abort = 0; + char *format; + + va_start (argp); + + for (format = va_arg (argp, char *); *format; format++) + { + if (*format != '%') + putc (*format, stderr); + else + { + switch(*++format) + { + case '%' : putc ('%', stderr); break; + case 'a' : abort = 1; break; + case 'c' : putc (va_arg (argp, int), stderr); break; + case 'd' : fprintf (stderr, "%d", va_arg (argp, int)); break; + case 'e' : (*va_arg (argp, PTF))(); break; + case 'f' : fprintf (stderr, "%g", va_arg (argp, double)); break; + case 'n' : fputs (program_name ? program_name : "error", stderr); break; + case 'p' : + if (errno < sys_nerr) + fprintf (stderr, "%s: %s", va_arg (argp, CHARP), sys_errlist[errno]); + else + fprintf (stderr, " %d", errno); + break; + case 's' : fputs (va_arg (argp, CHARP), stderr); break; + } + } + if (abort) + exit (1); + } + va_end (argp); +} diff --git a/contrib/gperf/src/stderr.h b/contrib/gperf/src/stderr.h new file mode 100644 index 000000000000..a94255e4d004 --- /dev/null +++ b/contrib/gperf/src/stderr.h @@ -0,0 +1,29 @@ +/* Provides a useful variable-length argument error handling abstraction. + + Copyright (C) 1989 Free Software Foundation, Inc. + written by Douglas C. Schmidt (schmidt@ics.uci.edu) + +This file is part of GNU GPERF. + +GNU GPERF is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 1, or (at your option) +any later version. + +GNU GPERF is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU GPERF; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#ifndef _stderr_h +#define _stderr_h +#include "prototype.h" +#include + +extern void set_program_name P ((char *prog_name)); +extern void report_error (); +#endif /* _stderr_h */ diff --git a/contrib/gperf/src/version.c b/contrib/gperf/src/version.c new file mode 100644 index 000000000000..7fa142cdcc90 --- /dev/null +++ b/contrib/gperf/src/version.c @@ -0,0 +1,22 @@ +/* Current program version number. + + Copyright (C) 1989 Free Software Foundation, Inc. + written by Douglas C. Schmidt (schmidt@ics.uci.edu) + +This file is part of GNU GPERF. + +GNU GPERF is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 1, or (at your option) +any later version. + +GNU GPERF is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU GPERF; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +char *version_string = "2.1 (K&R C version)"; diff --git a/contrib/gperf/src/xmalloc.c b/contrib/gperf/src/xmalloc.c new file mode 100644 index 000000000000..09cc0227564c --- /dev/null +++ b/contrib/gperf/src/xmalloc.c @@ -0,0 +1,78 @@ +/* Provide a useful malloc sanity checker and an efficient buffered memory + allocator that reduces calls to malloc. + Copyright (C) 1989 Free Software Foundation, Inc. + written by Douglas C. Schmidt (schmidt@ics.uci.edu) + +This file is part of GNU GPERF. + +GNU GPERF is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 1, or (at your option) +any later version. + +GNU GPERF is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU GPERF; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include + +/* Grabs SIZE bytes of dynamic memory or dies trying! */ + +char * +xmalloc (size) + int size; +{ + char *malloc (); + char *temp = malloc (size); + + if (temp == 0) + { + fprintf (stderr, "out of virtual memory\n"); + exit (1); + } + return temp; +} + +/* Determine default alignment. If your C compiler does not + like this then try something like #define DEFAULT_ALIGNMENT 8. */ +struct fooalign {char x; double d;}; +#define ALIGNMENT ((char *)&((struct fooalign *) 0)->d - (char *)0) + +/* Provide an abstraction that cuts down on the number of + calls to MALLOC by buffering the memory pool from which + items are allocated. */ + +char * +buffered_malloc (size) + int size; +{ + char *temp; + static char *buf_start = 0; /* Large array used to reduce calls to NEW. */ + static char *buf_end = 0; /* Indicates end of BUF_START. */ + static int buf_size = 4 * BUFSIZ; /* Size of buffer pointed to by BUF_START. */ + + /* Align this on correct boundaries, just to be safe... */ + size = ((size + ALIGNMENT - 1) / ALIGNMENT) * ALIGNMENT; + + /* If we are about to overflow our buffer we'll just grab another + chunk of memory. Since we never free the original memory it + doesn't matter that no one points to the beginning of that + chunk. Furthermore, as a heuristic, we double the + size of the new buffer! */ + + if (buf_start + size >= buf_end) + { + buf_size = buf_size * 2 > size ? buf_size * 2 : size; + buf_start = xmalloc (buf_size); + buf_end = buf_start + buf_size; + } + + temp = buf_start; + buf_start += size; + return temp; +} diff --git a/contrib/gperf/tests/Makefile b/contrib/gperf/tests/Makefile new file mode 100644 index 000000000000..b7796d5e2f4b --- /dev/null +++ b/contrib/gperf/tests/Makefile @@ -0,0 +1,65 @@ +# Copyright (C) 1989 Free Software Foundation, Inc. +# written by Douglas C. Schmidt (schmidt@ics.uci.edu) +# +# This file is part of GNU GPERF. +# +# GNU GPERF is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 1, or (at your option) +# any later version. +# +# GNU GPERF is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GNU GPERF; see the file COPYING. If not, write to +# the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + +GPERF = gperf +CC = gcc + +all: test + +test: + @echo "performing some tests of the perfect hash generator" + $(CC) -c -O test.c + $(GPERF) -p -c -l -S1 -C -o c.gperf > cinset.c + $(CC) -O -o cout cinset.c test.o + @echo "testing ANSI C reserved words, all items should be found in the set" + ./cout -v < c.gperf + $(GPERF) -k1,4,'$$' ada.gperf > adainset.c +# double '$$' is only there since make gets confused; program wants only 1 '$' + $(CC) -O -o aout adainset.c test.o + @echo "testing Ada reserved words,all items should be found in the set" + ./aout -v < ada.gperf + $(GPERF) -p -D -S1 -k1,'$$' -s 2 -o adapredefined.gperf > preinset.c + $(CC) -O -o preout preinset.c test.o + @echo "testing Ada predefined words, all items should be found in the set" + ./preout -v < adapredefined.gperf + $(GPERF) -k1,2,'$$' -o modula3.gperf > m3inset.c + $(CC) -O -o m3out m3inset.c test.o + @echo "testing Modula3 reserved words, all items should be found in the set" + ./m3out -v < modula3.gperf + $(GPERF) -o -S1 -p < pascal.gperf > pinset.c + $(CC) -O -o pout pinset.c test.o + @echo "testing Pascal reserved words, all items should be found in the set" + ./pout -v < pascal.gperf + $(GPERF) -o -S2 -j1 -D -p -t < c++.gperf > c++inset.c + $(CC) -O -o c++out c++inset.c test.o + @echo "testing C++ reserved words, all items should be found in the set" + tail -47 c++.gperf | ./c++out -v +# these next 5 are demos that show off the generated code + $(GPERF) -p -j1 -g -o -t -N is_reserved_word -k1,3,'$$' c-parse.gperf + $(GPERF) -n -k1-8 -l modula2.gperf + $(GPERF) -p -j 1 -o -a -g -t -k1,4,$$ gplus.gperf + $(GPERF) -D -p -t < c-parse.gperf + $(GPERF) -g -o -j1 -t -p -N is_reserved_word gpc.gperf +# prints out the help message + -$(GPERF) -h + @echo "only if, do, for, case, goto, else, while, and return should be found " + ./aout -v < c.gperf + +clean: + -rm -f *.o core *~ *inset.c *out #*# diff --git a/contrib/gperf/tests/ada.gperf b/contrib/gperf/tests/ada.gperf new file mode 100644 index 000000000000..332bdc740ad7 --- /dev/null +++ b/contrib/gperf/tests/ada.gperf @@ -0,0 +1,63 @@ +else +exit +terminate +type +raise +range +reverse +declare +end +record +exception +not +then +return +separate +select +digits +renames +subtype +elsif +function +for +package +procedure +private +while +when +new +entry +delay +case +constant +at +abort +accept +and +delta +access +abs +pragma +array +use +out +do +others +of +or +all +limited +loop +null +task +in +is +if +rem +mod +begin +body +xor +goto +generic +with diff --git a/contrib/gperf/tests/adapredefined.gperf b/contrib/gperf/tests/adapredefined.gperf new file mode 100644 index 000000000000..875be69abc97 --- /dev/null +++ b/contrib/gperf/tests/adapredefined.gperf @@ -0,0 +1,54 @@ +boolean +character +constraint_error +false +float +integer +natural +numeric_error +positive +program_error +storage_error +string +tasking_error +true +address +aft +base +callable +constrained +count +delta +digits +emax +epsilon +first +firstbit +fore +image +large +last +lastbit +length +machine_emax +machine_emin +machine_mantissa +machine_overflows +machine_radix +machine_rounds +mantissa +pos +position +pred +range +safe_emax +safe_large +safe_small +size +small +storage_size +succ +terminated +val +value +width diff --git a/contrib/gperf/tests/c++.gperf b/contrib/gperf/tests/c++.gperf new file mode 100644 index 000000000000..d09b13bf223a --- /dev/null +++ b/contrib/gperf/tests/c++.gperf @@ -0,0 +1,49 @@ +struct resword {char *name;}; +%% +asm +auto +break +case +catch +char +class +const +continue +default +delete +do +double +else +enum +extern +float +for +friend +goto +if +inline +int +long +new +operator +overload +private +protected +public +register +return +short +signed +sizeof +static +struct +switch +template +this +typedef +union +unsigned +virtual +void +volatile +while diff --git a/contrib/gperf/tests/c-parse.gperf b/contrib/gperf/tests/c-parse.gperf new file mode 100644 index 000000000000..f886f586e91d --- /dev/null +++ b/contrib/gperf/tests/c-parse.gperf @@ -0,0 +1,56 @@ +%{ +/* Command-line: gperf -p -j1 -g -o -t -N is_reserved_word -k1,3,$ c-parse.gperf */ +%} +struct resword { char *name; short token; enum rid rid; }; +%% +__alignof, ALIGNOF, NORID +__alignof__, ALIGNOF, NORID +__asm, ASM, NORID +__asm__, ASM, NORID +__attribute, ATTRIBUTE, NORID +__attribute__, ATTRIBUTE, NORID +__const, TYPE_QUAL, RID_CONST +__const__, TYPE_QUAL, RID_CONST +__inline, SCSPEC, RID_INLINE +__inline__, SCSPEC, RID_INLINE +__signed, TYPESPEC, RID_SIGNED +__signed__, TYPESPEC, RID_SIGNED +__typeof, TYPEOF, NORID +__typeof__, TYPEOF, NORID +__volatile, TYPE_QUAL, RID_VOLATILE +__volatile__, TYPE_QUAL, RID_VOLATILE +asm, ASM, NORID +auto, SCSPEC, RID_AUTO +break, BREAK, NORID +case, CASE, NORID +char, TYPESPEC, RID_CHAR +const, TYPE_QUAL, RID_CONST +continue, CONTINUE, NORID +default, DEFAULT, NORID +do, DO, NORID +double, TYPESPEC, RID_DOUBLE +else, ELSE, NORID +enum, ENUM, NORID +extern, SCSPEC, RID_EXTERN +float, TYPESPEC, RID_FLOAT +for, FOR, NORID +goto, GOTO, NORID +if, IF, NORID +inline, SCSPEC, RID_INLINE +int, TYPESPEC, RID_INT +long, TYPESPEC, RID_LONG +register, SCSPEC, RID_REGISTER +return, RETURN, NORID +short, TYPESPEC, RID_SHORT +signed, TYPESPEC, RID_SIGNED +sizeof, SIZEOF, NORID +static, SCSPEC, RID_STATIC +struct, STRUCT, NORID +switch, SWITCH, NORID +typedef, SCSPEC, RID_TYPEDEF +typeof, TYPEOF, NORID +union, UNION, NORID +unsigned, TYPESPEC, RID_UNSIGNED +void, TYPESPEC, RID_VOID +volatile, TYPE_QUAL, RID_VOLATILE +while, WHILE, NORID diff --git a/contrib/gperf/tests/c.gperf b/contrib/gperf/tests/c.gperf new file mode 100644 index 000000000000..8672d6c25ed3 --- /dev/null +++ b/contrib/gperf/tests/c.gperf @@ -0,0 +1,32 @@ +if +do +int +for +case +char +auto +goto +else +long +void +enum +float +short +union +break +while +const +double +static +extern +struct +return +sizeof +switch +signed +typedef +default +unsigned +continue +register +volatile diff --git a/contrib/gperf/tests/gpc.gperf b/contrib/gperf/tests/gpc.gperf new file mode 100644 index 000000000000..8fb469e46bc6 --- /dev/null +++ b/contrib/gperf/tests/gpc.gperf @@ -0,0 +1,48 @@ +%{ +/* ISO Pascal 7185 reserved words. + * + * For GNU Pascal compiler (GPC) by jtv@hut.fi + * + * run this through the Doug Schmidt's gperf program + * with command + * gperf -g -o -j1 -t -p -N is_reserved_word + * + */ +%} +struct resword { char *name; short token; short iclass;}; +%% +And, AND, PASCAL_ISO +Array, ARRAY, PASCAL_ISO +Begin, BEGIN_, PASCAL_ISO +Case, CASE, PASCAL_ISO +Const, CONST, PASCAL_ISO +Div, DIV, PASCAL_ISO +Do, DO, PASCAL_ISO +Downto, DOWNTO, PASCAL_ISO +Else, ELSE, PASCAL_ISO +End, END, PASCAL_ISO +File, FILE_, PASCAL_ISO +For, FOR, PASCAL_ISO +Function, FUNCTION, PASCAL_ISO +Goto, GOTO, PASCAL_ISO +If, IF, PASCAL_ISO +In, IN, PASCAL_ISO +Label, LABEL, PASCAL_ISO +Mod, MOD, PASCAL_ISO +Nil, NIL, PASCAL_ISO +Not, NOT, PASCAL_ISO +Of, OF, PASCAL_ISO +Or, OR, PASCAL_ISO +Packed, PACKED, PASCAL_ISO +Procedure, PROCEDURE, PASCAL_ISO +Program,PROGRAM,PASCAL_ISO +Record, RECORD, PASCAL_ISO +Repeat, REPEAT, PASCAL_ISO +Set, SET, PASCAL_ISO +Then, THEN, PASCAL_ISO +To, TO, PASCAL_ISO +Type, TYPE, PASCAL_ISO +Until, UNTIL, PASCAL_ISO +Var, VAR, PASCAL_ISO +While, WHILE, PASCAL_ISO +With, WITH, PASCAL_ISO diff --git a/contrib/gperf/tests/gplus.gperf b/contrib/gperf/tests/gplus.gperf new file mode 100644 index 000000000000..4a93315be52f --- /dev/null +++ b/contrib/gperf/tests/gplus.gperf @@ -0,0 +1,76 @@ +%{ +/* Command-line: gperf -p -j1 -g -o -t -N is_reserved_word -k1,4,$ gplus.gperf */ +%} +struct resword { char *name; short token; enum rid rid;}; +%% +__alignof, ALIGNOF, NORID +__alignof__, ALIGNOF, NORID +__asm, ASM, NORID +__asm__, ASM, NORID +__attribute, ATTRIBUTE, NORID +__attribute__, ATTRIBUTE, NORID +__const, TYPE_QUAL, RID_CONST +__const__, TYPE_QUAL, RID_CONST +__inline, SCSPEC, RID_INLINE +__inline__, SCSPEC, RID_INLINE +__signed, TYPESPEC, RID_SIGNED +__signed__, TYPESPEC, RID_SIGNED +__typeof, TYPEOF, NORID +__typeof__, TYPEOF, NORID +__volatile, TYPE_QUAL, RID_VOLATILE +__volatile__, TYPE_QUAL, RID_VOLATILE +all, ALL, NORID /* Extension */, +except, EXCEPT, NORID /* Extension */, +exception, AGGR, RID_EXCEPTION /* Extension */, +raise, RAISE, NORID /* Extension */, +raises, RAISES, NORID /* Extension */, +reraise, RERAISE, NORID /* Extension */, +try, TRY, NORID /* Extension */, +asm, ASM, NORID, +auto, SCSPEC, RID_AUTO, +break, BREAK, NORID, +case, CASE, NORID, +catch, CATCH, NORID, +char, TYPESPEC, RID_CHAR, +class, AGGR, RID_CLASS, +const, TYPE_QUAL, RID_CONST, +continue, CONTINUE, NORID, +default, DEFAULT, NORID, +delete, DELETE, NORID, +do, DO, NORID, +double, TYPESPEC, RID_DOUBLE, +dynamic, DYNAMIC, NORID, +else, ELSE, NORID, +enum, ENUM, NORID, +extern, SCSPEC, RID_EXTERN, +float, TYPESPEC, RID_FLOAT, +for, FOR, NORID, +friend, SCSPEC, RID_FRIEND, +goto, GOTO, NORID, +if, IF, NORID, +inline, SCSPEC, RID_INLINE, +int, TYPESPEC, RID_INT, +long, TYPESPEC, RID_LONG, +new, NEW, NORID, +operator, OPERATOR, NORID, +overload, OVERLOAD, NORID, +private, PRIVATE, NORID, +protected, PROTECTED, NORID, +public, PUBLIC, NORID, +register, SCSPEC, RID_REGISTER, +return, RETURN, NORID, +short, TYPESPEC, RID_SHORT, +signed, TYPESPEC, RID_SIGNED, +sizeof, SIZEOF, NORID, +static, SCSPEC, RID_STATIC, +struct, AGGR, RID_RECORD, +switch, SWITCH, NORID, +this, THIS, NORID, +typedef, SCSPEC, RID_TYPEDEF, +typeof, TYPEOF, NORID, +union, AGGR, RID_UNION, +unsigned, TYPESPEC, RID_UNSIGNED, +virtual, SCSPEC, RID_VIRTUAL, +void, TYPESPEC, RID_VOID, +volatile, TYPE_QUAL, RID_VOLATILE, +while, WHILE, NORID, diff --git a/contrib/gperf/tests/modula2.gperf b/contrib/gperf/tests/modula2.gperf new file mode 100644 index 000000000000..5ef9c7538354 --- /dev/null +++ b/contrib/gperf/tests/modula2.gperf @@ -0,0 +1,40 @@ +AND +ARRAY +BEGIN +BY +CASE +CONST +DEFINITION +DIV +DO +ELSE +ELSIF +END +EXIT +EXPORT +FOR +FROM +IF +IMPLEMENTATION +IMPORT +IN +LOOP +MOD +MODULE +NOT +OF +OR +POINTER +PROCEDURE +QUALIFIED +RECORD +REPEAT +RETURN +SET +THEN +TO +TYPE +UNTIL +VAR +WHILE +WITH diff --git a/contrib/gperf/tests/modula3.gperf b/contrib/gperf/tests/modula3.gperf new file mode 100644 index 000000000000..d0243460d9ba --- /dev/null +++ b/contrib/gperf/tests/modula3.gperf @@ -0,0 +1,106 @@ +AND +ARRAY +BEGIN +BITS +BY +CASE +CONST +DIV +DO +ELSE +ELSIF +END +EVAL +EXCEPT +EXCEPTION +EXIT +EXPORTS +FINALLY +FOR +FROM +IF +IMPORT +INTERFACE +IN +INLINE +LOCK +METHODS +MOD +MODULE +NOT +OBJECT +OF +OR +PROCEDURE +RAISES +READONLY +RECORD +REF +REPEAT +RETURN +SET +THEN +TO +TRY +TYPE +TYPECASE +UNSAFE +UNTIL +UNTRACED +VALUE +VAR +WHILE +WITH +and +array +begin +bits +by +case +const +div +do +else +elsif +end +eval +except +exception +exit +exports +finally +for +from +if +import +interface +in +inline +lock +methods +mod +module +not +object +of +or +procedure +raises +readonly +record +ref +repeat +return +set +then +to +try +type +typecase +unsafe +until +untraced +value +var +while +with diff --git a/contrib/gperf/tests/pascal.gperf b/contrib/gperf/tests/pascal.gperf new file mode 100644 index 000000000000..fed3fbb30eac --- /dev/null +++ b/contrib/gperf/tests/pascal.gperf @@ -0,0 +1,36 @@ +with +array +and +function +case +var +const +until +then +set +record +program +procedure +or +packed +not +nil +label +in +repeat +of +goto +forward +for +while +file +else +downto +do +div +to +type +end +mod +begin +if diff --git a/contrib/gperf/tests/test.c b/contrib/gperf/tests/test.c new file mode 100644 index 000000000000..528e2fa4c141 --- /dev/null +++ b/contrib/gperf/tests/test.c @@ -0,0 +1,32 @@ +/* + Tests the generated perfect has function. + The -v option prints diagnostics as to whether a word is in + the set or not. Without -v the program is useful for timing. +*/ + +#include + +#define MAX_LEN 80 + +#ifdef __STDC__ +int in_word_set (char *, unsigned int); +int +main (int argc, char *argv[]) +#else +int +main (argc, argv) + int argc; + char *argv[]; +#endif +{ + int verbose = argc > 1 ? 1 : 0; + char buf[MAX_LEN]; + + while (gets (buf)) + if (in_word_set (buf, strlen (buf)) && verbose) + printf ("in word set %s\n", buf); + else if (verbose) + printf ("NOT in word set %s\n", buf); + + return 0; +}