perm filename SYMSER.SAI[S,AIL] blob sn#229790 filedate 1976-08-10 generic text, type C, neo UTF8
COMMENT ⊗   VALID 00003 PAGES
C REC  PAGE   DESCRIPTION
C00001 00001
C00002 00002	
C00007 00003	
C00009 ENDMK
C⊗;
COMMENT 
 SYMSER.SAI package -- LOOKUP and ENTER procedures for hashed
symbol tables -- STRINGS -- uses linear quotient hash conflict resolution.

REQUIRED -- 
 1.  DEFINE SYMNO="1 less than some prime number big
	enough to hold all entries".
 2.  DEFINE POSSIBLY_SAFE = "", to override SAFENESS of the
    	arrays you get.  Otherwise, it's OK to leave this out.
 2.  REQUIRE "SYMSER.SAI[1,DCS]" SOURCE_FILE in outer block
     	declaration code.

WHAT YOU GET ---
 1.  An array, SYM[0:SYMNO-1], to hold the (STRING) symbols
     you enter.

 2.  A parallel array, NUMBER, to hold the (INTEGER) values which
     get associated with each string, during ENTERSYM.  If you want
     more complex symbol entries, use the NUMBER array to hold some
     sort of descriptors t the more complex entries.

 3.  An integer variable, SYMBOL, which LOOKSYM (below) will set 
     to the index of the found string, etc.

 4.  An integer variable, ERRFLAG, set to TRUE if errors occur in ENTERSYM.

 5.  A Procedure, FLAG←LOOKSYM("A") which returns:
    TRUE if the string is already present in the SYM table, whence:
	SYMBOL is the index of the found string/value in the arrays.
	The form of TRUE returned is: XWD -1,symbol index.
    FALSE if the symbol is not found, whence:
	SYMBOL is -1 (table full), or is the index in the table
	  which should be used to enter the string (see below).

 6.  A Procedure, ENTERSYM("SYM",VAL).
     This should be called just after a LOOKSYM, called with the
      same string.  ENTERSYM will use the value of SYMBOL produced by
      LOOKSYM, so this is important (more efficient than doing it over).
     Entersym checks for symbol full or duplicate symbol -- if either
      error occurs, it types a message and sets ERRFLAG TRUE.
     Entersym puts SYM and VAL into SYM/NUMBER arrays at SYMBOL index.

 7.  A Procedure, SETSYM, which initializes the table.  The indices
      returned by LOOKSYM will range from 1 to SYMNO-1 -- 0 is not
      used, for a reason which I do not remember.

  Average symbol table lookup requires about two probes into the symbol
  table, for tables which are kept less than about 80% full.  More
  dense tables will not degrade this figure too much.
;

REQUIRE "" DELIMITERS; COMMENT IN CASE OTHERS HAVE BEEN SPECIFIED;
INTEGER SYMBOL,ERRFLAG;

IFC DECLARATION(POSSIBLY_SAFE)=0 THENC
     DEFINE POSSIBLY_SAFE = "SAFE"; ENDC

POSSIBLY_SAFE STRING ARRAY SYM[-1:SYMNO];
POSSIBLY_SAFE INTEGER ARRAY NUMBER[-1:SYMNO];

DEFINE NULSTR(S)="LENGTH(S)=0",PRINT="OUTSTR(",MSG="&'15&'12)";

PROCEDURE SETSYM;
BEGIN
 INTEGER I;
 FOR I←-1 STEP 1 UNTIL SYMNO DO SYM[I]←NULL;
 SYM[0]←"              ";
 ERRFLAG←FALSE
END "SETSYM";

INTEGER PROCEDURE LOOKSYM(STRING A);
BEGIN "LOOKSYM"
 INTEGER H,Q,R;

 H←CVASC(A) +LENGTH(A) LSH 6;

Comment Linear Quotient Hash Conflict Resolution method, see
        CACM 13,11 (1970), page 675;

 R←SYMBOL←(H←ABS(H⊗(H LSH 2))) MOD (SYMNO+1);
 IF EQU(SYM[SYMBOL],A) THEN RETURN((-1 LSH 18)+SYMBOL);
 IF NULSTR(SYM[SYMBOL]) THEN  RETURN(0); 

 Q←H%(SYMNO+1) MOD (SYMNO+1);
 FOR H←1 STEP 1 UNTIL SYMNO DO BEGIN "LK1"
     IF (SYMBOL←SYMBOL+H)>SYMNO THEN SYMBOL←SYMBOL-(SYMNO+1);
     IF EQU(SYM[SYMBOL],A) THEN RETURN((-1 LSH 18)+SYMBOL);
     IF NULSTR(SYM[SYMBOL]) THEN RETURN(0);
 END "LK1";
 SYMBOL←-1; RETURN(0);
END "LOOKSYM";


COMMENT ROUTINE TO ENTER A SYMBOL IN THE SYMBOL TABLE.
	IT ENTERS THE PREVIOUS WORD SCANNED BY GETWORD.
	"SYMBOL" IS THE POINTER INTO THE ARRAY WHERE THE
	SYMBOL IS STORED.;

PROCEDURE ENTERSYM(STRING WORD; INTEGER VAL);
BEGIN "ENTERSYM" 
	IF LENGTH(SYM[SYMBOL])∨SYMBOL<0 THEN
	BEGIN
	  ERRFLAG←1;
	  IF SYMBOL≥0 THEN PRINT "DUPLICATE SYMBOL "&WORD MSG
		ELSE PRINT "SYMBOL TABLE FULL" MSG
	END;
	SYM[SYMBOL]←WORD;
	NUMBER[SYMBOL]←VAL;
END "ENTERSYM";
REQUIRE UNSTACK_DELIMITERS; COMMENT REVERT;