The following is code to generate and use a directed acyclic word graph (DAWG).

A DAWG is a data structure for compressing a list of words into a form amenable to quick searching. Processing a 90000 word Scrabble dictionary obtains a 240KB DAWG file in a handful of seconds. The DAWG is suitable for use in a spell-checker, anagram finder, or word game. Its use for instantly enumerating every legal play in a game of Scrabble is described in "The World's Fastest Scrabble Program", Journal of the ACM, May 1988.

The DAWG is a cell array of 32-bit "nodes". The first cell contains the index of the root "block", which happens to be the final block in the array. A node contains a 5-bit letter, an end-of-word flag, an end-of-block flag, and the 24-bit array index of a block. A node represents an edge of the word graph labeled with a letter. It also represents a subgraph, a set of suffixes beginning with that letter. A block is a contiguous sequence of nodes representing all the possible next letters, or suffixes, of a prefix.

This page contains some base words and the DAWG generator.

BoggleGame contains sample code for using the DAWG, including a Boggle game solver. It uses a sorted string array with a binary search. AnagramFinder also makes use of the DAWG.

IanOsgood

\ DAWG.F - Directed Acyclic Word Graph
\
\ By Ian Osgood  iano@quirkster.com

\
\ TRIE/DAWG node structure definition
\
1 30 LSHIFT    CONSTANT EOB_MASK
1 29 LSHIFT    CONSTANT EOW_MASK
1 24 LSHIFT 1- CONSTANT INDEX_MASK

: Let ( node -- 1-26) 24 RSHIFT 31 AND ;
: EOW ( node -- nz )  EOW_MASK AND ;
: EOB ( node -- nz )  EOB_MASK AND ;
: Ind ( node -- index ) INDEX_MASK AND ;

: InitLet ( 1-26 -- node ) 24 LSHIFT ;

: let>c ( 1-26 -- a-z ) [CHAR] a + 1- ;
: c>let ( a-z -- 1-26 ) [CHAR] a - 1+ ;
: ?c>let ( c -- 0-26 )
  DUP [CHAR] a [CHAR] z 1+ WITHIN if c>let ELSE DROP 0 THEN ;

\
\ DAWG usage utilities  (start session with "load-dawg dawg.out")
\

VARIABLE dawg

: read-trie ( fname count -- trie^ code )
  R/O OPEN-FILE            ?DUP IF EXIT THEN ( file )
  DUP FILE-SIZE            ?DUP IF EXIT THEN ( file udsize)
  D>S DUP ALLOCATE         ?DUP IF EXIT THEN ( file size mem^ )
  DUP 2OVER SWAP READ-FILE ?DUP IF EXIT THEN ( file size mem read )
  ROT <>                   ?DUP IF EXIT THEN ( file mem )
  SWAP CLOSE-FILE ;

: load-dawg  BL PARSE read-trie ABORT" Can't load dawg!" dawg ! ;
: unload-dawg  dawg @ FREE DROP ;

: dawg-root ( -- root-block ) dawg @ DUP @ CELLS + ;
: dawg@i ( index -- block ) CELLS dawg @ + ;

: CELL-   1 CELLS - ;
: letter-in-block ( letter block-addr -- node-addr | 0 )
  CELL-
  BEGIN CELL+ 2DUP @ Let - ?DUP 0= IF NIP EXIT THEN
        0< OVER @ EOB OR
  UNTIL
  2DROP 0 ;

This is the DAWG generator. It uses a hash table.

\ MAKEDAWG.F
\
\  make-dawg - converts the file "words.txt" of lowercase words,
\              sorted alphabetically one per line, into "dawg.out"
include dawg.f

\ utility
: CELL/   2 RSHIFT ;

16 CONSTANT max-word-len   \ max len in a Scrabble dictionary is 15
VARIABLE word-len
CREATE next-word max-word-len CHARS ALLOT
VARIABLE prefix-len
CREATE prefix max-word-len CHARS ALLOT

: prefix-len+  prefix-len @ CHARS + ;

: next-word-has-prefix? ( -- nz )
  next-word prefix-len @ prefix OVER COMPARE 0= word-len @ AND ;

\
\ DAWG builder
\

VARIABLE words-file
VARIABLE dawg-file
VARIABLE cur-dawg-index

: get-next-word
  next-word max-word-len CHARS words-file @ READ-LINE 2DROP word-len ! ;

: write-to-dawg ( block size -- )
  dawg-file @ WRITE-FILE ABORT" Can't write to dawg!" ;

\
\ Hash Table for blocks
\
2311 CONSTANT hash-size
VARIABLE htab

: create-hash-table
  hash-size CELLS ALLOCATE ABORT" Hash table too big!"
  DUP hash-size CELLS ERASE htab ! ;

: htab@i ( hash-index -- head-entry-addr ) CELLS htab @ + ;
: ->next ; IMMEDIATE
: ->index CELL+ ;
: ->block CELL+ CELL+ ;

: destroy-hash-table
  htab @ hash-size 0
  DO   DUP @
       BEGIN  ?DUP 
       WHILE  DUP ->next @  SWAP FREE DROP
       REPEAT CELL+
  LOOP DROP htab @ FREE DROP ;

\ 0 for a trie, >5 for a dawg (measured no dups above size 5)
6 CELLS CONSTANT Block-size-hash-threshold

: hash-block ( block -- hash )
  0 >R CELL-
  BEGIN CELL+ DUP @ DUP R> XOR >R EOB UNTIL
  DROP R> hash-size MOD ;

: blocks-equivalent? ( block1 block2 -- TF )
  BEGIN  OVER @ OVER @ <> IF 2DROP FALSE EXIT THEN
         DUP @ EOB 0=
  WHILE  CELL+ SWAP CELL+
  REPEAT 2DROP TRUE ;

: find-hash-block ( block -- index | 0 )
  DUP hash-block htab@i    ( block hash-block-addr )
  BEGIN @ DUP
  WHILE 2DUP ->block blocks-equivalent?
        IF ->index @ NIP EXIT
        THEN ->next
  REPEAT NIP ( 0 ) ;

: add-hash-block ( size block -- )
  OVER ->block ALLOCATE ABORT" Can't allocate hash entry!" 
  OVER hash-block htab@i           ( size block h head-addr )
  2DUP @ OVER ->next ! SWAP !      \ replace the head <- h
  cur-dawg-index @ OVER ->index !
  ->block ROT MOVE ;

\ Core DAWG building algorithm

: index-for-block ( size block -- index )
  OVER Block-size-hash-threshold <
  IF   DUP find-hash-block ?DUP IF NIP NIP EXIT THEN
       2DUP add-hash-block
  THEN
  OVER write-to-dawg  ( size )
  CELL/ cur-dawg-index @ TUCK + cur-dawg-index ! ;

: append-next-letter-to-prefix ( -- a-z )
  next-word prefix-len+ C@
  prefix prefix-len+ 2DUP C@ <=
  IF 2DROP ABORT" Words out of order!" THEN
  2DUP C!  0 SWAP CHAR+ C!
  1 prefix-len +! ;

: init-node-with-letter ( node a-z -- node )
  c>let InitLet OVER !
  word-len @ prefix-len @ =
  IF   EOW_MASK OVER +!
       get-next-word
  THEN ;

: remove-letter-from-prefix   -1 prefix-len +! ;

: finish-block ( prefix-node last-node -- prefix-node )
  2DUP = IF DROP EXIT THEN
  EOB_MASK OVER +!
  OVER - OVER CELL+ 	( prefix size block )
  index-for-block OVER +! ;

: suffixes ( prefix-node-addr -- prefix-node-addr )
  DUP 	   ( prefix current )
  BEGIN  next-word-has-prefix?
  WHILE  CELL+              \ allocate a new node
         append-next-letter-to-prefix
         init-node-with-letter
         RECURSE			\ process all suffixes from this prefix
         remove-letter-from-prefix
  REPEAT   ( prefix last )
  finish-block ;

: make-dawg
  S" words.txt" R/O OPEN-FILE ABORT" No input file!" words-file !
  S" dawg.out" R/W CREATE-FILE ABORT" No output file!" dawg-file !
  create-hash-table
  100 CELLS ALLOCATE ABORT" Can't allocate block stack!"
  ( blocks ) \ max 87 for "outstunting"

  0 OVER ! DUP 4 write-to-dawg	\ skip root pointer
  1 cur-dawg-index !
  get-next-word
  0 prefix-len !  0 prefix C!
  suffixes  ( blocks[0] filled with root node index )
  0. dawg-file @ REPOSITION-FILE ABORT" Can't rewind!"
  DUP 4 write-to-dawg	\ backpatch root pointer

  FREE DROP  destroy-hash-table
  dawg-file @ CLOSE-FILE DROP
  words-file @ CLOSE-FILE DROP ;