Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Custom hash consing tables #21

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Code/Compiler/code-generation.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@
(:interpreted
(let ((sb-ext:*evaluator-mode* :interpret))
(eval form)))
#-sbcl
(:interpreted (eval form))
(:compiled
(compile nil form))
(:literal
Expand Down
53 changes: 53 additions & 0 deletions Code/DFA-construction/hash-cons.lisp
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
(in-package :one-more-re-nightmare)

;;; A hash-table designed for hash consing. There are two main
;;; deficiencies in the hash tables provided by Common Lisp:
;;; 1. We cannot promise that a key is definitely new and avoid
;;; comparing keys. We may only "upsert" keys.
;;; 2. The hash function used by SBCL behaves very poorly with
;;; large substitution lists (as used by TAG-SET).

(eval-when (:compile-toplevel :load-toplevel :execute)
(defconstant +buckets+ 256))

(defstruct (hash-cons-table (:constructor make-hash-cons-table (test hash)))
(buckets (make-array +buckets+ :initial-element '())
:type (simple-vector #.+buckets+)
:read-only y)
(test #'equal :type function :read-only t)
(hash #'sxhash :type function :read-only t))

(declaim (inline %bucket))
(defun %bucket (hc-table key)
(mod (the fixnum (funcall (hash-cons-table-hash hc-table) key)) +buckets+))

(defun insert-hc (hc-table key value)
(push (cons key value)
(svref (hash-cons-table-buckets hc-table)
(%bucket hc-table key)))
value)

(defun lookup-hc (hc-table key)
(let ((pair (assoc key
(svref (hash-cons-table-buckets hc-table) (%bucket hc-table key))
:test (hash-cons-table-test hc-table))))
(if (null pair)
(values nil nil)
(values (cdr pair) t))))

(defun tag-set-hash (substitutions)
(let ((hash 0))
(flet ((update (x)
(setf hash (logand most-positive-fixnum (+ (* x 31) hash)))))
(loop for ((n v) . source) in (first substitutions)
do (update (sxhash n))
(update v)
(trivia:match source
('nil (update 0))
('position (update 1))
((list n v) (update (sxhash n)) (update v)))))
hash))

(defmethod print-object ((h hash-cons-table) s)
(print-unreadable-object (h s :type t)
(format s "~D entries" (reduce #'+ (hash-cons-table-buckets h) :key #'length))))
8 changes: 6 additions & 2 deletions Code/DFA-construction/make-dfa.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,11 @@
(reverse result))))

(defvar *probably-bad-limit* 1000)
(define-condition exceeded-state-limit (error)
()
(:report "Made too many states - either your regular expression is too complicated, or one-more-re-nightmare is broken.
(Either way, you're not going to get this compiled any time soon.)"))

(defun make-dfa-from-expressions (expressions)
(let ((states (make-hash-table))
(possibly-similar-states (make-hash-table))
Expand All @@ -94,8 +99,7 @@
(loop
(when (null work-list) (return))
(when (> (hash-table-count states) *probably-bad-limit*)
(error "Made too many states - either your regular expression is too complicated, or one-more-re-nightmare is broken.
(Either way, you're not going to get this compiled any time soon.)"))
(error 'exceeded-state-limit))
(let* ((expression (pop work-list))
(state (find-state expression)))
(cond
Expand Down
20 changes: 10 additions & 10 deletions Code/DFA-construction/re-types.lisp
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
(in-package :one-more-re-nightmare)

(define-types
(literal set)
(empty-string)
(repeat r min max can-empty)
(tag-set substitutions)
(alpha expression history)
(grep vector prototype)
(either r s)
(both r s)
(invert r)
(join r s))
((literal set))
((empty-string))
((repeat r min max can-empty))
((tag-set substitutions) equal tag-set-hash)
((alpha expression history))
((grep vector prototype))
((either r s))
((both r s))
((invert r))
((join r s)))

(define-rewrites (literal set)
:printer ((literal set)
Expand Down
38 changes: 19 additions & 19 deletions Code/DFA-construction/tag-sets.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -90,28 +90,28 @@
(has-tags-p r))
(_ nil))))

(defvar *allow-alpha* t)
(defun map-tags (f re)
;; Don't touch the table for UNIQUE-TAGS, since the tag set will
;; definitely not be in the table.
(defun %definitely-fresh-tag-set (set)
(let ((ts (make-instance 'tag-set)))
(setf (slot-value ts 'substitutions) set)
(insert-hc *tag-set-table* (list set) ts)
ts))

(defun unique-tags (re)
;; Return the same RE if we have no tags to replace.
(unless (has-tags-p re)
(return-from map-tags re))
(unless (has-tags-p re) (return-from unique-tags re))
(trivia:match re
((tag-set set) (tag-set (funcall f set)))
((either r s) (either (map-tags f r) (map-tags f s)))
((both r s) (both (map-tags f r) (map-tags f s)))
((join r s) (join (map-tags f r) (map-tags f s)))
((invert r) (invert (map-tags f r)))
((repeat r min max c) (repeat (map-tags f r) min max c))
((tag-set set) (%definitely-fresh-tag-set (unique-assignments set)))
((either r s) (either (unique-tags r) (unique-tags s)))
((both r s) (both (unique-tags r) (unique-tags s)))
((join r s) (join (unique-tags r) (unique-tags s)))
((invert r) (invert (unique-tags r)))
((repeat r min max c) (repeat (unique-tags r) min max c))
((alpha r old-tags)
(unless (or *allow-alpha* (eq old-tags (empty-set)))
(unless (eq old-tags (empty-set))
(error "Can't modify tags with history"))
(alpha (map-tags f r)
(map-tags f old-tags)))
(alpha (unique-tags r) (unique-tags old-tags)))
((grep r s)
(grep (map-tags f r)
(map-tags f s)))
(grep (unique-tags r) (unique-tags s)))
(_ re)))

(defun unique-tags (re)
(let ((*allow-alpha* nil))
(map-tags #'unique-assignments re)))
30 changes: 15 additions & 15 deletions Code/DFA-construction/type.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

(defvar *table-names* '())

(defmacro define-hash-consing-table (name)
(defmacro define-hash-consing-table (name &optional (test 'equal) (hash 'sxhash))
`(progn
(defvar ,name)
(pushnew ',name *table-names*)
(pushnew '(,name ,test ,hash) *table-names*)
',name))

(defconstant +uncomputed+ '+uncomputed+)
Expand All @@ -18,7 +18,9 @@
(%has-tags-p :initform +uncomputed+ :accessor cached-has-tags-p)))

(defmacro define-types (&body types)
(loop for (name . slots) in types
(loop for ((name . slots) test* hash*) in types
for test = (or test* 'equal)
for hash = (or hash* 'sxhash)
collect (let ((variables (loop for slot in slots collect (gensym (symbol-name slot))))
(internal-creator (alexandria:format-symbol t "%~a" name))
(table-name (alexandria:format-symbol '#:one-more-re-nightmare
Expand All @@ -32,14 +34,13 @@
,@(loop for slot in slots
for variable in variables
appending `((list 'slot-value instance-name '',slot) ,variable)))))
(define-hash-consing-table ,table-name)
(define-hash-consing-table ,table-name ,test ,hash)
(defun ,internal-creator ,slots
(or (gethash (list ,@slots) ,table-name)
(or (lookup-hc ,table-name (list ,@slots))
(let ((instance (make-instance ',name)))
,@(loop for slot in slots
collect `(setf (slot-value instance ',slot) ,slot))
(setf (gethash (list ,@slots) ,table-name)
instance))))))
(insert-hc ,table-name (list ,@slots) instance))))))
into forms
finally (return `(progn ,@forms))))

Expand All @@ -58,7 +59,7 @@
collect `((list ,@pattern) ,replacement))
,@(loop for ((nil . pattern) (nil . replacement)) in hash-cons
collect `((list ,@pattern)
(or (gethash (list ,@replacement) ,table-name)
(or (lookup-hc ,table-name (list ,@replacement))
(trivia.next:next))))
(_ (,internal-creator ,@slots)))))))
(indent:define-indentation define-type (4 &body))
Expand All @@ -67,11 +68,10 @@
(alexandria:once-only (table key)
(alexandria:with-gensyms (value present?)
`(multiple-value-bind (,value ,present?)
(gethash ,key ,table)
(lookup-hc ,table ,key)
(if ,present?
,value
(setf (gethash ,key ,table)
(progn ,@body)))))))
(insert-hc ,table ,key (progn ,@body)))))))

(defmacro with-slot-consing ((accessor object &key (when 't)) &body body)
(alexandria:once-only (object)
Expand All @@ -89,11 +89,11 @@
,value)))))))

(defmacro with-hash-consing-tables (() &body body)
`(let ,(loop for name in *table-names*
collect `(,name (make-hash-table :test 'equal)))
`(let ,(loop for (name test hash) in *table-names*
collect `(,name (make-hash-cons-table #',test #',hash)))
,@body))

(defmacro clear-global-tables ()
"Set up global tables for testing."
`(setf ,@(loop for name in *table-names*
append `(,name (make-hash-table :test 'equal)))))
`(setf ,@(loop for (name test hash) in *table-names*
append `(,name (make-hash-cons-table #',test #',hash)))))
1 change: 1 addition & 0 deletions Code/one-more-re-nightmare.asd
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
(:module "DFA-construction"
:components ((:file "type")
(:file "sets")
(:file "hash-cons")
(:file "re-types")
(:file "nullable")
(:file "tag-sets")
Expand Down
1 change: 1 addition & 0 deletions Code/package.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#:all-matches #:all-string-matches
#:first-match #:first-string-match
#:do-matches
#:exceeded-state-limit
#:lint-style-warning
#:not-matchable-style-warning
#:matching-too-much-style-warning))
28 changes: 28 additions & 0 deletions Documentation/linting.scrbl
Original file line number Diff line number Diff line change
Expand Up @@ -205,3 +205,31 @@ always be bound to an index, and never @cl{nil}, because the first two
registers designate the bounds of the entire match.

}

@subsection{"Made too many states - ..."}

@definitions{
@define-condition["exceeded-state-limit" "error"]
}

@definition-section["Explanation"]{

The compiler generates a deterministic finite automaton, which may
(in semi-rare cases) produce a number of states exponentially
proportional to the complexity of the regular expression. Complements
and intersections may produce doubly-exponential numbers of states.

It is also possible, but hopefully more rare, that the compiler lacks
rules to generate a finite number of states.

}

@definition-section["Examples"]{

@cl{(compile-regular-expression "1[01]{9}")} signals the error "Made too
many states - either your regular expression is too complicated, or
one-more-re-nightmare is broken. (Either way, you're not going to get
this compiled any time soon.)" In general, the regular expression
@${\mathtt{1} \cdot \left\{ \mathtt{0}, \mathtt{1} \right\}^n} requires
@${\mathcal{O}(2^n)} states.
}
2 changes: 1 addition & 1 deletion Tests/regrind.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
(let ((success t))
(lparallel:pdotimes (i n success n)
(let* ((*remaining-depth* depth)
#+sbcl (one-more-re-nightmare::*code-type* :interpreted)
(one-more-re-nightmare::*code-type* :interpreted)
(re (random-re))
(haystack (random-haystack)))
(handler-case
Expand Down