;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;;; gnfa2regexp.el: convert finite automata to regular expressions ;;; ;;; Copyright (c) 2000 by Boris Schaefer ;;; ;;; You may do as you please with this code as long as you do not ;;; remove this copyright notice or hold me liable for its use. ;;; Please send bug reports to the address provided in: ;;; ;;; http://www.uncommon-sense.net/this-site/contact.html ;;; ;;; Example: ;;; ======== ;;; ;;; Here's an example that will create a regexp that matches balanced ;;; parentheses after `(defun' up to 4 levels deep. ;;; ;;; (defvar my-gnfa '((A (B . "(defun")) ;;; (B (C . "(") (B . "[^()]") (F . ")")) ;;; (C (D . "(") (C . "[^()]") (B . ")")) ;;; (D (E . "(") (D . "[^()]") (C . ")")) ;;; (E (E . "[^)]") (D . ")")) ;;; (F . nil))) ;;; ;;; (with-output-to-temp-buffer ;;; "monster regexp" ;;; (print (gnfa-to-regexp my-gnfa)) ;;; nil) ;;; ;;; State E is different in the above example, because it is the ;;; deepest level at which correct balancing will be recognized. ;;; ;;; Beware that the resulting regexp is about 700 characters long. ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (require 'cl) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; The following is apropriate for emacs regexps. If you want to ;; create non-Emacs regexps, then rebind these. ;; (defvar regexp-group-left "\\(") (defvar regexp-group-right "\\)") (defvar regexp-kleene-star "*") (defvar regexp-union "\\|") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; some generalized set functions ;; (defun* adjoin* (elem list &key (decider '(lambda (a b) a)) (test 'eql)) "Adjoin `elem' to `list'. See `union*'." (if (null list) (list elem) (if (funcall test elem (car list)) (cons (funcall decider elem (car list)) (cdr list)) (cons (car list) (adjoin* elem (cdr list) :decider decider :test test))))) (defun* union* (list1 list2 &key (decider '(lambda (a b) a)) (test 'eql)) "Construct the union of `list1' and `list2' using. The union will be constructed using `test' as the comparison function for two elements and using `decider' to decide what to put into the result, if two elements are the same. `decider' will be called with two arguments, the first is the element from `list1' the second is the element from `list2'. `union*' assumes that `list1' and `list2' have no duplicates." (cond ((null list1) list2) (t (union* (cdr list1) (adjoin* (car list1) list2 :decider decider :test test) :decider decider :test test)))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; GNFA abstractions ;; (defun gnfa-transition-target (transition) (car transition)) (defun gnfa-transition-label (transition) (cdr transition)) (defun gnfa-state-name (state) "Return the name of `state'." (car state)) (defun gnfa-state-outgoing-transitions (state) "Return the list of transitions going out from `state'. Transitions to itself are excluded." (loop for trans in (cdr state) when (not (eq (gnfa-transition-target trans) (gnfa-state-name state))) collect trans into result finally (return result))) (defun gnfa-state-self-transition (state) "Return a transition from `state' to itself or nil if none exists." (loop with name = (gnfa-state-name state) for transition in (cdr state) for target = (gnfa-transition-target transition) until (eq target name) finally (return transition))) (defun gnfa-state-find-transition (target-name state) (loop for trans in (cdr state) when (eq (gnfa-transition-target trans) target-name) return trans)) (defun gnfa-state-accepting-p (state) "Check whether `state' is an accepting state." (loop for elem in state when (eq elem nil) return t)) (defun gnfa-find-state (state-name gnfa) "Return the state with name `state-name' in `gnfa'." (assq state-name gnfa)) (defun gnfa-find-states-targeting (target gnfa) "Return a list of all states that can move to `target' on some symbol. If `target' has a transition to itself, it is not included." (loop with target-name = (gnfa-state-name target) for state in gnfa for name = (gnfa-state-name state) for transitions = (gnfa-state-outgoing-transitions state) when (and (assq target-name transitions) (not (eq target-name name))) collect state into result finally (return result))) (defun gnfa-accepting-states (gnfa) "Return all accepting states." (loop for state in gnfa when (gnfa-state-accepting-p state) collect state into accepting-states finally (return accepting-states))) (defun gnfa-size (gnfa) (length gnfa)) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Here comes the code that actually does stuff. ;; (defun make-transition (target label) (cons target label)) (defun make-update-transitions (incoming ripout-state) "Join the `incoming' transition with those of `ripout-state'." (flet ((regexp-group (string) (concat regexp-group-left string regexp-group-right))) (let* ((self-trans (gnfa-state-self-transition ripout-state)) (self-label (if self-trans (gnfa-transition-label self-trans) nil)) (in-label (gnfa-transition-label incoming))) (mapcar '(lambda (tr) (make-transition (gnfa-transition-target tr) (if self-trans (concat (regexp-group in-label) (regexp-group self-label) regexp-kleene-star (regexp-group (gnfa-transition-label tr))) (concat (regexp-group in-label) (regexp-group (gnfa-transition-label tr)))))) (gnfa-state-outgoing-transitions ripout-state))))) (defun combine-transitions (state rip-state) "Join the transitions of `state' with those of `rip-state'." (let ((transitions (remove-if '(lambda (transition) (eq (gnfa-transition-target transition) (gnfa-state-name rip-state))) (gnfa-state-outgoing-transitions state))) (new-transitions (make-update-transitions (gnfa-state-find-transition (gnfa-state-name rip-state) state) rip-state))) (union* transitions new-transitions :decider '(lambda (old new) ;; build the string on which to move to the ;; target state that `old' and `rip' share. (make-transition (gnfa-transition-target old) (concat (gnfa-transition-label new) "|" (gnfa-transition-label old)))) :test '(lambda (t1 t2) (eq (gnfa-transition-target t1) (gnfa-transition-target t2)))))) (defun gnfa-get-ripout-state (gnfa) (if (> (gnfa-size gnfa) 2) (cadr gnfa) nil)) (defun gnfa-do-ripout (gnfa) (let* ((ripout-state (gnfa-get-ripout-state gnfa))) (mapcar '(lambda (state) (let ((trans (gnfa-state-find-transition (gnfa-state-name ripout-state) state))) (if trans (cons (gnfa-state-name state) (combine-transitions state ripout-state)) state))) (remove* ripout-state gnfa)))) (defun gnfa-to-regexp (gnfa) (let ((ripout-state (gnfa-get-ripout-state gnfa))) (if ripout-state (gnfa-to-regexp (gnfa-do-ripout gnfa)) ;; return the only transition that is left in the GNFA ;; using (cdadar ...) is evil, but I'm lazy now. (cdadar gnfa)))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;;; end of gnfa2regexp.el ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;