;;; GRAMRELEVAL.LSP ;;; ;;; Copyright John Carroll, University of Sussex, 1998-2001 ;;; ;;; Evaluate parser grammatical relation output with respect to gold ;;; standard relations. Compile and load this file into a Common ;;; Lisp system that implements the ANSI standard. Tested in Macintosh ;;; CL and Franz Allegro CL. ;;; ;;; See . ;;; The data and this program are free for research purposes; for any ;;; proposed commercial use please contact John Carroll. #| The lemmatised and numbered sentences and the gold standard should follow the format in files suste-text and suste-gr respectively (as linked to by ). The parser output is assumed to be as follows: 1 (ncsubj say jury _) (dobj say Friday _) (ccomp _ say produce) (ncsubj produce investigation _) 2 (ncsubj say jury _) (ccomp _ say believe) (ncsubj believe it _) ... The function gramreleval computes evaluation statistics; the functions greval-summary, greval-relation-summary, greval-sentence-summary, and greval-confusion-summary print out the statistics at increasing levels of detail. Gramreleval takes 4 arguments: the lemmatised and numbered sentence file, the gold standard relation file, the parser output file, and the name of a file for diagnostic output. An example call is: (setq stats (gramreleval "suste-text" "suste-gr" "parses" "output")) This puts the evaluation statistics in variable stats. The statistics can then be displayed, by e.g. (greval-summary t stats) where the first argument is a Lisp output stream (the symbol t indicates output to the terminal, as usual) and the second argument is the statistics computed by gramreleval. The other functions greval-relation-summary, greval-sentence-summary, and greval-confusion-summary take the same two arguments. The output from the latter function needs to be viewed with a long linelength since it produces a table several columns wide. |# (in-package :COMMON-LISP-USER) (defparameter *greval-weight-threshold* 0.0) (defparameter +relation-subsume-one-table+ ;; exactly one level of subsumption for particular relations '((|mod| . (|ncmod| |xmod| |cmod|)) (|subj| . (|ncsubj| |xsubj| |csubj|)) (|obj| . (|dobj| |obj2| |iobj|)) (|clausal| . (|xcomp| |ccomp|)))) (defparameter +relation-subsume-any-table+ ;; >=1 levels of subsumption for all relations '((|dependent| . (|mod| |arg_mod| |arg| |ncmod| |xmod| |cmod| |detmod| |subj| |subj_or_dobj| |comp| |ncsubj| |xsubj| |csubj| |obj| |clausal| |dobj| |obj2| |iobj| |xcomp| |ccomp| |aux| |conj|)) (|mod| . (|ncmod| |xmod| |cmod| |detmod|)) (|arg| . (|subj| |subj_or_dobj| |comp| |ncsubj| |xsubj| |csubj| |obj| |clausal| |dobj| |obj2| |iobj| |xcomp| |ccomp|)) (|subj| . (|ncsubj| |xsubj| |csubj|)) (|subj_or_dobj| . (|subj| |dobj| |ncsubj| |xsubj| |csubj|)) ; N.B. dobj, not obj as in figure (|comp| . (|obj| |clausal| |dobj| |obj2| |iobj| |xcomp| |ccomp|)) (|obj| . (|dobj| |obj2| |iobj|)) (|clausal| . (|xcomp| |ccomp|)))) (defparameter +relation-depth-table+ ;; depth of each relation in hierarchy '((|dependent| . 0) (|mod| . 1) (|ncmod| . 2) (|xmod| . 2) (|cmod| . 2) (|detmod| . 2) (|arg_mod| . 1) (|arg| . 1) (|subj| . 2) (|ncsubj| . 3) (|xsubj| . 3) (|csubj| . 3) (|subj_or_dobj| . 2) (|comp| . 2) (|obj| . 3) (|dobj| . 4) (|obj2| . 4) (|iobj| . 4) (|clausal| . 3) (|xcomp| . 4) (|ccomp| . 4) (|aux| . 1) (|conj| . 1))) (defparameter +relation-slot-table+ ;; argument slots for each relation '((|dependent| . (type head dependent)) (|mod| . (type head dependent)) (|ncmod| . (type head dependent)) (|xmod| . (type head dependent)) (|cmod| . (type head dependent)) (|detmod| . (type head dependent)) (|arg_mod| . (type head dependent initial_gr)) (|arg| . (head dependent)) (|subj| . (head dependent initial_gr)) (|ncsubj| . (head dependent initial_gr)) (|xsubj| . (head dependent initial_gr)) (|csubj| . (head dependent initial_gr)) (|subj_or_dobj| . (head dependent)) (|comp| . (head dependent)) (|obj| . (head dependent)) (|dobj| . (head dependent initial_gr)) (|obj2| . (head dependent)) (|iobj| . (type head dependent)) (|clausal| . (head dependent)) ; N.B. type slot added then deleted again (|xcomp| . (type head dependent)) (|ccomp| . (type head dependent)) (|aux| . (type head dependent)) (|conj| . (type . heads)))) (defparameter +type-underspecified-relations+ ;; relations for which type field may be unfilled in test GR '(|mod| |ncmod| |xmod| |cmod| |iobj| |clausal| |xcomp| |ccomp| |aux| |conj|)) (defstruct (gr-state (:print-function (lambda (x stream level) (declare (ignore level)) (format stream "" (gr-state-nsents x))))) nsents std-total tst-total agree states confusion) ;;; Entry point (defun gramreleval (text std tst &optional (out t)) (let ((readtable (copy-readtable nil))) (setf (readtable-case readtable) :preserve) (set-syntax-from-char #\' #\a readtable) (set-syntax-from-char #\, #\a readtable) (with-open-file (text-str text :direction :input) (with-open-file (std-str std :direction :input) (with-open-file (tst-str tst :direction :input) (cond ((null out) (gramreleval1 text-str std-str tst-str (make-broadcast-stream) readtable)) ((eq out t) (gramreleval1 text-str std-str tst-str *standard-output* readtable)) (t (with-open-file (out-str out :direction :output :if-exists :supersede :if-does-not-exist :create) (gramreleval1 text-str std-str tst-str out-str readtable)) ))))))) (defun gramreleval1 (text-str std-str tst-str out-str readtable) (greval-skip-to-next std-str) (peek-char #\1 std-str) ; skip comments, bracket specs etc (greval-skip-to-next tst-str) (peek-char #\1 tst-str) (let ((gr-state (make-gr-state :nsents 0)) (errorp nil)) (loop (let ((next-text (greval-next-text text-str readtable)) (next-std (greval-next-gr std-str readtable)) (next-tst (greval-next-gr tst-str readtable))) (cond ((and (null next-text) (null next-std) (null next-tst)) (greval-sentence-summary out-str gr-state) (return (if errorp nil gr-state))) ((or (null next-text) (null next-std) (null next-tst)) (error "Reached end of file at different times")) ((= (car next-text) (car next-std) (car next-tst)) (setq errorp (greval-check-words (car next-text) (cdr next-text) (cdr next-std) (cdr next-tst) errorp)) (unless errorp (greval-sentence (cdr next-text) (cdr next-std) (cdr next-tst) out-str (car next-text) gr-state))) (t (error "Mismatch in sentence numbers: ~A (text), ~A (standard), ~ ~A (test)" (car next-text) (car next-std) (car next-tst)))))))) (defun greval-next-text (str readtable) (let ((*readtable* readtable)) (let ((n (read str nil 'eof))) (if (eq n 'eof) nil (cons n (read-line str)))))) (defun greval-next-gr (str readtable) (let ((*readtable* readtable)) (let ((n (read str nil 'eof))) (if (eq n 'eof) nil (cons n (let ((res nil) (bracesp nil) (next (greval-skip-to-next str))) (when (eql next #\{) (read-char str) (setq bracesp t)) (loop (let ((next (greval-skip-to-next str))) (if bracesp (cond ((eql next #\}) (read-char str) (return (nreverse (remove-if #'greval-gr-ignore-p res)))) ((eq next 'eof) (error "Unexpected end of file, stream ~A" str)) ((digit-char-p next) (let ((w (read str nil 'eof))) (unless (numberp w) (error "Expecting number at byte ~A, stream ~A" (file-position str) str)) (when (> w 1.0) ;; (warn "Found weight ~A>1 at byte ~A, stream ~A - setting to 1" ;; w (file-position str) str) (setq w 1.0)) (push (cons w (greval-read-gr str)) res))) (t (push (cons 1.0 (greval-read-gr str)) res))) (if (or (eq next 'eof) (digit-char-p next)) (return (nreverse (remove-if #'greval-gr-ignore-p res))) (push (cons 1.0 (greval-read-gr str)) res))))))))))) (defun greval-skip-to-next (str) (let (next) (loop (setq next (peek-char t str nil 'eof)) (unless (eql next #\;) (return next)) (peek-char #\newline str)))) (defun greval-gr-ignore-p (wgr) (cond ((member nil (cdr wgr)) ; e.g. (conj _ add NIL) ;; (warn "Unexpected slot value NIL in ~S - ignoring" (cdr wgr)) t) ((< (car wgr) *greval-weight-threshold*)) (t nil))) (defun greval-read-gr (str) (peek-char t str nil 'eof) (unless (eql (read-char str) #\() (error "Expecting left parenthesis at byte ~A, stream ~A" (file-position str) str)) (let ((res nil) (pcount 0) c) (loop (peek-char t str) (setq c (read-char str)) (let ((tok nil)) (loop (cond ((eql c #\\) (setq c (read-char str))) ((and (eql c #\)) (> pcount 0)) (decf pcount)) ((member c '(#\space #\tab #\newline #\))) (push (intern (coerce (nreverse tok) 'string) (find-package :COMMON-LISP-USER)) res) (if (eql c #\)) (return-from greval-read-gr (nreverse res)) (return)))) (when (eql c #\() (incf pcount)) (push c tok) (setq c (read-char str))))))) (defun greval-check-words (sent-no text std-list tst-list errorp) (let ((text (concatenate 'string text " "))) (labels ((slot-filler-member-p (filler text) (search (concatenate 'string (string filler) " ") text)) (check-grs (wgrs type) (dolist (wgr wgrs) (mapc #'(lambda (gr-slot slot-name) (when (or (eq slot-name 'dependent) (eql (search "HEAD" (string slot-name)) 0)) (unless (slot-filler-member-p gr-slot text) (warn "Slot filler ~A in ~A relation ~A in sentence ~A not in text" gr-slot type (cdr wgr) sent-no) (unless errorp (warn "Only checking, not scoring from now on")) (setq errorp t)))) (rest (cdr wgr)) (gr-slot-list (cdr wgr)))))) (check-grs std-list "standard") (check-grs tst-list "test") errorp))) (defun greval-sentence (text std-list tst-list out-str sentn global-state) ;; there may legally be duplicate GRs in either standard or test ;; (setq std-list (remove-duplicates std-list :test #'equal)) ;; (setq tst-list (remove-duplicates tst-list :test #'equal)) (incf (gr-state-nsents global-state)) (let ((state (make-gr-state :nsents sentn)) (common nil) (missing std-list) (extra nil)) (dolist (tst tst-list) (multiple-value-bind (std-match matchp) (gr-find tst std-list) (if matchp (progn (push (list std-match tst) common) (setq missing (remove std-match missing :test #'eq)) (setq std-list (remove std-match std-list :test #'eq))) (push tst extra)) (when std-match ;; correct heads but possibly wrong relation (push (first (cdr tst)) (getf (gr-state-confusion global-state) (first (cdr std-match))))))) (format out-str "~%------------~%~A~%~A~%~%" sentn text) (format out-str "In both:~%~:{ ~A : ~A~%~}~%" (reverse (mapcar #'(lambda (x) (list (cdr (car x)) (cdr (cadr x)))) common))) (format out-str "Standard only:~%~{ ~A~%~}~%" (mapcar #'cdr missing)) (format out-str "Test only:~%~{ ~A~%~}~%" (reverse (mapcar #'cdr extra))) (dolist (std std-list) (incf (getf (gr-state-std-total state) (first (cdr std)) 0) (car std)) (incf (getf (gr-state-std-total global-state) (first (cdr std)) 0) (car std))) (dolist (tst extra) (incf (getf (gr-state-tst-total state) (first (cdr tst)) 0) (car tst)) (incf (getf (gr-state-tst-total global-state) (first (cdr tst)) 0) (car tst))) (dolist (pair common) (incf (getf (gr-state-std-total state) (first (cdr (car pair))) 0) (car (car pair))) (incf (getf (gr-state-std-total global-state) (first (cdr (car pair))) 0) (car (car pair))) (incf (getf (gr-state-tst-total state) (first (cdr (car pair))) 0) (car (cadr pair))) (incf (getf (gr-state-tst-total global-state) (first (cdr (car pair))) 0) (car (cadr pair))) (incf (getf (gr-state-agree state) (first (cdr (car pair))) 0) (car (cadr pair))) (incf (getf (gr-state-agree global-state) (first (cdr (car pair))) 0) (car (cadr pair)))) (push state (gr-state-states global-state)))) ;;; GR matching ;;; ;;; (gr-find '(1.0 |clausal| |add| |be|) '((1.0 |ncsubj| |keep| |there| _) (1.0 |ccomp| |that| |add| |be|))) ;;; (gr-find '(1.0 |ccomp| _ |add| |be|) '((1.0 |comp| |add| |be|) (1.0 |ccomp| |that| |add| |be|))) (defun gr-find (tst std-lst) ;; fuzzy equivalent of (find tst std-list :test #'equal) with second value ;; indicating whether relation found had a compatible name (t), or a ;; non-compatible one (nil) (let ((std-slots-matching nil)) (dolist (std std-lst) (let* ((relations-match-p (gr-subsumes-or-eq (first (cdr tst)) (first (cdr std)))) (tst-slots (gr-slot-list (cdr tst))) (std-slots (gr-slot-list (cdr std))) (slots-match-p (and (if (eq (first (cdr tst)) '|conj|) ; special case (equal tst-slots std-slots) (subsetp tst-slots std-slots)) (every #'(lambda (std-slot std-arg) (let ((tst-pos (position std-slot tst-slots :test #'eq))) (if tst-pos ;; slot with same name in both std and tst so check values (let ((tst-arg (nth tst-pos (rest (cdr tst))))) (or (gr-arg-equalp tst-arg std-arg) (and (eq std-slot 'type) (member (first (cdr tst)) +type-underspecified-relations+ :test #'eq) (eq tst-arg '_)))) ;; std slot not in tst, so skip with success t))) std-slots (rest (cdr std)))))) (cond ((and relations-match-p slots-match-p) (return-from gr-find (values std t))) (slots-match-p (push std std-slots-matching))))) (if std-slots-matching (values (car std-slots-matching) nil) nil))) (defun gr-subsumes-or-eq (tst-rel std-rel) (or (eq tst-rel std-rel) (some #'(lambda (entry) ;; test relation is less specific than standard by one level? (and (eq tst-rel (car entry)) (member std-rel (cdr entry) :test #'eq))) +relation-subsume-one-table+) (some #'(lambda (entry) ;; test relation is more specific than standard to any level? (and (eq std-rel (car entry)) (member tst-rel (cdr entry) :test #'eq))) +relation-subsume-any-table+))) (defun gr-arg-equalp (arg1 arg2) (cond ((eql arg1 arg2)) ((or (eq arg1 '_) (eq arg2 '_)) nil) ;; _ inside values taken as equivalent to ((or (find #\_ (string arg1)) (find #\_ (string arg2))) (labels ((canonicalise-blank (lst) (cond ((null lst) nil) ((eql (car lst) #\_) (append (coerce "" 'list) (canonicalise-blank (cdr lst)))) (t (cons (car lst) (canonicalise-blank (cdr lst))))))) (equal (canonicalise-blank (coerce (string arg1) 'list)) (canonicalise-blank (coerce (string arg2) 'list))))))) (defun gr-slot-list (gr) (let ((slots (cdr (assoc (first gr) +relation-slot-table+ :test #'eq)))) (unless slots (error "Unknown relation name ~A" (first gr))) (cond ((not (listp (cdr (last slots)))) ; e.g. (type . heads) (let ((initial (copy-tree slots))) (setf (cdr (last initial)) nil) (unless (>= (length (cdr gr)) (length initial)) (error "Not enough arguments to relation in ~A - expecting ~ ~A" gr slots)) (append initial (let ((res nil)) (dotimes (n (- (length (cdr gr)) (length initial)) (nreverse res)) (push (intern (format nil "~A~A" (cdr (last slots)) n)) res)))))) ((not (eql (length (cdr gr)) (length slots))) (error "Wrong number of slots in relation ~A - expecting ~ ~{~A~^ ~}" gr slots)) (t slots)))) ;;; (greval-summary t aa) (defun greval-summary (out-str gr-state) (let ((std (greval-plist-+ (gr-state-std-total gr-state))) (tst (greval-plist-+ (gr-state-tst-total gr-state))) (agree (greval-plist-+ (gr-state-agree gr-state)))) (let ((p (if (> tst 0) (* 100 (/ agree tst)) (- agree))) (r (if (> std 0) (* 100 (/ agree std)) (- agree)))) (format out-str "~%Precision ~7,2F Recall~7,2F ~ F-score~7,2F tst GRs~7,2F~%" p r (greval-f-score p r) (/ tst (gr-state-nsents gr-state)))))) (defun greval-sentence-summary (out-str gr-state) (format out-str "~%------------~%Summary~%") (format out-str "~%Sentence Precision Recall ~ F-score tst GRs~%") (dolist (s (sort (copy-list (gr-state-states gr-state)) #'< :key #'gr-state-nsents)) (let ((std (greval-plist-+ (gr-state-std-total s))) (tst (greval-plist-+ (gr-state-tst-total s))) (agree (greval-plist-+ (gr-state-agree s)))) (let ((p (if (> tst 0) (* 100 (/ agree tst)) (- agree))) (r (if (> std 0) (* 100 (/ agree std)) (- agree)))) (format out-str "~4D ~7,2F ~7,2F ~ ~7,2F ~9,2F~%" (gr-state-nsents s) p r (greval-f-score p r) tst)))) (greval-summary out-str gr-state)) (defun greval-relation-summary (out-str gr-state) (format out-str "~%------------~%Summary~%") (format out-str "~%Relation Precision Recall ~ F-score tst GRs~%") (dolist (pair +relation-slot-table+) (let* ((rel (car pair)) (std (getf (gr-state-std-total gr-state) rel 0)) (tst (getf (gr-state-tst-total gr-state) rel 0)) (agree (getf (gr-state-agree gr-state) rel 0))) (dolist (desc-rel (cdr (assoc rel +relation-subsume-any-table+ :test #'eq))) (incf std (getf (gr-state-std-total gr-state) desc-rel 0)) (incf tst (getf (gr-state-tst-total gr-state) desc-rel 0)) (incf agree (getf (gr-state-agree gr-state) desc-rel 0))) (let ((p (if (> tst 0) (* 100 (/ agree tst)) (- agree))) (r (if (> std 0) (* 100 (/ agree std)) (- agree)))) (format out-str "~15A~7,2F ~7,2F ~7,2F ~9,2F~%" (format nil "~VA~A" (cdr (assoc rel +relation-depth-table+)) " " rel) p r (greval-f-score p r) tst)))) (greval-summary out-str gr-state)) (defun greval-f-score (p r) (if (< (+ p r) least-positive-short-float) 0 (/ (* 2 p r) (+ p r)))) (defun greval-plist-+ (plist) (do ((tail plist (cddr tail)) (res 0)) ((null tail) res) (incf res (cadr tail)))) ;;; (greval-confusion-summary t aa) (defun greval-confusion-summary (out-str gr-state) (let* ((matrix (gr-state-confusion gr-state)) (rel-list (mapcan #'(lambda (pair) (let ((rel (car pair))) (dolist (x matrix) (when (or (eq x rel) (and (consp x) (member rel x :test #'eq))) (return (list rel)))))) +relation-slot-table+)) (total-rels 0) (rel-totals (make-list (length rel-list) :initial-element 0))) (format out-str "~%------------~%Confusion matrix~%") (format out-str "~%Standard Test relation returned~% ~{~7@A ~}~7@A~%~%" rel-list "Totals") (dolist (rel rel-list) (let ((confused (make-list (length rel-list) :initial-element 0)) (confused-total 0)) (dolist (item (getf matrix rel)) (incf (nth (position item rel-list) confused)) (incf confused-total) (incf (nth (position item rel-list) rel-totals)) (incf total-rels)) (format out-str "~9A~{~7@A ~}~7@A~%" rel confused confused-total))) (format out-str "~9A~{~7@A ~}~7@A~%" "Totals" rel-totals total-rels) (format out-str "~%Overall precision ~7,2F, recall ~7,2F, where matching criterion is ~ head/dependent slot equality~%" (* 100 (/ total-rels (greval-plist-+ (gr-state-tst-total gr-state)))) (* 100 (/ total-rels (greval-plist-+ (gr-state-std-total gr-state))))))) ;;; End of file