izard

Developing this topic further, when I change it from handling a toy grammar parsing example to more robust code it grows way too much:

diff:

+ ; Added storage for valid parsing trees
+  (let [N (count words)
+        tree (ref (vec (take N (cycle [[]]))))
+        update-tree (fn [i toadd]
+                      (dosync (ref-set tree (vec 
+                                              (map #(if (= % i)
+                                                      (conj (nth @tree i) toadd)
+                                                      (nth @tree %))
+                                                   (range N))))))
+; changed set-word
+        set-word (fn [word index]
+                   (let [matching-words (lexicon word)
+                         filter-lexic (fn [matching-word]
+                                        (first (filter #(and (= (% :term) (matching-word :term))
+                                                             (= nil (% :left))
+                                                             ) grammar)))
+                         matching-lexic (map filter-lexic matching-words)
+                         get-prob (fn [term]
+                                    (Float. ((first (filter #(= nil (% :left)) matching-words)) :prob)))]
+                     (do
+                       (dorun (map #(aset P (% :num) index 0 (get-prob %)) matching-lexic))
+                       (dosync 
+                         (ref-set tree (vec 
+                                         (map 
+                                           (fn [i] (if (= i 0)
+                                                     (reduce conj (nth @tree i) 
+                                                             (vec (map #(hash-map :term (% :term) :start index :len 0
+                                                                                  :len1 1 :len2 1) matching-lexic)))
+                                                     (nth @tree i)))
+                                           (range N))))))))                                   
+        ; Add to tree
+        get-nodes (fn [term]
+                    (filter #(= (% :term) term) grammar))
+        new-val (fn [old rules1 start1 len1 rules2 start2 len2 p] 
+                  (let [getp #(aget P %1 %2 %3)
+                        get-maxp-index (fn [rules start len]
+                                         (apply max (map #(getp (% :num) start len) rules)))
+                        leftp (get-maxp-index rules1 start1 len1)
+                        rightp (get-maxp-index rules2 start2 len2)]
+                    (max old 
+                         (* leftp rightp p))))]

+            X (filter 
+                #(and (not (= nil (% :left)))
+                      (xor (= (% :term) :start)
+                           (< length N)))
+                grammar)] ; X = all non - terminals in grammar, start nodes are used only on full sentence

+              (update-tree (dec length) {:term (X :term) :start start :len (dec length) :prob new 
+                                         :left (X :left) :right (X :right) :len1 len1 :len2 len2})); Add current term to tree
+            (aset P (X :num) start (dec length) (Float. new))))))
+    @tree))

And that is only part of the code, with grammar augmentation with semantic rules still missing (but planned :)

S	M	T	W	T	F	S
					1	2
3	4	5	6	7	8	9
10	11	12	13	14	15	16
17	18	19	20	21	22	23
24	25	26	27	28	29	30
31

Oct. 25th, 2012

Oct. 25th, 2012

More on PCFG parse in clojure (last post)

Profile

August 2025

Most Popular Tags

Page Summary

Style Credit

Expand Cut Tags