tree-features: 1f47088da013aa5ad5b5b1e292548614b9ca34e6
1: module Features where
2:
3: import Data.Char
4: import Data.Hash.MD5
5: import Data.Maybe
6: import System.IO
7: import Text.XML.Light.Input
8: import Text.XML.Light.Types
9: import XmlHelper
10:
11: data TreeOf a = Leaf a
12: | Node [TreeOf a] deriving (Show, Eq)
13:
14: instance Functor TreeOf where
15: fmap f (Leaf x) = Leaf (f x)
16: fmap f (Node ts) = Node (map (fmap f) ts)
17:
18: type Tree = TreeOf FeatureVector
19:
20: -- | Goal and context, from Coq XML output
21: type Request = (Tree, [Tree])
22:
23: type FeatureVector = [Integer]
24:
25: parseRequest :: Integer -> Element -> Request
26: parseRequest bits e = let (goal:context) = elContent e
27: in (parseTerm bits goal, map (parseTerm bits) context)
28:
29: parseTerm :: Integer -> Content -> Tree
30: parseTerm bits (Text t) = Leaf (feature bits (cdData t))
31: parseTerm bits (Elem e) = let subtrees = map (parseTerm bits) (elContent e)
32: name = qName (elName e)
33: attrs = getAttrs e
34: fVec = features . map (feature bits) $ name : attrs
35: in Node (Leaf fVec : subtrees)
36:
37: -- | Extract a FeatureVector from a Request
38: extractFeatures :: Request -> FeatureVector
39: extractFeatures (goal, context) = features (map extractFeatures' (goal : context))
40:
41: -- | Extract a FeatureVector from a Tree
42: extractFeatures' :: Tree -> FeatureVector
43: extractFeatures' (Leaf x) = features [x]
44: extractFeatures' (Node xs) = features (map extractFeatures' xs)
45:
46: -- | Combine FeatureVectors
47: features :: [FeatureVector] -> FeatureVector
48: features = foldr zipSum []
49:
50: zipSum [] ys = ys
51: zipSum xs [] = xs
52: zipSum (x:xs) (y:ys) = (x+y) : zipSum xs ys
53:
54: -- | Extract a feature from a string, mod n
55: feature :: Integer -> String -> FeatureVector
56: feature n = setFeature . (`mod` n) . md5i . Str
57:
58: -- | Create a fresh FeatureVector with the nth feature set to 1 and the rest 0
59: setFeature :: Integer -> FeatureVector
60: setFeature 0 = [1]
61: setFeature n = 0 : setFeature (n-1)
62:
63: leaves (Leaf _) = 1
64: leaves (Node ts) = sum (map leaves ts)
Generated by git2html.