DOM 全部実装ってのはあんまり意味ないと思ったので get.. だけにしたけど、 最低半分は実装しないと使い勝手が悪そうだな~ かといって何か使い道があるか といゆうと疑問
使い方は
(setq g(parse "e:/test/test.html")) => #<hashtable 51908756> (value g) => ....... (setq m(get-elements-by-class-name gg "menu")) => #<hashtable 51910572> (value m) => .............
コード
(defvar *parse-cache* ())
(defun parse(file)
(let((i 0)(n -1)(ln)(cache)(hash))
(if(setq cache(assoc file *parse-cache* :test 'equal))
(return-from parse (cdr cache)))
(setq hash (make-hash-table))
(with-open-file(f file)
(while(setq ln (read-line f nil nil nil))
(when ln
(setq ln (split-string ln #\>))
(dolist (x ln)
(if(string-match "<" x)
(setf(gethash (incf n) hash)(concat x ">"))
(setf(gethash (incf n) hash) x))))))
(pushnew (cons file hash) *parse-cache*
:test 'equal :key 'cdr)
hash
))
(defun dom-scanner(document regex &optional once)
(let((i 0)(ct 0)(n -1)(nn -1)(tag)(hash)(rhash))
(setq hash (make-hash-table))
(setq rhash (make-hash-table))
(dotimes (c (hash-table-count document))
(let((s (gethash c document)))
(cond
((string-match regex s)
(and (= i 0)
(setq tag (car(split-string(match-string 2) #\SPC))))
(setf (gethash (incf n) hash) (match-string 1))
(setq i 1)
(cond
((string-match (concat ".*</" tag ">") s)
(setq i 0))
((string-match (concat "<" tag ".*>") s)
(incf ct)))
)
((and(< 0 ct)
(string-match (concat "\\(.*</" tag ">\\)") s))
(setf (gethash(incf n) hash)(match-string 1))
(decf ct)
(if(zerop ct)(setq i 0))
(when(zerop ct)
(setf (gethash (incf nn) rhash)hash)
(setq hash(make-hash-table))
(setq n -1))
(if once (return-from dom-scanner rhash))
)
((= i 1)
(and(string-match(concat "<" tag ".*>") s)
(incf ct))
(setf (gethash (incf n) hash) s)
)
(t ()))))
rhash))
(setq get-tag-regex-list '("\\(<\\("
"[^<>/]+?"
"\\)[^<>]*?"
">.*\\)"))
(defmacro get-document(document &optional once &rest arg)
`(dom-scanner
,document
(concat (nth 0 get-tag-regex-list)
(if (< 1 (length(list ,@arg)))
(nth 1 get-tag-regex-list))
,@arg
(nth 2 get-tag-regex-list)
(nth 3 get-tag-regex-list)
)
,once
))
(defmacro get-value(dom)
(let((g (gensym)))
`(let((g))
(maphash
#'(lambda(a b)
(setq g (concat g(string b))))
(if(hash-table-p(gethash 0 ,dom))
(gethash 0 ,dom)
,dom))
g)))
;; ここから使う関数
(defun get-element-by-id(document id)
(get-document document t " id=\"" id "\""))
(defun get-elements-by-name(document id)
(get-document document t " name=\"" name "\""))
(defun get-elements-by-tag-name(document tag)
(get-document document nil tag ))
(defun get-elements-by-class-name(document class)
(get-document document nil " class=\"" class "\""))
(defun get-attribute(dom name)
(string-match(concat "\\([^ ]+?\\)=" name) (gethash 0 dom))
(match-string 1))
(defun get-attribute-node(dom node)
(string-match(concat node "= *\"\\([^ ]+?\\)\"") (gethash 0 dom))
(match-string 1))
(defun value(dom)
(get-value dom))
(defun title(dom)
(get-value (dom-scanner dom "\\(<\\(title\\).*?>\\)" t)))