diff options
author | Samy Pessé <samypesse@gmail.com> | 2016-01-27 21:56:00 +0100 |
---|---|---|
committer | Samy Pessé <samypesse@gmail.com> | 2016-01-27 21:56:00 +0100 |
commit | 8d277e9108afa6a027c61feb581a2150958f8571 (patch) | |
tree | 708a272a939c3e2089a2280d1358a4a9566e412e /lib/parsers/html.js | |
parent | f305d57ab7702c3ca10fd6e32366d19e524ee1f0 (diff) | |
download | gitbook-8d277e9108afa6a027c61feb581a2150958f8571.zip gitbook-8d277e9108afa6a027c61feb581a2150958f8571.tar.gz gitbook-8d277e9108afa6a027c61feb581a2150958f8571.tar.bz2 |
Add parsing of the glossary
Diffstat (limited to 'lib/parsers/html.js')
-rw-r--r-- | lib/parsers/html.js | 90 |
1 files changed, 0 insertions, 90 deletions
diff --git a/lib/parsers/html.js b/lib/parsers/html.js deleted file mode 100644 index 8f4ed34..0000000 --- a/lib/parsers/html.js +++ /dev/null @@ -1,90 +0,0 @@ -var _ = require('lodash'); -var cheerio = require('cheerio'); - -// Parse summary and returns a list of sections -function parseSummary(html) { - var sections = []; - var $ = cheerio.load(html); - - // Find main container - var $body = getContainer($); - - // Extract sections, and parse - var $lists = $body.find('> ul, > ol'); - - $lists.each(function() { - sections.push({ - articles: parseList($(this), $) - }); - }); - - return sections; -} - -// Parse readme and extract title, description -function parseReadme(html) { - var $ = cheerio.load(html); - - // Find main container - var $body = getContainer($); - - return { - title: $body.find('h1:first-child').text().trim(), - description: $body.find('div.paragraph').first().text().trim() - }; -} - -// Return a page container (html, body tag or directly the root element) -function getContainer($) { - var $body = $('body, html').first(); - if (!$body) $body = $; - - return $body; -} - -// Parse a ul list and return list of chapters recursvely -function parseList($ul, $) { - var articles = []; - - $ul.children('li').each(function() { - var article = {}; - - var $li = $(this); - - var $text = $li.find('> p, > span'); - var $a = $li.find('> a, > p a, > span a'); - - article.title = $text.text(); - if ($a.length > 0) { - article.title = $a.first().text(); - article.ref = $a.attr('href'); - } - - // Inner list, with children article - var $sub = $li.find('> ol, > ul, > .olist > ol'); - article.articles = parseList($sub, $); - - articles.push(article); - }); - - return articles; -} - - -// Inherit from the html parser -function inherits(opts) { - var parser = _.defaults(opts, { - toHTML: _.identity - }); - - parser.readme = _.compose(opts.toHTML, parseReadme); - parser.summary = _.compose(opts.toHTML, parseSummary); - - return parser; -} - - -module.exports = inherits({ - extensions: ['.html'] -}); -module.exports.inherits = inherits; |