summaryrefslogtreecommitdiffstats
path: root/lib/parsers/html.js
diff options
context:
space:
mode:
authorSamy Pessé <samypesse@gmail.com>2016-01-27 21:56:00 +0100
committerSamy Pessé <samypesse@gmail.com>2016-01-27 21:56:00 +0100
commit8d277e9108afa6a027c61feb581a2150958f8571 (patch)
tree708a272a939c3e2089a2280d1358a4a9566e412e /lib/parsers/html.js
parentf305d57ab7702c3ca10fd6e32366d19e524ee1f0 (diff)
downloadgitbook-8d277e9108afa6a027c61feb581a2150958f8571.zip
gitbook-8d277e9108afa6a027c61feb581a2150958f8571.tar.gz
gitbook-8d277e9108afa6a027c61feb581a2150958f8571.tar.bz2
Add parsing of the glossary
Diffstat (limited to 'lib/parsers/html.js')
-rw-r--r--lib/parsers/html.js90
1 files changed, 0 insertions, 90 deletions
diff --git a/lib/parsers/html.js b/lib/parsers/html.js
deleted file mode 100644
index 8f4ed34..0000000
--- a/lib/parsers/html.js
+++ /dev/null
@@ -1,90 +0,0 @@
-var _ = require('lodash');
-var cheerio = require('cheerio');
-
-// Parse summary and returns a list of sections
-function parseSummary(html) {
- var sections = [];
- var $ = cheerio.load(html);
-
- // Find main container
- var $body = getContainer($);
-
- // Extract sections, and parse
- var $lists = $body.find('> ul, > ol');
-
- $lists.each(function() {
- sections.push({
- articles: parseList($(this), $)
- });
- });
-
- return sections;
-}
-
-// Parse readme and extract title, description
-function parseReadme(html) {
- var $ = cheerio.load(html);
-
- // Find main container
- var $body = getContainer($);
-
- return {
- title: $body.find('h1:first-child').text().trim(),
- description: $body.find('div.paragraph').first().text().trim()
- };
-}
-
-// Return a page container (html, body tag or directly the root element)
-function getContainer($) {
- var $body = $('body, html').first();
- if (!$body) $body = $;
-
- return $body;
-}
-
-// Parse a ul list and return list of chapters recursvely
-function parseList($ul, $) {
- var articles = [];
-
- $ul.children('li').each(function() {
- var article = {};
-
- var $li = $(this);
-
- var $text = $li.find('> p, > span');
- var $a = $li.find('> a, > p a, > span a');
-
- article.title = $text.text();
- if ($a.length > 0) {
- article.title = $a.first().text();
- article.ref = $a.attr('href');
- }
-
- // Inner list, with children article
- var $sub = $li.find('> ol, > ul, > .olist > ol');
- article.articles = parseList($sub, $);
-
- articles.push(article);
- });
-
- return articles;
-}
-
-
-// Inherit from the html parser
-function inherits(opts) {
- var parser = _.defaults(opts, {
- toHTML: _.identity
- });
-
- parser.readme = _.compose(opts.toHTML, parseReadme);
- parser.summary = _.compose(opts.toHTML, parseSummary);
-
- return parser;
-}
-
-
-module.exports = inherits({
- extensions: ['.html']
-});
-module.exports.inherits = inherits;