diff options
author | Samy Pessé <samypesse@gmail.com> | 2016-02-19 16:12:43 +0100 |
---|---|---|
committer | Samy Pessé <samypesse@gmail.com> | 2016-12-22 12:32:14 +0100 |
commit | 4c44d677117d926b6dcc164f55fe34079c2ca3c7 (patch) | |
tree | 71d0fca56421f3a7b68d60a446f89d3735828f03 /packages/gitbook-html/lib | |
parent | 9e99b5850fd866fc2f9196993a0ae7e342311558 (diff) | |
download | gitbook-4c44d677117d926b6dcc164f55fe34079c2ca3c7.zip gitbook-4c44d677117d926b6dcc164f55fe34079c2ca3c7.tar.gz gitbook-4c44d677117d926b6dcc164f55fe34079c2ca3c7.tar.bz2 |
Improve summary parser
Diffstat (limited to 'packages/gitbook-html/lib')
-rw-r--r-- | packages/gitbook-html/lib/dom.js | 11 | ||||
-rwxr-xr-x | packages/gitbook-html/lib/glossary.js | 5 | ||||
-rwxr-xr-x | packages/gitbook-html/lib/index.js | 27 | ||||
-rwxr-xr-x | packages/gitbook-html/lib/langs.js | 4 | ||||
-rwxr-xr-x | packages/gitbook-html/lib/page.js | 6 | ||||
-rwxr-xr-x | packages/gitbook-html/lib/summary.js | 97 |
6 files changed, 103 insertions, 47 deletions
diff --git a/packages/gitbook-html/lib/dom.js b/packages/gitbook-html/lib/dom.js index 2c2eaf7..df4de90 100644 --- a/packages/gitbook-html/lib/dom.js +++ b/packages/gitbook-html/lib/dom.js @@ -3,12 +3,18 @@ var cheerio = require('cheerio'); // Parse an HTML string and return its content function parse(html) { - var $ = cheerio.load('<div>'+html+'</div>'); + var $ = cheerio.load(html); var $el = $('html, body').first(); return $el.length > 0? $el : $; } +// Return main element +function root($) { + var $el = $('html, body, > div').first(); + return $el.length > 0? $el : $.root(); +} + // Return text node of an element function textNode($el) { return _.reduce($el.children, function(text, e) { @@ -19,5 +25,6 @@ function textNode($el) { module.exports = { parse: parse, - textNode: textNode + textNode: textNode, + root: root }; diff --git a/packages/gitbook-html/lib/glossary.js b/packages/gitbook-html/lib/glossary.js index 648ba1a..9d3799b 100755 --- a/packages/gitbook-html/lib/glossary.js +++ b/packages/gitbook-html/lib/glossary.js @@ -7,9 +7,10 @@ function parseGlossary(html) { var entries = []; - $("h2").each(function() { + $('h2').each(function() { var $heading = $(this); - var $p = $heading.next(); + var $next = $heading.next() + var $p = $next.is('p')? $next.first() : $next.find('p').first(); var entry = {}; diff --git a/packages/gitbook-html/lib/index.js b/packages/gitbook-html/lib/index.js index a7c478c..0e67c94 100755 --- a/packages/gitbook-html/lib/index.js +++ b/packages/gitbook-html/lib/index.js @@ -1,8 +1,33 @@ +var _ = require('lodash'); -module.exports = { +var htmlParser = { summary: require("./summary"), glossary: require("./glossary"), langs: require("./langs"), readme: require("./readme"), page: require("./page") }; + +// Compose a function with a transform function for the first args +function compose(toHTML, fn) { + return function() { + var args = _.toArray(arguments); + args[0] = toHTML(args[0]); + + return fn.apply(null, args); + } +} + +// Create a GitBook parser +function createParser(toHTML) { + return { + summary: compose(toHTML, htmlParser.summary), + glossary: compose(toHTML, htmlParser.glossary), + langs: compose(toHTML, htmlParser.langs), + readme: compose(toHTML, htmlParser.readme), + page: compose(toHTML, htmlParser.page) + } +} + +module.exports = htmlParser; +module.exports.createParser = createParser; diff --git a/packages/gitbook-html/lib/langs.js b/packages/gitbook-html/lib/langs.js index 1042dcb..270a9f6 100755 --- a/packages/gitbook-html/lib/langs.js +++ b/packages/gitbook-html/lib/langs.js @@ -1,9 +1,9 @@ var _ = require('lodash'); -var parseEntries = require('./summary').entries; +var parseSummary = require('./summary'); // HTML -> Languages function parseLangs(content) { - return parseEntries(content); + return parseSummary(content).parts[0].articles; } // Languages -> HTML diff --git a/packages/gitbook-html/lib/page.js b/packages/gitbook-html/lib/page.js index e687050..6f056fc 100755 --- a/packages/gitbook-html/lib/page.js +++ b/packages/gitbook-html/lib/page.js @@ -2,8 +2,10 @@ var Q = require('q'); var _ = require('lodash'); // HTML -> HTML -function parsePage(src) { - return src; +function parsePage(html) { + return { + content: html + }; } module.exports = parsePage; diff --git a/packages/gitbook-html/lib/summary.js b/packages/gitbook-html/lib/summary.js index 1e2d63d..e71d6b5 100755 --- a/packages/gitbook-html/lib/summary.js +++ b/packages/gitbook-html/lib/summary.js @@ -1,14 +1,17 @@ var _ = require('lodash'); var dom = require('./dom'); +var SELECTOR_LIST = '.olist > ol, ol, ul'; +var SELECTOR_LINK = 'a, p > a'; + +var BL = '\n'; // parse a ul list and return list of chapters recursvely function parseList($ul, $) { var articles = []; - $ul.children('>li').each(function() { + $ul.children('li').each(function() { var article = {}; - var $li = $(this); // Get text for the entry @@ -16,14 +19,14 @@ function parseList($ul, $) { article.title = $p.text() || dom.textNode($li.get(0)); // Parse link - var $a = $li.find('> a, > p > a'); + var $a = $li.children(SELECTOR_LINK); if ($a.length > 0) { article.title = $a.first().text(); article.path = $a.attr('href').replace(/\\/g, '/').replace(/^\/+/, '') } // Sub articles - var $sub = $li.children('> .olist > ol, > ol, > ul'); + var $sub = $li.children(SELECTOR_LIST).first(); article.articles = parseList($sub, $); articles.push(article); @@ -32,55 +35,73 @@ function parseList($ul, $) { return articles; } -// Return a list of entries in a div -function parseEntries (html) { +// HTML -> Summary +function parseSummary(html) { var $ = dom.parse(html); - var chapters = parseList($("> ol, > ul").first(), $); - return chapters; -} + var $root = dom.root($); -// HTML -> Summary -function parseSummary(src) { - var chapters = parseEntries(src); + var $lists = $root.children(SELECTOR_LIST); + var parts = []; + + $lists.each(function() { + var $list = $(this); + + parts.push({ + articles: parseList($(SELECTOR_LIST).first(), $) + }); + }); return { - chapters: chapters + parts: parts }; } // Summary -> HTML -function summaryToText(summary) { - var bl = '\n'; - - var _base = function(article) { - if (article.path) { - return '<a href="'+article.path+'">'+article.title+'</a>'; - } else { - return article.title; - } - }; +function textPrefix(d) { + return Array(d*4).join(' '); +} - var convertArticle = function(article, d) { - var content = Array(d+2).join(' ') + '<li>' + _base(article); +function articleToText(article, d) { + var prefix = textPrefix(d); + var content = prefix + '<li>'; - if (article.articles.length > 0) { - content += convertArticles(article.articles, d); - } - return content + '</li>' + bl; - }; + if (article.path) { + content += '<a href="'+article.path+'">'+article.title+'</a>'; + } else { + content += article.title; + } - var convertArticles = function(articles, d) { - var content = '<ul>' + bl; - _.each(articles, function(_article) { - content += convertArticle(_article, d + 1); - }); - return content + '<ul>' + bl; + if (article.articles.length > 0) { + content += BL + articlesToText(article.articles, d) + prefix; } + content += '</li>' + BL; + + return content; +} + +function articlesToText(articles, d) { + var prefix = textPrefix(d); + var content = prefix + '<ul>' + BL; + _.each(articles, function(_article) { + content += articleToText(_article, d + 1); + }); + return content + '</ul>' + BL; +} + +function partsToText(part) { + return articlesToText(part.articles, 0) + BL + BL; +} + +function summaryToText(summary) { + var content = '<h1>Summary</h1>' + BL; + + _.each(summary.parts, function(part) { + content += partsToText(part); + }); - return '<h1>Summary</h1>'+ bl+bl + convertArticles(summary.chapters, 0) + bl; + return content + BL; }; module.exports = parseSummary; -module.exports.entries = parseEntries; module.exports.toText = summaryToText; |