diff options
author | Soreine <soreine.plume@gmail.com> | 2016-06-06 11:50:06 +0200 |
---|---|---|
committer | Samy Pessé <samypesse@gmail.com> | 2016-12-22 12:32:18 +0100 |
commit | 22b22dea1ca61285d5228f95a4fc008709e38d7d (patch) | |
tree | be3539b3e0ab6db4adbf77df4170800b3e764b92 /packages/gitbook-html/lib/summary.js | |
parent | 67424a9091663e19c9ddbd23fc0cc1619c23c843 (diff) | |
download | gitbook-22b22dea1ca61285d5228f95a4fc008709e38d7d.zip gitbook-22b22dea1ca61285d5228f95a4fc008709e38d7d.tar.gz gitbook-22b22dea1ca61285d5228f95a4fc008709e38d7d.tar.bz2 |
Parsing empty parts
Diffstat (limited to 'packages/gitbook-html/lib/summary.js')
-rwxr-xr-x | packages/gitbook-html/lib/summary.js | 96 |
1 files changed, 82 insertions, 14 deletions
diff --git a/packages/gitbook-html/lib/summary.js b/packages/gitbook-html/lib/summary.js index e55719b..0a9ddb5 100755 --- a/packages/gitbook-html/lib/summary.js +++ b/packages/gitbook-html/lib/summary.js @@ -3,6 +3,8 @@ var dom = require('./dom'); var SELECTOR_LIST = 'ol, ul'; var SELECTOR_LINK = '> a, p > a'; +var SELECTOR_PART = 'h2, h3, h4'; + var BL = '\n'; /** @@ -15,7 +17,7 @@ function findList($parent) { var $container = $parent.children('.olist'); if ($container.length > 0) $parent = $container.first(); - return $parent.children('ul, ol'); + return $parent.children(SELECTOR_LIST); } /** @@ -40,7 +42,7 @@ function parseList($ul, $) { var $a = $li.find(SELECTOR_LINK); if ($a.length > 0) { article.title = $a.first().text(); - article.ref = $a.attr('href').replace(/\\/g, '/').replace(/^\/+/, '') + article.ref = $a.attr('href').replace(/\\/g, '/').replace(/^\/+/, ''); } // Sub articles @@ -55,6 +57,71 @@ function parseList($ul, $) { } /** + Find all parts and their corresponding lists + + @param {cheerio.Node} + @param {cheerio.DOM} + @return {Array<{title: String, list: cheerio.Node}>} + */ +function findParts($parent, $) { + // Find parts and lists + // TODO asciidoc compatibility + var partsAndLists = $parent.children(SELECTOR_LIST + ', ' + SELECTOR_PART); + + // Group each part with the list after + var parts = []; + var previousPart = null; + + partsAndLists.each(function (i, el) { + if (isPartNode(el)) { + if (previousPart !== null) { + // The previous part was empty + parts.push(previousPart); + } + previousPart = { + title: getPartTitle(el, $), + list: null + }; + + } else { // It is a list + if (previousPart !== null) { + previousPart.list = el; + } else { + previousPart = { + title: '', + list: el + }; + } + parts.push(previousPart); + previousPart = null; + } + }); + + return parts; +} + +/** + True if the element is a part + + @param el + @return {Boolean} + */ +function isPartNode(el) { + return SELECTOR_PART.indexOf(el.name) !== -1; +} + +/** + Parse the title of a part element + + @param el + @param {cheerio.DOM} $ + @return {String} + */ +function getPartTitle(el, $) { + return $(el).text().trim(); +} + +/** Parse an HTML content into a tree of articles/parts @param {String} html @@ -64,21 +131,22 @@ function parseSummary(html) { var $ = dom.parse(html); var $root = dom.cleanup(dom.root($), $); - var $lists = findList($root); - var parts = []; - - $lists.each(function() { - var $list = $(this); - var $title = $list.prevUntil(SELECTOR_LIST, 'h2, h3, h4').first(); - - parts.push({ - title: $title.text().trim(), - articles: parseList($list, $) + var parts = findParts($root, $); + + // Parse each list + debugger; + var parsedParts = []; + var part; + for (var i = 0; i < parts.length; ++i) { + part = parts[i]; + parsedParts.push({ + title: part.title, + articles: parseList($(part.list), $) }); - }); + } return { - parts: parts + parts: parsedParts }; } |