summaryrefslogtreecommitdiffstats
path: root/packages/gitbook-html/src/summary.js
diff options
context:
space:
mode:
Diffstat (limited to 'packages/gitbook-html/src/summary.js')
-rwxr-xr-xpackages/gitbook-html/src/summary.js148
1 files changed, 148 insertions, 0 deletions
diff --git a/packages/gitbook-html/src/summary.js b/packages/gitbook-html/src/summary.js
new file mode 100755
index 0000000..1dda344
--- /dev/null
+++ b/packages/gitbook-html/src/summary.js
@@ -0,0 +1,148 @@
+const dom = require('./dom');
+
+const SELECTOR_LIST = 'ol, ul';
+const SELECTOR_LINK = '> a, p > a';
+const SELECTOR_PART = 'h2, h3, h4';
+
+/**
+ * Find a list.
+ * @param {cheerio.Node}
+ * @return {cheerio.Node}
+ */
+function findList($parent) {
+ const $container = $parent.children('.olist');
+ if ($container.length > 0) $parent = $container.first();
+
+ return $parent.children(SELECTOR_LIST);
+}
+
+/**
+ * Parse a ul list and return list of chapters recursvely.
+ * @param {cheerio.Node}
+ * @param {cheerio.DOM}
+ * @return {Array}
+ */
+function parseList($ul, $) {
+ const articles = [];
+
+ $ul.children('li').each(function() {
+ const article = {};
+ const $li = $(this);
+
+ // Get text for the entry
+ const $p = $li.children('p');
+ article.title = ($p.text() || dom.textNode($li.get(0))).trim();
+
+ // Parse link
+ const $a = $li.find(SELECTOR_LINK);
+ if ($a.length > 0) {
+ article.title = $a.first().text();
+ article.ref = $a.attr('href').replace(/\\/g, '/').replace(/^\/+/, '');
+ }
+
+ // Sub articles
+ const $sub = findList($li);
+ article.articles = parseList($sub, $);
+
+ if (!article.title) return;
+ articles.push(article);
+ });
+
+ return articles;
+}
+
+/**
+ * Find all parts and their corresponding lists.
+ * @param {cheerio.Node}
+ * @param {cheerio.DOM}
+ * @return {Array<{title: String, list: cheerio.Node}>}
+ */
+function findParts($parent, $) {
+ // Find parts and lists
+ // TODO asciidoc compatibility
+ const partsAndLists = $parent.children(SELECTOR_LIST + ', ' + SELECTOR_PART);
+
+ // Group each part with the list after
+ const parts = [];
+ let previousPart = null;
+
+ partsAndLists.each((i, el) => {
+ if (isPartNode(el)) {
+ if (previousPart !== null) {
+ // The previous part was empty
+ parts.push(previousPart);
+ }
+ previousPart = {
+ title: getPartTitle(el, $),
+ list: null
+ };
+
+ } else { // It is a list
+ if (previousPart !== null) {
+ previousPart.list = el;
+ } else {
+ previousPart = {
+ title: '',
+ list: el
+ };
+ }
+ parts.push(previousPart);
+ previousPart = null;
+ }
+ });
+
+ // Last part might be empty
+ if (previousPart !== null) {
+ parts.push(previousPart);
+ }
+
+ return parts;
+}
+
+/**
+ * True if the element is a part.
+ * @param el
+ * @return {Boolean}
+ */
+function isPartNode(el) {
+ return SELECTOR_PART.indexOf(el.name) !== -1;
+}
+
+/**
+ * Parse the title of a part element.
+ * @param el
+ * @param {cheerio.DOM} $
+ * @return {String}
+ */
+function getPartTitle(el, $) {
+ return $(el).text().trim();
+}
+
+/**
+ * Parse an HTML content into a tree of articles/parts.
+ * @param {String} html
+ * @return {Object}
+ */
+function parseSummary(html) {
+ const $ = dom.parse(html);
+ const $root = dom.cleanup(dom.root($), $);
+
+ const parts = findParts($root, $);
+
+ // Parse each list
+ const parsedParts = [];
+ let part;
+ for (let i = 0; i < parts.length; ++i) {
+ part = parts[i];
+ parsedParts.push({
+ title: part.title,
+ articles: parseList($(part.list), $)
+ });
+ }
+
+ return {
+ parts: parsedParts
+ };
+}
+
+module.exports = parseSummary;