summaryrefslogtreecommitdiffstats
path: root/packages/gitbook-html/src
diff options
context:
space:
mode:
Diffstat (limited to 'packages/gitbook-html/src')
-rw-r--r--packages/gitbook-html/src/dom.js62
-rwxr-xr-xpackages/gitbook-html/src/glossary.js30
-rwxr-xr-xpackages/gitbook-html/src/index.js50
-rwxr-xr-xpackages/gitbook-html/src/langs.js17
-rwxr-xr-xpackages/gitbook-html/src/page.js12
-rwxr-xr-xpackages/gitbook-html/src/readme.js18
-rwxr-xr-xpackages/gitbook-html/src/summary.js148
-rw-r--r--packages/gitbook-html/src/totext.js172
8 files changed, 509 insertions, 0 deletions
diff --git a/packages/gitbook-html/src/dom.js b/packages/gitbook-html/src/dom.js
new file mode 100644
index 0000000..9c5e070
--- /dev/null
+++ b/packages/gitbook-html/src/dom.js
@@ -0,0 +1,62 @@
+const cheerio = require('cheerio');
+
+/**
+ * Parse an HTML string and return its content.
+ * @param {String}
+ * @return {cheerio.DOM}
+ */
+function parse(html) {
+ const $ = cheerio.load(html);
+ const $el = $('html, body').first();
+
+ return $el.length > 0 ? $el : $;
+}
+
+/**
+ * Return main element for a DOM.
+ * @param {cheerio.DOM}
+ * @return {cheerio.Node}
+ */
+function root($) {
+ const $el = $('html, body, > div').first();
+ return $el.length > 0 ? $el : $.root();
+}
+
+/**
+ * Return text node of an element.
+ * @param {cheerio.Node}
+ * @return {String}
+ */
+function textNode($el) {
+ return $el.children.reduce(
+ (text, e) => {
+ if (e.type == 'text') text += e.data;
+ return text;
+ },
+ ''
+ );
+}
+
+/**
+ * Cleanup a DOM by removing all useless divs.
+ * @param {cheerio.Node}
+ * @param {cheerio.DOM}
+ * @return {cheerio.Node}
+ */
+function cleanup($el, $) {
+ $el.find('div').each(function() {
+ const $div = $(this);
+ cleanup($div, $);
+
+ $div.replaceWith($div.html());
+ });
+
+ return $el;
+}
+
+module.exports = {
+ parse,
+ textNode,
+ root,
+ cleanup
+};
diff --git a/packages/gitbook-html/src/glossary.js b/packages/gitbook-html/src/glossary.js
new file mode 100755
index 0000000..a4269fe
--- /dev/null
+++ b/packages/gitbook-html/src/glossary.js
@@ -0,0 +1,30 @@
+const dom = require('./dom');
+
+/**
+ * Parse an HTML content into a list of glossary entry.
+ *
+ * @param {String} html
+ * @return {Array} entries
+ */
+function parseGlossary(html) {
+ const $ = dom.parse(html);
+
+ const entries = [];
+
+ $('h2').each(function() {
+ const $heading = $(this);
+ const $next = $heading.next();
+ const $p = $next.is('p') ? $next.first() : $next.find('p').first();
+
+ const entry = {};
+
+ entry.name = $heading.text();
+ entry.description = $p.text();
+
+ entries.push(entry);
+ });
+
+ return entries;
+}
+
+module.exports = parseGlossary;
diff --git a/packages/gitbook-html/src/index.js b/packages/gitbook-html/src/index.js
new file mode 100755
index 0000000..9d560f1
--- /dev/null
+++ b/packages/gitbook-html/src/index.js
@@ -0,0 +1,50 @@
+const ToText = require('./totext');
+
+const htmlParser = {
+ summary: require('./summary'),
+ glossary: require('./glossary'),
+ langs: require('./langs'),
+ readme: require('./readme'),
+ page: require('./page')
+};
+
+// Compose a function with a transform function for the first argument only
+function compose(toHTML, fn) {
+ return (...args) => {
+ args[0] = toHTML(args[0]);
+ return fn(...args);
+ };
+}
+
+/**
+ * Create a GitBook parser from an HTML converter.
+ * @param {Object} toHTML
+ * {Function} [toHTML.inline]
+ * {Function} [toHTML.block]
+ * @param {Object} toText
+ * @return {[type]} [description]
+ */
+function createParser(toHTML, toText = {}) {
+ const parser = {
+ summary: compose(toHTML.block, htmlParser.summary),
+ glossary: compose(toHTML.block, htmlParser.glossary),
+ langs: compose(toHTML.block, htmlParser.langs),
+ readme: compose(toHTML.block, htmlParser.readme),
+ page: compose(toHTML.block, htmlParser.page),
+ inline: compose(toHTML.inline, htmlParser.page)
+ };
+
+ const _toText = new ToText(toText);
+
+ parser.summary.toText = summary => _toText.summary(summary);
+ parser.langs.toText = langs => _toText.langs(langs);
+ parser.glossary.toText = glossary => _toText.glossary(glossary);
+
+ return parser;
+}
+
+module.exports = createParser({
+ block: html => html,
+ inline: html => html
+});
+module.exports.createParser = createParser;
diff --git a/packages/gitbook-html/src/langs.js b/packages/gitbook-html/src/langs.js
new file mode 100755
index 0000000..2c3523f
--- /dev/null
+++ b/packages/gitbook-html/src/langs.js
@@ -0,0 +1,17 @@
+const parseSummary = require('./summary');
+
+/**
+ * Parse an HTML content into a list of language.
+ * @param {String} html
+ * @return {Array}
+ */
+function parseLangs(content) {
+ const parts = parseSummary(content).parts;
+ if (parts.length > 0) {
+ return parts[0].articles;
+ }
+
+ return [];
+}
+
+module.exports = parseLangs;
diff --git a/packages/gitbook-html/src/page.js b/packages/gitbook-html/src/page.js
new file mode 100755
index 0000000..c4982b5
--- /dev/null
+++ b/packages/gitbook-html/src/page.js
@@ -0,0 +1,12 @@
+/**
+ * Parse content of a page.
+ * @param {String} html
+ * @return {Object}
+ */
+function parsePage(html) {
+ return {
+ content: html
+ };
+}
+
+module.exports = parsePage;
diff --git a/packages/gitbook-html/src/readme.js b/packages/gitbook-html/src/readme.js
new file mode 100755
index 0000000..18b0e62
--- /dev/null
+++ b/packages/gitbook-html/src/readme.js
@@ -0,0 +1,18 @@
+const dom = require('./dom');
+
+/**
+ * Parse an HTML content into metadata about a readme
+ *
+ * @param {String} html
+ * @return {Object}
+ */
+function parseReadme(html) {
+ const $ = dom.parse(html);
+
+ return {
+ title: $('h1:first-child').text().trim(),
+ description: $('div.paragraph,p').first().text().trim()
+ };
+}
+
+module.exports = parseReadme;
diff --git a/packages/gitbook-html/src/summary.js b/packages/gitbook-html/src/summary.js
new file mode 100755
index 0000000..1dda344
--- /dev/null
+++ b/packages/gitbook-html/src/summary.js
@@ -0,0 +1,148 @@
+const dom = require('./dom');
+
+const SELECTOR_LIST = 'ol, ul';
+const SELECTOR_LINK = '> a, p > a';
+const SELECTOR_PART = 'h2, h3, h4';
+
+/**
+ * Find a list.
+ * @param {cheerio.Node}
+ * @return {cheerio.Node}
+ */
+function findList($parent) {
+ const $container = $parent.children('.olist');
+ if ($container.length > 0) $parent = $container.first();
+
+ return $parent.children(SELECTOR_LIST);
+}
+
+/**
+ * Parse a ul list and return list of chapters recursvely.
+ * @param {cheerio.Node}
+ * @param {cheerio.DOM}
+ * @return {Array}
+ */
+function parseList($ul, $) {
+ const articles = [];
+
+ $ul.children('li').each(function() {
+ const article = {};
+ const $li = $(this);
+
+ // Get text for the entry
+ const $p = $li.children('p');
+ article.title = ($p.text() || dom.textNode($li.get(0))).trim();
+
+ // Parse link
+ const $a = $li.find(SELECTOR_LINK);
+ if ($a.length > 0) {
+ article.title = $a.first().text();
+ article.ref = $a.attr('href').replace(/\\/g, '/').replace(/^\/+/, '');
+ }
+
+ // Sub articles
+ const $sub = findList($li);
+ article.articles = parseList($sub, $);
+
+ if (!article.title) return;
+ articles.push(article);
+ });
+
+ return articles;
+}
+
+/**
+ * Find all parts and their corresponding lists.
+ * @param {cheerio.Node}
+ * @param {cheerio.DOM}
+ * @return {Array<{title: String, list: cheerio.Node}>}
+ */
+function findParts($parent, $) {
+ // Find parts and lists
+ // TODO asciidoc compatibility
+ const partsAndLists = $parent.children(SELECTOR_LIST + ', ' + SELECTOR_PART);
+
+ // Group each part with the list after
+ const parts = [];
+ let previousPart = null;
+
+ partsAndLists.each((i, el) => {
+ if (isPartNode(el)) {
+ if (previousPart !== null) {
+ // The previous part was empty
+ parts.push(previousPart);
+ }
+ previousPart = {
+ title: getPartTitle(el, $),
+ list: null
+ };
+
+ } else { // It is a list
+ if (previousPart !== null) {
+ previousPart.list = el;
+ } else {
+ previousPart = {
+ title: '',
+ list: el
+ };
+ }
+ parts.push(previousPart);
+ previousPart = null;
+ }
+ });
+
+ // Last part might be empty
+ if (previousPart !== null) {
+ parts.push(previousPart);
+ }
+
+ return parts;
+}
+
+/**
+ * True if the element is a part.
+ * @param el
+ * @return {Boolean}
+ */
+function isPartNode(el) {
+ return SELECTOR_PART.indexOf(el.name) !== -1;
+}
+
+/**
+ * Parse the title of a part element.
+ * @param el
+ * @param {cheerio.DOM} $
+ * @return {String}
+ */
+function getPartTitle(el, $) {
+ return $(el).text().trim();
+}
+
+/**
+ * Parse an HTML content into a tree of articles/parts.
+ * @param {String} html
+ * @return {Object}
+ */
+function parseSummary(html) {
+ const $ = dom.parse(html);
+ const $root = dom.cleanup(dom.root($), $);
+
+ const parts = findParts($root, $);
+
+ // Parse each list
+ const parsedParts = [];
+ let part;
+ for (let i = 0; i < parts.length; ++i) {
+ part = parts[i];
+ parsedParts.push({
+ title: part.title,
+ articles: parseList($(part.list), $)
+ });
+ }
+
+ return {
+ parts: parsedParts
+ };
+}
+
+module.exports = parseSummary;
diff --git a/packages/gitbook-html/src/totext.js b/packages/gitbook-html/src/totext.js
new file mode 100644
index 0000000..6e71cd3
--- /dev/null
+++ b/packages/gitbook-html/src/totext.js
@@ -0,0 +1,172 @@
+
+/*
+ This class is extended by gitbook-markdown and gitbook-asciidoc
+ to generate back markdown/asciidoc from GitBook metadata.
+*/
+
+class ToText {
+ constructor(markup) {
+ Object.assign(this, markup);
+ }
+
+ // Break line
+ onBL() {
+ return '\n';
+ }
+
+ onText(text) {
+ return text;
+ }
+
+ onHR() {
+ return '<hr />';
+ }
+
+ // ---- TITLES
+
+ onTitleStart(level) {
+ return '<h' + level + '>';
+ }
+ onTitleEnd(level) {
+ return '</h' + level + '>';
+ }
+
+ // ---- PARAGRAPHS / SECTIONS
+ onParagraphStart() {
+ return '<p>';
+ }
+ onParagraphEnd() {
+ return '</p>';
+ }
+
+
+ onSection() {
+ return this.onBL();
+ }
+
+ // ---- LINKS
+ onLinkStart(href) {
+ return '<a href="' + href + '">';
+ }
+ onLinkEnd(href) {
+ return '</a>';
+ }
+
+ // ---- LISTS
+ onListItemStart(level) {
+ return this._spaces((level + 1) * 4) + '<li>';
+ }
+ onListItemEnd(level) {
+ return this._spaces((level + 1) * 4) + '</li>' + this.onBL();
+ }
+ onListStart(level) {
+ return this._spaces(level * 4) + '<ul>' + this.onBL();
+ }
+ onListEnd(level) {
+ return this._spaces(level * 4) + '</ul>' + this.onBL();
+ }
+
+ // ------ LANGS
+
+ langs(languages) {
+ let content = '';
+ content += this.onTitleStart(1) + this.onText('Languages') + this.onTitleEnd(1);
+ content += this.onSection();
+
+ content += this._summaryArticles(languages);
+
+ return content;
+ }
+
+ // ------ GLOSSARY
+
+ glossary(glossary) {
+ let content = '';
+
+ content += this.onTitleStart(1) + this.onText('Glossary') + this.onTitleEnd(1);
+ content += this.onSection();
+
+ glossary.forEach((entry) => {
+ content += this.onTitleStart(2) + this.onText(entry.name) + this.onTitleEnd(2);
+ content += this.onParagraphStart();
+ content += this.onText(entry.description);
+ content += this.onParagraphEnd();
+ content += this.onSection();
+ });
+
+ return content;
+ }
+
+ // ------ SUMMARY
+
+ _summaryArticle(article, level) {
+ let content = '';
+
+ content += this.onListItemStart(level);
+
+ if (article.ref) content += this.onLinkStart(article.ref);
+ content += this.onText(article.title);
+ if (article.ref) content += this.onLinkEnd(article.ref);
+ content += this.onBL();
+
+ if (article.articles && article.articles.length > 0) {
+ content += this._summaryArticles(article.articles, level + 1);
+ }
+
+ content += this.onListItemEnd(level);
+
+ return content;
+ }
+ _summaryArticles(articles, level) {
+ let content = '';
+
+ level = level || 0;
+
+ content += this.onListStart(level);
+ articles.forEach((article) => {
+ content += this._summaryArticle(article, level);
+ });
+ content += this.onListEnd(level);
+
+ return content;
+ }
+ _summaryPart(part) {
+ let content = '';
+
+ if (part.title) content += this.onTitleStart(2) + this.onText(part.title) + this.onTitleEnd(2);
+
+ content += this._summaryArticles(part.articles);
+
+ return content;
+ }
+
+ summary(summary) {
+ let content = '';
+
+ content += this.onTitleStart(1) + this.onText('Summary') + this.onTitleEnd(1);
+ content += this.onSection();
+
+ summary.parts.forEach((part, i) => {
+ const next = summary.parts[i + 1];
+
+ content += this._summaryPart(part);
+
+ if (next && !next.title) {
+ content += this.onBL() + this.onHR() + this.onBL();
+ } else {
+ content += this.onSection();
+ }
+
+ });
+
+ return content;
+ }
+
+ // ---- Utilities
+
+ _spaces(n, s) {
+ return Array(n + 1).join(s || ' ');
+ }
+}
+
+module.exports = ToText;