diff options
Diffstat (limited to 'packages/gitbook-html')
26 files changed, 964 insertions, 446 deletions
diff --git a/packages/gitbook-html/.gitignore b/packages/gitbook-html/.gitignore deleted file mode 100644 index 9550e4f..0000000 --- a/packages/gitbook-html/.gitignore +++ /dev/null @@ -1,28 +0,0 @@ -# Logs -logs -*.log - -# Runtime data -pids -*.pid -*.seed - -# Directory for instrumented libs generated by jscoverage/JSCover -lib-cov - -# Coverage directory used by tools like istanbul -coverage - -# Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) -.grunt - -# Compiled binary addons (http://nodejs.org/api/addons.html) -build/Release - -# Dependency directory -# Deployed apps should consider commenting this line out: -# see https://npmjs.org/doc/faq.html#Should-I-check-my-node_modules-folder-into-git -node_modules - -# vim swapfile -*.swp diff --git a/packages/gitbook-html/.npmignore b/packages/gitbook-html/.npmignore new file mode 100644 index 0000000..85de9cf --- /dev/null +++ b/packages/gitbook-html/.npmignore @@ -0,0 +1 @@ +src diff --git a/packages/gitbook-html/.travis.yml b/packages/gitbook-html/.travis.yml deleted file mode 100644 index 603536b..0000000 --- a/packages/gitbook-html/.travis.yml +++ /dev/null @@ -1,4 +0,0 @@ -language: node_js -node_js: - - "stable" - - "0.10"
\ No newline at end of file diff --git a/packages/gitbook-html/README.md b/packages/gitbook-html/README.md index bd9f006..aa6374b 100644 --- a/packages/gitbook-html/README.md +++ b/packages/gitbook-html/README.md @@ -1,9 +1,3 @@ -# GitBook HTML Parser - -[](https://travis-ci.org/GitbookIO/gitbook-html) -[](http://badge.fury.io/js/gitbook-html) - -This node module parse HTML for gitbook (SUMMARY.html, README.html). - -It is a dependency of **gitbook-asciidoc** and **gitbook-markdown**. +# `gitbook-html` +> HTML parser interface for GitBook diff --git a/packages/gitbook-html/lib/dom.js b/packages/gitbook-html/lib/dom.js index 819ced0..96d370e 100644 --- a/packages/gitbook-html/lib/dom.js +++ b/packages/gitbook-html/lib/dom.js @@ -1,52 +1,49 @@ -var _ = require('lodash'); +'use strict'; + var cheerio = require('cheerio'); /** - Parse an HTML string and return its content - - @param {String} - @return {cheerio.DOM} -*/ + * Parse an HTML string and return its content. + * @param {String} + * @return {cheerio.DOM} + */ function parse(html) { var $ = cheerio.load(html); var $el = $('html, body').first(); - return $el.length > 0? $el : $; + return $el.length > 0 ? $el : $; } /** - Return main element for a DOM - - @param {cheerio.DOM} - @return {cheerio.Node} -*/ + * Return main element for a DOM. + * @param {cheerio.DOM} + * @return {cheerio.Node} + */ function root($) { var $el = $('html, body, > div').first(); - return $el.length > 0? $el : $.root(); + return $el.length > 0 ? $el : $.root(); } /** - Return text node of an element - - @param {cheerio.Node} - @return {String} -*/ + * Return text node of an element. + * @param {cheerio.Node} + * @return {String} + */ function textNode($el) { - return _.reduce($el.children, function(text, e) { + return $el.children.reduce(function (text, e) { if (e.type == 'text') text += e.data; return text; }, ''); } /** - Cleanup a DOM by removing all useless divs - - @param {cheerio.Node} - @param {cheerio.DOM} - @return {cheerio.Node} -*/ + * Cleanup a DOM by removing all useless divs. + * @param {cheerio.Node} + * @param {cheerio.DOM} + * @return {cheerio.Node} + */ function cleanup($el, $) { - $el.find('div').each(function() { + $el.find('div').each(function () { var $div = $(this); cleanup($div, $); @@ -62,3 +59,4 @@ module.exports = { root: root, cleanup: cleanup }; +//# sourceMappingURL=dom.js.map
\ No newline at end of file diff --git a/packages/gitbook-html/lib/glossary.js b/packages/gitbook-html/lib/glossary.js index 26787ab..c8c5bf5 100755 --- a/packages/gitbook-html/lib/glossary.js +++ b/packages/gitbook-html/lib/glossary.js @@ -1,21 +1,22 @@ -var _ = require('lodash'); +'use strict'; + var dom = require('./dom'); /** - Parse an HTML content into a list of glossary entry - - @param {String} html - @return {Array} -*/ + * Parse an HTML content into a list of glossary entry. + * + * @param {String} html + * @return {Array} entries + */ function parseGlossary(html) { var $ = dom.parse(html); var entries = []; - $('h2').each(function() { + $('h2').each(function () { var $heading = $(this); - var $next = $heading.next() - var $p = $next.is('p')? $next.first() : $next.find('p').first(); + var $next = $heading.next(); + var $p = $next.is('p') ? $next.first() : $next.find('p').first(); var entry = {}; @@ -29,3 +30,4 @@ function parseGlossary(html) { } module.exports = parseGlossary; +//# sourceMappingURL=glossary.js.map
\ No newline at end of file diff --git a/packages/gitbook-html/lib/index.js b/packages/gitbook-html/lib/index.js index 2658914..b6fa18c 100755 --- a/packages/gitbook-html/lib/index.js +++ b/packages/gitbook-html/lib/index.js @@ -1,4 +1,5 @@ -var _ = require('lodash'); +'use strict'; + var ToText = require('./totext'); var htmlParser = { @@ -11,22 +12,26 @@ var htmlParser = { // Compose a function with a transform function for the first argument only function compose(toHTML, fn) { - return function() { - var args = _.toArray(arguments); - args[0] = toHTML(args[0]); + return function () { + for (var _len = arguments.length, args = Array(_len), _key = 0; _key < _len; _key++) { + args[_key] = arguments[_key]; + } - return fn.apply(null, args); - } + args[0] = toHTML(args[0]); + return fn.apply(undefined, args); + }; } -// Create a GitBook parser from an HTML converter -function createParser(toHTML, toText) { - if (_.isFunction(toHTML)) { - toHTML = { - inline: toHTML, - block: toHTML - }; - } +/** + * Create a GitBook parser from an HTML converter. + * @param {Object} toHTML + * {Function} [toHTML.inline] + * {Function} [toHTML.block] + * @param {Object} toText + * @return {[type]} [description] + */ +function createParser(toHTML) { + var toText = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {}; var parser = { summary: compose(toHTML.block, htmlParser.summary), @@ -38,12 +43,27 @@ function createParser(toHTML, toText) { }; var _toText = new ToText(toText); - parser.summary.toText =_toText.summary; - parser.langs.toText =_toText.langs; - parser.glossary.toText =_toText.glossary; + + parser.summary.toText = function (summary) { + return _toText.summary(summary); + }; + parser.langs.toText = function (langs) { + return _toText.langs(langs); + }; + parser.glossary.toText = function (glossary) { + return _toText.glossary(glossary); + }; return parser; } -module.exports = createParser(_.identity); +module.exports = createParser({ + block: function block(html) { + return html; + }, + inline: function inline(html) { + return html; + } +}); module.exports.createParser = createParser; +//# sourceMappingURL=index.js.map
\ No newline at end of file diff --git a/packages/gitbook-html/lib/langs.js b/packages/gitbook-html/lib/langs.js index a06d3ee..f799b50 100755 --- a/packages/gitbook-html/lib/langs.js +++ b/packages/gitbook-html/lib/langs.js @@ -1,12 +1,12 @@ -var _ = require('lodash'); +'use strict'; + var parseSummary = require('./summary'); /** - Parse an HTML content into a list of language - - @param {String} html - @return {Array} -*/ + * Parse an HTML content into a list of language. + * @param {String} html + * @return {Array} + */ function parseLangs(content) { var parts = parseSummary(content).parts; if (parts.length > 0) { @@ -17,4 +17,4 @@ function parseLangs(content) { } module.exports = parseLangs; - +//# sourceMappingURL=langs.js.map
\ No newline at end of file diff --git a/packages/gitbook-html/lib/page.js b/packages/gitbook-html/lib/page.js index 56d8984..25fd637 100755 --- a/packages/gitbook-html/lib/page.js +++ b/packages/gitbook-html/lib/page.js @@ -1,12 +1,10 @@ -var Q = require('q'); -var _ = require('lodash'); +"use strict"; /** - Parse content of a page - - @param {String} html - @return {Object} -*/ + * Parse content of a page. + * @param {String} html + * @return {Object} + */ function parsePage(html) { return { content: html @@ -14,3 +12,4 @@ function parsePage(html) { } module.exports = parsePage; +//# sourceMappingURL=page.js.map
\ No newline at end of file diff --git a/packages/gitbook-html/lib/readme.js b/packages/gitbook-html/lib/readme.js index 34e447e..771b525 100755 --- a/packages/gitbook-html/lib/readme.js +++ b/packages/gitbook-html/lib/readme.js @@ -1,12 +1,13 @@ -var _ = require('lodash'); +'use strict'; + var dom = require('./dom'); /** - Parse an HTML content into metadata about a readme - - @param {String} html - @return {Object} -*/ + * Parse an HTML content into metadata about a readme + * + * @param {String} html + * @return {Object} + */ function parseReadme(html) { var $ = dom.parse(html); @@ -16,6 +17,5 @@ function parseReadme(html) { }; } - -// Exports module.exports = parseReadme; +//# sourceMappingURL=readme.js.map
\ No newline at end of file diff --git a/packages/gitbook-html/lib/summary.js b/packages/gitbook-html/lib/summary.js index 4b263c9..a7d05a3 100755 --- a/packages/gitbook-html/lib/summary.js +++ b/packages/gitbook-html/lib/summary.js @@ -1,18 +1,16 @@ -var _ = require('lodash'); +'use strict'; + var dom = require('./dom'); var SELECTOR_LIST = 'ol, ul'; var SELECTOR_LINK = '> a, p > a'; var SELECTOR_PART = 'h2, h3, h4'; -var BL = '\n'; - /** - Find a list - - @param {cheerio.Node} - @return {cheerio.Node} -*/ + * Find a list. + * @param {cheerio.Node} + * @return {cheerio.Node} + */ function findList($parent) { var $container = $parent.children('.olist'); if ($container.length > 0) $parent = $container.first(); @@ -21,22 +19,21 @@ function findList($parent) { } /** - Parse a ul list and return list of chapters recursvely - - @param {cheerio.Node} - @param {cheerio.DOM} - @return {Array} -*/ + * Parse a ul list and return list of chapters recursvely. + * @param {cheerio.Node} + * @param {cheerio.DOM} + * @return {Array} + */ function parseList($ul, $) { var articles = []; - $ul.children('li').each(function() { + $ul.children('li').each(function () { var article = {}; var $li = $(this); // Get text for the entry var $p = $li.children('p'); - article.title = ($p.text() || dom.textNode($li.get(0))).trim(); + article.title = ($p.text() || dom.textNode($li.get(0))).trim(); // Parse link var $a = $li.find(SELECTOR_LINK); @@ -57,11 +54,10 @@ function parseList($ul, $) { } /** - Find all parts and their corresponding lists - - @param {cheerio.Node} - @param {cheerio.DOM} - @return {Array<{title: String, list: cheerio.Node}>} + * Find all parts and their corresponding lists. + * @param {cheerio.Node} + * @param {cheerio.DOM} + * @return {Array<{title: String, list: cheerio.Node}>} */ function findParts($parent, $) { // Find parts and lists @@ -82,8 +78,8 @@ function findParts($parent, $) { title: getPartTitle(el, $), list: null }; - - } else { // It is a list + } else { + // It is a list if (previousPart !== null) { previousPart.list = el; } else { @@ -106,32 +102,29 @@ function findParts($parent, $) { } /** - True if the element is a part - - @param el - @return {Boolean} - */ + * True if the element is a part. + * @param el + * @return {Boolean} + */ function isPartNode(el) { return SELECTOR_PART.indexOf(el.name) !== -1; } /** - Parse the title of a part element - - @param el - @param {cheerio.DOM} $ - @return {String} + * Parse the title of a part element. + * @param el + * @param {cheerio.DOM} $ + * @return {String} */ function getPartTitle(el, $) { return $(el).text().trim(); } /** - Parse an HTML content into a tree of articles/parts - - @param {String} html - @return {Object} -*/ + * Parse an HTML content into a tree of articles/parts. + * @param {String} html + * @return {Object} + */ function parseSummary(html) { var $ = dom.parse(html); var $root = dom.cleanup(dom.root($), $); @@ -140,7 +133,7 @@ function parseSummary(html) { // Parse each list var parsedParts = []; - var part; + var part = void 0; for (var i = 0; i < parts.length; ++i) { part = parts[i]; parsedParts.push({ @@ -155,3 +148,4 @@ function parseSummary(html) { } module.exports = parseSummary; +//# sourceMappingURL=summary.js.map
\ No newline at end of file diff --git a/packages/gitbook-html/lib/totext.js b/packages/gitbook-html/lib/totext.js index 368d62e..0baa48a 100644 --- a/packages/gitbook-html/lib/totext.js +++ b/packages/gitbook-html/lib/totext.js @@ -1,181 +1,230 @@ -var _ = require('lodash'); +'use strict'; + +var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }(); + +function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } } /* This class is extended by gitbook-markdown and gitbook-asciidoc to generate back markdown/asciidoc from GitBook metadata. */ +var ToText = function () { + function ToText(markup) { + _classCallCheck(this, ToText); -function ToText(markup) { - if (!(this instanceof ToText)) { - return new ToText(markup); + Object.assign(this, markup); } - _.extend(this, markup || {}); - _.bindAll(this, _.functionsIn(this)); -}; - -// Break line -ToText.prototype.onBL = function() { - return '\n'; -}; - -ToText.prototype.onText = function(text) { - return text; -}; - -ToText.prototype.onHR = function() { - return '<hr />'; -}; - -// ---- TITLES - -ToText.prototype.onTitleStart = function(level) { - return '<h'+level+'>'; -}; -ToText.prototype.onTitleEnd = function(level) { - return '</h'+level+'>'; -}; - -// ---- PARAGRAPHS / SECTIONS -ToText.prototype.onParagraphStart = function() { - return '<p>'; -}; -ToText.prototype.onParagraphEnd = function() { - return '</p>'; -}; - - -ToText.prototype.onSection = function() { - return this.onBL(); -}; - -// ---- LINKS -ToText.prototype.onLinkStart = function(href) { - return '<a href="' + href + '">'; -}; -ToText.prototype.onLinkEnd = function(href) { - return '</a>'; -}; - -// ---- LISTS -ToText.prototype.onListItemStart = function(level) { - return this._spaces((level + 1) * 4) + '<li>'; -}; -ToText.prototype.onListItemEnd = function(level) { - return this._spaces((level + 1) * 4) + '</li>' + this.onBL(); -}; -ToText.prototype.onListStart = function(level) { - return this._spaces(level * 4) + '<ul>' + this.onBL(); -}; -ToText.prototype.onListEnd = function(level) { - return this._spaces(level * 4) + '</ul>' + this.onBL(); -}; - -// ------ LANGS - -ToText.prototype.langs = function(languages) { - var content = ''; - content += this.onTitleStart(1) + this.onText('Languages') + this.onTitleEnd(1); - content += this.onSection(); - - content += this._summaryArticles(languages); - - return content; -}; - -// ------ GLOSSARY - -ToText.prototype.glossary = function(glossary) { - var that = this; - var content = ''; - - content += that.onTitleStart(1) + that.onText('Glossary') + that.onTitleEnd(1); - content += that.onSection(); - - _.each(glossary, function(entry) { - content += that.onTitleStart(2) + that.onText(entry.name) + that.onTitleEnd(2); - content += that.onParagraphStart(); - content += that.onText(entry.description); - content += that.onParagraphEnd(); - content += that.onSection(); - }); - - return content; -}; - -// ------ SUMMARY - -ToText.prototype._summaryArticle = function(article, level) { - var content = ''; - - content += this.onListItemStart(level); - - if (article.ref) content += this.onLinkStart(article.ref) - content += this.onText(article.title) - if (article.ref) content += this.onLinkEnd(article.ref); - content += this.onBL(); - - if (article.articles && article.articles.length > 0) { - content += this._summaryArticles(article.articles, level + 1); - } + // Break line - content += this.onListItemEnd(level); - return content; -}; -ToText.prototype._summaryArticles = function(articles, level) { - var that = this; - var content = ''; + _createClass(ToText, [{ + key: 'onBL', + value: function onBL() { + return '\n'; + } + }, { + key: 'onText', + value: function onText(text) { + return text; + } + }, { + key: 'onHR', + value: function onHR() { + return '<hr />'; + } - level = level || 0; + // ---- TITLES - content += that.onListStart(level); - _.each(articles, function(article) { - content += that._summaryArticle(article, level); - }); - content += that.onListEnd(level); + }, { + key: 'onTitleStart', + value: function onTitleStart(level) { + return '<h' + level + '>'; + } + }, { + key: 'onTitleEnd', + value: function onTitleEnd(level) { + return '</h' + level + '>'; + } - return content; -}; -ToText.prototype._summaryPart = function(part) { - var content = ''; + // ---- PARAGRAPHS / SECTIONS - if (part.title) content += this.onTitleStart(2) + this.onText(part.title) + this.onTitleEnd(2); + }, { + key: 'onParagraphStart', + value: function onParagraphStart() { + return '<p>'; + } + }, { + key: 'onParagraphEnd', + value: function onParagraphEnd() { + return '</p>'; + } + }, { + key: 'onSection', + value: function onSection() { + return this.onBL(); + } - content += this._summaryArticles(part.articles); + // ---- LINKS - return content; -}; + }, { + key: 'onLinkStart', + value: function onLinkStart(href) { + return '<a href="' + href + '">'; + } + }, { + key: 'onLinkEnd', + value: function onLinkEnd(href) { + return '</a>'; + } -ToText.prototype.summary = function(summary) { - var that = this; - var content = ''; + // ---- LISTS - content += that.onTitleStart(1) + that.onText('Summary') + that.onTitleEnd(1); - content += that.onSection(); + }, { + key: 'onListItemStart', + value: function onListItemStart(level) { + return this._spaces((level + 1) * 4) + '<li>'; + } + }, { + key: 'onListItemEnd', + value: function onListItemEnd(level) { + return this._spaces((level + 1) * 4) + '</li>' + this.onBL(); + } + }, { + key: 'onListStart', + value: function onListStart(level) { + return this._spaces(level * 4) + '<ul>' + this.onBL(); + } + }, { + key: 'onListEnd', + value: function onListEnd(level) { + return this._spaces(level * 4) + '</ul>' + this.onBL(); + } + + // ------ LANGS - _.each(summary.parts, function(part, i) { - var next = summary.parts[i + 1]; + }, { + key: 'langs', + value: function langs(languages) { + var content = ''; + content += this.onTitleStart(1) + this.onText('Languages') + this.onTitleEnd(1); + content += this.onSection(); - content += that._summaryPart(part); + content += this._summaryArticles(languages); - if (next && !next.title) { - content += that.onBL() + that.onHR() + that.onBL(); - } else { - content += that.onSection(); + return content; } - }); + // ------ GLOSSARY - return content; -}; + }, { + key: 'glossary', + value: function glossary(_glossary) { + var _this = this; -// ---- Utilities + var content = ''; -ToText.prototype._spaces = function(n, s) { - return Array(n + 1).join(s || ' '); -} + content += this.onTitleStart(1) + this.onText('Glossary') + this.onTitleEnd(1); + content += this.onSection(); -module.exports = ToText; + _glossary.forEach(function (entry) { + content += _this.onTitleStart(2) + _this.onText(entry.name) + _this.onTitleEnd(2); + content += _this.onParagraphStart(); + content += _this.onText(entry.description); + content += _this.onParagraphEnd(); + content += _this.onSection(); + }); + + return content; + } + + // ------ SUMMARY + + }, { + key: '_summaryArticle', + value: function _summaryArticle(article, level) { + var content = ''; + + content += this.onListItemStart(level); + + if (article.ref) content += this.onLinkStart(article.ref); + content += this.onText(article.title); + if (article.ref) content += this.onLinkEnd(article.ref); + content += this.onBL(); + if (article.articles && article.articles.length > 0) { + content += this._summaryArticles(article.articles, level + 1); + } + + content += this.onListItemEnd(level); + + return content; + } + }, { + key: '_summaryArticles', + value: function _summaryArticles(articles, level) { + var _this2 = this; + + var content = ''; + + level = level || 0; + + content += this.onListStart(level); + articles.forEach(function (article) { + content += _this2._summaryArticle(article, level); + }); + content += this.onListEnd(level); + + return content; + } + }, { + key: '_summaryPart', + value: function _summaryPart(part) { + var content = ''; + + if (part.title) content += this.onTitleStart(2) + this.onText(part.title) + this.onTitleEnd(2); + + content += this._summaryArticles(part.articles); + + return content; + } + }, { + key: 'summary', + value: function summary(_summary) { + var _this3 = this; + + var content = ''; + + content += this.onTitleStart(1) + this.onText('Summary') + this.onTitleEnd(1); + content += this.onSection(); + + _summary.parts.forEach(function (part, i) { + var next = _summary.parts[i + 1]; + + content += _this3._summaryPart(part); + + if (next && !next.title) { + content += _this3.onBL() + _this3.onHR() + _this3.onBL(); + } else { + content += _this3.onSection(); + } + }); + + return content; + } + + // ---- Utilities + + }, { + key: '_spaces', + value: function _spaces(n, s) { + return Array(n + 1).join(s || ' '); + } + }]); + + return ToText; +}(); + +module.exports = ToText; +//# sourceMappingURL=totext.js.map
\ No newline at end of file diff --git a/packages/gitbook-html/package.json b/packages/gitbook-html/package.json index b7c3513..2346220 100644 --- a/packages/gitbook-html/package.json +++ b/packages/gitbook-html/package.json @@ -1,37 +1,23 @@ { - "name": "gitbook-html", - "version": "1.3.3", - "homepage": "https://www.gitbook.com", - "description": "Parse HTML content for gitbook", - "main": "lib/index.js", - "dependencies": { - "q": "^1.1.2", - "lodash": "^4.13.1", - "cheerio": "^0.20.0 && >=0.20.0" - }, - "devDependencies": { - "mocha": "^2.3.2" - }, - "scripts": { - "test": "export TESTING=true; mocha --reporter spec --bail" - }, - "repository": { - "type": "git", - "url": "https://github.com/GitbookIO/gitbook-html.git" - }, - "author": "FriendCode Inc. <contact@gitbook.com>", - "license": "Apache-2.0", - "bugs": { - "url": "https://github.com/GitbookIO/gitbook-html/issues" - }, - "contributors": [ - { - "name": "Aaron O'Mullan", - "email": "aaron@gitbook.com" - }, - { - "name": "Samy Pessé", - "email": "samy@gitbook.com" - } - ] -}
\ No newline at end of file + "name": "gitbook-html", + "version": "4.0.0", + "homepage": "https://www.gitbook.com", + "description": "HTML parser interface for GitBook", + "main": "lib/index.js", + "dependencies": { + "cheerio": "^0.20.0 && >=0.20.0" + }, + "scripts": { + "test": "mocha --reporter spec --bail", + "prepublish": "rm -rf lib/ && babel -d lib/ src/ --source-maps" + }, + "repository": { + "type": "git", + "url": "https://github.com/GitbookIO/gitbook-html.git" + }, + "author": "FriendCode Inc. <contact@gitbook.com>", + "license": "Apache-2.0", + "bugs": { + "url": "https://github.com/GitbookIO/gitbook-html/issues" + } +} diff --git a/packages/gitbook-html/src/dom.js b/packages/gitbook-html/src/dom.js new file mode 100644 index 0000000..9c5e070 --- /dev/null +++ b/packages/gitbook-html/src/dom.js @@ -0,0 +1,62 @@ +const cheerio = require('cheerio'); + +/** + * Parse an HTML string and return its content. + * @param {String} + * @return {cheerio.DOM} + */ +function parse(html) { + const $ = cheerio.load(html); + const $el = $('html, body').first(); + + return $el.length > 0 ? $el : $; +} + +/** + * Return main element for a DOM. + * @param {cheerio.DOM} + * @return {cheerio.Node} + */ +function root($) { + const $el = $('html, body, > div').first(); + return $el.length > 0 ? $el : $.root(); +} + +/** + * Return text node of an element. + * @param {cheerio.Node} + * @return {String} + */ +function textNode($el) { + return $el.children.reduce( + (text, e) => { + if (e.type == 'text') text += e.data; + return text; + }, + '' + ); +} + +/** + * Cleanup a DOM by removing all useless divs. + * @param {cheerio.Node} + * @param {cheerio.DOM} + * @return {cheerio.Node} + */ +function cleanup($el, $) { + $el.find('div').each(function() { + const $div = $(this); + cleanup($div, $); + + $div.replaceWith($div.html()); + }); + + return $el; +} + +module.exports = { + parse, + textNode, + root, + cleanup +}; diff --git a/packages/gitbook-html/src/glossary.js b/packages/gitbook-html/src/glossary.js new file mode 100755 index 0000000..a4269fe --- /dev/null +++ b/packages/gitbook-html/src/glossary.js @@ -0,0 +1,30 @@ +const dom = require('./dom'); + +/** + * Parse an HTML content into a list of glossary entry. + * + * @param {String} html + * @return {Array} entries + */ +function parseGlossary(html) { + const $ = dom.parse(html); + + const entries = []; + + $('h2').each(function() { + const $heading = $(this); + const $next = $heading.next(); + const $p = $next.is('p') ? $next.first() : $next.find('p').first(); + + const entry = {}; + + entry.name = $heading.text(); + entry.description = $p.text(); + + entries.push(entry); + }); + + return entries; +} + +module.exports = parseGlossary; diff --git a/packages/gitbook-html/src/index.js b/packages/gitbook-html/src/index.js new file mode 100755 index 0000000..9d560f1 --- /dev/null +++ b/packages/gitbook-html/src/index.js @@ -0,0 +1,50 @@ +const ToText = require('./totext'); + +const htmlParser = { + summary: require('./summary'), + glossary: require('./glossary'), + langs: require('./langs'), + readme: require('./readme'), + page: require('./page') +}; + +// Compose a function with a transform function for the first argument only +function compose(toHTML, fn) { + return (...args) => { + args[0] = toHTML(args[0]); + return fn(...args); + }; +} + +/** + * Create a GitBook parser from an HTML converter. + * @param {Object} toHTML + * {Function} [toHTML.inline] + * {Function} [toHTML.block] + * @param {Object} toText + * @return {[type]} [description] + */ +function createParser(toHTML, toText = {}) { + const parser = { + summary: compose(toHTML.block, htmlParser.summary), + glossary: compose(toHTML.block, htmlParser.glossary), + langs: compose(toHTML.block, htmlParser.langs), + readme: compose(toHTML.block, htmlParser.readme), + page: compose(toHTML.block, htmlParser.page), + inline: compose(toHTML.inline, htmlParser.page) + }; + + const _toText = new ToText(toText); + + parser.summary.toText = summary => _toText.summary(summary); + parser.langs.toText = langs => _toText.langs(langs); + parser.glossary.toText = glossary => _toText.glossary(glossary); + + return parser; +} + +module.exports = createParser({ + block: html => html, + inline: html => html +}); +module.exports.createParser = createParser; diff --git a/packages/gitbook-html/src/langs.js b/packages/gitbook-html/src/langs.js new file mode 100755 index 0000000..2c3523f --- /dev/null +++ b/packages/gitbook-html/src/langs.js @@ -0,0 +1,17 @@ +const parseSummary = require('./summary'); + +/** + * Parse an HTML content into a list of language. + * @param {String} html + * @return {Array} + */ +function parseLangs(content) { + const parts = parseSummary(content).parts; + if (parts.length > 0) { + return parts[0].articles; + } + + return []; +} + +module.exports = parseLangs; diff --git a/packages/gitbook-html/src/page.js b/packages/gitbook-html/src/page.js new file mode 100755 index 0000000..c4982b5 --- /dev/null +++ b/packages/gitbook-html/src/page.js @@ -0,0 +1,12 @@ +/** + * Parse content of a page. + * @param {String} html + * @return {Object} + */ +function parsePage(html) { + return { + content: html + }; +} + +module.exports = parsePage; diff --git a/packages/gitbook-html/src/readme.js b/packages/gitbook-html/src/readme.js new file mode 100755 index 0000000..18b0e62 --- /dev/null +++ b/packages/gitbook-html/src/readme.js @@ -0,0 +1,18 @@ +const dom = require('./dom'); + +/** + * Parse an HTML content into metadata about a readme + * + * @param {String} html + * @return {Object} + */ +function parseReadme(html) { + const $ = dom.parse(html); + + return { + title: $('h1:first-child').text().trim(), + description: $('div.paragraph,p').first().text().trim() + }; +} + +module.exports = parseReadme; diff --git a/packages/gitbook-html/src/summary.js b/packages/gitbook-html/src/summary.js new file mode 100755 index 0000000..1dda344 --- /dev/null +++ b/packages/gitbook-html/src/summary.js @@ -0,0 +1,148 @@ +const dom = require('./dom'); + +const SELECTOR_LIST = 'ol, ul'; +const SELECTOR_LINK = '> a, p > a'; +const SELECTOR_PART = 'h2, h3, h4'; + +/** + * Find a list. + * @param {cheerio.Node} + * @return {cheerio.Node} + */ +function findList($parent) { + const $container = $parent.children('.olist'); + if ($container.length > 0) $parent = $container.first(); + + return $parent.children(SELECTOR_LIST); +} + +/** + * Parse a ul list and return list of chapters recursvely. + * @param {cheerio.Node} + * @param {cheerio.DOM} + * @return {Array} + */ +function parseList($ul, $) { + const articles = []; + + $ul.children('li').each(function() { + const article = {}; + const $li = $(this); + + // Get text for the entry + const $p = $li.children('p'); + article.title = ($p.text() || dom.textNode($li.get(0))).trim(); + + // Parse link + const $a = $li.find(SELECTOR_LINK); + if ($a.length > 0) { + article.title = $a.first().text(); + article.ref = $a.attr('href').replace(/\\/g, '/').replace(/^\/+/, ''); + } + + // Sub articles + const $sub = findList($li); + article.articles = parseList($sub, $); + + if (!article.title) return; + articles.push(article); + }); + + return articles; +} + +/** + * Find all parts and their corresponding lists. + * @param {cheerio.Node} + * @param {cheerio.DOM} + * @return {Array<{title: String, list: cheerio.Node}>} + */ +function findParts($parent, $) { + // Find parts and lists + // TODO asciidoc compatibility + const partsAndLists = $parent.children(SELECTOR_LIST + ', ' + SELECTOR_PART); + + // Group each part with the list after + const parts = []; + let previousPart = null; + + partsAndLists.each((i, el) => { + if (isPartNode(el)) { + if (previousPart !== null) { + // The previous part was empty + parts.push(previousPart); + } + previousPart = { + title: getPartTitle(el, $), + list: null + }; + + } else { // It is a list + if (previousPart !== null) { + previousPart.list = el; + } else { + previousPart = { + title: '', + list: el + }; + } + parts.push(previousPart); + previousPart = null; + } + }); + + // Last part might be empty + if (previousPart !== null) { + parts.push(previousPart); + } + + return parts; +} + +/** + * True if the element is a part. + * @param el + * @return {Boolean} + */ +function isPartNode(el) { + return SELECTOR_PART.indexOf(el.name) !== -1; +} + +/** + * Parse the title of a part element. + * @param el + * @param {cheerio.DOM} $ + * @return {String} + */ +function getPartTitle(el, $) { + return $(el).text().trim(); +} + +/** + * Parse an HTML content into a tree of articles/parts. + * @param {String} html + * @return {Object} + */ +function parseSummary(html) { + const $ = dom.parse(html); + const $root = dom.cleanup(dom.root($), $); + + const parts = findParts($root, $); + + // Parse each list + const parsedParts = []; + let part; + for (let i = 0; i < parts.length; ++i) { + part = parts[i]; + parsedParts.push({ + title: part.title, + articles: parseList($(part.list), $) + }); + } + + return { + parts: parsedParts + }; +} + +module.exports = parseSummary; diff --git a/packages/gitbook-html/src/totext.js b/packages/gitbook-html/src/totext.js new file mode 100644 index 0000000..6e71cd3 --- /dev/null +++ b/packages/gitbook-html/src/totext.js @@ -0,0 +1,172 @@ + +/* + This class is extended by gitbook-markdown and gitbook-asciidoc + to generate back markdown/asciidoc from GitBook metadata. +*/ + +class ToText { + constructor(markup) { + Object.assign(this, markup); + } + + // Break line + onBL() { + return '\n'; + } + + onText(text) { + return text; + } + + onHR() { + return '<hr />'; + } + + // ---- TITLES + + onTitleStart(level) { + return '<h' + level + '>'; + } + onTitleEnd(level) { + return '</h' + level + '>'; + } + + // ---- PARAGRAPHS / SECTIONS + onParagraphStart() { + return '<p>'; + } + onParagraphEnd() { + return '</p>'; + } + + + onSection() { + return this.onBL(); + } + + // ---- LINKS + onLinkStart(href) { + return '<a href="' + href + '">'; + } + onLinkEnd(href) { + return '</a>'; + } + + // ---- LISTS + onListItemStart(level) { + return this._spaces((level + 1) * 4) + '<li>'; + } + onListItemEnd(level) { + return this._spaces((level + 1) * 4) + '</li>' + this.onBL(); + } + onListStart(level) { + return this._spaces(level * 4) + '<ul>' + this.onBL(); + } + onListEnd(level) { + return this._spaces(level * 4) + '</ul>' + this.onBL(); + } + + // ------ LANGS + + langs(languages) { + let content = ''; + content += this.onTitleStart(1) + this.onText('Languages') + this.onTitleEnd(1); + content += this.onSection(); + + content += this._summaryArticles(languages); + + return content; + } + + // ------ GLOSSARY + + glossary(glossary) { + let content = ''; + + content += this.onTitleStart(1) + this.onText('Glossary') + this.onTitleEnd(1); + content += this.onSection(); + + glossary.forEach((entry) => { + content += this.onTitleStart(2) + this.onText(entry.name) + this.onTitleEnd(2); + content += this.onParagraphStart(); + content += this.onText(entry.description); + content += this.onParagraphEnd(); + content += this.onSection(); + }); + + return content; + } + + // ------ SUMMARY + + _summaryArticle(article, level) { + let content = ''; + + content += this.onListItemStart(level); + + if (article.ref) content += this.onLinkStart(article.ref); + content += this.onText(article.title); + if (article.ref) content += this.onLinkEnd(article.ref); + content += this.onBL(); + + if (article.articles && article.articles.length > 0) { + content += this._summaryArticles(article.articles, level + 1); + } + + content += this.onListItemEnd(level); + + return content; + } + _summaryArticles(articles, level) { + let content = ''; + + level = level || 0; + + content += this.onListStart(level); + articles.forEach((article) => { + content += this._summaryArticle(article, level); + }); + content += this.onListEnd(level); + + return content; + } + _summaryPart(part) { + let content = ''; + + if (part.title) content += this.onTitleStart(2) + this.onText(part.title) + this.onTitleEnd(2); + + content += this._summaryArticles(part.articles); + + return content; + } + + summary(summary) { + let content = ''; + + content += this.onTitleStart(1) + this.onText('Summary') + this.onTitleEnd(1); + content += this.onSection(); + + summary.parts.forEach((part, i) => { + const next = summary.parts[i + 1]; + + content += this._summaryPart(part); + + if (next && !next.title) { + content += this.onBL() + this.onHR() + this.onBL(); + } else { + content += this.onSection(); + } + + }); + + return content; + } + + // ---- Utilities + + _spaces(n, s) { + return Array(n + 1).join(s || ' '); + } +} + +module.exports = ToText; diff --git a/packages/gitbook-html/test/glossary.js b/packages/gitbook-html/test/glossary.js index 8bd77d6..e7175ea 100755 --- a/packages/gitbook-html/test/glossary.js +++ b/packages/gitbook-html/test/glossary.js @@ -1,29 +1,29 @@ -var fs = require('fs'); -var path = require('path'); -var assert = require('assert'); +const fs = require('fs'); +const path = require('path'); +const expect = require('expect'); -var glossary = require('../').glossary; +const glossary = require('../src').glossary; -describe('Glossary parsing', function () { - var LEXED; +describe('Glossary', () => { + let LEXED; - before(function() { - var CONTENT = fs.readFileSync(path.join(__dirname, './fixtures/GLOSSARY.html'), 'utf8'); + before(() => { + const CONTENT = fs.readFileSync(path.join(__dirname, './fixtures/GLOSSARY.html'), 'utf8'); LEXED = glossary(CONTENT); }); - it('should only get heading + paragraph pairs', function() { - assert.equal(LEXED.length, 5); + it('should only get heading + paragraph pairs', () => { + expect(LEXED.length).toBe(5); }); - it('should output simple name/description objects', function() { - assert.equal(true, !(LEXED.some(function(e) { - return !Boolean(e.name && e.description); - }))); + it('should output simple name/description objects', () => { + expect(!(LEXED.some(e => !Boolean(e.name && e.description)))).toBe(true); }); - it('should correctly convert it to text', function() { - var text = glossary.toText(LEXED); - assertObjectsEqual(glossary(text), LEXED); + it('should correctly convert it to text', () => { + const text = glossary.toText(LEXED); + const parsed = glossary(text); + + expect(parsed).toEqual(LEXED); }); }); diff --git a/packages/gitbook-html/test/helper.js b/packages/gitbook-html/test/helper.js deleted file mode 100644 index 1e310f7..0000000 --- a/packages/gitbook-html/test/helper.js +++ /dev/null @@ -1,6 +0,0 @@ -var assert = require("assert"); - -global.assertObjectsEqual = function(o1, o2) { - assert.equal(JSON.stringify(o1, null, 4), JSON.stringify(o2, null, 4)); -}; - diff --git a/packages/gitbook-html/test/langs.js b/packages/gitbook-html/test/langs.js index ab002a1..dfa640d 100755 --- a/packages/gitbook-html/test/langs.js +++ b/packages/gitbook-html/test/langs.js @@ -1,27 +1,29 @@ -var fs = require('fs'); -var path = require('path'); -var assert = require('assert'); +const fs = require('fs'); +const path = require('path'); +const expect = require('expect'); -var langs = require('../').langs; +const langs = require('../src').langs; -describe('Languages parsing', function () { - var LEXED; +describe('Languages', () => { + let LEXED; - before(function() { - var CONTENT = fs.readFileSync(path.join(__dirname, './fixtures/LANGS.html'), 'utf8'); + before(() => { + const CONTENT = fs.readFileSync(path.join(__dirname, './fixtures/LANGS.html'), 'utf8'); LEXED = langs(CONTENT); }); - it('should detect paths and titles', function() { - assert.equal(LEXED[0].ref,'en/'); - assert.equal(LEXED[0].title,'English'); + it('should detect paths and titles', () => { + expect(LEXED[0].ref).toBe('en/'); + expect(LEXED[0].title).toBe('English'); - assert.equal(LEXED[1].ref,'fr/'); - assert.equal(LEXED[1].title,'French'); + expect(LEXED[1].ref).toBe('fr/'); + expect(LEXED[1].title).toBe('French'); }); - it('should correctly convert it to text', function() { - var text = langs.toText(LEXED); - assertObjectsEqual(langs(text), LEXED); + it('should correctly convert it to text', () => { + const text = langs.toText(LEXED); + const parsed = langs(text); + + expect(parsed).toEqual(LEXED); }); }); diff --git a/packages/gitbook-html/test/readme.js b/packages/gitbook-html/test/readme.js index f38f40b..929ba10 100755 --- a/packages/gitbook-html/test/readme.js +++ b/packages/gitbook-html/test/readme.js @@ -1,30 +1,30 @@ -var fs = require('fs'); -var path = require('path'); -var assert = require('assert'); +const fs = require('fs'); +const path = require('path'); +const expect = require('expect'); -var readme = require('../').readme; +const readme = require('../src').readme; -describe('Readme parsing', function () { - var LEXED; +describe('Readme', () => { + let LEXED; - before(function() { - var CONTENT = fs.readFileSync(path.join(__dirname, './fixtures/README.html'), 'utf8'); + before(() => { + const CONTENT = fs.readFileSync(path.join(__dirname, './fixtures/README.html'), 'utf8'); LEXED = readme(CONTENT); }); - it('should contain a title', function() { - assert(LEXED.title); + it('should contain a title', () => { + expect(LEXED.title).toExist(); }); - it('should contain a description', function() { - assert(LEXED.description); + it('should contain a description', () => { + expect(LEXED.description).toExist(); }); - it('should extract the right title', function() { - assert.equal(LEXED.title, "This is the title"); + it('should extract the right title', () => { + expect(LEXED.title).toBe('This is the title'); }); - it('should extract the right description', function() { - assert.equal(LEXED.description, "This is the book description."); + it('should extract the right description', () => { + expect(LEXED.description).toBe('This is the book description.'); }); }); diff --git a/packages/gitbook-html/test/summary.js b/packages/gitbook-html/test/summary.js index 03be73f..ea27fb3 100755 --- a/packages/gitbook-html/test/summary.js +++ b/packages/gitbook-html/test/summary.js @@ -1,40 +1,40 @@ -var fs = require('fs'); -var path = require('path'); -var assert = require('assert'); +const fs = require('fs'); +const path = require('path'); +const expect = require('expect'); -var summary = require('../').summary; +const summary = require('../src').summary; -describe('Summary parsing', function () { - var LEXED, PART; - var LEXED_EMPTY; +describe('Summary', () => { + let LEXED, PART; + let LEXED_EMPTY; - before(function() { - var CONTENT = fs.readFileSync( + before(() => { + const CONTENT = fs.readFileSync( path.join(__dirname, './fixtures/SUMMARY.html'), 'utf8'); LEXED = summary(CONTENT); PART = LEXED.parts[0]; - var CONTENT_EMPTY = fs.readFileSync( + const CONTENT_EMPTY = fs.readFileSync( path.join(__dirname, './fixtures/SUMMARY-EMPTY.html'), 'utf8'); LEXED_EMPTY = summary(CONTENT_EMPTY); }); - describe('Parts', function() { - it('should detect parts', function() { - assert.equal(LEXED.parts.length, 3); + describe('Parts', () => { + it('should detect parts', () => { + expect(LEXED.parts.length).toBe(3); }); - it('should detect title', function() { - assert.equal(LEXED.parts[0].title, ''); - assert.equal(LEXED.parts[1].title, 'Part 2'); - assert.equal(LEXED.parts[2].title, ''); + it('should detect title', () => { + expect(LEXED.parts[0].title).toBe(''); + expect(LEXED.parts[1].title).toBe('Part 2'); + expect(LEXED.parts[2].title).toBe(''); }); - it('should detect empty parts', function() { - var partTitles = LEXED_EMPTY.parts.map(function (part) { + it('should detect empty parts', () => { + const partTitles = LEXED_EMPTY.parts.map((part) => { return part.title; }); - var expectedTitles = [ + const expectedTitles = [ 'First empty part', 'Part 1', '', @@ -43,49 +43,51 @@ describe('Summary parsing', function () { 'Penultimate empty part', 'Last empty part' ]; - assert.equal(LEXED_EMPTY.parts.length, 7); - expectedTitles.forEach(function (title, index) { - assert.equal(partTitles[index], title); + expect(LEXED_EMPTY.parts.length).toBe(7); + expectedTitles.forEach((title, index) => { + expect(partTitles[index]).toBe(title); }); }); }); - it('should detect chapters', function() { - assert.equal(PART.articles.length, 5); + it('should detect chapters', () => { + expect(PART.articles.length).toBe(5); }); - it('should detect chapters in other parts', function() { - assert.equal(LEXED.parts[1].articles.length, 1); + it('should detect chapters in other parts', () => { + expect(LEXED.parts[1].articles.length).toBe(1); }); - it('should support articles', function() { - assert.equal(PART.articles[0].articles.length, 2); - assert.equal(PART.articles[1].articles.length, 0); - assert.equal(PART.articles[2].articles.length, 0); + it('should support articles', () => { + expect(PART.articles[0].articles.length).toBe(2); + expect(PART.articles[1].articles.length).toBe(0); + expect(PART.articles[2].articles.length).toBe(0); }); - it('should detect paths and titles', function() { - assert(PART.articles[0].ref); - assert(PART.articles[1].ref); - assert(PART.articles[2].ref); - assert(PART.articles[3].ref); - assert.equal(PART.articles[4].ref, null); + it('should detect paths and titles', () => { + expect(PART.articles[0].ref).toExist(); + expect(PART.articles[1].ref).toExist(); + expect(PART.articles[2].ref).toExist(); + expect(PART.articles[3].ref).toExist(); + expect(PART.articles[4].ref).toNotExist(); - assert(PART.articles[0].title); - assert(PART.articles[1].title); - assert(PART.articles[2].title); - assert(PART.articles[3].title); - assert(PART.articles[4].title); + expect(PART.articles[0].title).toExist(); + expect(PART.articles[1].title).toExist(); + expect(PART.articles[2].title).toExist(); + expect(PART.articles[3].title).toExist(); + expect(PART.articles[4].title).toExist(); }); - it('should normalize paths from .md', function() { - assert.equal(PART.articles[0].ref,'chapter-1/README.md'); - assert.equal(PART.articles[1].ref,'chapter-2/README.md'); - assert.equal(PART.articles[2].ref,'chapter-3/README.md'); + it('should normalize paths from .md', () => { + expect(PART.articles[0].ref).toBe('chapter-1/README.md'); + expect(PART.articles[1].ref).toBe('chapter-2/README.md'); + expect(PART.articles[2].ref).toBe('chapter-3/README.md'); }); - it('should correctly convert it to text', function() { - var text = summary.toText(LEXED); - assertObjectsEqual(summary(text), LEXED); + it('should correctly convert it to text', () => { + const text = summary.toText(LEXED); + const parsed = summary(text); + + expect(parsed).toEqual(LEXED); }); }); |