summaryrefslogtreecommitdiffstats
path: root/packages/gitbook-html
diff options
context:
space:
mode:
authorSamy Pessé <samypesse@gmail.com>2016-02-19 16:12:43 +0100
committerSamy Pessé <samypesse@gmail.com>2016-12-22 12:32:14 +0100
commit4c44d677117d926b6dcc164f55fe34079c2ca3c7 (patch)
tree71d0fca56421f3a7b68d60a446f89d3735828f03 /packages/gitbook-html
parent9e99b5850fd866fc2f9196993a0ae7e342311558 (diff)
downloadgitbook-4c44d677117d926b6dcc164f55fe34079c2ca3c7.zip
gitbook-4c44d677117d926b6dcc164f55fe34079c2ca3c7.tar.gz
gitbook-4c44d677117d926b6dcc164f55fe34079c2ca3c7.tar.bz2
Improve summary parser
Diffstat (limited to 'packages/gitbook-html')
-rw-r--r--packages/gitbook-html/lib/dom.js11
-rwxr-xr-xpackages/gitbook-html/lib/glossary.js5
-rwxr-xr-xpackages/gitbook-html/lib/index.js27
-rwxr-xr-xpackages/gitbook-html/lib/langs.js4
-rwxr-xr-xpackages/gitbook-html/lib/page.js6
-rwxr-xr-xpackages/gitbook-html/lib/summary.js97
-rw-r--r--packages/gitbook-html/package.json4
-rwxr-xr-xpackages/gitbook-html/test/fixtures/SUMMARY.html14
-rwxr-xr-xpackages/gitbook-html/test/glossary.js10
-rwxr-xr-xpackages/gitbook-html/test/langs.js10
-rwxr-xr-xpackages/gitbook-html/test/readme.js10
-rwxr-xr-xpackages/gitbook-html/test/summary.js50
12 files changed, 168 insertions, 80 deletions
diff --git a/packages/gitbook-html/lib/dom.js b/packages/gitbook-html/lib/dom.js
index 2c2eaf7..df4de90 100644
--- a/packages/gitbook-html/lib/dom.js
+++ b/packages/gitbook-html/lib/dom.js
@@ -3,12 +3,18 @@ var cheerio = require('cheerio');
// Parse an HTML string and return its content
function parse(html) {
- var $ = cheerio.load('<div>'+html+'</div>');
+ var $ = cheerio.load(html);
var $el = $('html, body').first();
return $el.length > 0? $el : $;
}
+// Return main element
+function root($) {
+ var $el = $('html, body, > div').first();
+ return $el.length > 0? $el : $.root();
+}
+
// Return text node of an element
function textNode($el) {
return _.reduce($el.children, function(text, e) {
@@ -19,5 +25,6 @@ function textNode($el) {
module.exports = {
parse: parse,
- textNode: textNode
+ textNode: textNode,
+ root: root
};
diff --git a/packages/gitbook-html/lib/glossary.js b/packages/gitbook-html/lib/glossary.js
index 648ba1a..9d3799b 100755
--- a/packages/gitbook-html/lib/glossary.js
+++ b/packages/gitbook-html/lib/glossary.js
@@ -7,9 +7,10 @@ function parseGlossary(html) {
var entries = [];
- $("h2").each(function() {
+ $('h2').each(function() {
var $heading = $(this);
- var $p = $heading.next();
+ var $next = $heading.next()
+ var $p = $next.is('p')? $next.first() : $next.find('p').first();
var entry = {};
diff --git a/packages/gitbook-html/lib/index.js b/packages/gitbook-html/lib/index.js
index a7c478c..0e67c94 100755
--- a/packages/gitbook-html/lib/index.js
+++ b/packages/gitbook-html/lib/index.js
@@ -1,8 +1,33 @@
+var _ = require('lodash');
-module.exports = {
+var htmlParser = {
summary: require("./summary"),
glossary: require("./glossary"),
langs: require("./langs"),
readme: require("./readme"),
page: require("./page")
};
+
+// Compose a function with a transform function for the first args
+function compose(toHTML, fn) {
+ return function() {
+ var args = _.toArray(arguments);
+ args[0] = toHTML(args[0]);
+
+ return fn.apply(null, args);
+ }
+}
+
+// Create a GitBook parser
+function createParser(toHTML) {
+ return {
+ summary: compose(toHTML, htmlParser.summary),
+ glossary: compose(toHTML, htmlParser.glossary),
+ langs: compose(toHTML, htmlParser.langs),
+ readme: compose(toHTML, htmlParser.readme),
+ page: compose(toHTML, htmlParser.page)
+ }
+}
+
+module.exports = htmlParser;
+module.exports.createParser = createParser;
diff --git a/packages/gitbook-html/lib/langs.js b/packages/gitbook-html/lib/langs.js
index 1042dcb..270a9f6 100755
--- a/packages/gitbook-html/lib/langs.js
+++ b/packages/gitbook-html/lib/langs.js
@@ -1,9 +1,9 @@
var _ = require('lodash');
-var parseEntries = require('./summary').entries;
+var parseSummary = require('./summary');
// HTML -> Languages
function parseLangs(content) {
- return parseEntries(content);
+ return parseSummary(content).parts[0].articles;
}
// Languages -> HTML
diff --git a/packages/gitbook-html/lib/page.js b/packages/gitbook-html/lib/page.js
index e687050..6f056fc 100755
--- a/packages/gitbook-html/lib/page.js
+++ b/packages/gitbook-html/lib/page.js
@@ -2,8 +2,10 @@ var Q = require('q');
var _ = require('lodash');
// HTML -> HTML
-function parsePage(src) {
- return src;
+function parsePage(html) {
+ return {
+ content: html
+ };
}
module.exports = parsePage;
diff --git a/packages/gitbook-html/lib/summary.js b/packages/gitbook-html/lib/summary.js
index 1e2d63d..e71d6b5 100755
--- a/packages/gitbook-html/lib/summary.js
+++ b/packages/gitbook-html/lib/summary.js
@@ -1,14 +1,17 @@
var _ = require('lodash');
var dom = require('./dom');
+var SELECTOR_LIST = '.olist > ol, ol, ul';
+var SELECTOR_LINK = 'a, p > a';
+
+var BL = '\n';
// parse a ul list and return list of chapters recursvely
function parseList($ul, $) {
var articles = [];
- $ul.children('>li').each(function() {
+ $ul.children('li').each(function() {
var article = {};
-
var $li = $(this);
// Get text for the entry
@@ -16,14 +19,14 @@ function parseList($ul, $) {
article.title = $p.text() || dom.textNode($li.get(0));
// Parse link
- var $a = $li.find('> a, > p > a');
+ var $a = $li.children(SELECTOR_LINK);
if ($a.length > 0) {
article.title = $a.first().text();
article.path = $a.attr('href').replace(/\\/g, '/').replace(/^\/+/, '')
}
// Sub articles
- var $sub = $li.children('> .olist > ol, > ol, > ul');
+ var $sub = $li.children(SELECTOR_LIST).first();
article.articles = parseList($sub, $);
articles.push(article);
@@ -32,55 +35,73 @@ function parseList($ul, $) {
return articles;
}
-// Return a list of entries in a div
-function parseEntries (html) {
+// HTML -> Summary
+function parseSummary(html) {
var $ = dom.parse(html);
- var chapters = parseList($("> ol, > ul").first(), $);
- return chapters;
-}
+ var $root = dom.root($);
-// HTML -> Summary
-function parseSummary(src) {
- var chapters = parseEntries(src);
+ var $lists = $root.children(SELECTOR_LIST);
+ var parts = [];
+
+ $lists.each(function() {
+ var $list = $(this);
+
+ parts.push({
+ articles: parseList($(SELECTOR_LIST).first(), $)
+ });
+ });
return {
- chapters: chapters
+ parts: parts
};
}
// Summary -> HTML
-function summaryToText(summary) {
- var bl = '\n';
-
- var _base = function(article) {
- if (article.path) {
- return '<a href="'+article.path+'">'+article.title+'</a>';
- } else {
- return article.title;
- }
- };
+function textPrefix(d) {
+ return Array(d*4).join(' ');
+}
- var convertArticle = function(article, d) {
- var content = Array(d+2).join(' ') + '<li>' + _base(article);
+function articleToText(article, d) {
+ var prefix = textPrefix(d);
+ var content = prefix + '<li>';
- if (article.articles.length > 0) {
- content += convertArticles(article.articles, d);
- }
- return content + '</li>' + bl;
- };
+ if (article.path) {
+ content += '<a href="'+article.path+'">'+article.title+'</a>';
+ } else {
+ content += article.title;
+ }
- var convertArticles = function(articles, d) {
- var content = '<ul>' + bl;
- _.each(articles, function(_article) {
- content += convertArticle(_article, d + 1);
- });
- return content + '<ul>' + bl;
+ if (article.articles.length > 0) {
+ content += BL + articlesToText(article.articles, d) + prefix;
}
+ content += '</li>' + BL;
+
+ return content;
+}
+
+function articlesToText(articles, d) {
+ var prefix = textPrefix(d);
+ var content = prefix + '<ul>' + BL;
+ _.each(articles, function(_article) {
+ content += articleToText(_article, d + 1);
+ });
+ return content + '</ul>' + BL;
+}
+
+function partsToText(part) {
+ return articlesToText(part.articles, 0) + BL + BL;
+}
+
+function summaryToText(summary) {
+ var content = '<h1>Summary</h1>' + BL;
+
+ _.each(summary.parts, function(part) {
+ content += partsToText(part);
+ });
- return '<h1>Summary</h1>'+ bl+bl + convertArticles(summary.chapters, 0) + bl;
+ return content + BL;
};
module.exports = parseSummary;
-module.exports.entries = parseEntries;
module.exports.toText = summaryToText;
diff --git a/packages/gitbook-html/package.json b/packages/gitbook-html/package.json
index ff28ca6..a21259b 100644
--- a/packages/gitbook-html/package.json
+++ b/packages/gitbook-html/package.json
@@ -7,13 +7,13 @@
"dependencies": {
"q": "^1.1.2",
"lodash": "^3.2.0",
- "cheerio": "^0.19.0"
+ "cheerio": "^0.20.0 && >=0.20.0"
},
"devDependencies": {
"mocha": "^2.3.2"
},
"scripts": {
- "test": "export TESTING=true; mocha --reporter list --bail"
+ "test": "export TESTING=true; mocha --reporter spec --bail"
},
"repository": {
"type": "git",
diff --git a/packages/gitbook-html/test/fixtures/SUMMARY.html b/packages/gitbook-html/test/fixtures/SUMMARY.html
index f469249..bae97f3 100755
--- a/packages/gitbook-html/test/fixtures/SUMMARY.html
+++ b/packages/gitbook-html/test/fixtures/SUMMARY.html
@@ -23,4 +23,16 @@
</ul>
</li>
<li>Unfinished Chapter</li>
-</ul> \ No newline at end of file
+</ul>
+
+<ul>
+ <li>
+ <a href="chapter-1/README.md">Chapter 1</a>
+ </li>
+</ul>
+
+<ul>
+ <li>
+ <a href="chapter-1/README.md">Chapter 1</a>
+ </li>
+</ul>
diff --git a/packages/gitbook-html/test/glossary.js b/packages/gitbook-html/test/glossary.js
index 250c6f1..8bd77d6 100755
--- a/packages/gitbook-html/test/glossary.js
+++ b/packages/gitbook-html/test/glossary.js
@@ -4,10 +4,14 @@ var assert = require('assert');
var glossary = require('../').glossary;
-var CONTENT = fs.readFileSync(path.join(__dirname, './fixtures/GLOSSARY.html'), 'utf8');
-var LEXED = glossary(CONTENT);
-
describe('Glossary parsing', function () {
+ var LEXED;
+
+ before(function() {
+ var CONTENT = fs.readFileSync(path.join(__dirname, './fixtures/GLOSSARY.html'), 'utf8');
+ LEXED = glossary(CONTENT);
+ });
+
it('should only get heading + paragraph pairs', function() {
assert.equal(LEXED.length, 5);
});
diff --git a/packages/gitbook-html/test/langs.js b/packages/gitbook-html/test/langs.js
index c51cf2d..6b5e00b 100755
--- a/packages/gitbook-html/test/langs.js
+++ b/packages/gitbook-html/test/langs.js
@@ -4,10 +4,14 @@ var assert = require('assert');
var langs = require('../').langs;
-var CONTENT = fs.readFileSync(path.join(__dirname, './fixtures/LANGS.html'), 'utf8');
-var LEXED = langs(CONTENT);
-
describe('Languages parsing', function () {
+ var LEXED;
+
+ before(function() {
+ var CONTENT = fs.readFileSync(path.join(__dirname, './fixtures/LANGS.html'), 'utf8');
+ LEXED = langs(CONTENT);
+ });
+
it('should detect paths and titles', function() {
assert.equal(LEXED[0].path,'en/');
assert.equal(LEXED[0].title,'English');
diff --git a/packages/gitbook-html/test/readme.js b/packages/gitbook-html/test/readme.js
index 9d9ca29..f38f40b 100755
--- a/packages/gitbook-html/test/readme.js
+++ b/packages/gitbook-html/test/readme.js
@@ -4,11 +4,13 @@ var assert = require('assert');
var readme = require('../').readme;
-
-var CONTENT = fs.readFileSync(path.join(__dirname, './fixtures/README.html'), 'utf8');
-var LEXED = readme(CONTENT);
-
describe('Readme parsing', function () {
+ var LEXED;
+
+ before(function() {
+ var CONTENT = fs.readFileSync(path.join(__dirname, './fixtures/README.html'), 'utf8');
+ LEXED = readme(CONTENT);
+ });
it('should contain a title', function() {
assert(LEXED.title);
diff --git a/packages/gitbook-html/test/summary.js b/packages/gitbook-html/test/summary.js
index 8d686fc..4d06c32 100755
--- a/packages/gitbook-html/test/summary.js
+++ b/packages/gitbook-html/test/summary.js
@@ -4,37 +4,47 @@ var assert = require('assert');
var summary = require('../').summary;
-var CONTENT = fs.readFileSync(path.join(__dirname, './fixtures/SUMMARY.html'), 'utf8');
-var LEXED = summary(CONTENT);
describe('Summary parsing', function () {
+ var LEXED, PART;
+
+ before(function() {
+ var CONTENT = fs.readFileSync(path.join(__dirname, './fixtures/SUMMARY.html'), 'utf8');
+ LEXED = summary(CONTENT);
+ PART = LEXED.parts[0];
+ });
+
+ it('should detect parts', function() {
+ assert.equal(LEXED.parts.length, 3);
+ });
+
it('should detect chapters', function() {
- assert.equal(LEXED.chapters.length, 5);
+ assert.equal(PART.articles.length, 5);
});
it('should support articles', function() {
- assert.equal(LEXED.chapters[0].articles.length, 2);
- assert.equal(LEXED.chapters[1].articles.length, 0);
- assert.equal(LEXED.chapters[2].articles.length, 0);
+ assert.equal(PART.articles[0].articles.length, 2);
+ assert.equal(PART.articles[1].articles.length, 0);
+ assert.equal(PART.articles[2].articles.length, 0);
});
it('should detect paths and titles', function() {
- assert(LEXED.chapters[0].path);
- assert(LEXED.chapters[1].path);
- assert(LEXED.chapters[2].path);
- assert(LEXED.chapters[3].path);
- assert.equal(LEXED.chapters[4].path, null);
-
- assert(LEXED.chapters[0].title);
- assert(LEXED.chapters[1].title);
- assert(LEXED.chapters[2].title);
- assert(LEXED.chapters[3].title);
- assert(LEXED.chapters[4].title);
+ assert(PART.articles[0].path);
+ assert(PART.articles[1].path);
+ assert(PART.articles[2].path);
+ assert(PART.articles[3].path);
+ assert.equal(PART.articles[4].path, null);
+
+ assert(PART.articles[0].title);
+ assert(PART.articles[1].title);
+ assert(PART.articles[2].title);
+ assert(PART.articles[3].title);
+ assert(PART.articles[4].title);
});
it('should normalize paths from .md', function() {
- assert.equal(LEXED.chapters[0].path,'chapter-1/README.md');
- assert.equal(LEXED.chapters[1].path,'chapter-2/README.md');
- assert.equal(LEXED.chapters[2].path,'chapter-3/README.md');
+ assert.equal(PART.articles[0].path,'chapter-1/README.md');
+ assert.equal(PART.articles[1].path,'chapter-2/README.md');
+ assert.equal(PART.articles[2].path,'chapter-3/README.md');
});
it('should correctly convert it to text', function() {