summaryrefslogtreecommitdiffstats
path: root/packages/gitbook-html
diff options
context:
space:
mode:
Diffstat (limited to 'packages/gitbook-html')
-rw-r--r--packages/gitbook-html/.gitignore28
-rw-r--r--packages/gitbook-html/.travis.yml4
-rw-r--r--packages/gitbook-html/README.md9
-rw-r--r--packages/gitbook-html/lib/dom.js23
-rwxr-xr-xpackages/gitbook-html/lib/glossary.js38
-rwxr-xr-xpackages/gitbook-html/lib/index.js8
-rwxr-xr-xpackages/gitbook-html/lib/langs.js24
-rwxr-xr-xpackages/gitbook-html/lib/page.js9
-rwxr-xr-xpackages/gitbook-html/lib/readme.js16
-rwxr-xr-xpackages/gitbook-html/lib/summary.js86
-rw-r--r--packages/gitbook-html/package.json37
-rwxr-xr-xpackages/gitbook-html/test/fixtures/GLOSSARY.html39
-rwxr-xr-xpackages/gitbook-html/test/fixtures/LANGS.html6
-rwxr-xr-xpackages/gitbook-html/test/fixtures/PAGE.adoc14
-rwxr-xr-xpackages/gitbook-html/test/fixtures/README.html6
-rwxr-xr-xpackages/gitbook-html/test/fixtures/SUMMARY.html26
-rwxr-xr-xpackages/gitbook-html/test/glossary.js25
-rw-r--r--packages/gitbook-html/test/helper.js6
-rwxr-xr-xpackages/gitbook-html/test/langs.js23
-rwxr-xr-xpackages/gitbook-html/test/readme.js28
-rwxr-xr-xpackages/gitbook-html/test/summary.js44
21 files changed, 499 insertions, 0 deletions
diff --git a/packages/gitbook-html/.gitignore b/packages/gitbook-html/.gitignore
new file mode 100644
index 0000000..9550e4f
--- /dev/null
+++ b/packages/gitbook-html/.gitignore
@@ -0,0 +1,28 @@
+# Logs
+logs
+*.log
+
+# Runtime data
+pids
+*.pid
+*.seed
+
+# Directory for instrumented libs generated by jscoverage/JSCover
+lib-cov
+
+# Coverage directory used by tools like istanbul
+coverage
+
+# Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
+.grunt
+
+# Compiled binary addons (http://nodejs.org/api/addons.html)
+build/Release
+
+# Dependency directory
+# Deployed apps should consider commenting this line out:
+# see https://npmjs.org/doc/faq.html#Should-I-check-my-node_modules-folder-into-git
+node_modules
+
+# vim swapfile
+*.swp
diff --git a/packages/gitbook-html/.travis.yml b/packages/gitbook-html/.travis.yml
new file mode 100644
index 0000000..603536b
--- /dev/null
+++ b/packages/gitbook-html/.travis.yml
@@ -0,0 +1,4 @@
+language: node_js
+node_js:
+ - "stable"
+ - "0.10" \ No newline at end of file
diff --git a/packages/gitbook-html/README.md b/packages/gitbook-html/README.md
new file mode 100644
index 0000000..bd9f006
--- /dev/null
+++ b/packages/gitbook-html/README.md
@@ -0,0 +1,9 @@
+# GitBook HTML Parser
+
+[![Build Status](https://travis-ci.org/GitbookIO/gitbook-html.png?branch=master)](https://travis-ci.org/GitbookIO/gitbook-html)
+[![NPM version](https://badge.fury.io/js/gitbook-html.svg)](http://badge.fury.io/js/gitbook-html)
+
+This node module parse HTML for gitbook (SUMMARY.html, README.html).
+
+It is a dependency of **gitbook-asciidoc** and **gitbook-markdown**.
+
diff --git a/packages/gitbook-html/lib/dom.js b/packages/gitbook-html/lib/dom.js
new file mode 100644
index 0000000..2c2eaf7
--- /dev/null
+++ b/packages/gitbook-html/lib/dom.js
@@ -0,0 +1,23 @@
+var _ = require('lodash');
+var cheerio = require('cheerio');
+
+// Parse an HTML string and return its content
+function parse(html) {
+ var $ = cheerio.load('<div>'+html+'</div>');
+ var $el = $('html, body').first();
+
+ return $el.length > 0? $el : $;
+}
+
+// Return text node of an element
+function textNode($el) {
+ return _.reduce($el.children, function(text, e) {
+ if (e.type == 'text') text += e.data;
+ return text;
+ }, '');
+}
+
+module.exports = {
+ parse: parse,
+ textNode: textNode
+};
diff --git a/packages/gitbook-html/lib/glossary.js b/packages/gitbook-html/lib/glossary.js
new file mode 100755
index 0000000..648ba1a
--- /dev/null
+++ b/packages/gitbook-html/lib/glossary.js
@@ -0,0 +1,38 @@
+var _ = require('lodash');
+var dom = require('./dom');
+
+// HTML -> Glossary
+function parseGlossary(html) {
+ var $ = dom.parse(html);
+
+ var entries = [];
+
+ $("h2").each(function() {
+ var $heading = $(this);
+ var $p = $heading.next();
+
+ var entry = {};
+
+ entry.name = $heading.text();
+ entry.description = $p.text();
+
+ entries.push(entry);
+ });
+
+ return entries;
+}
+
+// Glossary -> HTML
+function glossaryToText(glossary) {
+ var bl = '\n';
+
+ var body = _.map(glossary, function(entry) {
+ return '<h2>' + entry.name + '</h2>' + bl + bl
+ + '<p>' + entry.description + '</p>';
+ }).join(bl+bl);
+
+ return '<h1>Glossary</h1>'+bl+bl+body;
+}
+
+module.exports = parseGlossary;
+module.exports.toText = glossaryToText;
diff --git a/packages/gitbook-html/lib/index.js b/packages/gitbook-html/lib/index.js
new file mode 100755
index 0000000..a7c478c
--- /dev/null
+++ b/packages/gitbook-html/lib/index.js
@@ -0,0 +1,8 @@
+
+module.exports = {
+ summary: require("./summary"),
+ glossary: require("./glossary"),
+ langs: require("./langs"),
+ readme: require("./readme"),
+ page: require("./page")
+};
diff --git a/packages/gitbook-html/lib/langs.js b/packages/gitbook-html/lib/langs.js
new file mode 100755
index 0000000..1042dcb
--- /dev/null
+++ b/packages/gitbook-html/lib/langs.js
@@ -0,0 +1,24 @@
+var _ = require('lodash');
+var parseEntries = require('./summary').entries;
+
+// HTML -> Languages
+function parseLangs(content) {
+ return parseEntries(content);
+}
+
+// Languages -> HTML
+function langsToText(langs) {
+ var bl = '\n';
+ var content = '<h1>Languages</h1>'+bl+bl;
+
+ content += '<ul>' + bl;
+ _.each(langs, function(lang) {
+ content = content + ' <li><a href="'+lang.path+'">'+lang.title+'</a></li>'+bl;
+ });
+ content += '</ul>' + bl;
+
+ return content;
+}
+
+module.exports = parseLangs;
+module.exports.toText = langsToText;
diff --git a/packages/gitbook-html/lib/page.js b/packages/gitbook-html/lib/page.js
new file mode 100755
index 0000000..e687050
--- /dev/null
+++ b/packages/gitbook-html/lib/page.js
@@ -0,0 +1,9 @@
+var Q = require('q');
+var _ = require('lodash');
+
+// HTML -> HTML
+function parsePage(src) {
+ return src;
+}
+
+module.exports = parsePage;
diff --git a/packages/gitbook-html/lib/readme.js b/packages/gitbook-html/lib/readme.js
new file mode 100755
index 0000000..0d179ad
--- /dev/null
+++ b/packages/gitbook-html/lib/readme.js
@@ -0,0 +1,16 @@
+var _ = require('lodash');
+var dom = require('./dom');
+
+// HTML -> Readme
+function parseReadme(html) {
+ var $ = dom.parse(html);
+
+ return {
+ title: $('h1:first-child').text().trim(),
+ description: $('div.paragraph,p').first().text().trim()
+ };
+}
+
+
+// Exports
+module.exports = parseReadme;
diff --git a/packages/gitbook-html/lib/summary.js b/packages/gitbook-html/lib/summary.js
new file mode 100755
index 0000000..1e2d63d
--- /dev/null
+++ b/packages/gitbook-html/lib/summary.js
@@ -0,0 +1,86 @@
+var _ = require('lodash');
+var dom = require('./dom');
+
+
+// parse a ul list and return list of chapters recursvely
+function parseList($ul, $) {
+ var articles = [];
+
+ $ul.children('>li').each(function() {
+ var article = {};
+
+ var $li = $(this);
+
+ // Get text for the entry
+ var $p = $li.children('> p');
+ article.title = $p.text() || dom.textNode($li.get(0));
+
+ // Parse link
+ var $a = $li.find('> a, > p > a');
+ if ($a.length > 0) {
+ article.title = $a.first().text();
+ article.path = $a.attr('href').replace(/\\/g, '/').replace(/^\/+/, '')
+ }
+
+ // Sub articles
+ var $sub = $li.children('> .olist > ol, > ol, > ul');
+ article.articles = parseList($sub, $);
+
+ articles.push(article);
+ });
+
+ return articles;
+}
+
+// Return a list of entries in a div
+function parseEntries (html) {
+ var $ = dom.parse(html);
+ var chapters = parseList($("> ol, > ul").first(), $);
+ return chapters;
+}
+
+// HTML -> Summary
+function parseSummary(src) {
+ var chapters = parseEntries(src);
+
+ return {
+ chapters: chapters
+ };
+}
+
+// Summary -> HTML
+function summaryToText(summary) {
+ var bl = '\n';
+
+ var _base = function(article) {
+ if (article.path) {
+ return '<a href="'+article.path+'">'+article.title+'</a>';
+ } else {
+ return article.title;
+ }
+ };
+
+ var convertArticle = function(article, d) {
+ var content = Array(d+2).join(' ') + '<li>' + _base(article);
+
+ if (article.articles.length > 0) {
+ content += convertArticles(article.articles, d);
+ }
+ return content + '</li>' + bl;
+ };
+
+ var convertArticles = function(articles, d) {
+ var content = '<ul>' + bl;
+ _.each(articles, function(_article) {
+ content += convertArticle(_article, d + 1);
+ });
+ return content + '<ul>' + bl;
+ }
+
+ return '<h1>Summary</h1>'+ bl+bl + convertArticles(summary.chapters, 0) + bl;
+};
+
+
+module.exports = parseSummary;
+module.exports.entries = parseEntries;
+module.exports.toText = summaryToText;
diff --git a/packages/gitbook-html/package.json b/packages/gitbook-html/package.json
new file mode 100644
index 0000000..ff28ca6
--- /dev/null
+++ b/packages/gitbook-html/package.json
@@ -0,0 +1,37 @@
+{
+ "name": "gitbook-html",
+ "version": "0.0.0",
+ "homepage": "https://www.gitbook.com",
+ "description": "Parse HTML content for gitbook",
+ "main": "lib/index.js",
+ "dependencies": {
+ "q": "^1.1.2",
+ "lodash": "^3.2.0",
+ "cheerio": "^0.19.0"
+ },
+ "devDependencies": {
+ "mocha": "^2.3.2"
+ },
+ "scripts": {
+ "test": "export TESTING=true; mocha --reporter list --bail"
+ },
+ "repository": {
+ "type": "git",
+ "url": "https://github.com/GitbookIO/gitbook-html.git"
+ },
+ "author": "FriendCode Inc. <contact@gitbook.com>",
+ "license": "Apache-2.0",
+ "bugs": {
+ "url": "https://github.com/GitbookIO/gitbook-html/issues"
+ },
+ "contributors": [
+ {
+ "name": "Aaron O'Mullan",
+ "email": "aaron@gitbook.com"
+ },
+ {
+ "name": "Samy Pessé",
+ "email": "samy@gitbook.com"
+ }
+ ]
+} \ No newline at end of file
diff --git a/packages/gitbook-html/test/fixtures/GLOSSARY.html b/packages/gitbook-html/test/fixtures/GLOSSARY.html
new file mode 100755
index 0000000..3be80a6
--- /dev/null
+++ b/packages/gitbook-html/test/fixtures/GLOSSARY.html
@@ -0,0 +1,39 @@
+<h1>Glossary</h1>
+
+<h2>Magic</h2>
+
+<p>Sufficiently advanced technology, beyond the understanding of the observer producing a sense of wonder.</p>
+
+<p>Hello, I am random noise in the middle of this beautiful Glossary. (Really astonishing !)</p>
+
+<h2>PHP</h2>
+
+<p>An atrocious language, invented for the sole purpose of inflicting pain and suffering amongst the proframming wizards of this world.</p>
+
+<h2>Clojure</h2>
+
+<p>Lisp re-invented for hipsters.</p>
+
+<h2>Go</h2>
+
+<p>Go Go Google <a href="https://www.google.com">Wow</a></p>
+
+<p>Fantastic, I love code too ! :</p>
+
+<p>```</p>
+
+<p>def f(x):
+ return x * 4</p>
+
+<h1>Wow this is some really awesome code</h1>
+
+<h1>totally mind blowing</h1>
+
+<h1>but we don't care, it shouldn't be in our glossary !</h1>
+
+<p>print(f(9))
+```</p>
+
+<h2>Gitbook</h2>
+
+<p>Awesome project. Really amazing, I'm really at a loss for words ...</p>
diff --git a/packages/gitbook-html/test/fixtures/LANGS.html b/packages/gitbook-html/test/fixtures/LANGS.html
new file mode 100755
index 0000000..be0b717
--- /dev/null
+++ b/packages/gitbook-html/test/fixtures/LANGS.html
@@ -0,0 +1,6 @@
+<h1>Languages</h1>
+
+<ul>
+<li><a href="en/">English</a></li>
+<li><a href="fr/">French</a></li>
+</ul>
diff --git a/packages/gitbook-html/test/fixtures/PAGE.adoc b/packages/gitbook-html/test/fixtures/PAGE.adoc
new file mode 100755
index 0000000..a9afbac
--- /dev/null
+++ b/packages/gitbook-html/test/fixtures/PAGE.adoc
@@ -0,0 +1,14 @@
+= Python basics
+
+Python is a nice language, you can add stuff. Bla bla bla.
+
+Some more nice content ....
+
+[Cool stuff](http://gitbook.io)
+
+[Link to another Markdown file](./xyz/file.md)
+
+And look at this pretty picture:
+![Pretty](../assets/my-pretty-picture.png "Pretty")
+
+Lets go for another exercise but this time with some context :
diff --git a/packages/gitbook-html/test/fixtures/README.html b/packages/gitbook-html/test/fixtures/README.html
new file mode 100755
index 0000000..d73a058
--- /dev/null
+++ b/packages/gitbook-html/test/fixtures/README.html
@@ -0,0 +1,6 @@
+<h1>This is the title</h1>
+
+<p>This is the book description.</p>
+
+<p>other content
+...</p> \ No newline at end of file
diff --git a/packages/gitbook-html/test/fixtures/SUMMARY.html b/packages/gitbook-html/test/fixtures/SUMMARY.html
new file mode 100755
index 0000000..f469249
--- /dev/null
+++ b/packages/gitbook-html/test/fixtures/SUMMARY.html
@@ -0,0 +1,26 @@
+<h1>Summary</h1>
+
+<ul>
+ <li>
+ <a href="chapter-1/README.md">Chapter 1</a>
+ <ul>
+ <li><a href="chapter-1/ARTICLE1.md">Article 1</a></li>
+ <li
+ ><a href="chapter-1/ARTICLE2.md">Article 2</a>
+ <ul>
+ <li><a href="\chapter-1\ARTICLE-1-2-1.md">article 1.2.1</a></li>
+ <li><a href="/chapter-1/ARTICLE-1-2-2.md">article 1.2.2</a></li>
+ </ul>
+ </li>
+ </ul>
+ </li>
+ <li><a href="chapter-2/README.md">Chapter 2</a></li>
+ <li><a href="chapter-3/README.md">Chapter 3</a></li>
+ <li>
+ <a href="chapter-4/README.md">Chapter 4</a>
+ <ul>
+ <li>Unfinished article</li>
+ </ul>
+ </li>
+ <li>Unfinished Chapter</li>
+</ul> \ No newline at end of file
diff --git a/packages/gitbook-html/test/glossary.js b/packages/gitbook-html/test/glossary.js
new file mode 100755
index 0000000..250c6f1
--- /dev/null
+++ b/packages/gitbook-html/test/glossary.js
@@ -0,0 +1,25 @@
+var fs = require('fs');
+var path = require('path');
+var assert = require('assert');
+
+var glossary = require('../').glossary;
+
+var CONTENT = fs.readFileSync(path.join(__dirname, './fixtures/GLOSSARY.html'), 'utf8');
+var LEXED = glossary(CONTENT);
+
+describe('Glossary parsing', function () {
+ it('should only get heading + paragraph pairs', function() {
+ assert.equal(LEXED.length, 5);
+ });
+
+ it('should output simple name/description objects', function() {
+ assert.equal(true, !(LEXED.some(function(e) {
+ return !Boolean(e.name && e.description);
+ })));
+ });
+
+ it('should correctly convert it to text', function() {
+ var text = glossary.toText(LEXED);
+ assertObjectsEqual(glossary(text), LEXED);
+ });
+});
diff --git a/packages/gitbook-html/test/helper.js b/packages/gitbook-html/test/helper.js
new file mode 100644
index 0000000..44cfb24
--- /dev/null
+++ b/packages/gitbook-html/test/helper.js
@@ -0,0 +1,6 @@
+var assert = require("assert");
+
+global.assertObjectsEqual = function(o1, o2) {
+ assert.equal(JSON.stringify(o1, null, 4), JSON.stringify(o2, null, 4));
+};
+
diff --git a/packages/gitbook-html/test/langs.js b/packages/gitbook-html/test/langs.js
new file mode 100755
index 0000000..c51cf2d
--- /dev/null
+++ b/packages/gitbook-html/test/langs.js
@@ -0,0 +1,23 @@
+var fs = require('fs');
+var path = require('path');
+var assert = require('assert');
+
+var langs = require('../').langs;
+
+var CONTENT = fs.readFileSync(path.join(__dirname, './fixtures/LANGS.html'), 'utf8');
+var LEXED = langs(CONTENT);
+
+describe('Languages parsing', function () {
+ it('should detect paths and titles', function() {
+ assert.equal(LEXED[0].path,'en/');
+ assert.equal(LEXED[0].title,'English');
+
+ assert.equal(LEXED[1].path,'fr/');
+ assert.equal(LEXED[1].title,'French');
+ });
+
+ it('should correctly convert it to text', function() {
+ var text = langs.toText(LEXED);
+ assertObjectsEqual(langs(text), LEXED);
+ });
+});
diff --git a/packages/gitbook-html/test/readme.js b/packages/gitbook-html/test/readme.js
new file mode 100755
index 0000000..9d9ca29
--- /dev/null
+++ b/packages/gitbook-html/test/readme.js
@@ -0,0 +1,28 @@
+var fs = require('fs');
+var path = require('path');
+var assert = require('assert');
+
+var readme = require('../').readme;
+
+
+var CONTENT = fs.readFileSync(path.join(__dirname, './fixtures/README.html'), 'utf8');
+var LEXED = readme(CONTENT);
+
+describe('Readme parsing', function () {
+
+ it('should contain a title', function() {
+ assert(LEXED.title);
+ });
+
+ it('should contain a description', function() {
+ assert(LEXED.description);
+ });
+
+ it('should extract the right title', function() {
+ assert.equal(LEXED.title, "This is the title");
+ });
+
+ it('should extract the right description', function() {
+ assert.equal(LEXED.description, "This is the book description.");
+ });
+});
diff --git a/packages/gitbook-html/test/summary.js b/packages/gitbook-html/test/summary.js
new file mode 100755
index 0000000..8d686fc
--- /dev/null
+++ b/packages/gitbook-html/test/summary.js
@@ -0,0 +1,44 @@
+var fs = require('fs');
+var path = require('path');
+var assert = require('assert');
+
+var summary = require('../').summary;
+
+var CONTENT = fs.readFileSync(path.join(__dirname, './fixtures/SUMMARY.html'), 'utf8');
+var LEXED = summary(CONTENT);
+describe('Summary parsing', function () {
+ it('should detect chapters', function() {
+ assert.equal(LEXED.chapters.length, 5);
+ });
+
+ it('should support articles', function() {
+ assert.equal(LEXED.chapters[0].articles.length, 2);
+ assert.equal(LEXED.chapters[1].articles.length, 0);
+ assert.equal(LEXED.chapters[2].articles.length, 0);
+ });
+
+ it('should detect paths and titles', function() {
+ assert(LEXED.chapters[0].path);
+ assert(LEXED.chapters[1].path);
+ assert(LEXED.chapters[2].path);
+ assert(LEXED.chapters[3].path);
+ assert.equal(LEXED.chapters[4].path, null);
+
+ assert(LEXED.chapters[0].title);
+ assert(LEXED.chapters[1].title);
+ assert(LEXED.chapters[2].title);
+ assert(LEXED.chapters[3].title);
+ assert(LEXED.chapters[4].title);
+ });
+
+ it('should normalize paths from .md', function() {
+ assert.equal(LEXED.chapters[0].path,'chapter-1/README.md');
+ assert.equal(LEXED.chapters[1].path,'chapter-2/README.md');
+ assert.equal(LEXED.chapters[2].path,'chapter-3/README.md');
+ });
+
+ it('should correctly convert it to text', function() {
+ var text = summary.toText(LEXED);
+ assertObjectsEqual(summary(text), LEXED);
+ });
+});