diff options
Diffstat (limited to 'packages/gitbook-html')
21 files changed, 499 insertions, 0 deletions
diff --git a/packages/gitbook-html/.gitignore b/packages/gitbook-html/.gitignore new file mode 100644 index 0000000..9550e4f --- /dev/null +++ b/packages/gitbook-html/.gitignore @@ -0,0 +1,28 @@ +# Logs +logs +*.log + +# Runtime data +pids +*.pid +*.seed + +# Directory for instrumented libs generated by jscoverage/JSCover +lib-cov + +# Coverage directory used by tools like istanbul +coverage + +# Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) +.grunt + +# Compiled binary addons (http://nodejs.org/api/addons.html) +build/Release + +# Dependency directory +# Deployed apps should consider commenting this line out: +# see https://npmjs.org/doc/faq.html#Should-I-check-my-node_modules-folder-into-git +node_modules + +# vim swapfile +*.swp diff --git a/packages/gitbook-html/.travis.yml b/packages/gitbook-html/.travis.yml new file mode 100644 index 0000000..603536b --- /dev/null +++ b/packages/gitbook-html/.travis.yml @@ -0,0 +1,4 @@ +language: node_js +node_js: + - "stable" + - "0.10"
\ No newline at end of file diff --git a/packages/gitbook-html/README.md b/packages/gitbook-html/README.md new file mode 100644 index 0000000..bd9f006 --- /dev/null +++ b/packages/gitbook-html/README.md @@ -0,0 +1,9 @@ +# GitBook HTML Parser + +[](https://travis-ci.org/GitbookIO/gitbook-html) +[](http://badge.fury.io/js/gitbook-html) + +This node module parse HTML for gitbook (SUMMARY.html, README.html). + +It is a dependency of **gitbook-asciidoc** and **gitbook-markdown**. + diff --git a/packages/gitbook-html/lib/dom.js b/packages/gitbook-html/lib/dom.js new file mode 100644 index 0000000..2c2eaf7 --- /dev/null +++ b/packages/gitbook-html/lib/dom.js @@ -0,0 +1,23 @@ +var _ = require('lodash'); +var cheerio = require('cheerio'); + +// Parse an HTML string and return its content +function parse(html) { + var $ = cheerio.load('<div>'+html+'</div>'); + var $el = $('html, body').first(); + + return $el.length > 0? $el : $; +} + +// Return text node of an element +function textNode($el) { + return _.reduce($el.children, function(text, e) { + if (e.type == 'text') text += e.data; + return text; + }, ''); +} + +module.exports = { + parse: parse, + textNode: textNode +}; diff --git a/packages/gitbook-html/lib/glossary.js b/packages/gitbook-html/lib/glossary.js new file mode 100755 index 0000000..648ba1a --- /dev/null +++ b/packages/gitbook-html/lib/glossary.js @@ -0,0 +1,38 @@ +var _ = require('lodash'); +var dom = require('./dom'); + +// HTML -> Glossary +function parseGlossary(html) { + var $ = dom.parse(html); + + var entries = []; + + $("h2").each(function() { + var $heading = $(this); + var $p = $heading.next(); + + var entry = {}; + + entry.name = $heading.text(); + entry.description = $p.text(); + + entries.push(entry); + }); + + return entries; +} + +// Glossary -> HTML +function glossaryToText(glossary) { + var bl = '\n'; + + var body = _.map(glossary, function(entry) { + return '<h2>' + entry.name + '</h2>' + bl + bl + + '<p>' + entry.description + '</p>'; + }).join(bl+bl); + + return '<h1>Glossary</h1>'+bl+bl+body; +} + +module.exports = parseGlossary; +module.exports.toText = glossaryToText; diff --git a/packages/gitbook-html/lib/index.js b/packages/gitbook-html/lib/index.js new file mode 100755 index 0000000..a7c478c --- /dev/null +++ b/packages/gitbook-html/lib/index.js @@ -0,0 +1,8 @@ + +module.exports = { + summary: require("./summary"), + glossary: require("./glossary"), + langs: require("./langs"), + readme: require("./readme"), + page: require("./page") +}; diff --git a/packages/gitbook-html/lib/langs.js b/packages/gitbook-html/lib/langs.js new file mode 100755 index 0000000..1042dcb --- /dev/null +++ b/packages/gitbook-html/lib/langs.js @@ -0,0 +1,24 @@ +var _ = require('lodash'); +var parseEntries = require('./summary').entries; + +// HTML -> Languages +function parseLangs(content) { + return parseEntries(content); +} + +// Languages -> HTML +function langsToText(langs) { + var bl = '\n'; + var content = '<h1>Languages</h1>'+bl+bl; + + content += '<ul>' + bl; + _.each(langs, function(lang) { + content = content + ' <li><a href="'+lang.path+'">'+lang.title+'</a></li>'+bl; + }); + content += '</ul>' + bl; + + return content; +} + +module.exports = parseLangs; +module.exports.toText = langsToText; diff --git a/packages/gitbook-html/lib/page.js b/packages/gitbook-html/lib/page.js new file mode 100755 index 0000000..e687050 --- /dev/null +++ b/packages/gitbook-html/lib/page.js @@ -0,0 +1,9 @@ +var Q = require('q'); +var _ = require('lodash'); + +// HTML -> HTML +function parsePage(src) { + return src; +} + +module.exports = parsePage; diff --git a/packages/gitbook-html/lib/readme.js b/packages/gitbook-html/lib/readme.js new file mode 100755 index 0000000..0d179ad --- /dev/null +++ b/packages/gitbook-html/lib/readme.js @@ -0,0 +1,16 @@ +var _ = require('lodash'); +var dom = require('./dom'); + +// HTML -> Readme +function parseReadme(html) { + var $ = dom.parse(html); + + return { + title: $('h1:first-child').text().trim(), + description: $('div.paragraph,p').first().text().trim() + }; +} + + +// Exports +module.exports = parseReadme; diff --git a/packages/gitbook-html/lib/summary.js b/packages/gitbook-html/lib/summary.js new file mode 100755 index 0000000..1e2d63d --- /dev/null +++ b/packages/gitbook-html/lib/summary.js @@ -0,0 +1,86 @@ +var _ = require('lodash'); +var dom = require('./dom'); + + +// parse a ul list and return list of chapters recursvely +function parseList($ul, $) { + var articles = []; + + $ul.children('>li').each(function() { + var article = {}; + + var $li = $(this); + + // Get text for the entry + var $p = $li.children('> p'); + article.title = $p.text() || dom.textNode($li.get(0)); + + // Parse link + var $a = $li.find('> a, > p > a'); + if ($a.length > 0) { + article.title = $a.first().text(); + article.path = $a.attr('href').replace(/\\/g, '/').replace(/^\/+/, '') + } + + // Sub articles + var $sub = $li.children('> .olist > ol, > ol, > ul'); + article.articles = parseList($sub, $); + + articles.push(article); + }); + + return articles; +} + +// Return a list of entries in a div +function parseEntries (html) { + var $ = dom.parse(html); + var chapters = parseList($("> ol, > ul").first(), $); + return chapters; +} + +// HTML -> Summary +function parseSummary(src) { + var chapters = parseEntries(src); + + return { + chapters: chapters + }; +} + +// Summary -> HTML +function summaryToText(summary) { + var bl = '\n'; + + var _base = function(article) { + if (article.path) { + return '<a href="'+article.path+'">'+article.title+'</a>'; + } else { + return article.title; + } + }; + + var convertArticle = function(article, d) { + var content = Array(d+2).join(' ') + '<li>' + _base(article); + + if (article.articles.length > 0) { + content += convertArticles(article.articles, d); + } + return content + '</li>' + bl; + }; + + var convertArticles = function(articles, d) { + var content = '<ul>' + bl; + _.each(articles, function(_article) { + content += convertArticle(_article, d + 1); + }); + return content + '<ul>' + bl; + } + + return '<h1>Summary</h1>'+ bl+bl + convertArticles(summary.chapters, 0) + bl; +}; + + +module.exports = parseSummary; +module.exports.entries = parseEntries; +module.exports.toText = summaryToText; diff --git a/packages/gitbook-html/package.json b/packages/gitbook-html/package.json new file mode 100644 index 0000000..ff28ca6 --- /dev/null +++ b/packages/gitbook-html/package.json @@ -0,0 +1,37 @@ +{ + "name": "gitbook-html", + "version": "0.0.0", + "homepage": "https://www.gitbook.com", + "description": "Parse HTML content for gitbook", + "main": "lib/index.js", + "dependencies": { + "q": "^1.1.2", + "lodash": "^3.2.0", + "cheerio": "^0.19.0" + }, + "devDependencies": { + "mocha": "^2.3.2" + }, + "scripts": { + "test": "export TESTING=true; mocha --reporter list --bail" + }, + "repository": { + "type": "git", + "url": "https://github.com/GitbookIO/gitbook-html.git" + }, + "author": "FriendCode Inc. <contact@gitbook.com>", + "license": "Apache-2.0", + "bugs": { + "url": "https://github.com/GitbookIO/gitbook-html/issues" + }, + "contributors": [ + { + "name": "Aaron O'Mullan", + "email": "aaron@gitbook.com" + }, + { + "name": "Samy Pessé", + "email": "samy@gitbook.com" + } + ] +}
\ No newline at end of file diff --git a/packages/gitbook-html/test/fixtures/GLOSSARY.html b/packages/gitbook-html/test/fixtures/GLOSSARY.html new file mode 100755 index 0000000..3be80a6 --- /dev/null +++ b/packages/gitbook-html/test/fixtures/GLOSSARY.html @@ -0,0 +1,39 @@ +<h1>Glossary</h1> + +<h2>Magic</h2> + +<p>Sufficiently advanced technology, beyond the understanding of the observer producing a sense of wonder.</p> + +<p>Hello, I am random noise in the middle of this beautiful Glossary. (Really astonishing !)</p> + +<h2>PHP</h2> + +<p>An atrocious language, invented for the sole purpose of inflicting pain and suffering amongst the proframming wizards of this world.</p> + +<h2>Clojure</h2> + +<p>Lisp re-invented for hipsters.</p> + +<h2>Go</h2> + +<p>Go Go Google <a href="https://www.google.com">Wow</a></p> + +<p>Fantastic, I love code too ! :</p> + +<p>```</p> + +<p>def f(x): + return x * 4</p> + +<h1>Wow this is some really awesome code</h1> + +<h1>totally mind blowing</h1> + +<h1>but we don't care, it shouldn't be in our glossary !</h1> + +<p>print(f(9)) +```</p> + +<h2>Gitbook</h2> + +<p>Awesome project. Really amazing, I'm really at a loss for words ...</p> diff --git a/packages/gitbook-html/test/fixtures/LANGS.html b/packages/gitbook-html/test/fixtures/LANGS.html new file mode 100755 index 0000000..be0b717 --- /dev/null +++ b/packages/gitbook-html/test/fixtures/LANGS.html @@ -0,0 +1,6 @@ +<h1>Languages</h1> + +<ul> +<li><a href="en/">English</a></li> +<li><a href="fr/">French</a></li> +</ul> diff --git a/packages/gitbook-html/test/fixtures/PAGE.adoc b/packages/gitbook-html/test/fixtures/PAGE.adoc new file mode 100755 index 0000000..a9afbac --- /dev/null +++ b/packages/gitbook-html/test/fixtures/PAGE.adoc @@ -0,0 +1,14 @@ += Python basics + +Python is a nice language, you can add stuff. Bla bla bla. + +Some more nice content .... + +[Cool stuff](http://gitbook.io) + +[Link to another Markdown file](./xyz/file.md) + +And look at this pretty picture: + + +Lets go for another exercise but this time with some context : diff --git a/packages/gitbook-html/test/fixtures/README.html b/packages/gitbook-html/test/fixtures/README.html new file mode 100755 index 0000000..d73a058 --- /dev/null +++ b/packages/gitbook-html/test/fixtures/README.html @@ -0,0 +1,6 @@ +<h1>This is the title</h1> + +<p>This is the book description.</p> + +<p>other content +...</p>
\ No newline at end of file diff --git a/packages/gitbook-html/test/fixtures/SUMMARY.html b/packages/gitbook-html/test/fixtures/SUMMARY.html new file mode 100755 index 0000000..f469249 --- /dev/null +++ b/packages/gitbook-html/test/fixtures/SUMMARY.html @@ -0,0 +1,26 @@ +<h1>Summary</h1> + +<ul> + <li> + <a href="chapter-1/README.md">Chapter 1</a> + <ul> + <li><a href="chapter-1/ARTICLE1.md">Article 1</a></li> + <li + ><a href="chapter-1/ARTICLE2.md">Article 2</a> + <ul> + <li><a href="\chapter-1\ARTICLE-1-2-1.md">article 1.2.1</a></li> + <li><a href="/chapter-1/ARTICLE-1-2-2.md">article 1.2.2</a></li> + </ul> + </li> + </ul> + </li> + <li><a href="chapter-2/README.md">Chapter 2</a></li> + <li><a href="chapter-3/README.md">Chapter 3</a></li> + <li> + <a href="chapter-4/README.md">Chapter 4</a> + <ul> + <li>Unfinished article</li> + </ul> + </li> + <li>Unfinished Chapter</li> +</ul>
\ No newline at end of file diff --git a/packages/gitbook-html/test/glossary.js b/packages/gitbook-html/test/glossary.js new file mode 100755 index 0000000..250c6f1 --- /dev/null +++ b/packages/gitbook-html/test/glossary.js @@ -0,0 +1,25 @@ +var fs = require('fs'); +var path = require('path'); +var assert = require('assert'); + +var glossary = require('../').glossary; + +var CONTENT = fs.readFileSync(path.join(__dirname, './fixtures/GLOSSARY.html'), 'utf8'); +var LEXED = glossary(CONTENT); + +describe('Glossary parsing', function () { + it('should only get heading + paragraph pairs', function() { + assert.equal(LEXED.length, 5); + }); + + it('should output simple name/description objects', function() { + assert.equal(true, !(LEXED.some(function(e) { + return !Boolean(e.name && e.description); + }))); + }); + + it('should correctly convert it to text', function() { + var text = glossary.toText(LEXED); + assertObjectsEqual(glossary(text), LEXED); + }); +}); diff --git a/packages/gitbook-html/test/helper.js b/packages/gitbook-html/test/helper.js new file mode 100644 index 0000000..44cfb24 --- /dev/null +++ b/packages/gitbook-html/test/helper.js @@ -0,0 +1,6 @@ +var assert = require("assert"); + +global.assertObjectsEqual = function(o1, o2) { + assert.equal(JSON.stringify(o1, null, 4), JSON.stringify(o2, null, 4)); +}; + diff --git a/packages/gitbook-html/test/langs.js b/packages/gitbook-html/test/langs.js new file mode 100755 index 0000000..c51cf2d --- /dev/null +++ b/packages/gitbook-html/test/langs.js @@ -0,0 +1,23 @@ +var fs = require('fs'); +var path = require('path'); +var assert = require('assert'); + +var langs = require('../').langs; + +var CONTENT = fs.readFileSync(path.join(__dirname, './fixtures/LANGS.html'), 'utf8'); +var LEXED = langs(CONTENT); + +describe('Languages parsing', function () { + it('should detect paths and titles', function() { + assert.equal(LEXED[0].path,'en/'); + assert.equal(LEXED[0].title,'English'); + + assert.equal(LEXED[1].path,'fr/'); + assert.equal(LEXED[1].title,'French'); + }); + + it('should correctly convert it to text', function() { + var text = langs.toText(LEXED); + assertObjectsEqual(langs(text), LEXED); + }); +}); diff --git a/packages/gitbook-html/test/readme.js b/packages/gitbook-html/test/readme.js new file mode 100755 index 0000000..9d9ca29 --- /dev/null +++ b/packages/gitbook-html/test/readme.js @@ -0,0 +1,28 @@ +var fs = require('fs'); +var path = require('path'); +var assert = require('assert'); + +var readme = require('../').readme; + + +var CONTENT = fs.readFileSync(path.join(__dirname, './fixtures/README.html'), 'utf8'); +var LEXED = readme(CONTENT); + +describe('Readme parsing', function () { + + it('should contain a title', function() { + assert(LEXED.title); + }); + + it('should contain a description', function() { + assert(LEXED.description); + }); + + it('should extract the right title', function() { + assert.equal(LEXED.title, "This is the title"); + }); + + it('should extract the right description', function() { + assert.equal(LEXED.description, "This is the book description."); + }); +}); diff --git a/packages/gitbook-html/test/summary.js b/packages/gitbook-html/test/summary.js new file mode 100755 index 0000000..8d686fc --- /dev/null +++ b/packages/gitbook-html/test/summary.js @@ -0,0 +1,44 @@ +var fs = require('fs'); +var path = require('path'); +var assert = require('assert'); + +var summary = require('../').summary; + +var CONTENT = fs.readFileSync(path.join(__dirname, './fixtures/SUMMARY.html'), 'utf8'); +var LEXED = summary(CONTENT); +describe('Summary parsing', function () { + it('should detect chapters', function() { + assert.equal(LEXED.chapters.length, 5); + }); + + it('should support articles', function() { + assert.equal(LEXED.chapters[0].articles.length, 2); + assert.equal(LEXED.chapters[1].articles.length, 0); + assert.equal(LEXED.chapters[2].articles.length, 0); + }); + + it('should detect paths and titles', function() { + assert(LEXED.chapters[0].path); + assert(LEXED.chapters[1].path); + assert(LEXED.chapters[2].path); + assert(LEXED.chapters[3].path); + assert.equal(LEXED.chapters[4].path, null); + + assert(LEXED.chapters[0].title); + assert(LEXED.chapters[1].title); + assert(LEXED.chapters[2].title); + assert(LEXED.chapters[3].title); + assert(LEXED.chapters[4].title); + }); + + it('should normalize paths from .md', function() { + assert.equal(LEXED.chapters[0].path,'chapter-1/README.md'); + assert.equal(LEXED.chapters[1].path,'chapter-2/README.md'); + assert.equal(LEXED.chapters[2].path,'chapter-3/README.md'); + }); + + it('should correctly convert it to text', function() { + var text = summary.toText(LEXED); + assertObjectsEqual(summary(text), LEXED); + }); +}); |