summaryrefslogtreecommitdiffstats
path: root/lib/page
diff options
context:
space:
mode:
Diffstat (limited to 'lib/page')
-rw-r--r--lib/page/html.js280
-rw-r--r--lib/page/index.js250
2 files changed, 530 insertions, 0 deletions
diff --git a/lib/page/html.js b/lib/page/html.js
new file mode 100644
index 0000000..bce6cd2
--- /dev/null
+++ b/lib/page/html.js
@@ -0,0 +1,280 @@
+var _ = require('lodash');
+var url = require('url');
+var cheerio = require('cheerio');
+var domSerializer = require('dom-serializer');
+var slug = require('github-slugid');
+
+var Promise = require('../utils/promise');
+var location = require('../utils/location');
+
+// Selector to ignore
+var ANNOTATION_IGNORE = '.no-glossary,code,pre,a,script,h1,h2,h3,h4,h5,h6';
+
+function HTMLPipeline(htmlString, opts) {
+ _.bindAll(this);
+
+ this.opts = _.defaults(opts || {}, {
+ // Called once the description has been found
+ onDescription: function(description) { },
+
+ // Calcul new href for a relative link
+ onRelativeLink: _.identity,
+
+ // Output an image
+ onImage: _.identity,
+
+ // Syntax highlighting
+ onCodeBlock: _.identity,
+
+ // Output a svg, if returns null the svg is kept inlined
+ onOutputSVG: _.constant(null),
+
+ // Words to annotate
+ annotations: [],
+
+ // When an annotation is applied
+ onAnnotation: function () { }
+ });
+
+ this.$ = cheerio.load(htmlString, {
+ // We should parse html without trying to normalize too much
+ xmlMode: false,
+
+ // SVG need some attributes to use uppercases
+ lowerCaseAttributeNames: false,
+ lowerCaseTags: false
+ });
+}
+
+// Transform a query of elements in the page
+HTMLPipeline.prototype._transform = function(query, fn) {
+ var that = this;
+
+ var $elements = this.$(query);
+
+ return Promise.serie($elements, function(el) {
+ var $el = that.$(el);
+ return fn.call(that, $el);
+ });
+};
+
+// Normalize links
+HTMLPipeline.prototype.transformLinks = function() {
+ return this._transform('a', function($a) {
+ var href = $a.attr('href');
+ if (!href) return;
+
+ if (location.isAnchor(href)) {
+ // Don't "change" anchor links
+ } else if (location.isRelative(href)) {
+ // Preserve anchor
+ var parsed = url.parse(href);
+ var filename = this.opts.onRelativeLink(parsed.pathname);
+
+ $a.attr('href', filename + (parsed.hash || ''));
+ } else {
+ // External links
+ $a.attr('target', '_blank');
+ }
+ });
+};
+
+// Normalize images
+HTMLPipeline.prototype.transformImages = function() {
+ return this._transform('img', function($img) {
+ return Promise(this.opts.onImage($img.attr('src')))
+ .then(function(filename) {
+ $img.attr('src', filename);
+ });
+ });
+};
+
+// Normalize code blocks
+HTMLPipeline.prototype.transformCodeBlocks = function() {
+ return this._transform('code', function($code) {
+ // Extract language
+ var lang = _.chain(
+ ($code.attr('class') || '').split(' ')
+ )
+ .map(function(cl) {
+ // Markdown
+ if (cl.search('lang-') === 0) return cl.slice('lang-'.length);
+
+ // Asciidoc
+ if (cl.search('language-') === 0) return cl.slice('language-'.length);
+
+ return null;
+ })
+ .compact()
+ .first()
+ .value();
+
+ var source = $code.text();
+
+ return Promise(this.opts.onCodeBlock(source, lang))
+ .then(function(blk) {
+ if (blk.html === false) {
+ $code.text(blk.body);
+ } else {
+ $code.html(blk.body);
+ }
+ });
+ });
+};
+
+// Add ID to headings
+HTMLPipeline.prototype.transformHeadings = function() {
+ var that = this;
+
+ this.$('h1,h2,h3,h4,h5,h6').each(function() {
+ var $h = that.$(this);
+
+ // Already has an ID?
+ if ($h.attr('id')) return;
+ $h.attr('id', slug($h.text()));
+ });
+};
+
+// Outline SVG from the HML
+HTMLPipeline.prototype.transformSvgs = function() {
+ var that = this;
+
+ return this._transform('svg', function($svg) {
+ var content = [
+ '<?xml version="1.0" encoding="UTF-8"?>',
+ renderDOM(that.$, $svg)
+ ].join('\n');
+
+ return Promise(that.opts.onOutputSVG(content))
+ .then(function(filename) {
+ if (!filename) return;
+
+ $svg.replaceWith(that.$('<img>').attr('src', filename));
+ });
+ });
+};
+
+// Annotate the content
+HTMLPipeline.prototype.applyAnnotations = function() {
+ var that = this;
+
+ _.each(this.opts.annotations, function(annotation) {
+ var searchRegex = new RegExp( '\\b(' + pregQuote(annotation.name.toLowerCase()) + ')\\b' , 'gi' );
+
+ that.$('*').each(function() {
+ var $this = that.$(this);
+
+ if (
+ $this.is(ANNOTATION_IGNORE) ||
+ $this.parents(ANNOTATION_IGNORE).length > 0
+ ) return;
+
+ replaceText(that.$, this, searchRegex, function(match) {
+ that.opts.onAnnotation(annotation);
+
+ return '<a href="' + that.opts.onRelativeLink(annotation.href) + '" '
+ + 'class="glossary-term" title="'+_.escape(annotation.description)+'">'
+ + match
+ + '</a>';
+ });
+ });
+ });
+};
+
+// Extract page description from html
+// This can totally be improved
+HTMLPipeline.prototype.extractDescription = function() {
+ var $p = this.$('p').first();
+ var description = $p.text().trim().slice(0, 155);
+
+ this.opts.onDescription(description);
+};
+
+// Write content to the pipeline
+HTMLPipeline.prototype.output = function() {
+ var that = this;
+
+ return Promise()
+ .then(this.extractDescription)
+ .then(this.transformImages)
+ .then(this.transformHeadings)
+ .then(this.transformCodeBlocks)
+ .then(this.transformSvgs)
+ .then(this.applyAnnotations)
+
+ // Transform of links should be applied after annotations
+ // because annotations are created as links
+ .then(this.transformLinks)
+
+ .then(function() {
+ return renderDOM(that.$);
+ });
+};
+
+
+// Render a cheerio DOM as html
+function renderDOM($, dom, options) {
+ if (!dom && $._root && $._root.children) {
+ dom = $._root.children;
+ }
+ options = options|| dom.options || $._options;
+ return domSerializer(dom, options);
+}
+
+// Replace text in an element
+function replaceText($, el, search, replace, text_only ) {
+ return $(el).each(function(){
+ var node = this.firstChild,
+ val,
+ new_val,
+
+ // Elements to be removed at the end.
+ remove = [];
+
+ // Only continue if firstChild exists.
+ if ( node ) {
+
+ // Loop over all childNodes.
+ while (node) {
+
+ // Only process text nodes.
+ if ( node.nodeType === 3 ) {
+
+ // The original node value.
+ val = node.nodeValue;
+
+ // The new value.
+ new_val = val.replace( search, replace );
+
+ // Only replace text if the new value is actually different!
+ if ( new_val !== val ) {
+
+ if ( !text_only && /</.test( new_val ) ) {
+ // The new value contains HTML, set it in a slower but far more
+ // robust way.
+ $(node).before( new_val );
+
+ // Don't remove the node yet, or the loop will lose its place.
+ remove.push( node );
+ } else {
+ // The new value contains no HTML, so it can be set in this
+ // very fast, simple way.
+ node.nodeValue = new_val;
+ }
+ }
+ }
+
+ node = node.nextSibling;
+ }
+ }
+
+ // Time to remove those elements!
+ if (remove.length) $(remove).remove();
+ });
+}
+
+function pregQuote( str ) {
+ return (str+'').replace(/([\\\.\+\*\?\[\^\]\$\(\)\{\}\=\!\<\>\|\:])/g, '\\$1');
+}
+
+module.exports = HTMLPipeline;
diff --git a/lib/page/index.js b/lib/page/index.js
new file mode 100644
index 0000000..f3a8f39
--- /dev/null
+++ b/lib/page/index.js
@@ -0,0 +1,250 @@
+var _ = require('lodash');
+var path = require('path');
+var direction = require('direction');
+var fm = require('front-matter');
+
+var error = require('../utils/error');
+var pathUtil = require('../utils/path');
+var location = require('../utils/location');
+var parsers = require('../parsers');
+var gitbook = require('../gitbook');
+var pluginCompatibility = require('../plugins/compatibility');
+var HTMLPipeline = require('./html');
+
+/*
+A page represent a parsable file in the book (Markdown, Asciidoc, etc)
+*/
+
+function Page(book, filename) {
+ if (!(this instanceof Page)) return new Page(book, filename);
+ var extension;
+ _.bindAll(this);
+
+ this.book = book;
+ this.log = this.book.log;
+
+ // Current content
+ this.content = '';
+
+ // Short description for the page
+ this.description = '';
+
+ // Relative path to the page
+ this.path = location.normalize(filename);
+
+ // Absolute path to the page
+ this.rawPath = this.book.resolve(filename);
+
+ // Last modification date
+ this.mtime = 0;
+
+ // Can we parse it?
+ extension = path.extname(this.path);
+ this.parser = parsers.get(extension);
+ if (!this.parser) throw error.ParsingError(new Error('Can\'t parse file "'+this.path+'"'));
+
+ this.type = this.parser.name;
+}
+
+// Return the filename of the page with another extension
+// "README.md" -> "README.html"
+Page.prototype.withExtension = function(ext) {
+ return pathUtil.setExtension(this.path, ext);
+};
+
+// Resolve a filename relative to this page
+// It returns a path relative to the book root folder
+Page.prototype.resolveLocal = function() {
+ var dir = path.dirname(this.path);
+ var file = path.join.apply(path, _.toArray(arguments));
+
+ return location.toAbsolute(file, dir, '');
+};
+
+// Resolve a filename relative to this page
+// It returns an absolute path for the FS
+Page.prototype.resolve = function() {
+ return this.book.resolve(this.resolveLocal.apply(this, arguments));
+};
+
+// Convert an absolute path (in the book) to a relative path from this page
+Page.prototype.relative = function(name) {
+ // Convert /test.png -> test.png
+ name = location.toAbsolute(name, '', '');
+
+ return location.relative(
+ this.resolve('.') + '/',
+ this.book.resolve(name)
+ );
+};
+
+// Return a page result of a relative page from this page
+Page.prototype.followPage = function(filename) {
+ var absPath = this.resolveLocal(filename);
+ return this.book.getPage(absPath);
+};
+
+// Update content of the page
+Page.prototype.update = function(content) {
+ this.content = content;
+};
+
+// Read the page as a string
+Page.prototype.read = function() {
+ var that = this;
+
+ return this.book.statFile(this.path)
+ .then(function(stat) {
+ that.mtime = stat.mtime;
+ return that.book.readFile(that.path);
+ })
+ .then(this.update);
+};
+
+// Return templating context for this page
+// This is used both for themes and page parsing
+Page.prototype.getContext = function() {
+ var article = this.book.summary.getArticle(this);
+ var next = article? article.next() : null;
+ var prev = article? article.prev() : null;
+
+ // Detect text direction in this page
+ var dir = this.book.config.get('direction');
+ if (!dir) {
+ dir = direction(this.content);
+ if (dir == 'neutral') dir = null;
+ }
+
+ return _.extend(
+ {
+ file: {
+ path: this.path,
+ mtime: this.mtime,
+ type: this.type
+ },
+ page: {
+ title: article? article.title : null,
+ description: this.description,
+ next: next? next.getContext() : null,
+ previous: prev? prev.getContext() : null,
+ level: article? article.level : null,
+ depth: article? article.depth : 0,
+ content: this.content,
+ dir: dir
+ }
+ },
+ gitbook.getContext(),
+ this.book.getContext(),
+ this.book.langs.getContext(),
+ this.book.summary.getContext(),
+ this.book.glossary.getContext(),
+ this.book.config.getContext()
+ );
+};
+
+// Parse the page and return its content
+Page.prototype.toHTML = function(output) {
+ var that = this;
+
+ this.log.debug.ln('start parsing file', this.path);
+
+ // Call a hook in the output
+ // using an utility to "keep" compatibility with gitbook 2
+ function hook(name) {
+ return pluginCompatibility.pageHook(that, function(ctx) {
+ return output.plugins.hook(name, ctx);
+ })
+ .then(function(result) {
+ if(_.isString(result)) that.update(result);
+ });
+ }
+
+ return this.read()
+
+ // Parse yaml front matter
+ .then(function() {
+ var parsed = fm(that.content);
+
+ // Extend page with the fontmatter attribute
+ that.description = parsed.attributes.description || '';
+
+ // Keep only the body
+ that.update(parsed.body);
+ })
+
+ .then(function() {
+ return hook('page:before');
+ })
+
+ // Pre-process page with parser
+ .then(function() {
+ return that.parser.page.prepare(that.content)
+ .then(that.update);
+ })
+
+ // Render template
+ .then(function() {
+ return output.template.render(that.content, that.getContext(), {
+ path: that.path
+ })
+ .then(that.update);
+ })
+
+ // Render markup using the parser
+ .then(function() {
+ return that.parser.page(that.content)
+ .then(function(out) {
+ that.update(out.content);
+ });
+ })
+
+ // Post process templating
+ .then(function() {
+ return output.template.postProcess(that.content)
+ .then(that.update);
+ })
+
+ // Normalize HTML output
+ .then(function() {
+ var pipelineOpts = {
+ onRelativeLink: _.partial(output.onRelativeLink, that),
+ onImage: _.partial(output.onOutputImage, that),
+ onOutputSVG: _.partial(output.onOutputSVG, that),
+
+ // Use 'code' template block
+ onCodeBlock: function(source, lang) {
+ return output.template.applyBlock('code', {
+ body: source,
+ kwargs: {
+ language: lang
+ }
+ });
+ },
+
+ // Extract description from page's content if no frontmatter
+ onDescription: function(description) {
+ if (that.description) return;
+ that.description = description;
+ },
+
+ // Convert glossary entries to annotations
+ annotations: that.book.glossary.annotations()
+ };
+ var pipeline = new HTMLPipeline(that.content, pipelineOpts);
+
+ return pipeline.output()
+ .then(that.update);
+ })
+
+ .then(function() {
+ return hook('page');
+ })
+
+ // Return content itself
+ .then(function() {
+ return that.content;
+ });
+};
+
+
+module.exports = Page;