2 files changed, 530 insertions, 0 deletions
diff --git a/lib/page/html.js b/lib/page/html.js
new file mode 100644
index 0000000..bce6cd2
--- /dev/null
+++ b/lib/page/html.js
@@ -0,0 +1,280 @@
+var _ = require('lodash');
+var url = require('url');
+var cheerio = require('cheerio');
+var domSerializer = require('dom-serializer');
+var slug = require('github-slugid');
+
+var Promise = require('../utils/promise');
+var location = require('../utils/location');
+
+// Selector to ignore
+var ANNOTATION_IGNORE = '.no-glossary,code,pre,a,script,h1,h2,h3,h4,h5,h6';
+
+function HTMLPipeline(htmlString, opts) {
+    _.bindAll(this);
+
+    this.opts = _.defaults(opts || {}, {
+        // Called once the description has been found
+        onDescription: function(description) { },
+
+        // Calcul new href for a relative link
+        onRelativeLink: _.identity,
+
+        // Output an image
+        onImage: _.identity,
+
+        // Syntax highlighting
+        onCodeBlock: _.identity,
+
+        // Output a svg, if returns null the svg is kept inlined
+        onOutputSVG: _.constant(null),
+
+        // Words to annotate
+        annotations: [],
+
+        // When an annotation is applied
+        onAnnotation: function () { }
+    });
+
+    this.$ = cheerio.load(htmlString, {
+        // We should parse html without trying to normalize too much
+        xmlMode: false,
+
+        // SVG need some attributes to use uppercases
+        lowerCaseAttributeNames: false,
+        lowerCaseTags: false
+    });
+}
+
+// Transform a query of elements in the page
+HTMLPipeline.prototype._transform = function(query, fn) {
+    var that = this;
+
+    var $elements = this.$(query);
+
+    return Promise.serie($elements, function(el) {
+        var $el = that.$(el);
+        return fn.call(that, $el);
+    });
+};
+
+// Normalize links
+HTMLPipeline.prototype.transformLinks = function() {
+    return this._transform('a', function($a) {
+        var href = $a.attr('href');
+        if (!href) return;
+
+        if (location.isAnchor(href)) {
+            // Don't "change" anchor links
+        } else if (location.isRelative(href)) {
+            // Preserve anchor
+            var parsed = url.parse(href);
+            var filename = this.opts.onRelativeLink(parsed.pathname);
+
+            $a.attr('href', filename + (parsed.hash || ''));
+        } else {
+            // External links
+            $a.attr('target', '_blank');
+        }
+    });
+};
+
+// Normalize images
+HTMLPipeline.prototype.transformImages = function() {
+    return this._transform('img', function($img) {
+        return Promise(this.opts.onImage($img.attr('src')))
+        .then(function(filename) {
+            $img.attr('src', filename);
+        });
+    });
+};
+
+// Normalize code blocks
+HTMLPipeline.prototype.transformCodeBlocks = function() {
+    return this._transform('code', function($code) {
+        // Extract language
+        var lang = _.chain(
+                ($code.attr('class') || '').split(' ')
+            )
+            .map(function(cl) {
+                // Markdown
+                if (cl.search('lang-') === 0) return cl.slice('lang-'.length);
+
+                // Asciidoc
+                if (cl.search('language-') === 0) return cl.slice('language-'.length);
+
+                return null;
+            })
+            .compact()
+            .first()
+            .value();
+
+        var source = $code.text();
+
+        return Promise(this.opts.onCodeBlock(source, lang))
+        .then(function(blk) {
+            if (blk.html === false) {
+                $code.text(blk.body);
+            } else {
+                $code.html(blk.body);
+            }
+        });
+    });
+};
+
+// Add ID to headings
+HTMLPipeline.prototype.transformHeadings = function() {
+    var that = this;
+
+    this.$('h1,h2,h3,h4,h5,h6').each(function() {
+        var $h = that.$(this);
+
+        // Already has an ID?
+        if ($h.attr('id')) return;
+        $h.attr('id', slug($h.text()));
+    });
+};
+
+// Outline SVG from the HML
+HTMLPipeline.prototype.transformSvgs = function() {
+    var that = this;
+
+    return this._transform('svg', function($svg) {
+        var content = [
+            '<?xml version="1.0" encoding="UTF-8"?>',
+            renderDOM(that.$, $svg)
+        ].join('\n');
+
+        return Promise(that.opts.onOutputSVG(content))
+        .then(function(filename) {
+            if (!filename) return;
+
+            $svg.replaceWith(that.$('<img>').attr('src', filename));
+        });
+    });
+};
+
+// Annotate the content
+HTMLPipeline.prototype.applyAnnotations = function() {
+    var that = this;
+
+    _.each(this.opts.annotations, function(annotation) {
+        var searchRegex =  new RegExp( '\\b(' + pregQuote(annotation.name.toLowerCase()) + ')\\b' , 'gi' );
+
+        that.$('*').each(function() {
+            var $this = that.$(this);
+
+            if (
+                $this.is(ANNOTATION_IGNORE) ||
+                $this.parents(ANNOTATION_IGNORE).length > 0
+            ) return;
+
+            replaceText(that.$, this, searchRegex, function(match) {
+                that.opts.onAnnotation(annotation);
+
+                return '<a href="' + that.opts.onRelativeLink(annotation.href) + '" '
+                    + 'class="glossary-term" title="'+_.escape(annotation.description)+'">'
+                    + match
+                    + '</a>';
+            });
+        });
+    });
+};
+
+// Extract page description from html
+// This can totally be improved
+HTMLPipeline.prototype.extractDescription = function() {
+    var $p = this.$('p').first();
+    var description = $p.text().trim().slice(0, 155);
+
+    this.opts.onDescription(description);
+};
+
+// Write content to the pipeline
+HTMLPipeline.prototype.output = function() {
+    var that = this;
+
+    return Promise()
+    .then(this.extractDescription)
+    .then(this.transformImages)
+    .then(this.transformHeadings)
+    .then(this.transformCodeBlocks)
+    .then(this.transformSvgs)
+    .then(this.applyAnnotations)
+
+    // Transform of links should be applied after annotations
+    // because annotations are created as links
+    .then(this.transformLinks)
+
+    .then(function() {
+        return renderDOM(that.$);
+    });
+};
+
+
+// Render a cheerio DOM as html
+function renderDOM($, dom, options) {
+    if (!dom && $._root && $._root.children) {
+        dom = $._root.children;
+    }
+    options = options|| dom.options || $._options;
+    return domSerializer(dom, options);
+}
+
+// Replace text in an element
+function replaceText($, el, search, replace, text_only ) {
+    return $(el).each(function(){
+        var node = this.firstChild,
+            val,
+            new_val,
+
+            // Elements to be removed at the end.
+            remove = [];
+
+        // Only continue if firstChild exists.
+        if ( node ) {
+
+            // Loop over all childNodes.
+            while (node) {
+
+                // Only process text nodes.
+                if ( node.nodeType === 3 ) {
+
+                    // The original node value.
+                    val = node.nodeValue;
+
+                    // The new value.
+                    new_val = val.replace( search, replace );
+
+                    // Only replace text if the new value is actually different!
+                    if ( new_val !== val ) {
+
+                        if ( !text_only && /</.test( new_val ) ) {
+                            // The new value contains HTML, set it in a slower but far more
+                            // robust way.
+                            $(node).before( new_val );
+
+                            // Don't remove the node yet, or the loop will lose its place.
+                            remove.push( node );
+                        } else {
+                            // The new value contains no HTML, so it can be set in this
+                            // very fast, simple way.
+                            node.nodeValue = new_val;
+                        }
+                    }
+                }
+
+                node = node.nextSibling;
+            }
+        }
+
+        // Time to remove those elements!
+        if (remove.length) $(remove).remove();
+    });
+}
+
+function pregQuote( str ) {
+    return (str+'').replace(/([\\\.\+\*\?\[\^\]\$\(\)\{\}\=\!\<\>\|\:])/g, '\\$1');
+}
+
+module.exports = HTMLPipeline;
diff --git a/lib/page/index.js b/lib/page/index.js
new file mode 100644
index 0000000..f3a8f39
--- /dev/null
+++ b/lib/page/index.js
@@ -0,0 +1,250 @@
+var _ = require('lodash');
+var path = require('path');
+var direction = require('direction');
+var fm = require('front-matter');
+
+var error = require('../utils/error');
+var pathUtil = require('../utils/path');
+var location = require('../utils/location');
+var parsers = require('../parsers');
+var gitbook = require('../gitbook');
+var pluginCompatibility = require('../plugins/compatibility');
+var HTMLPipeline = require('./html');
+
+/*
+A page represent a parsable file in the book (Markdown, Asciidoc, etc)
+*/
+
+function Page(book, filename) {
+    if (!(this instanceof Page)) return new Page(book, filename);
+    var extension;
+    _.bindAll(this);
+
+    this.book = book;
+    this.log = this.book.log;
+
+    // Current content
+    this.content = '';
+
+    // Short description for the page
+    this.description = '';
+
+    // Relative path to the page
+    this.path = location.normalize(filename);
+
+    // Absolute path to the page
+    this.rawPath = this.book.resolve(filename);
+
+    // Last modification date
+    this.mtime = 0;
+
+    // Can we parse it?
+    extension = path.extname(this.path);
+    this.parser = parsers.get(extension);
+    if (!this.parser) throw error.ParsingError(new Error('Can\'t parse file "'+this.path+'"'));
+
+    this.type = this.parser.name;
+}
+
+// Return the filename of the page with another extension
+// "README.md" -> "README.html"
+Page.prototype.withExtension = function(ext) {
+    return pathUtil.setExtension(this.path, ext);
+};
+
+// Resolve a filename relative to this page
+// It returns a path relative to the book root folder
+Page.prototype.resolveLocal = function() {
+    var dir = path.dirname(this.path);
+    var file = path.join.apply(path, _.toArray(arguments));
+
+    return location.toAbsolute(file, dir, '');
+};
+
+// Resolve a filename relative to this page
+// It returns an absolute path for the FS
+Page.prototype.resolve = function() {
+    return this.book.resolve(this.resolveLocal.apply(this, arguments));
+};
+
+// Convert an absolute path (in the book) to a relative path from this page
+Page.prototype.relative = function(name) {
+    // Convert /test.png -> test.png
+    name = location.toAbsolute(name, '', '');
+
+    return location.relative(
+        this.resolve('.') + '/',
+        this.book.resolve(name)
+    );
+};
+
+// Return a page result of a relative page from this page
+Page.prototype.followPage = function(filename) {
+    var absPath = this.resolveLocal(filename);
+    return this.book.getPage(absPath);
+};
+
+// Update content of the page
+Page.prototype.update = function(content) {
+    this.content = content;
+};
+
+// Read the page as a string
+Page.prototype.read = function() {
+    var that = this;
+
+    return this.book.statFile(this.path)
+    .then(function(stat) {
+        that.mtime = stat.mtime;
+        return that.book.readFile(that.path);
+    })
+    .then(this.update);
+};
+
+// Return templating context for this page
+// This is used both for themes and page parsing
+Page.prototype.getContext = function() {
+    var article = this.book.summary.getArticle(this);
+    var next = article? article.next() : null;
+    var prev = article? article.prev() : null;
+
+    // Detect text direction in this page
+    var dir = this.book.config.get('direction');
+    if (!dir) {
+        dir = direction(this.content);
+        if (dir == 'neutral') dir = null;
+    }
+
+    return _.extend(
+        {
+            file: {
+                path: this.path,
+                mtime: this.mtime,
+                type: this.type
+            },
+            page: {
+                title: article? article.title : null,
+                description: this.description,
+                next: next? next.getContext() : null,
+                previous: prev? prev.getContext() : null,
+                level: article? article.level : null,
+                depth: article? article.depth : 0,
+                content: this.content,
+                dir: dir
+            }
+        },
+        gitbook.getContext(),
+        this.book.getContext(),
+        this.book.langs.getContext(),
+        this.book.summary.getContext(),
+        this.book.glossary.getContext(),
+        this.book.config.getContext()
+    );
+};
+
+// Parse the page and return its content
+Page.prototype.toHTML = function(output) {
+    var that = this;
+
+    this.log.debug.ln('start parsing file', this.path);
+
+    // Call a hook in the output
+    // using an utility to "keep" compatibility with gitbook 2
+    function hook(name) {
+        return pluginCompatibility.pageHook(that, function(ctx) {
+            return output.plugins.hook(name, ctx);
+        })
+        .then(function(result) {
+            if(_.isString(result)) that.update(result);
+        });
+    }
+
+    return this.read()
+
+    // Parse yaml front matter
+    .then(function() {
+        var parsed = fm(that.content);
+
+        // Extend page with the fontmatter attribute
+        that.description = parsed.attributes.description || '';
+
+        // Keep only the body
+        that.update(parsed.body);
+    })
+
+    .then(function() {
+        return hook('page:before');
+    })
+
+    // Pre-process page with parser
+    .then(function() {
+        return that.parser.page.prepare(that.content)
+        .then(that.update);
+    })
+
+    // Render template
+    .then(function() {
+        return output.template.render(that.content, that.getContext(), {
+            path: that.path
+        })
+        .then(that.update);
+    })
+
+    // Render markup using the parser
+    .then(function() {
+        return that.parser.page(that.content)
+        .then(function(out) {
+            that.update(out.content);
+        });
+    })
+
+    // Post process templating
+    .then(function() {
+        return output.template.postProcess(that.content)
+        .then(that.update);
+    })
+
+    // Normalize HTML output
+    .then(function() {
+        var pipelineOpts = {
+            onRelativeLink: _.partial(output.onRelativeLink, that),
+            onImage: _.partial(output.onOutputImage, that),
+            onOutputSVG: _.partial(output.onOutputSVG, that),
+
+            // Use 'code' template block
+            onCodeBlock: function(source, lang) {
+                return output.template.applyBlock('code', {
+                    body: source,
+                    kwargs: {
+                        language: lang
+                    }
+                });
+            },
+
+            // Extract description from page's content if no frontmatter
+            onDescription: function(description) {
+                if (that.description) return;
+                that.description = description;
+            },
+
+            // Convert glossary entries to annotations
+            annotations: that.book.glossary.annotations()
+        };
+        var pipeline = new HTMLPipeline(that.content, pipelineOpts);
+
+        return pipeline.output()
+        .then(that.update);
+    })
+
+    .then(function() {
+        return hook('page');
+    })
+
+    // Return content itself
+    .then(function() {
+        return that.content;
+    });
+};
+
+
+module.exports = Page;