diff options
Diffstat (limited to 'lib/page')
-rw-r--r-- | lib/page/html.js | 280 | ||||
-rw-r--r-- | lib/page/index.js | 250 |
2 files changed, 530 insertions, 0 deletions
diff --git a/lib/page/html.js b/lib/page/html.js new file mode 100644 index 0000000..bce6cd2 --- /dev/null +++ b/lib/page/html.js @@ -0,0 +1,280 @@ +var _ = require('lodash'); +var url = require('url'); +var cheerio = require('cheerio'); +var domSerializer = require('dom-serializer'); +var slug = require('github-slugid'); + +var Promise = require('../utils/promise'); +var location = require('../utils/location'); + +// Selector to ignore +var ANNOTATION_IGNORE = '.no-glossary,code,pre,a,script,h1,h2,h3,h4,h5,h6'; + +function HTMLPipeline(htmlString, opts) { + _.bindAll(this); + + this.opts = _.defaults(opts || {}, { + // Called once the description has been found + onDescription: function(description) { }, + + // Calcul new href for a relative link + onRelativeLink: _.identity, + + // Output an image + onImage: _.identity, + + // Syntax highlighting + onCodeBlock: _.identity, + + // Output a svg, if returns null the svg is kept inlined + onOutputSVG: _.constant(null), + + // Words to annotate + annotations: [], + + // When an annotation is applied + onAnnotation: function () { } + }); + + this.$ = cheerio.load(htmlString, { + // We should parse html without trying to normalize too much + xmlMode: false, + + // SVG need some attributes to use uppercases + lowerCaseAttributeNames: false, + lowerCaseTags: false + }); +} + +// Transform a query of elements in the page +HTMLPipeline.prototype._transform = function(query, fn) { + var that = this; + + var $elements = this.$(query); + + return Promise.serie($elements, function(el) { + var $el = that.$(el); + return fn.call(that, $el); + }); +}; + +// Normalize links +HTMLPipeline.prototype.transformLinks = function() { + return this._transform('a', function($a) { + var href = $a.attr('href'); + if (!href) return; + + if (location.isAnchor(href)) { + // Don't "change" anchor links + } else if (location.isRelative(href)) { + // Preserve anchor + var parsed = url.parse(href); + var filename = this.opts.onRelativeLink(parsed.pathname); + + $a.attr('href', filename + (parsed.hash || '')); + } else { + // External links + $a.attr('target', '_blank'); + } + }); +}; + +// Normalize images +HTMLPipeline.prototype.transformImages = function() { + return this._transform('img', function($img) { + return Promise(this.opts.onImage($img.attr('src'))) + .then(function(filename) { + $img.attr('src', filename); + }); + }); +}; + +// Normalize code blocks +HTMLPipeline.prototype.transformCodeBlocks = function() { + return this._transform('code', function($code) { + // Extract language + var lang = _.chain( + ($code.attr('class') || '').split(' ') + ) + .map(function(cl) { + // Markdown + if (cl.search('lang-') === 0) return cl.slice('lang-'.length); + + // Asciidoc + if (cl.search('language-') === 0) return cl.slice('language-'.length); + + return null; + }) + .compact() + .first() + .value(); + + var source = $code.text(); + + return Promise(this.opts.onCodeBlock(source, lang)) + .then(function(blk) { + if (blk.html === false) { + $code.text(blk.body); + } else { + $code.html(blk.body); + } + }); + }); +}; + +// Add ID to headings +HTMLPipeline.prototype.transformHeadings = function() { + var that = this; + + this.$('h1,h2,h3,h4,h5,h6').each(function() { + var $h = that.$(this); + + // Already has an ID? + if ($h.attr('id')) return; + $h.attr('id', slug($h.text())); + }); +}; + +// Outline SVG from the HML +HTMLPipeline.prototype.transformSvgs = function() { + var that = this; + + return this._transform('svg', function($svg) { + var content = [ + '<?xml version="1.0" encoding="UTF-8"?>', + renderDOM(that.$, $svg) + ].join('\n'); + + return Promise(that.opts.onOutputSVG(content)) + .then(function(filename) { + if (!filename) return; + + $svg.replaceWith(that.$('<img>').attr('src', filename)); + }); + }); +}; + +// Annotate the content +HTMLPipeline.prototype.applyAnnotations = function() { + var that = this; + + _.each(this.opts.annotations, function(annotation) { + var searchRegex = new RegExp( '\\b(' + pregQuote(annotation.name.toLowerCase()) + ')\\b' , 'gi' ); + + that.$('*').each(function() { + var $this = that.$(this); + + if ( + $this.is(ANNOTATION_IGNORE) || + $this.parents(ANNOTATION_IGNORE).length > 0 + ) return; + + replaceText(that.$, this, searchRegex, function(match) { + that.opts.onAnnotation(annotation); + + return '<a href="' + that.opts.onRelativeLink(annotation.href) + '" ' + + 'class="glossary-term" title="'+_.escape(annotation.description)+'">' + + match + + '</a>'; + }); + }); + }); +}; + +// Extract page description from html +// This can totally be improved +HTMLPipeline.prototype.extractDescription = function() { + var $p = this.$('p').first(); + var description = $p.text().trim().slice(0, 155); + + this.opts.onDescription(description); +}; + +// Write content to the pipeline +HTMLPipeline.prototype.output = function() { + var that = this; + + return Promise() + .then(this.extractDescription) + .then(this.transformImages) + .then(this.transformHeadings) + .then(this.transformCodeBlocks) + .then(this.transformSvgs) + .then(this.applyAnnotations) + + // Transform of links should be applied after annotations + // because annotations are created as links + .then(this.transformLinks) + + .then(function() { + return renderDOM(that.$); + }); +}; + + +// Render a cheerio DOM as html +function renderDOM($, dom, options) { + if (!dom && $._root && $._root.children) { + dom = $._root.children; + } + options = options|| dom.options || $._options; + return domSerializer(dom, options); +} + +// Replace text in an element +function replaceText($, el, search, replace, text_only ) { + return $(el).each(function(){ + var node = this.firstChild, + val, + new_val, + + // Elements to be removed at the end. + remove = []; + + // Only continue if firstChild exists. + if ( node ) { + + // Loop over all childNodes. + while (node) { + + // Only process text nodes. + if ( node.nodeType === 3 ) { + + // The original node value. + val = node.nodeValue; + + // The new value. + new_val = val.replace( search, replace ); + + // Only replace text if the new value is actually different! + if ( new_val !== val ) { + + if ( !text_only && /</.test( new_val ) ) { + // The new value contains HTML, set it in a slower but far more + // robust way. + $(node).before( new_val ); + + // Don't remove the node yet, or the loop will lose its place. + remove.push( node ); + } else { + // The new value contains no HTML, so it can be set in this + // very fast, simple way. + node.nodeValue = new_val; + } + } + } + + node = node.nextSibling; + } + } + + // Time to remove those elements! + if (remove.length) $(remove).remove(); + }); +} + +function pregQuote( str ) { + return (str+'').replace(/([\\\.\+\*\?\[\^\]\$\(\)\{\}\=\!\<\>\|\:])/g, '\\$1'); +} + +module.exports = HTMLPipeline; diff --git a/lib/page/index.js b/lib/page/index.js new file mode 100644 index 0000000..f3a8f39 --- /dev/null +++ b/lib/page/index.js @@ -0,0 +1,250 @@ +var _ = require('lodash'); +var path = require('path'); +var direction = require('direction'); +var fm = require('front-matter'); + +var error = require('../utils/error'); +var pathUtil = require('../utils/path'); +var location = require('../utils/location'); +var parsers = require('../parsers'); +var gitbook = require('../gitbook'); +var pluginCompatibility = require('../plugins/compatibility'); +var HTMLPipeline = require('./html'); + +/* +A page represent a parsable file in the book (Markdown, Asciidoc, etc) +*/ + +function Page(book, filename) { + if (!(this instanceof Page)) return new Page(book, filename); + var extension; + _.bindAll(this); + + this.book = book; + this.log = this.book.log; + + // Current content + this.content = ''; + + // Short description for the page + this.description = ''; + + // Relative path to the page + this.path = location.normalize(filename); + + // Absolute path to the page + this.rawPath = this.book.resolve(filename); + + // Last modification date + this.mtime = 0; + + // Can we parse it? + extension = path.extname(this.path); + this.parser = parsers.get(extension); + if (!this.parser) throw error.ParsingError(new Error('Can\'t parse file "'+this.path+'"')); + + this.type = this.parser.name; +} + +// Return the filename of the page with another extension +// "README.md" -> "README.html" +Page.prototype.withExtension = function(ext) { + return pathUtil.setExtension(this.path, ext); +}; + +// Resolve a filename relative to this page +// It returns a path relative to the book root folder +Page.prototype.resolveLocal = function() { + var dir = path.dirname(this.path); + var file = path.join.apply(path, _.toArray(arguments)); + + return location.toAbsolute(file, dir, ''); +}; + +// Resolve a filename relative to this page +// It returns an absolute path for the FS +Page.prototype.resolve = function() { + return this.book.resolve(this.resolveLocal.apply(this, arguments)); +}; + +// Convert an absolute path (in the book) to a relative path from this page +Page.prototype.relative = function(name) { + // Convert /test.png -> test.png + name = location.toAbsolute(name, '', ''); + + return location.relative( + this.resolve('.') + '/', + this.book.resolve(name) + ); +}; + +// Return a page result of a relative page from this page +Page.prototype.followPage = function(filename) { + var absPath = this.resolveLocal(filename); + return this.book.getPage(absPath); +}; + +// Update content of the page +Page.prototype.update = function(content) { + this.content = content; +}; + +// Read the page as a string +Page.prototype.read = function() { + var that = this; + + return this.book.statFile(this.path) + .then(function(stat) { + that.mtime = stat.mtime; + return that.book.readFile(that.path); + }) + .then(this.update); +}; + +// Return templating context for this page +// This is used both for themes and page parsing +Page.prototype.getContext = function() { + var article = this.book.summary.getArticle(this); + var next = article? article.next() : null; + var prev = article? article.prev() : null; + + // Detect text direction in this page + var dir = this.book.config.get('direction'); + if (!dir) { + dir = direction(this.content); + if (dir == 'neutral') dir = null; + } + + return _.extend( + { + file: { + path: this.path, + mtime: this.mtime, + type: this.type + }, + page: { + title: article? article.title : null, + description: this.description, + next: next? next.getContext() : null, + previous: prev? prev.getContext() : null, + level: article? article.level : null, + depth: article? article.depth : 0, + content: this.content, + dir: dir + } + }, + gitbook.getContext(), + this.book.getContext(), + this.book.langs.getContext(), + this.book.summary.getContext(), + this.book.glossary.getContext(), + this.book.config.getContext() + ); +}; + +// Parse the page and return its content +Page.prototype.toHTML = function(output) { + var that = this; + + this.log.debug.ln('start parsing file', this.path); + + // Call a hook in the output + // using an utility to "keep" compatibility with gitbook 2 + function hook(name) { + return pluginCompatibility.pageHook(that, function(ctx) { + return output.plugins.hook(name, ctx); + }) + .then(function(result) { + if(_.isString(result)) that.update(result); + }); + } + + return this.read() + + // Parse yaml front matter + .then(function() { + var parsed = fm(that.content); + + // Extend page with the fontmatter attribute + that.description = parsed.attributes.description || ''; + + // Keep only the body + that.update(parsed.body); + }) + + .then(function() { + return hook('page:before'); + }) + + // Pre-process page with parser + .then(function() { + return that.parser.page.prepare(that.content) + .then(that.update); + }) + + // Render template + .then(function() { + return output.template.render(that.content, that.getContext(), { + path: that.path + }) + .then(that.update); + }) + + // Render markup using the parser + .then(function() { + return that.parser.page(that.content) + .then(function(out) { + that.update(out.content); + }); + }) + + // Post process templating + .then(function() { + return output.template.postProcess(that.content) + .then(that.update); + }) + + // Normalize HTML output + .then(function() { + var pipelineOpts = { + onRelativeLink: _.partial(output.onRelativeLink, that), + onImage: _.partial(output.onOutputImage, that), + onOutputSVG: _.partial(output.onOutputSVG, that), + + // Use 'code' template block + onCodeBlock: function(source, lang) { + return output.template.applyBlock('code', { + body: source, + kwargs: { + language: lang + } + }); + }, + + // Extract description from page's content if no frontmatter + onDescription: function(description) { + if (that.description) return; + that.description = description; + }, + + // Convert glossary entries to annotations + annotations: that.book.glossary.annotations() + }; + var pipeline = new HTMLPipeline(that.content, pipelineOpts); + + return pipeline.output() + .then(that.update); + }) + + .then(function() { + return hook('page'); + }) + + // Return content itself + .then(function() { + return that.content; + }); +}; + + +module.exports = Page; |