diff options
Diffstat (limited to 'lib/page')
-rw-r--r-- | lib/page/html.js | 290 | ||||
-rw-r--r-- | lib/page/index.js | 246 |
2 files changed, 0 insertions, 536 deletions
diff --git a/lib/page/html.js b/lib/page/html.js deleted file mode 100644 index e8d3a85..0000000 --- a/lib/page/html.js +++ /dev/null @@ -1,290 +0,0 @@ -var _ = require('lodash'); -var url = require('url'); -var cheerio = require('cheerio'); -var domSerializer = require('dom-serializer'); -var slug = require('github-slugid'); - -var Promise = require('../utils/promise'); -var location = require('../utils/location'); - -// Selector to ignore -var ANNOTATION_IGNORE = '.no-glossary,code,pre,a,script,h1,h2,h3,h4,h5,h6'; - -function HTMLPipeline(htmlString, opts) { - _.bindAll(this); - - this.opts = _.defaults(opts || {}, { - // Called once the description has been found - onDescription: function(description) { }, - - // Calcul new href for a relative link - onRelativeLink: _.identity, - - // Output an image - onImage: _.identity, - - // Syntax highlighting - onCodeBlock: _.identity, - - // Output a svg, if returns null the svg is kept inlined - onOutputSVG: _.constant(null), - - // Words to annotate - annotations: [], - - // When an annotation is applied - onAnnotation: function () { } - }); - - this.$ = cheerio.load(htmlString, { - // We should parse html without trying to normalize too much - xmlMode: false, - - // SVG need some attributes to use uppercases - lowerCaseAttributeNames: false, - lowerCaseTags: false - }); -} - -// Transform a query of elements in the page -HTMLPipeline.prototype._transform = function(query, fn) { - var that = this; - - var $elements = this.$(query); - - return Promise.serie($elements, function(el) { - var $el = that.$(el); - return fn.call(that, $el); - }); -}; - -// Normalize links -HTMLPipeline.prototype.transformLinks = function() { - return this._transform('a', function($a) { - var href = $a.attr('href'); - if (!href) return; - - if (location.isAnchor(href)) { - // Don't "change" anchor links - } else if (location.isRelative(href)) { - // Preserve anchor - var parsed = url.parse(href); - var filename = this.opts.onRelativeLink(parsed.pathname); - - $a.attr('href', filename + (parsed.hash || '')); - } else { - // External links - $a.attr('target', '_blank'); - } - }); -}; - -// Normalize images -HTMLPipeline.prototype.transformImages = function() { - return this._transform('img', function($img) { - return Promise(this.opts.onImage($img.attr('src'))) - .then(function(filename) { - $img.attr('src', filename); - }); - }); -}; - -// Normalize code blocks -HTMLPipeline.prototype.transformCodeBlocks = function() { - return this._transform('code', function($code) { - // Extract language - var lang = _.chain( - ($code.attr('class') || '').split(' ') - ) - .map(function(cl) { - // Markdown - if (cl.search('lang-') === 0) return cl.slice('lang-'.length); - - // Asciidoc - if (cl.search('language-') === 0) return cl.slice('language-'.length); - - return null; - }) - .compact() - .first() - .value(); - - var source = $code.text(); - - return Promise(this.opts.onCodeBlock(source, lang)) - .then(function(blk) { - if (blk.html === false) { - $code.text(blk.body); - } else { - $code.html(blk.body); - } - }); - }); -}; - -// Add ID to headings -HTMLPipeline.prototype.transformHeadings = function() { - var that = this; - - this.$('h1,h2,h3,h4,h5,h6').each(function() { - var $h = that.$(this); - - // Already has an ID? - if ($h.attr('id')) return; - $h.attr('id', slug($h.text())); - }); -}; - -// Outline SVG from the HML -HTMLPipeline.prototype.transformSvgs = function() { - var that = this; - - return this._transform('svg', function($svg) { - var content = [ - '<?xml version="1.0" encoding="UTF-8"?>', - renderDOM(that.$, $svg) - ].join('\n'); - - return Promise(that.opts.onOutputSVG(content)) - .then(function(filename) { - if (!filename) return; - - $svg.replaceWith(that.$('<img>').attr('src', filename)); - }); - }); -}; - -// Annotate the content -HTMLPipeline.prototype.applyAnnotations = function() { - var that = this; - - _.each(this.opts.annotations, function(annotation) { - var searchRegex = new RegExp( '\\b(' + pregQuote(annotation.name.toLowerCase()) + ')\\b' , 'gi' ); - - that.$('*').each(function() { - var $this = that.$(this); - - if ( - $this.is(ANNOTATION_IGNORE) || - $this.parents(ANNOTATION_IGNORE).length > 0 - ) return; - - replaceText(that.$, this, searchRegex, function(match) { - that.opts.onAnnotation(annotation); - - return '<a href="' + that.opts.onRelativeLink(annotation.href) + '" ' - + 'class="glossary-term" title="'+_.escape(annotation.description)+'">' - + match - + '</a>'; - }); - }); - }); -}; - -// Extract page description from html -// This can totally be improved -HTMLPipeline.prototype.extractDescription = function() { - var $ = this.$; - var $p = $('p').first(); - var $next = $p.nextUntil('h1,h2,h3,h4,h5,h6,pre,blockquote,ul,ol,div'); - - var description = $p.text().trim(); - - $next.each(function() { - description += ' ' + $(this).text().trim(); - }); - - // Truncate description - description = _.trunc(description, 300); - - this.opts.onDescription(description); -}; - -// Write content to the pipeline -HTMLPipeline.prototype.output = function() { - var that = this; - - return Promise() - .then(this.extractDescription) - .then(this.transformImages) - .then(this.transformHeadings) - .then(this.transformCodeBlocks) - .then(this.transformSvgs) - .then(this.applyAnnotations) - - // Transform of links should be applied after annotations - // because annotations are created as links - .then(this.transformLinks) - - .then(function() { - return renderDOM(that.$); - }); -}; - - -// Render a cheerio DOM as html -function renderDOM($, dom, options) { - if (!dom && $._root && $._root.children) { - dom = $._root.children; - } - options = options|| dom.options || $._options; - return domSerializer(dom, options); -} - -// Replace text in an element -function replaceText($, el, search, replace, text_only ) { - return $(el).each(function(){ - var node = this.firstChild, - val, - new_val, - - // Elements to be removed at the end. - remove = []; - - // Only continue if firstChild exists. - if ( node ) { - - // Loop over all childNodes. - while (node) { - - // Only process text nodes. - if ( node.nodeType === 3 ) { - - // The original node value. - val = node.nodeValue; - - // The new value. - new_val = val.replace( search, replace ); - - // Only replace text if the new value is actually different! - if ( new_val !== val ) { - - if ( !text_only && /</.test( new_val ) ) { - // The new value contains HTML, set it in a slower but far more - // robust way. - $(node).before( new_val ); - - // Don't remove the node yet, or the loop will lose its place. - remove.push( node ); - } else { - // The new value contains no HTML, so it can be set in this - // very fast, simple way. - node.nodeValue = new_val; - } - } - } - - node = node.nextSibling; - } - } - - // Time to remove those elements! - if (remove.length) $(remove).remove(); - }); -} - -function pregQuote( str ) { - return (str+'').replace(/([\\\.\+\*\?\[\^\]\$\(\)\{\}\=\!\<\>\|\:])/g, '\\$1'); -} - -module.exports = HTMLPipeline; diff --git a/lib/page/index.js b/lib/page/index.js deleted file mode 100644 index f0d7f57..0000000 --- a/lib/page/index.js +++ /dev/null @@ -1,246 +0,0 @@ -var _ = require('lodash'); -var path = require('path'); -var direction = require('direction'); -var fm = require('front-matter'); - -var error = require('../utils/error'); -var pathUtil = require('../utils/path'); -var location = require('../utils/location'); -var parsers = require('../parsers'); -var pluginCompatibility = require('../plugins/compatibility'); -var HTMLPipeline = require('./html'); - -/* -A page represent a parsable file in the book (Markdown, Asciidoc, etc) -*/ - -function Page(book, filename) { - if (!(this instanceof Page)) return new Page(book, filename); - var extension; - _.bindAll(this); - - this.book = book; - this.log = this.book.log; - - // Map of attributes from YAML frontmatter - // Description is also extracted by default from content - this.attributes = {}; - - // Current content - this.content = ''; - - // Relative path to the page - this.path = location.normalize(filename); - - // Absolute path to the page - this.rawPath = this.book.resolve(filename); - - // Last modification date - this.mtime = 0; - - // Can we parse it? - extension = path.extname(this.path); - this.parser = parsers.getByExt(extension); - if (!this.parser) throw error.ParsingError(new Error('Can\'t parse file "'+this.path+'"')); - - this.type = this.parser.name; -} - -// Return the filename of the page with another extension -// "README.md" -> "README.html" -Page.prototype.withExtension = function(ext) { - return pathUtil.setExtension(this.path, ext); -}; - -// Resolve a filename relative to this page -// It returns a path relative to the book root folder -Page.prototype.resolveLocal = function() { - var dir = path.dirname(this.path); - var file = path.join.apply(path, _.toArray(arguments)); - - return location.toAbsolute(file, dir, ''); -}; - -// Resolve a filename relative to this page -// It returns an absolute path for the FS -Page.prototype.resolve = function() { - return this.book.resolve(this.resolveLocal.apply(this, arguments)); -}; - -// Convert an absolute path (in the book) to a relative path from this page -Page.prototype.relative = function(name) { - // Convert /test.png -> test.png - name = location.toAbsolute(name, '', ''); - - return location.relative( - this.resolve('.') + '/', - this.book.resolve(name) - ); -}; - -// Return a page result of a relative page from this page -Page.prototype.followPage = function(filename) { - var absPath = this.resolveLocal(filename); - return this.book.getPage(absPath); -}; - -// Update content of the page -Page.prototype.update = function(content) { - this.content = content; -}; - -// Read the page as a string -Page.prototype.read = function() { - var that = this; - - return this.book.statFile(this.path) - .then(function(stat) { - that.mtime = stat.mtime; - return that.book.readFile(that.path); - }) - .then(this.update); -}; - -// Return templating context for this page -// This is used both for themes and page parsing -Page.prototype.getContext = function() { - var article = this.book.summary.getArticle(this); - var next = article? article.next() : null; - var prev = article? article.prev() : null; - - // Detect text direction in this page - var dir = this.book.config.get('direction'); - if (!dir) { - dir = direction(this.content); - if (dir == 'neutral') dir = null; - } - - return { - file: { - path: this.path, - mtime: this.mtime, - type: this.type - }, - page: _.extend({}, this.attributes, { - title: article? article.title : null, - next: next? next.getContext() : null, - previous: prev? prev.getContext() : null, - level: article? article.level : null, - depth: article? article.depth() : 0, - content: this.content, - dir: dir - }) - }; -}; - -// Return complete context for templating (page + book + summary + ...) -Page.prototype.getOutputContext = function(output) { - return _.extend({}, this.getContext(), output.getContext()); -}; - -// Parse the page and return its content -Page.prototype.toHTML = function(output) { - var that = this; - - this.log.debug.ln('start parsing file', this.path); - - // Call a hook in the output - // using an utility to "keep" compatibility with gitbook 2 - function hook(name) { - return pluginCompatibility.pageHook(that, function(ctx) { - return output.plugins.hook(name, ctx); - }) - .then(function(result) { - if(_.isString(result)) that.update(result); - }); - } - - return this.read() - - // Parse yaml front matter - .then(function() { - var parsed = fm(that.content); - - // Extract attributes - that.attributes = parsed.attributes; - - // Keep only the body - that.update(parsed.body); - }) - - .then(function() { - return hook('page:before'); - }) - - // Pre-process page with parser - .then(function() { - return that.parser.page.prepare(that.content) - .then(that.update); - }) - - // Render template - .then(function() { - return output.template.render(that.content, that.getOutputContext(output), { - path: that.path - }) - .then(that.update); - }) - - // Render markup using the parser - .then(function() { - return that.parser.page(that.content) - .then(function(out) { - that.update(out.content); - }); - }) - - // Post process templating - .then(function() { - return output.template.postProcess(that.content) - .then(that.update); - }) - - // Normalize HTML output - .then(function() { - var pipelineOpts = { - onRelativeLink: _.partial(output.onRelativeLink, that), - onImage: _.partial(output.onOutputImage, that), - onOutputSVG: _.partial(output.onOutputSVG, that), - - // Use 'code' template block - onCodeBlock: function(source, lang) { - return output.template.applyBlock('code', { - body: source, - kwargs: { - language: lang - } - }); - }, - - // Extract description from page's content if no frontmatter - onDescription: function(description) { - if (that.attributes.description) return; - that.attributes.description = description; - }, - - // Convert glossary entries to annotations - annotations: that.book.glossary.annotations() - }; - var pipeline = new HTMLPipeline(that.content, pipelineOpts); - - return pipeline.output() - .then(that.update); - }) - - .then(function() { - return hook('page'); - }) - - // Return content itself - .then(function() { - return that.content; - }); -}; - - -module.exports = Page; |