diff options
author | Samy Pesse <samypesse@gmail.com> | 2016-04-23 17:10:16 +0200 |
---|---|---|
committer | Samy Pesse <samypesse@gmail.com> | 2016-04-23 17:10:16 +0200 |
commit | e1fa977b5b1b3c03790de6e2c21ee39ba55d9555 (patch) | |
tree | a1ad4386100b5779ef930845093c14639a3353b5 /lib/page/html.js | |
parent | ce95f316b9ce1eac1e615db3540c4d0f30408d63 (diff) | |
download | gitbook-e1fa977b5b1b3c03790de6e2c21ee39ba55d9555.zip gitbook-e1fa977b5b1b3c03790de6e2c21ee39ba55d9555.tar.gz gitbook-e1fa977b5b1b3c03790de6e2c21ee39ba55d9555.tar.bz2 |
Add json encoding utils
Diffstat (limited to 'lib/page/html.js')
-rw-r--r-- | lib/page/html.js | 290 |
1 files changed, 0 insertions, 290 deletions
diff --git a/lib/page/html.js b/lib/page/html.js deleted file mode 100644 index e8d3a85..0000000 --- a/lib/page/html.js +++ /dev/null @@ -1,290 +0,0 @@ -var _ = require('lodash'); -var url = require('url'); -var cheerio = require('cheerio'); -var domSerializer = require('dom-serializer'); -var slug = require('github-slugid'); - -var Promise = require('../utils/promise'); -var location = require('../utils/location'); - -// Selector to ignore -var ANNOTATION_IGNORE = '.no-glossary,code,pre,a,script,h1,h2,h3,h4,h5,h6'; - -function HTMLPipeline(htmlString, opts) { - _.bindAll(this); - - this.opts = _.defaults(opts || {}, { - // Called once the description has been found - onDescription: function(description) { }, - - // Calcul new href for a relative link - onRelativeLink: _.identity, - - // Output an image - onImage: _.identity, - - // Syntax highlighting - onCodeBlock: _.identity, - - // Output a svg, if returns null the svg is kept inlined - onOutputSVG: _.constant(null), - - // Words to annotate - annotations: [], - - // When an annotation is applied - onAnnotation: function () { } - }); - - this.$ = cheerio.load(htmlString, { - // We should parse html without trying to normalize too much - xmlMode: false, - - // SVG need some attributes to use uppercases - lowerCaseAttributeNames: false, - lowerCaseTags: false - }); -} - -// Transform a query of elements in the page -HTMLPipeline.prototype._transform = function(query, fn) { - var that = this; - - var $elements = this.$(query); - - return Promise.serie($elements, function(el) { - var $el = that.$(el); - return fn.call(that, $el); - }); -}; - -// Normalize links -HTMLPipeline.prototype.transformLinks = function() { - return this._transform('a', function($a) { - var href = $a.attr('href'); - if (!href) return; - - if (location.isAnchor(href)) { - // Don't "change" anchor links - } else if (location.isRelative(href)) { - // Preserve anchor - var parsed = url.parse(href); - var filename = this.opts.onRelativeLink(parsed.pathname); - - $a.attr('href', filename + (parsed.hash || '')); - } else { - // External links - $a.attr('target', '_blank'); - } - }); -}; - -// Normalize images -HTMLPipeline.prototype.transformImages = function() { - return this._transform('img', function($img) { - return Promise(this.opts.onImage($img.attr('src'))) - .then(function(filename) { - $img.attr('src', filename); - }); - }); -}; - -// Normalize code blocks -HTMLPipeline.prototype.transformCodeBlocks = function() { - return this._transform('code', function($code) { - // Extract language - var lang = _.chain( - ($code.attr('class') || '').split(' ') - ) - .map(function(cl) { - // Markdown - if (cl.search('lang-') === 0) return cl.slice('lang-'.length); - - // Asciidoc - if (cl.search('language-') === 0) return cl.slice('language-'.length); - - return null; - }) - .compact() - .first() - .value(); - - var source = $code.text(); - - return Promise(this.opts.onCodeBlock(source, lang)) - .then(function(blk) { - if (blk.html === false) { - $code.text(blk.body); - } else { - $code.html(blk.body); - } - }); - }); -}; - -// Add ID to headings -HTMLPipeline.prototype.transformHeadings = function() { - var that = this; - - this.$('h1,h2,h3,h4,h5,h6').each(function() { - var $h = that.$(this); - - // Already has an ID? - if ($h.attr('id')) return; - $h.attr('id', slug($h.text())); - }); -}; - -// Outline SVG from the HML -HTMLPipeline.prototype.transformSvgs = function() { - var that = this; - - return this._transform('svg', function($svg) { - var content = [ - '<?xml version="1.0" encoding="UTF-8"?>', - renderDOM(that.$, $svg) - ].join('\n'); - - return Promise(that.opts.onOutputSVG(content)) - .then(function(filename) { - if (!filename) return; - - $svg.replaceWith(that.$('<img>').attr('src', filename)); - }); - }); -}; - -// Annotate the content -HTMLPipeline.prototype.applyAnnotations = function() { - var that = this; - - _.each(this.opts.annotations, function(annotation) { - var searchRegex = new RegExp( '\\b(' + pregQuote(annotation.name.toLowerCase()) + ')\\b' , 'gi' ); - - that.$('*').each(function() { - var $this = that.$(this); - - if ( - $this.is(ANNOTATION_IGNORE) || - $this.parents(ANNOTATION_IGNORE).length > 0 - ) return; - - replaceText(that.$, this, searchRegex, function(match) { - that.opts.onAnnotation(annotation); - - return '<a href="' + that.opts.onRelativeLink(annotation.href) + '" ' - + 'class="glossary-term" title="'+_.escape(annotation.description)+'">' - + match - + '</a>'; - }); - }); - }); -}; - -// Extract page description from html -// This can totally be improved -HTMLPipeline.prototype.extractDescription = function() { - var $ = this.$; - var $p = $('p').first(); - var $next = $p.nextUntil('h1,h2,h3,h4,h5,h6,pre,blockquote,ul,ol,div'); - - var description = $p.text().trim(); - - $next.each(function() { - description += ' ' + $(this).text().trim(); - }); - - // Truncate description - description = _.trunc(description, 300); - - this.opts.onDescription(description); -}; - -// Write content to the pipeline -HTMLPipeline.prototype.output = function() { - var that = this; - - return Promise() - .then(this.extractDescription) - .then(this.transformImages) - .then(this.transformHeadings) - .then(this.transformCodeBlocks) - .then(this.transformSvgs) - .then(this.applyAnnotations) - - // Transform of links should be applied after annotations - // because annotations are created as links - .then(this.transformLinks) - - .then(function() { - return renderDOM(that.$); - }); -}; - - -// Render a cheerio DOM as html -function renderDOM($, dom, options) { - if (!dom && $._root && $._root.children) { - dom = $._root.children; - } - options = options|| dom.options || $._options; - return domSerializer(dom, options); -} - -// Replace text in an element -function replaceText($, el, search, replace, text_only ) { - return $(el).each(function(){ - var node = this.firstChild, - val, - new_val, - - // Elements to be removed at the end. - remove = []; - - // Only continue if firstChild exists. - if ( node ) { - - // Loop over all childNodes. - while (node) { - - // Only process text nodes. - if ( node.nodeType === 3 ) { - - // The original node value. - val = node.nodeValue; - - // The new value. - new_val = val.replace( search, replace ); - - // Only replace text if the new value is actually different! - if ( new_val !== val ) { - - if ( !text_only && /</.test( new_val ) ) { - // The new value contains HTML, set it in a slower but far more - // robust way. - $(node).before( new_val ); - - // Don't remove the node yet, or the loop will lose its place. - remove.push( node ); - } else { - // The new value contains no HTML, so it can be set in this - // very fast, simple way. - node.nodeValue = new_val; - } - } - } - - node = node.nextSibling; - } - } - - // Time to remove those elements! - if (remove.length) $(remove).remove(); - }); -} - -function pregQuote( str ) { - return (str+'').replace(/([\\\.\+\*\?\[\^\]\$\(\)\{\}\=\!\<\>\|\:])/g, '\\$1'); -} - -module.exports = HTMLPipeline; |