summaryrefslogtreecommitdiffstats
path: root/lib/page/html.js
diff options
context:
space:
mode:
Diffstat (limited to 'lib/page/html.js')
-rw-r--r--lib/page/html.js290
1 files changed, 0 insertions, 290 deletions
diff --git a/lib/page/html.js b/lib/page/html.js
deleted file mode 100644
index e8d3a85..0000000
--- a/lib/page/html.js
+++ /dev/null
@@ -1,290 +0,0 @@
-var _ = require('lodash');
-var url = require('url');
-var cheerio = require('cheerio');
-var domSerializer = require('dom-serializer');
-var slug = require('github-slugid');
-
-var Promise = require('../utils/promise');
-var location = require('../utils/location');
-
-// Selector to ignore
-var ANNOTATION_IGNORE = '.no-glossary,code,pre,a,script,h1,h2,h3,h4,h5,h6';
-
-function HTMLPipeline(htmlString, opts) {
- _.bindAll(this);
-
- this.opts = _.defaults(opts || {}, {
- // Called once the description has been found
- onDescription: function(description) { },
-
- // Calcul new href for a relative link
- onRelativeLink: _.identity,
-
- // Output an image
- onImage: _.identity,
-
- // Syntax highlighting
- onCodeBlock: _.identity,
-
- // Output a svg, if returns null the svg is kept inlined
- onOutputSVG: _.constant(null),
-
- // Words to annotate
- annotations: [],
-
- // When an annotation is applied
- onAnnotation: function () { }
- });
-
- this.$ = cheerio.load(htmlString, {
- // We should parse html without trying to normalize too much
- xmlMode: false,
-
- // SVG need some attributes to use uppercases
- lowerCaseAttributeNames: false,
- lowerCaseTags: false
- });
-}
-
-// Transform a query of elements in the page
-HTMLPipeline.prototype._transform = function(query, fn) {
- var that = this;
-
- var $elements = this.$(query);
-
- return Promise.serie($elements, function(el) {
- var $el = that.$(el);
- return fn.call(that, $el);
- });
-};
-
-// Normalize links
-HTMLPipeline.prototype.transformLinks = function() {
- return this._transform('a', function($a) {
- var href = $a.attr('href');
- if (!href) return;
-
- if (location.isAnchor(href)) {
- // Don't "change" anchor links
- } else if (location.isRelative(href)) {
- // Preserve anchor
- var parsed = url.parse(href);
- var filename = this.opts.onRelativeLink(parsed.pathname);
-
- $a.attr('href', filename + (parsed.hash || ''));
- } else {
- // External links
- $a.attr('target', '_blank');
- }
- });
-};
-
-// Normalize images
-HTMLPipeline.prototype.transformImages = function() {
- return this._transform('img', function($img) {
- return Promise(this.opts.onImage($img.attr('src')))
- .then(function(filename) {
- $img.attr('src', filename);
- });
- });
-};
-
-// Normalize code blocks
-HTMLPipeline.prototype.transformCodeBlocks = function() {
- return this._transform('code', function($code) {
- // Extract language
- var lang = _.chain(
- ($code.attr('class') || '').split(' ')
- )
- .map(function(cl) {
- // Markdown
- if (cl.search('lang-') === 0) return cl.slice('lang-'.length);
-
- // Asciidoc
- if (cl.search('language-') === 0) return cl.slice('language-'.length);
-
- return null;
- })
- .compact()
- .first()
- .value();
-
- var source = $code.text();
-
- return Promise(this.opts.onCodeBlock(source, lang))
- .then(function(blk) {
- if (blk.html === false) {
- $code.text(blk.body);
- } else {
- $code.html(blk.body);
- }
- });
- });
-};
-
-// Add ID to headings
-HTMLPipeline.prototype.transformHeadings = function() {
- var that = this;
-
- this.$('h1,h2,h3,h4,h5,h6').each(function() {
- var $h = that.$(this);
-
- // Already has an ID?
- if ($h.attr('id')) return;
- $h.attr('id', slug($h.text()));
- });
-};
-
-// Outline SVG from the HML
-HTMLPipeline.prototype.transformSvgs = function() {
- var that = this;
-
- return this._transform('svg', function($svg) {
- var content = [
- '<?xml version="1.0" encoding="UTF-8"?>',
- renderDOM(that.$, $svg)
- ].join('\n');
-
- return Promise(that.opts.onOutputSVG(content))
- .then(function(filename) {
- if (!filename) return;
-
- $svg.replaceWith(that.$('<img>').attr('src', filename));
- });
- });
-};
-
-// Annotate the content
-HTMLPipeline.prototype.applyAnnotations = function() {
- var that = this;
-
- _.each(this.opts.annotations, function(annotation) {
- var searchRegex = new RegExp( '\\b(' + pregQuote(annotation.name.toLowerCase()) + ')\\b' , 'gi' );
-
- that.$('*').each(function() {
- var $this = that.$(this);
-
- if (
- $this.is(ANNOTATION_IGNORE) ||
- $this.parents(ANNOTATION_IGNORE).length > 0
- ) return;
-
- replaceText(that.$, this, searchRegex, function(match) {
- that.opts.onAnnotation(annotation);
-
- return '<a href="' + that.opts.onRelativeLink(annotation.href) + '" '
- + 'class="glossary-term" title="'+_.escape(annotation.description)+'">'
- + match
- + '</a>';
- });
- });
- });
-};
-
-// Extract page description from html
-// This can totally be improved
-HTMLPipeline.prototype.extractDescription = function() {
- var $ = this.$;
- var $p = $('p').first();
- var $next = $p.nextUntil('h1,h2,h3,h4,h5,h6,pre,blockquote,ul,ol,div');
-
- var description = $p.text().trim();
-
- $next.each(function() {
- description += ' ' + $(this).text().trim();
- });
-
- // Truncate description
- description = _.trunc(description, 300);
-
- this.opts.onDescription(description);
-};
-
-// Write content to the pipeline
-HTMLPipeline.prototype.output = function() {
- var that = this;
-
- return Promise()
- .then(this.extractDescription)
- .then(this.transformImages)
- .then(this.transformHeadings)
- .then(this.transformCodeBlocks)
- .then(this.transformSvgs)
- .then(this.applyAnnotations)
-
- // Transform of links should be applied after annotations
- // because annotations are created as links
- .then(this.transformLinks)
-
- .then(function() {
- return renderDOM(that.$);
- });
-};
-
-
-// Render a cheerio DOM as html
-function renderDOM($, dom, options) {
- if (!dom && $._root && $._root.children) {
- dom = $._root.children;
- }
- options = options|| dom.options || $._options;
- return domSerializer(dom, options);
-}
-
-// Replace text in an element
-function replaceText($, el, search, replace, text_only ) {
- return $(el).each(function(){
- var node = this.firstChild,
- val,
- new_val,
-
- // Elements to be removed at the end.
- remove = [];
-
- // Only continue if firstChild exists.
- if ( node ) {
-
- // Loop over all childNodes.
- while (node) {
-
- // Only process text nodes.
- if ( node.nodeType === 3 ) {
-
- // The original node value.
- val = node.nodeValue;
-
- // The new value.
- new_val = val.replace( search, replace );
-
- // Only replace text if the new value is actually different!
- if ( new_val !== val ) {
-
- if ( !text_only && /</.test( new_val ) ) {
- // The new value contains HTML, set it in a slower but far more
- // robust way.
- $(node).before( new_val );
-
- // Don't remove the node yet, or the loop will lose its place.
- remove.push( node );
- } else {
- // The new value contains no HTML, so it can be set in this
- // very fast, simple way.
- node.nodeValue = new_val;
- }
- }
- }
-
- node = node.nextSibling;
- }
- }
-
- // Time to remove those elements!
- if (remove.length) $(remove).remove();
- });
-}
-
-function pregQuote( str ) {
- return (str+'').replace(/([\\\.\+\*\?\[\^\]\$\(\)\{\}\=\!\<\>\|\:])/g, '\\$1');
-}
-
-module.exports = HTMLPipeline;