1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
|
var _ = require('lodash');
var cheerio = require('cheerio');
var domSerializer = require('dom-serializer');
var slug = require('github-slugid');
var Promise = require('../utils/promise');
var location = require('../utils/location');
// Render a cheerio DOM as html
function renderDOM($, dom, options) {
if (!dom && $._root && $._root.children) {
dom = $._root.children;
}
options = options|| dom.options || $._options;
return domSerializer(dom, options);
}
function HTMLPipeline(htmlString, opts) {
_.bindAll(this);
this.opts = _.defaults(opts || {}, {
// Calcul new href for a relative link
onRelativeLink: _.identity,
// Output an image
onImage: _.identity,
// Syntax highlighting
onCodeBlock: _.identity,
// Output a svg, if returns null the svg is kept inlined
onOutputSVG: _.constant(null)
});
this.$ = cheerio.load(htmlString, {
// We should parse html without trying to normalize too much
xmlMode: false,
// SVG need some attributes to use uppercases
lowerCaseAttributeNames: false,
lowerCaseTags: false
});
}
// Transform a query of elements in the page
HTMLPipeline.prototype._transform = function(query, fn) {
var that = this;
var $elements = this.$(query);
return Promise.serie($elements, function(el) {
var $el = that.$(el);
return fn.call(that, $el);
});
};
// Normalize links
HTMLPipeline.prototype.transformLinks = function() {
return this._transform('a', function($a) {
var href = $a.attr('href');
if (!href) return;
if (location.isAnchor(href)) {
// Don't "change" anchor links
} else if (location.isRelative(href)) {
$a.attr('href', this.opts.onRelativeLink(href));
} else {
// External links
$a.attr('target', '_blank');
}
});
};
// Normalize images
HTMLPipeline.prototype.transformImages = function() {
return this._transform('img', function($img) {
return Promise(this.opts.onImage($img.attr('src')))
.then(function(filename) {
$img.attr('src', filename);
});
});
};
// Normalize code blocks
HTMLPipeline.prototype.transformCodeBlocks = function() {
return this._transform('code', function($code) {
// Extract language
var lang = _.chain(
($code.attr('class') || '').split(' ')
)
.map(function(cl) {
// Markdown
if (cl.search('lang-') === 0) return cl.slice('lang-'.length);
// Asciidoc
if (cl.search('language-') === 0) return cl.slice('language-'.length);
return null;
})
.compact()
.first()
.value();
var source = $code.text();
return Promise(this.opts.onCodeBlock(source, lang))
.then(function(blk) {
if (blk.html === false) {
$code.text(blk.body);
} else {
$code.html(blk.body);
}
});
});
};
// Add ID to headings
HTMLPipeline.prototype.transformHeadings = function() {
var that = this;
this.$('h1,h2,h3,h4,h5,h6').each(function() {
var $h = that.$(this);
// Already has an ID?
if ($h.attr('id')) return;
$h.attr('id', slug($h.text()));
});
};
// Outline SVG from the HML
HTMLPipeline.prototype.transformSvgs = function() {
var that = this;
return this._transform('svg', function($svg) {
var content = [
'<?xml version="1.0" encoding="UTF-8"?>',
renderDOM(that.$, $svg)
].join('\n');
return Promise(that.opts.onOutputSVG(content))
.then(function(filename) {
if (!filename) return;
$svg.replaceWith(that.$('<img>').attr('src', filename));
});
});
};
// Write content to the pipeline
HTMLPipeline.prototype.output = function() {
var that = this;
return Promise()
.then(this.transformLinks)
.then(this.transformImages)
.then(this.transformHeadings)
.then(this.transformCodeBlocks)
.then(this.transformSvgs)
.then(function() {
return renderDOM(that.$);
});
};
module.exports = HTMLPipeline;
|