var Q = require("q");
var _ = require("lodash");
var url = require("url");
var path = require("path");
var cheerio = require("cheerio");
var domSerializer = require("dom-serializer");
var request = require("request");
var crc = require("crc");
var links = require("./links");
var imgUtils = require("./images");
var fs = require("./fs");
var batch = require("./batch");
var parsableExtensions = require("gitbook-parsers").extensions;
// Render a cheerio dom as html
var renderDom = function($, dom, options) {
if (!dom && $._root && $._root.children) {
dom = $._root.children;
}
options = options|| dom.options || $._options;
return domSerializer(dom, options);
};
// Map of images that have been converted
var imgConversionCache = {};
function replaceText($, el, search, replace, text_only ) {
return $(el).each(function(){
var node = this.firstChild,
val,
new_val,
// Elements to be removed at the end.
remove = [];
// Only continue if firstChild exists.
if ( node ) {
// Loop over all childNodes.
while (node) {
// Only process text nodes.
if ( node.nodeType === 3 ) {
// The original node value.
val = node.nodeValue;
// The new value.
new_val = val.replace( search, replace );
// Only replace text if the new value is actually different!
if ( new_val !== val ) {
if ( !text_only && /\|\:])/g, "\\$1");
}
// Adapt an html snippet to be relative to a base folder
function normalizeHtml(src, options) {
var $ = cheerio.load(src, {
// We should parse html without trying to normalize too much
xmlMode: false,
// SVG need some attributes to use uppercases
lowerCaseAttributeNames: false,
lowerCaseTags: false
});
var toConvert = [];
var svgContent = {};
var outputRoot = options.book.options.output;
imgConversionCache[outputRoot] = imgConversionCache[outputRoot] || {};
// Find svg images to extract and process
if (options.convertImages) {
$("svg").each(function() {
var content = renderDom($, $(this));
var svgId = _.uniqueId("svg");
var dest = svgId+".svg";
// Generate filename
dest = "/"+fs.getUniqueFilename(outputRoot, dest);
svgContent[dest] = ""+content;
$(this).replaceWith($("
").attr("src", dest));
});
}
// Find images to normalize
$("img").each(function() {
var origin;
var src = $(this).attr("src");
if (!src) return;
var isExternal = links.isExternal(src);
// Transform as relative to the bases
if (links.isRelative(src)) {
src = links.toAbsolute(src, options.base, options.output);
}
// Convert if needed
if (options.convertImages) {
// If image is external and ebook, then downlaod the images
if (isExternal) {
origin = src;
src = "/"+crc.crc32(origin).toString(16)+path.extname(origin);
src = links.toAbsolute(src, options.base, options.output);
isExternal = false;
}
var ext = path.extname(src);
var srcAbs = links.join("/", options.base, src);
// Test image extension
if (_.contains(imgUtils.INVALID, ext)) {
if (imgConversionCache[outputRoot][srcAbs]) {
// Already converted
src = imgConversionCache[outputRoot][srcAbs];
} else {
// Not converted yet
var dest = "";
// Replace extension
dest = links.join(path.dirname(srcAbs), path.basename(srcAbs, ext)+".png");
dest = dest[0] == "/"? dest.slice(1) : dest;
// Get a name that doesn"t exists
dest = fs.getUniqueFilename(outputRoot, dest);
options.book.log.debug.ln("detect invalid image (will be converted to png):", srcAbs);
// Add to cache
imgConversionCache[outputRoot][srcAbs] = "/"+dest;
// Push to convert
toConvert.push({
origin: origin,
content: svgContent[srcAbs],
source: isExternal? srcAbs : path.join("./", srcAbs),
dest: path.join("./", dest)
});
src = links.join("/", dest);
}
// Reset as relative to output
src = links.toAbsolute(src, options.base, options.output);
}
else if (origin) {
// Need to downlaod image
toConvert.push({
origin: origin,
source: path.join("./", srcAbs)
});
}
}
$(this).attr("src", src);
});
// Normalize links
$("a").each(function() {
var href = $(this).attr("href");
if (!href) return;
if (links.isAnchor(href)) {
// Keep it as it is
} else if (links.isRelative(href)) {
var parts = url.parse(href);
var absolutePath = links.join(options.base, parts.pathname);
var anchor = parts.hash || "";
// If is in navigation relative: transform as content
if (options.navigation[absolutePath]) {
absolutePath = options.book.contentLink(absolutePath);
}
// If md/adoc/rst files is not in summary
// or for ebook, signal all files that are outside the summary
else if (_.contains(parsableExtensions, path.extname(absolutePath)) ||
_.contains(["epub", "pdf", "mobi"], options.book.options.generator)) {
options.book.log.warn.ln("page", options.input, "contains an hyperlink to resource outside spine \""+href+"\"");
}
// Transform as absolute
href = links.toAbsolute("/"+absolutePath, options.base, options.output)+anchor;
} else {
// External links
$(this).attr("target", "_blank");
}
// Transform extension
$(this).attr("href", href);
});
// Highlight code blocks
$("code").each(function() {
// Normalize language
var lang = _.chain(
($(this).attr("class") || "").split(" ")
)
.map(function(cl) {
// Markdown
if (cl.search("lang-") === 0) return cl.slice("lang-".length);
// Asciidoc
if (cl.search("language-") === 0) return cl.slice("language-".length);
return null;
})
.compact()
.first()
.value();
var source = $(this).text();
var blk = options.book.template.applyBlock("code", {
body: source,
kwargs: {
language: lang
}
});
if (blk.html === false) $(this).text(blk.body);
else $(this).html(blk.body);
});
// Replace glossary terms
var glossary = _.sortBy(options.glossary, function(term) {
return -term.name.length;
});
_.each(glossary, function(term) {
var r = new RegExp( "\\b(" + pregQuote(term.name.toLowerCase()) + ")\\b" , "gi" );
var includedInFiles = false;
$("*").each(function() {
// Ignore codeblocks
if (_.contains(["code", "pre", "a"], this.name.toLowerCase())) return;
replaceText($, this, r, function(match) {
// Add to files index in glossary
if (!includedInFiles) {
includedInFiles = true;
term.files = term.files || [];
term.files.push(options.navigation[options.input]);
}
return ""+match+"";
});
});
});
return {
html: renderDom($),
images: toConvert
};
}
// Convert svg images to png
function convertImages(images, options) {
if (!options.convertImages) return Q();
var downloaded = [];
options.book.log.debug.ln("convert ", images.length, "images to png");
return batch.execEach(images, {
max: 100,
fn: function(image) {
var imgin = path.resolve(options.book.options.output, image.source);
return Q()
// Write image if need to be download
.then(function() {
if (!image.origin && !_.contains(downloaded, image.origin)) return;
options.book.log.debug("download image", image.origin, "...");
downloaded.push(image.origin);
return options.book.log.debug.promise(fs.writeStream(imgin, request(image.origin)))
.fail(function(err) {
if (!_.isError(err)) err = new Error(err);
err.message = "Fail downloading "+image.origin+": "+err.message;
throw err;
});
})
// Write svg if content
.then(function() {
if (!image.content) return;
return fs.writeFile(imgin, image.content);
})
// Convert
.then(function() {
if (!image.dest) return;
var imgout = path.resolve(options.book.options.output, image.dest);
options.book.log.debug("convert image", image.source, "to", image.dest, "...");
return options.book.log.debug.promise(imgUtils.convertSVG(imgin, imgout));
});
}
})
.then(function() {
options.book.log.debug.ok(images.length+" images converted with success");
});
}
// Adapt page content to be relative to a base folder
function normalizePage(sections, options) {
options = _.defaults(options || {}, {
// Current book
book: null,
// Do we need to convert svg?
convertImages: false,
// Current file path
input: ".",
// Navigation to use to transform path
navigation: {},
// Directory parent of the file currently in rendering process
base: "./",
// Directory parent from the html output
output: "./",
// Glossary terms
glossary: []
});
// List of images to convert
var toConvert = [];
sections = _.map(sections, function(section) {
if (section.type != "normal") return section;
var out = normalizeHtml(section.content, options);
toConvert = toConvert.concat(out.images);
section.content = out.html;
return section;
});
return Q()
.then(function() {
toConvert = _.uniq(toConvert, "source");
return convertImages(toConvert, options);
})
.thenResolve(sections);
}
// Extract text from sections
function extractText(sections) {
return _.reduce(sections, function(prev, section) {
if (section.type != "normal") return prev;
var $ = cheerio.load(section.content);
$("*").each(function() {
prev = prev+" "+$(this).text();
});
return prev;
}, "");
}
module.exports = {
normalize: normalizePage,
extractText: extractText
};