summaryrefslogtreecommitdiffstats
path: root/lib/utils/page.js
diff options
context:
space:
mode:
authorSamy Pessé <samypesse@gmail.com>2015-03-09 10:43:12 +0100
committerSamy Pessé <samypesse@gmail.com>2015-03-09 10:43:12 +0100
commit34fc2831e0cf0fed01c71cec28d93472d87f455b (patch)
treea803cc907c20491ba02863b5d3dd5aedf6bfed10 /lib/utils/page.js
parente1594cde2c32e4ff48f6c4eff3d3d461743d74e1 (diff)
parent1bf68a5aa0703b5a1815cfe4ebb731b5fb6ed9d2 (diff)
downloadgitbook-34fc2831e0cf0fed01c71cec28d93472d87f455b.zip
gitbook-34fc2831e0cf0fed01c71cec28d93472d87f455b.tar.gz
gitbook-34fc2831e0cf0fed01c71cec28d93472d87f455b.tar.bz2
Merge branch 'version/2.0'
Diffstat (limited to 'lib/utils/page.js')
-rw-r--r--lib/utils/page.js343
1 files changed, 343 insertions, 0 deletions
diff --git a/lib/utils/page.js b/lib/utils/page.js
new file mode 100644
index 0000000..effa24f
--- /dev/null
+++ b/lib/utils/page.js
@@ -0,0 +1,343 @@
+var Q = require('q');
+var _ = require('lodash');
+var url = require('url');
+var path = require('path');
+var cheerio = require('cheerio');
+var domSerializer = require('dom-serializer');
+var request = require('request');
+var crc = require("crc");
+
+var links = require('./links');
+var imgUtils = require('./images');
+var fs = require('./fs');
+var batch = require('./batch');
+
+// Render a cheerio dom as html
+var renderDom = function($, dom, options) {
+ if (!dom && $._root && $._root.children) {
+ dom = $._root.children;
+ }
+
+ options = options|| dom.options || $._options;
+ return domSerializer(dom, options);
+};
+
+// Map of images that have been converted
+var imgConversionCache = {};
+
+function replaceText($, el, search, replace, text_only ) {
+ return $(el).each(function(){
+ var node = this.firstChild,
+ val,
+ new_val,
+
+ // Elements to be removed at the end.
+ remove = [];
+
+ // Only continue if firstChild exists.
+ if ( node ) {
+
+ // Loop over all childNodes.
+ do {
+ // Only process text nodes.
+ if ( node.nodeType === 3 ) {
+
+ // The original node value.
+ val = node.nodeValue;
+
+ // The new value.
+ new_val = val.replace( search, replace );
+
+ // Only replace text if the new value is actually different!
+ if ( new_val !== val ) {
+
+ if ( !text_only && /</.test( new_val ) ) {
+ // The new value contains HTML, set it in a slower but far more
+ // robust way.
+ $(node).before( new_val );
+
+ // Don't remove the node yet, or the loop will lose its place.
+ remove.push( node );
+ } else {
+ // The new value contains no HTML, so it can be set in this
+ // very fast, simple way.
+ node.nodeValue = new_val;
+ }
+ }
+ }
+
+ } while ( node = node.nextSibling );
+ }
+
+ // Time to remove those elements!
+ remove.length && $(remove).remove();
+ });
+};
+
+function pregQuote( str ) {
+ return (str+'').replace(/([\\\.\+\*\?\[\^\]\$\(\)\{\}\=\!\<\>\|\:])/g, "\\$1");
+};
+
+
+// Adapt an html snippet to be relative to a base folder
+function normalizeHtml(src, options) {
+ var $ = cheerio.load(src, {
+ // We should parse html without trying to normalize too much
+ xmlMode: false,
+
+ // SVG need some attributes to use uppercases
+ lowerCaseAttributeNames: false,
+ lowerCaseTags: false
+ });
+ var toConvert = [];
+ var svgContent = {};
+ var outputRoot = options.book.options.output;
+
+ imgConversionCache[outputRoot] = imgConversionCache[outputRoot] || {};
+
+ // Find svg images to extract and process
+ if (options.convertImages) {
+ $("svg").each(function() {
+ var content = renderDom($, $(this));
+ var svgId = _.uniqueId("svg");
+ var dest = svgId+".svg";
+
+ // Generate filename
+ dest = "/"+fs.getUniqueFilename(outputRoot, dest);
+
+ svgContent[dest] = content;
+ $(this).replaceWith($("<img>").attr("src", dest));
+ });
+ }
+
+ // Find images to normalize
+ $("img").each(function() {
+ var origin = undefined;
+ var src = $(this).attr("src");
+ if (!src) return;
+ var isExternal = links.isExternal(src);
+
+ // Transform as relative to the bases
+ if (links.isRelative(src)) {
+ src = links.toAbsolute(src, options.base, options.output);
+ }
+
+ // Convert if needed
+ if (options.convertImages) {
+ // If image is external and ebook, then downlaod the images
+ if (isExternal) {
+ origin = src;
+ src = "/"+crc.crc32(origin).toString(16)+path.extname(origin);
+ src = links.toAbsolute(src, options.base, options.output);
+ isExternal = false;
+ }
+
+ var ext = path.extname(src);
+ var srcAbs = path.join("/", options.base, src);
+
+ // Test image extension
+ if (_.contains(imgUtils.INVALID, ext)) {
+ if (imgConversionCache[outputRoot][srcAbs]) {
+ // Already converted
+ src = imgConversionCache[outputRoot][srcAbs];
+ } else {
+ // Not converted yet
+ var dest = "";
+
+ // Replace extension
+ dest = path.join(path.dirname(srcAbs), path.basename(srcAbs, ext)+".png");
+ dest = dest[0] == "/"? dest.slice(1) : dest;
+
+ // Get a name that doesn't exists
+ dest = fs.getUniqueFilename(outputRoot, dest);
+
+ options.book.log.debug.ln("detect invalid image (will be converted to png):", srcAbs);
+
+ // Add to cache
+ imgConversionCache[outputRoot][srcAbs] = "/"+dest;
+
+ // Push to convert
+ toConvert.push({
+ origin: origin,
+ content: svgContent[srcAbs],
+ source: isExternal? srcAbs : path.join("./", srcAbs),
+ dest: path.join("./", dest)
+ });
+
+ src = path.join("/", dest);
+ }
+
+ // Reset as relative to output
+ src = links.toAbsolute(src, options.base, options.output);
+ }
+
+ else if (origin) {
+ // Need to downlaod image
+ toConvert.push({
+ origin: origin,
+ source: path.join("./", srcAbs)
+ });
+ }
+ }
+
+ $(this).attr("src", src);
+ });
+
+ $("a").each(function() {
+ var href = $(this).attr("href");
+ if (!href) return;
+
+ if (links.isAnchor(href)) {
+ // Keep it as it is
+ } else if (links.isRelative(href)) {
+ var parts = url.parse(path.join(options.base, href));
+ var absolutePath = parts.pathname;
+ var anchor = parts.hash;
+
+ // If is in navigation relative: transform as content
+ if (options.navigation[absolutePath]) {
+ href = options.book.contentLink(href);
+ }
+
+ // Transform as absolute
+ href = links.toAbsolute(href, options.base, options.output)+anchor;
+ } else {
+ // External links
+ $(this).attr("target", "_blank");
+ }
+
+ // Transform extension
+ $(this).attr("href", href);
+ });
+
+ // Replace glossayr terms
+ _.each(options.glossary, function(term) {
+ var r = new RegExp( "\\b(" + pregQuote(term.name.toLowerCase()) + ")\\b" , 'gi' );
+ var includedInFiles = false;
+
+ $("*").each(function() {
+ replaceText($, this, r, function(match) {
+ // Add to files index in glossary
+ if (!includedInFiles) {
+ includedInFiles = true;
+ term.files = term.files || [];
+ term.files.push(options.navigation[options.input]);
+ }
+ return "<a href='"+links.toAbsolute("/GLOSSARY.html", options.base, options.output)+"#"+term.id+"' class='glossary-term' title='"+_.escape(term.description)+"'>"+match+"</a>";
+ });
+ });
+ });
+
+ return {
+ html: renderDom($),
+ images: toConvert
+ };
+};
+
+// Convert svg images to png
+function convertImages(images, options) {
+ if (!options.convertImages) return Q();
+
+ var downloaded = [];
+ options.book.log.debug.ln("convert ", images.length, "images to png");
+
+ return batch.execEach(images, {
+ max: 100,
+ fn: function(image) {
+ var imgin = path.resolve(options.book.options.output, image.source);
+
+ return Q()
+
+ // Write image if need to be download
+ .then(function() {
+ if (!image.origin && !_.contains(downloaded, image.origin)) return;
+ options.book.log.debug("download image", image.origin, "...");
+ downloaded.push(image.origin);
+ return options.book.log.debug.promise(fs.writeStream(imgin, request(image.origin)));
+ })
+
+ // Write svg if content
+ .then(function() {
+ if (!image.content) return;
+ return fs.writeFile(imgin, image.content);
+ })
+
+ // Convert
+ .then(function() {
+ if (!image.dest) return;
+ var imgout = path.resolve(options.book.options.output, image.dest);
+ options.book.log.debug("convert image", image.source, "to", image.dest, "...");
+ return options.book.log.debug.promise(imgUtils.convertSVG(imgin, imgout));
+ });
+ }
+ })
+ .then(function() {
+ options.book.log.debug.ok(images.length+" images converted with success");
+ });
+};
+
+
+// Adapt page content to be relative to a base folder
+function normalizePage(sections, options) {
+ options = _.defaults(options || {}, {
+ // Current book
+ book: null,
+
+ // Do we need to convert svg?
+ convertImages: false,
+
+ // Current file path
+ input: ".",
+
+ // Navigation to use to transform path
+ navigation: {},
+
+ // Directory parent of the file currently in rendering process
+ base: "./",
+
+ // Directory parent from the html output
+ output: "./",
+
+ // Glossary terms
+ glossary: []
+ });
+
+ // List of images to convert
+ var toConvert = [];
+
+ sections = _.map(sections, function(section) {
+ if (section.type != "normal") return section;
+
+ var out = normalizeHtml(section.content, options);;
+
+ toConvert = toConvert.concat(out.images);
+ section.content = out.html;
+ return section;
+ });
+
+ return Q()
+ .then(function() {
+ toConvert = _.uniq(toConvert, 'source');
+ return convertImages(toConvert, options);
+ })
+ .thenResolve(sections);
+};
+
+// Extract text from sections
+function extractText(sections) {
+ return _.reduce(sections, function(prev, section) {
+ if (section.type != "normal") return prev;
+
+ var $ = cheerio.load(section.content);
+ $("*").each(function() {
+ prev = prev+" "+$(this).text();
+ });
+
+ return prev;
+ }, "");
+};
+
+module.exports = {
+ normalize: normalizePage,
+ extractText: extractText
+};