diff options
Diffstat (limited to 'lib/utils/page.js')
-rw-r--r-- | lib/utils/page.js | 17 |
1 files changed, 16 insertions, 1 deletions
diff --git a/lib/utils/page.js b/lib/utils/page.js index 0168831..525722e 100644 --- a/lib/utils/page.js +++ b/lib/utils/page.js @@ -139,6 +139,21 @@ function normalizePage(sections, options) { }); }; +// Extract text from sections +function extractText(sections) { + return _.reduce(sections, function(prev, section) { + if (section.type != "normal") return prev; + + var $ = cheerio.load(section.content); + $("*").each(function() { + prev = prev+" "+$(this).text(); + }); + + return prev; + }, ""); +}; + module.exports = { - normalize: normalizePage + normalize: normalizePage, + extractText: extractText }; |