diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/page/html.js | 14 |
1 files changed, 10 insertions, 4 deletions
diff --git a/lib/page/html.js b/lib/page/html.js index 5a13dcf..e8d3a85 100644 --- a/lib/page/html.js +++ b/lib/page/html.js @@ -184,12 +184,18 @@ HTMLPipeline.prototype.applyAnnotations = function() { // Extract page description from html // This can totally be improved HTMLPipeline.prototype.extractDescription = function() { - var $p = this.$('p'); + var $ = this.$; + var $p = $('p').first(); + var $next = $p.nextUntil('h1,h2,h3,h4,h5,h6,pre,blockquote,ul,ol,div'); var description = $p.text().trim(); - if (description.length > 300) { - description = description.slice(0, 300).trim()+'...'; - } + + $next.each(function() { + description += ' ' + $(this).text().trim(); + }); + + // Truncate description + description = _.trunc(description, 300); this.opts.onDescription(description); }; |