1 files changed, 103 insertions, 0 deletions
diff --git a/lib/generate/site/glossary_indexer.js b/lib/generate/site/glossary_indexer.js
new file mode 100644
index 0000000..d46e393
--- /dev/null
+++ b/lib/generate/site/glossary_indexer.js
@@ -0,0 +1,103 @@
+var _ = require("lodash");
+
+var kramed = require('kramed');
+var textRenderer = require('kramed-text-renderer');
+
+var entryId = require('../../parse/glossary').entryId;
+
+
+function Indexer(glossary) {
+    if(!(this instanceof Indexer)) {
+        return new Indexer(glossary);
+    }
+
+    _.bindAll(this);
+
+    this.glossary = glossary || [];
+
+    this.glossaryTerms = _.pluck(this.glossary, "id");
+
+    // Regex for searching for terms through body
+    this.termsRegex = new RegExp(
+        // Match any of the terms
+        "("+
+            this.glossaryTerms.map(regexEscape).join('|') +
+        ")",
+
+        // Flags
+        "gi"
+    );
+
+    // debug
+    console.log('term regex =', this.termsRegex);
+
+    // page url => terms
+    this.idx = {
+        /*
+        "a/b.html": ["one word", "second word"]
+        */
+    };
+
+    // term => page urls
+    this.invertedIdx = {
+        /*
+        "word1": ["page1.html", "page2.html"]
+        */
+    };
+
+    // Use text renderer
+    this.renderer = textRenderer();
+}
+
+Indexer.prototype.text = function(nodes) {
+    // Copy section
+    var section = _.toArray(nodes);
+
+    // kramed's Render expects this, we don't use it yet
+    section.links = {};
+
+    var options = _.extend({}, kramed.defaults, {
+        renderer: this.renderer
+    });
+
+    return kramed.parser(section, options);
+};
+
+// Add page to glossary index
+Indexer.prototype.add = function(sections, url) {
+    if(!(this.glossary && this.glossary.length > 0)) {
+        console.log('Glossary =', this.glossary);
+        console.log('No glossary to match');
+        return;
+    }
+
+    var textblob =
+    _.where(sections, { type: 'normal' })
+    .map(this.text)
+    .join('\n');
+
+    var matches = _(textblob.match(this.termsRegex) || [])
+    .map(entryId)
+    .uniq()
+    .value();
+
+    // Add idx for book
+    this.idx[url] = matches;
+
+    // Add to inverted idx
+    matches.forEach(function(match) {
+        if(!this.invertedIdx[match]) {
+            this.invertedIdx[match] = [];
+        }
+        this.invertedIdx[match].push(url);
+    }.bind(this));
+};
+
+
+
+function regexEscape(s) {
+    return s.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&');
+}
+
+// Exports
+module.exports = Indexer;