seo

author: Thomas Davis <thomasalwyndavis@gmail.com> 2012-10-15 11:25:11 +0000
committer: Thomas Davis <thomasalwyndavis@gmail.com> 2012-10-15 11:25:11 +0000
commit: 953c25778e0a89d4682f00b448f83bd0b6a9d78f (patch)
tree: 2e7fe80933b9b9b34f700b3fd9fbacfbb1f2e52b
parent: 14b8dd07cbc6a780e63076a232e3bf52b99e4849 (diff)
download: backbonetutorials-953c25778e0a89d4682f00b448f83bd0b6a9d78f.zip
backbonetutorials-953c25778e0a89d4682f00b448f83bd0b6a9d78f.tar.gz
backbonetutorials-953c25778e0a89d4682f00b448f83bd0b6a9d78f.tar.bz2
1 files changed, 102 insertions, 0 deletions
diff --git a/_posts/2012-8-1-seo-for-single-page-apps.md b/_posts/2012-8-1-seo-for-single-page-apps.md
index 9a427c4..0f9b2f3 100644
--- a/_posts/2012-8-1-seo-for-single-page-apps.md
+++ b/_posts/2012-8-1-seo-for-single-page-apps.md
@@ -19,7 +19,109 @@ Using modern headless browsers, we can easily return the fully rendered content
 
 ![headless seo](http://acris.googlecode.com/svn/wiki/images/seo_google_crawlability.png)
 
+<div style='clear: both;'></div>
 
+## Implementation using Phantom.js
+
+[Phantom.js](http://phantomjs.org/) is a headless webkit browser.  We are going to setup a node.js server that given a url, it will fully render the page content. Then we will redirect bots to this server to retrieve the correct content.
+
+You will need to install node.js and phantom.js onto a box. Then start up this server below. There are two files, one which is the webserver and the other is a phantomjs script that renders the page.
+
+{% highlight javascript %}
+// web.js
+
+// Express is our web server that can handle request
+var express = require('express');
+var app = express();
+
+
+var getContent = function(url, callback) {
+  var content = '';
+  // Here we spawn a phantom.js process, the first element of the 
+  // array is our phantomjs script and the second element is our url 
+  var phantom = require('child_process').spawn('phantomjs', ['phantom-server.js', url]);
+  phantom.stdout.setEncoding('utf8');
+  phantom.stdout.on('data', function(data) {
+    content += data.toString();
+  });
+  phantom.on('exit', function(code) {
+    if (code !== 0) {
+      console.log('We have an error');
+    } else {
+      callback(content);
+    }
+  });
+};
+
+var respond = function (req, res) {
+  url = 'http://' + req.headers['x-forwarded-host'] + req.params[0];
+  getContent(url, function (content) {
+    res.send(content);
+  });
+}
+
+app.get(/(.*)/, respond);
+app.listen(3000);
+{% endhighlight %}
+
+The script below is `phantom-server.js` and will be in charge of fully rendering the content. We don't return the content  until the page is fully rendered. We hook into the resources listener to do this.
+
+{% highlight javascript %}
+var page = require('webpage').create();
+var system = require('system');
+
+var lastReceived = new Date().getTime();
+var requestCount = 0;
+var responseCount = 0;
+var requestIds = [];
+var startTime = new Date().getTime();
+
+page.onResourceReceived = function (response) {
+    if(requestIds.indexOf(response.id) !== -1) {
+        lastReceived = new Date().getTime();
+        responseCount++;
+        requestIds[requestIds.indexOf(response.id)] = null;
+    }
+};
+page.onResourceRequested = function (request) {
+    if(requestIds.indexOf(request.id) === -1) {
+        requestIds.push(request.id);
+        requestCount++;
+    }
+};
+page.open(system.args[1], function () {
+
+});
+
+var checkComplete = function () {
+  
+  if((new Date().getTime() - lastReceived > 300 && requestCount === responseCount) || new Date().getTime() - startTime > 5000)  {
+    clearInterval(checkCompleteInterval);
+    console.log(page.content);
+    phantom.exit();
+  }
+}
+var checkCompleteInterval = setInterval(checkComplete, 1);
+{% endhighlight %}
+
+Once we have this server up and running we just redirect bots to the server in our client's webserver configuration.
+
+## Redirecting bots
+
+If you are using apache we can edit out `.htaccess` such that Google requests are proxied to our middle man phantom.js server.
+
+{% highlight javascript %}
+RewriteEngine on
+RewriteCond %{QUERY_STRING} ^_escaped_fragment_=(.*)$
+RewriteRule (.*) http://webserver:3000/%1? [P]
+{% endhighlight %}
+
+Though Google won't use query string unless we tell it to by either including a meta tag;
+`<meta name="fragment" content="!">`
+or
+using `#!` url's in our links.
+
+You will most likely have to use both.
 
 ### Relevant Links
author	Thomas Davis <thomasalwyndavis@gmail.com>	2012-10-15 11:25:11 +0000
committer	Thomas Davis <thomasalwyndavis@gmail.com>	2012-10-15 11:25:11 +0000
commit	953c25778e0a89d4682f00b448f83bd0b6a9d78f (patch)
tree	2e7fe80933b9b9b34f700b3fd9fbacfbb1f2e52b
parent	14b8dd07cbc6a780e63076a232e3bf52b99e4849 (diff)
download	backbonetutorials-953c25778e0a89d4682f00b448f83bd0b6a9d78f.zip backbonetutorials-953c25778e0a89d4682f00b448f83bd0b6a9d78f.tar.gz backbonetutorials-953c25778e0a89d4682f00b448f83bd0b6a9d78f.tar.bz2