<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" media="screen" href="/~d/styles/rss2full.xsl"?><?xml-stylesheet type="text/css" media="screen" href="http://feeds.feedburner.com/~d/styles/itemcontent.css"?><rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:wfw="http://wellformedweb.org/CommentAPI/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:sy="http://purl.org/rss/1.0/modules/syndication/" xmlns:slash="http://purl.org/rss/1.0/modules/slash/" xmlns:media="http://search.yahoo.com/mrss/" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0" version="2.0">

<channel>
	<title>blog.mhartl | Michael Hartl's (mostly Ruby on Rails) tech blog</title>
	
	<link>http://blog.mhartl.com</link>
	<description>Michael Hartl's (mostly Ruby on Rails) tech blog</description>
	<lastBuildDate>Fri, 15 May 2009 23:05:24 +0000</lastBuildDate>
	<generator>http://wordpress.com/</generator>
	<language>en</language>
	<sy:updatePeriod>hourly</sy:updatePeriod>
	<sy:updateFrequency>1</sy:updateFrequency>
	<cloud domain="blog.mhartl.com" port="80" path="/?rsscloud=notify" registerProcedure="" protocol="http-post" />
<image>
		<url>http://www.gravatar.com/blavatar/4e51c93cc9bb9db581ae0a1592703f01?s=96&amp;d=http://s.wordpress.com/i/buttonw-com.png</url>
		<title>blog.mhartl | Michael Hartl's (mostly Ruby on Rails) tech blog</title>
		<link>http://blog.mhartl.com</link>
	</image>
			<atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="self" href="http://feeds.feedburner.com/mhartl" type="application/rss+xml" /><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="hub" href="http://pubsubhubbub.appspot.com" /><item>
		<title>Running rcov with RSpec</title>
		<link>http://feedproxy.google.com/~r/mhartl/~3/WeiaK0ag50o/</link>
		<comments>http://blog.mhartl.com/2009/05/15/running-rcov-with-rspec/#comments</comments>
		<pubDate>Fri, 15 May 2009 23:01:08 +0000</pubDate>
		<dc:creator>mhartl</dc:creator>
				<category><![CDATA[RSpec]]></category>
		<category><![CDATA[Ruby]]></category>
		<category><![CDATA[Ruby on Rails]]></category>

		<guid isPermaLink="false">http://blog.mhartl.com/?p=236</guid>
		<description><![CDATA[I recently wanted to run rcov, the Ruby code coverage tool, on a project tested with RSpec. I think I&#8217;d done it once before, but I&#8217;d forgotten how. After searching to no avail (both the rcov home page and the RSpec page on rcov proved unhelpful), I applied the tried-and-true Wild-Assed Guess&#8482; method and typed [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blog.mhartl.com&blog=3368824&post=236&subd=insoshi&ref=&feed=1" />]]></description>
			<content:encoded><![CDATA[<div class='snap_preview'><br /><p>I recently wanted to run <tt>rcov</tt>, the <a href="http://eigenclass.org/hiki.rb?rcov">Ruby code coverage tool</a>, on a project tested with <a href="http://rspec.info">RSpec</a>. I think I&#8217;d done it once before, but I&#8217;d forgotten how. After searching to no avail (both the <a href="http://eigenclass.org/hiki.rb?rcov">rcov home page</a> and <a href="http://rspec.info/documentation/tools/rcov.html">the RSpec page on rcov</a> proved unhelpful), I applied the tried-and-true Wild-Assed Guess&trade; method and typed </p>
<pre>$ rake spec:rcov</pre>
<p>That worked.</p>
<p>The reports themselves are in the <tt>coverage/</tt> directory:</p>
<pre>$ open coverage/index.html</pre>
<p>(or navigate your browser to <tt>file:///path/to/project/coverage/index.html</tt>). If you&#8217;re using Git, add <tt>coverage/*</tt> to your .gitignore file.</p>
<p>N.B. The inverse Rake task also exists:</p>
<pre>$ rake -T rcov
rake spec:clobber_rcov  # Remove rcov products for rcov
rake spec:rcov          # Run all specs in spec directory with RCov (excluding plugin specs)</pre>
  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/insoshi.wordpress.com/236/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/insoshi.wordpress.com/236/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/insoshi.wordpress.com/236/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/insoshi.wordpress.com/236/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/insoshi.wordpress.com/236/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/insoshi.wordpress.com/236/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/insoshi.wordpress.com/236/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/insoshi.wordpress.com/236/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/insoshi.wordpress.com/236/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/insoshi.wordpress.com/236/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blog.mhartl.com&blog=3368824&post=236&subd=insoshi&ref=&feed=1" /></div>]]></content:encoded>
			<wfw:commentRss>http://blog.mhartl.com/2009/05/15/running-rcov-with-rspec/feed/</wfw:commentRss>
		<slash:comments>2</slash:comments>
	
		<media:content url="http://1.gravatar.com/avatar/ffda7d145b83c4b118f982401f962ca6?s=96&amp;d=identicon&amp;r=G" medium="image">
			<media:title type="html">mhartl</media:title>
		</media:content>
	<feedburner:origLink>http://blog.mhartl.com/2009/05/15/running-rcov-with-rspec/</feedburner:origLink></item>
		<item>
		<title>New RSS feed</title>
		<link>http://feedproxy.google.com/~r/mhartl/~3/SK0bULIQav0/</link>
		<comments>http://blog.mhartl.com/2009/04/28/new-rss-feed/#comments</comments>
		<pubDate>Tue, 28 Apr 2009 15:22:51 +0000</pubDate>
		<dc:creator>mhartl</dc:creator>
				<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://blog.mhartl.com/?p=225</guid>
		<description><![CDATA[This blog&#8217;s RSS feed has changed; please re-subscribe here: http://feeds2.feedburner.com/mhartl. (It might take an hour or two to go live. If it doesn&#8217;t work for you now, come back in a bit and try again.)
       <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blog.mhartl.com&blog=3368824&post=225&subd=insoshi&ref=&feed=1" />]]></description>
			<content:encoded><![CDATA[<div class='snap_preview'><br /><p>This blog&#8217;s RSS feed has changed; please re-subscribe here: <a title="Michael Hartl's tech blog RSS feed" href="http://feeds2.feedburner.com/mhartl">http://feeds2.feedburner.com/mhartl</a>. (It might take an hour or two to go live. If it doesn&#8217;t work for you now, come back in a bit and try again.)</p>
  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/insoshi.wordpress.com/225/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/insoshi.wordpress.com/225/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/insoshi.wordpress.com/225/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/insoshi.wordpress.com/225/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/insoshi.wordpress.com/225/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/insoshi.wordpress.com/225/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/insoshi.wordpress.com/225/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/insoshi.wordpress.com/225/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/insoshi.wordpress.com/225/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/insoshi.wordpress.com/225/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blog.mhartl.com&blog=3368824&post=225&subd=insoshi&ref=&feed=1" /></div>]]></content:encoded>
			<wfw:commentRss>http://blog.mhartl.com/2009/04/28/new-rss-feed/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://1.gravatar.com/avatar/ffda7d145b83c4b118f982401f962ca6?s=96&amp;d=identicon&amp;r=G" medium="image">
			<media:title type="html">mhartl</media:title>
		</media:content>
	<feedburner:origLink>http://blog.mhartl.com/2009/04/28/new-rss-feed/</feedburner:origLink></item>
		<item>
		<title>Using a temporary branch when doing Git merges</title>
		<link>http://feedproxy.google.com/~r/mhartl/~3/Jg2X4_2s5bY/</link>
		<comments>http://blog.mhartl.com/2008/10/28/using-a-temporary-branch-when-doing-git-merges/#comments</comments>
		<pubDate>Tue, 28 Oct 2008 22:54:10 +0000</pubDate>
		<dc:creator>mhartl</dc:creator>
				<category><![CDATA[Git]]></category>
		<category><![CDATA[Insoshi]]></category>

		<guid isPermaLink="false">http://insoshi.wordpress.com/?p=202</guid>
		<description><![CDATA[Merging branches in Git is wonderfully easy compared to many other version control tools, but sometimes merging causes problems you&#8217;d rather undo.  One common merge side effect is the creation of code conflicts, and sometimes a merge causes so many conflicts that you end up regretting doing the merge in the first place.  [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blog.mhartl.com&blog=3368824&post=202&subd=insoshi&ref=&feed=1" />]]></description>
			<content:encoded><![CDATA[<div class='snap_preview'><br /><p>Merging branches in Git is wonderfully easy compared to many other version control tools, but sometimes merging causes problems you&#8217;d rather undo.  One common merge side effect is the creation of code conflicts, and sometimes a merge causes so many conflicts that you end up regretting doing the merge in the first place.  In addition, for projects with many contributors (such as <a href="http://insoshi.com/">Insoshi</a>), sometimes you aren&#8217;t sure if you will even want to use the contribution on the branch you&#8217;re merging in.  Unfortunately, merges are very difficult to undo, so if you just do a direct merge of, say, a contributor branch into your main development branch, you&#8217;re stuck if you decide you don&#8217;t want the changes after all:</p>
<pre># Don't do this!
$ git checkout master
$ git merge contributor_branch</pre>
<p>The solution is always to use a <em>temporary branch</em> when doing any merge whose changes you&#8217;re not <em>sure</em> you&#8217;ll want to keep; if the merge proves intractable due to conflicts, or you just don&#8217;t want to use the contribution, then you can simply delete the temp branch.  Here&#8217;s how it works:</p>
<pre>$ git checkout master
$ git checkout -b temp_branch
$ git merge contributor_branch</pre>
<p>Then you can do stuff like</p>
<pre>$ git status
$ git diff master
&lt;resolve conflicts, polish contributed code&gt;</pre>
<p>If the new branch passes muster, you can then merge it in:</p>
<pre>$ git checkout master
$ git merge temp_branch
$ git branch -D temp_branch</pre>
<p>(Note here that I&#8217;ve deleted the temp branch in the final step, just to clean up.)  If, on the other hand, you decide not to continue with the merge, you can just delete the temp branch without merging it in:</p>
<pre>$ git checkout master
$ git branch -D temp_branch</pre>
<p>Either way, the lesson is the same: consistently using temp branches when doing dangerous merges is a great way to avoid the agony of merge remorse.</p>
  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/insoshi.wordpress.com/202/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/insoshi.wordpress.com/202/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/insoshi.wordpress.com/202/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/insoshi.wordpress.com/202/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/insoshi.wordpress.com/202/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/insoshi.wordpress.com/202/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/insoshi.wordpress.com/202/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/insoshi.wordpress.com/202/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/insoshi.wordpress.com/202/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/insoshi.wordpress.com/202/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blog.mhartl.com&blog=3368824&post=202&subd=insoshi&ref=&feed=1" /></div>]]></content:encoded>
			<wfw:commentRss>http://blog.mhartl.com/2008/10/28/using-a-temporary-branch-when-doing-git-merges/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://1.gravatar.com/avatar/ffda7d145b83c4b118f982401f962ca6?s=96&amp;d=identicon&amp;r=G" medium="image">
			<media:title type="html">mhartl</media:title>
		</media:content>
	<feedburner:origLink>http://blog.mhartl.com/2008/10/28/using-a-temporary-branch-when-doing-git-merges/</feedburner:origLink></item>
		<item>
		<title>Setting up your Git repositories for open source projects at GitHub</title>
		<link>http://feedproxy.google.com/~r/mhartl/~3/F4VLsjJyJ2U/</link>
		<comments>http://blog.mhartl.com/2008/10/14/setting-up-your-git-repositories-for-open-source-projects-at-github/#comments</comments>
		<pubDate>Tue, 14 Oct 2008 19:08:53 +0000</pubDate>
		<dc:creator>long</dc:creator>
				<category><![CDATA[Git]]></category>
		<category><![CDATA[Insoshi]]></category>

		<guid isPermaLink="false">http://insoshi.wordpress.com/?p=150</guid>
		<description><![CDATA[[This is a guest post from Long Nguyen. &#8212;mhartl]
Like a lot of projects in the Ruby on Rails world, the Insoshi social networking platform uses Git and GitHub to manage its open source development and contributions.  In setting up the repositories for Insoshi, I&#8217;ve applied the version control experience I gained  at Discover, [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blog.mhartl.com&blog=3368824&post=150&subd=insoshi&ref=&feed=1" />]]></description>
			<content:encoded><![CDATA[<div class='snap_preview'><br /><p>[This is a guest post from Long Nguyen. &mdash;mhartl]</p>
<p>Like a lot of projects in the Ruby on Rails world, the <a href="http://insoshi.com/">Insoshi social networking platform</a> uses <a href="http://git.or.cz/">Git</a> and <a href="http://github.com/">GitHub</a> to manage its open source development and contributions.  In setting up the repositories for Insoshi, I&#8217;ve applied the version control experience I gained  at <a href="http://www.discovercard.com/">Discover</a>, where I was technical lead for the software configuration management (SCM) team. Since some aspects of our setup aren&#8217;t obvious if you haven&#8217;t managed large projects before, we at Insoshi decided to share the details so that other GitHub projects might benefit as well.</p>
<p>We&#8217;ll start by reviewing the typical Git workflow based on <em>pull requests</em>, then discuss some problems you might run into with a &#8220;typical&#8221; repository setup, and finally explain the details of preparing the Insoshi Git repository for collaboration.</p>
<h4>Why Pull Requests?</h4>
<p>Git was originally developed by Linus Torvalds to host the Linux kernel, and <a href="http://github.com/guides/pull-requests">pull requests</a> are the de-facto standard for submitting contributions in Git because that&#8217;s what Linus does.  (He talked about this in his <a href="http://www.youtube.com/watch?v=4XpnKHJAok8">Google Tech Talk on Git</a>.)  The concept of the pull request is straightforward: You notify someone that you&#8217;ve made an update via email, messaging on GitHub, etc. and let them know where to find it.  They can then pull in your changes and merge it with their work.</p>
<p>Except for that interaction, everyone works within their own repository and on their own schedule.  There&#8217;s no process waiting to be completed that blocks you from moving on to whatever you need/want to do next.  And you&#8217;re not forcing anyone to drop what they&#8217;re doing to right now to handle your request.</p>
<p>It&#8217;s all very polite.  And it works well in the context of distributed development since you avoid all kinds of coordination issues.</p>
<h4>What&#8217;s needed?</h4>
<p>If you want to contribute to an open source project, here&#8217;s really all that you need:</p>
<ol>
<li>A publicly accessible repository where your changes can be found</li>
<li>A local repository for your development</li>
</ol>
<p>Even if you&#8217;re new to <a href="http://git.or.cz/">Git</a>, these both seem like pretty straightforward things to do—especially if you&#8217;re using <a href="http://github.com">GitHub</a> for the public repository:  your repository is just a fork of the main project repository.</p>
<p>Let&#8217;s set up our repository by going to the <a href="http://github.com/insoshi/insoshi/">official Insoshi repository</a> and clicking on the fork button:</p>
<p style="text-align:center;"><a href="http://github.com/insoshi/insoshi/fork"><img class="aligncenter size-medium wp-image-190" title="Forking Insoshi at GitHub" src="http://insoshi.files.wordpress.com/2008/10/forking-insoshi-at-github.png?w=300&#038;h=184" alt="" width="300" height="184" /></a></p>
<p>I&#8217;ll need make note of the public clone URL for the official repository and my private clone URL for my newly created fork:</p>
<ul>
<li>Official Insoshi public clone URL<br />
<tt>git://github.com/insoshi/insoshi.git</tt></li>
<li>My fork&#8217;s private clone URL<br />
<tt>git@github.com:long/insoshi.git</tt></li>
</ul>
<h4>Your local repository: The &#8220;obvious&#8221; thing to do</h4>
<p>At this point, I&#8217;ll be tempted to go ahead and make a local clone of my fork:</p>
<pre>$ git clone git@github.com:long/insoshi.git</pre>
<p>and immediately get to work.</p>
<p>Technically, there&#8217;s nothing wrong with that.  And as an individual developer starting a new project, it&#8217;s what you do, but there are several disadvantages to this seemingly straightforward approach.  One of the major benefits of a distributed version control system like Git is that each repository is on an equal footing; in particular, we would like <em>every</em> fork to have the same master branch, so that if the &#8220;official&#8221; Insoshi repository should ever be lost there would be plenty of redundant backups.  We also want it to be easy for each developer to pull in changes from the official repository; the &#8220;obvious&#8221; approach isn&#8217;t set up for that.  Finally, it&#8217;s a bad idea in general to work on the master branch; experienced Git users typically work on separate development branches and then merge those branches into master when they&#8217;re done.</p>
<p>What we&#8217;d like is a way to connect up the local repository in a way that will</p>
<ul>
<li>Keep the repositories in sync so that each contains the full &#8220;official&#8221; repository</li>
<li>Allow developers to pull in official updates</li>
<li>Encourage working on branches other than master</li>
</ul>
<p>In the &#8220;obvious&#8221; configuration, I&#8217;m not set up to do any of that:</p>
<ul>
<li>There&#8217;s no local connection to the official repository for updates</li>
<li>There&#8217;s no mechanism in place to push official updates to my fork on GitHub</li>
<li>We&#8217;re working directly on the master branch</li>
</ul>
<h4>Your local repository: The &#8220;right&#8221; way</h4>
<p>Keeping the big picture in mind, here are the commands I&#8217;ve run to set up my local repository (using the GitHub id <tt>long</tt>):</p>
<pre>$ git clone git://github.com/insoshi/insoshi.git
$ cd insoshi
$ git branch --track edge origin/edge
$ git branch long edge
$ git checkout long
$ git remote add long git@github.com:long/insoshi.git
$ git fetch long
$ git push long long:refs/heads/long
$ git config branch.long.remote long
$ git config branch.long.merge refs/heads/long</pre>
<p>Let&#8217;s take a detailed look at what these steps accomplish.</p>
<h4>So what does it all mean?</h4>
<h5>Step one</h5>
<p>Create a local clone of the Insoshi repository:</p>
<pre>$ git clone git://github.com/insoshi/insoshi.git</pre>
<p>You should note that the Git URL for the clone references the <em>official</em> Insoshi repository and not the URL of my own fork (i.e., the clone URL is <tt>git://github.com/insoshi/insoshi.git</tt> instead of <tt>git@github.com:long/insoshi.git</tt>).  This way, the official repository is the default remote (aka &#8216;origin&#8217;), and the local master branch tracks the official master.</p>
<h5>Step two</h5>
<p>I have to change into the repository to perform additional git setup:</p>
<pre>$ cd insoshi</pre>
<h5>Step three</h5>
<p>Insoshi also has an &#8216;edge&#8217; branch for changes that we want to make public but may require a bit more polishing before we&#8217;d consider them production-ready (in the past this has included migrating to Rails 2.1 and Sphinx/Ultrasphinx).  Our typical development lifecycle looks something like</p>
<p style="text-align:center;">development -&gt; edge -&gt; master</p>
<p>I want to create a local tracking branch for it:</p>
<pre>$ git branch --track edge origin/edge</pre>
<h5>Steps four and five</h5>
<p>As I mentioned before, I&#8217;m resisting the temptation to immediately start working on the local &#8216;master&#8217; and &#8216;edge&#8217; branches.  I want to keep those in sync with the official Insoshi repository.</p>
<p>I&#8217;ll keep my changes separate by creating a new branch &#8216;long&#8217; that&#8217;s based off edge and checking it out:</p>
<pre>$ git branch long edge
$ git checkout long</pre>
<p>By the way, you can actually combine the two commands if you like, using just the &#8216;git checkout&#8217; command with the <tt>-b</tt> flag:</p>
<pre>$ git checkout -b long edge</pre>
<p>You can name this branch anything that you want, but I&#8217;ve chosen my GitHub id so that it&#8217;s easy to identify.</p>
<p>I&#8217;m starting my changes off of &#8216;edge&#8217; since that contains all the latest updates and any contribution I submit a pull request for will be merged first into the official Insoshi &#8216;edge&#8217; branch to allow for public testing before it&#8217;s merged into the &#8216;master&#8217;.</p>
<h5>Steps six and seven</h5>
<p>I&#8217;m finally adding the remote reference to my fork on GitHub:</p>
<pre>$ git remote add long git@github.com:long/insoshi.git</pre>
<p>I&#8217;ve used my GitHub id once again, this time as the remote nickname.</p>
<p>We should run a fetch immediately in order to sync up the local repository with the fork:</p>
<pre>$ git fetch long</pre>
<h5>Step eight</h5>
<p>I&#8217;m pushing up my new local branch up to my fork.  Since it&#8217;ll be a new branch on the remote end, I need to fully specify the remote refspec:</p>
<pre>$ git push long long:refs/heads/long</pre>
<h5>Steps nine and ten</h5>
<p>Now that the new branch is up on my fork, I want to set the branch configuration to track it:</p>
<pre>$ git config branch.long.remote long
$ git config branch.long.merge refs/heads/long</pre>
<p>Setting the remote lets me just simply use</p>
<pre>$ git push</pre>
<p>to push changes on my development branch up to my fork</p>
<p>Setting the merge configuration is mainly for completeness at this point.  But if you end up working on more than one machine (work/home, desktop/laptop, etc.), it&#8217;ll allow you to just use</p>
<pre>$ git pull</pre>
<p>to grab the changes you&#8217;ve pushed up to your fork.</p>
<h4>Isn&#8217;t that a lot of extra work to do?</h4>
<p>This may seem like a lot work up front, but it&#8217;s all configuration work that you&#8217;d eventually do anyway.  If you&#8217;re really that concerned about the extra typing, I&#8217;ve got <a href="http://docs.insoshi.com/Git-Guides/quick-git-setup/configure_insoshi_local.sh?attredirects=0">a shell script for you</a>.</p>
<p>The extra work is worth the effort, because with this configuration</p>
<ul>
<li>My changes will be easily identifiable in my named branch</li>
<li>I can easily get updates from the main Insoshi repository</li>
<li>Any updates I&#8217;ve pulled into master and edge are automatically pushed up to my fork on GitHub</li>
</ul>
<p>The last one is a bonus because the default refspec for remotes is <tt>refs/heads/*:refs/heads/*</tt>. This means that the simple &#8216;git push&#8217; command will push up changes for all local branches that have a matching branch on the remote. And if I make it a point to pull in updates to my local master and edge but not work directly on them, my fork will match up with the official repository.</p>
<p>So what is the benefit of all this to open source projects like Insoshi?</p>
<ul>
<li>The easier it is for the contributor to pull in updates, the more likely it will be that the pull request will be for code that merges easily with the latest releases (with few conflicts)</li>
<li>You can tell if someone is pulling updates by looking at their master and edge branches and seeing if they match up with the latest branches on the main repository</li>
<li>By getting contributors in the habit of working on branches, you&#8217;re going to get better organized code contributions</li>
</ul>
<p>Basically, the less effort that&#8217;s required to bring in code via a pull request, the sooner it can be added to the project release.  And at the end of the day, that&#8217;s really what it&#8217;s all about.</p>
<h4>Putting (pushing and pulling) it all together</h4>
<p>Now that we&#8217;ve covered all the details, let&#8217;s go through the full set of steps needed to make a contribution to a project like Insoshi:</p>
<ol>
<li>Fork the Insoshi repository on GitHub:
<p style="text-align:center;"><a href="http://github.com/insoshi/insoshi/fork"><img class="aligncenter size-medium wp-image-190" title="Forking Insoshi at GitHub" src="http://insoshi.files.wordpress.com/2008/10/forking-insoshi-at-github.png?w=300&#038;h=184" alt="" width="300" height="184" /></a></p>
</li>
<li>Follow the Git steps above or use the <a href="http://docs.insoshi.com/Git-Guides/quick-git-setup/configure_insoshi_local.sh?attredirects=0">shell script</a> to set up your local repository</li>
<li>Checkout the local branch, just to be sure:
<pre>$ git checkout long</pre>
</li>
<li>Make some changes (and remember your development branch is against &#8216;edge&#8217;) and commit them:
<pre>[make changes in a text editor]
$ git commit -m "My great contribution"
$ git push</pre>
</li>
<li>Go to your fork and branch at GitHub (I&#8217;m at <a href="http://github.com/long/insoshi/tree/long">long/insoshi @ long</a>) and click on the pull request button:
<p style="text-align:center;"><a href="http://insoshi.files.wordpress.com/2008/10/pull-request-from-branch-at-github.png"><img class="aligncenter size-medium wp-image-189" title="Pull Request from Branch at GitHub" src="http://insoshi.files.wordpress.com/2008/10/pull-request-from-branch-at-github.png?w=300&#038;h=194" alt="" width="300" height="194" /></a></p>
</li>
<li>Tell us about what you just did and make sure &#8220;insoshi&#8221; is a recipient:<a href="http://insoshi.files.wordpress.com/2008/10/pull-request-message.png"><img class="aligncenter size-medium wp-image-191" title="Pull Request Message" src="http://insoshi.files.wordpress.com/2008/10/pull-request-message.png?w=300&#038;h=223" alt="" width="300" height="223" /></a></li>
<li>Bask in the glory of being an open-source contributor!</li>
</ol>
  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/insoshi.wordpress.com/150/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/insoshi.wordpress.com/150/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/insoshi.wordpress.com/150/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/insoshi.wordpress.com/150/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/insoshi.wordpress.com/150/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/insoshi.wordpress.com/150/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/insoshi.wordpress.com/150/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/insoshi.wordpress.com/150/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/insoshi.wordpress.com/150/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/insoshi.wordpress.com/150/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blog.mhartl.com&blog=3368824&post=150&subd=insoshi&ref=&feed=1" /></div>]]></content:encoded>
			<wfw:commentRss>http://blog.mhartl.com/2008/10/14/setting-up-your-git-repositories-for-open-source-projects-at-github/feed/</wfw:commentRss>
		<slash:comments>16</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/25498f6df76787138fcd0a976abf2c0f?s=96&amp;d=identicon&amp;r=G" medium="image">
			<media:title type="html">long</media:title>
		</media:content>

		<media:content url="http://insoshi.files.wordpress.com/2008/10/forking-insoshi-at-github.png?w=300" medium="image">
			<media:title type="html">Forking Insoshi at GitHub</media:title>
		</media:content>

		<media:content url="http://insoshi.files.wordpress.com/2008/10/forking-insoshi-at-github.png?w=300" medium="image">
			<media:title type="html">Forking Insoshi at GitHub</media:title>
		</media:content>

		<media:content url="http://insoshi.files.wordpress.com/2008/10/pull-request-from-branch-at-github.png?w=300" medium="image">
			<media:title type="html">Pull Request from Branch at GitHub</media:title>
		</media:content>

		<media:content url="http://insoshi.files.wordpress.com/2008/10/pull-request-message.png?w=300" medium="image">
			<media:title type="html">Pull Request Message</media:title>
		</media:content>
	<feedburner:origLink>http://blog.mhartl.com/2008/10/14/setting-up-your-git-repositories-for-open-source-projects-at-github/</feedburner:origLink></item>
		<item>
		<title>Using Rails to serve different content to humans and robots</title>
		<link>http://feedproxy.google.com/~r/mhartl/~3/c7ju-v5mi5k/</link>
		<comments>http://blog.mhartl.com/2008/09/26/using-rails-to-serve-different-content-to-humans-and-robots/#comments</comments>
		<pubDate>Fri, 26 Sep 2008 18:38:54 +0000</pubDate>
		<dc:creator>mhartl</dc:creator>
				<category><![CDATA[Insoshi]]></category>
		<category><![CDATA[Ruby on Rails]]></category>

		<guid isPermaLink="false">http://insoshi.wordpress.com/?p=134</guid>
		<description><![CDATA[This post answers the question, How do you use Rails to do one thing for robots, and another thing for humans?  
Why would you want to do this?  In our case, the Insoshi home page forwards to a portal page that uses frames in order to have an interface that unifies the sites [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blog.mhartl.com&blog=3368824&post=134&subd=insoshi&ref=&feed=1" />]]></description>
			<content:encoded><![CDATA[<div class='snap_preview'><br /><p>This post answers the question, How do you use Rails to do one thing for robots, and another thing for humans?  </p>
<p>Why would you want to do this?  In our case, the <a href="http://insoshi.com/">Insoshi home page</a> forwards to a <a href="http://portal.insoshi.com/">portal page</a> that uses frames in order to have an interface that unifies the sites on the <tt>insoshi.com</tt> domain with those off-site, such as our <a href="http://github.com/insoshi/insoshi/tree">GitHub repository</a> and <a href="http://insoshi.lighthouseapp.com/projects/9331-insoshi/overview">bug tracker</a>.  The frames page is horribly search-unfriendly, though, so we serve bots the actual content of <tt>http://insoshi.com/home/index</tt> (our routes map <tt>/</tt> to <tt>/home/index</tt>).  (<em>Note</em>:  The front of the portal page as seen by a human is the <em>same</em> as the index page served to bots; be careful about doing anything else, since bots can punish you if you use this technique for anything slimy.)</p>
<p>Our method is to use a before filter in the Home controller.  Here&#8217;s the code (minus some irrelevant bits):</p>
<pre class="brush: ruby;">
class HomeController &lt; ApplicationController
  before_filter :forward_nonbots_to_portal, :only =&gt; &quot;index&quot;

  .
  .
  .

  private

    # Return true if the user agent is a bot.
    def robot?
      bot = /(Baidu|bot|Google|SiteUptime|Slurp|WordPress|ZIBB|ZyBorg)/i
      request.user_agent =~ bot
    end

    # Allow an explicit override of the forward_nonbots_to_portal.
    def no_redirect?
      params[:redirect] == 'false' or RAILS_ENV['ENV'] != 'production'
    end

    def forward_nonbots_to_portal
      redirect_to &quot;http://portal.insoshi.com&quot; unless robot? or no_redirect?
    end
end
</pre>
<p>The key here is the <tt>robot?</tt> method, which has a regex with a list of the most common bots user agents:</p>
<pre class="brush: ruby;">
    # Return true if the user agent is a bot.
    def robot?
      bot = /(Baidu|bot|Google|SiteUptime|Slurp|WordPress|ZIBB|ZyBorg)/i
      request.user_agent =~ bot
    end
</pre>
<p>If the user <em>isn&#8217;t</em> a bot, we redirect them to the portal.  </p>
<p>N.B.  The second boolean, <tt>no_redirect?</tt>, prevents the forwarding in development mode and also allows us to override the redirect by passing a <tt>redirect=false</tt> parameter.  This latter condition allows us to link directly to the home page, <em>without</em> a redirect, by using <tt>http://insoshi.com/?redirect=false</tt>.  In particular, the portal menu home link itself uses this URL, because otherwise clicking on the home link repeatedly would cause a bunch of nested portal pages to appear.</p>
  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/insoshi.wordpress.com/134/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/insoshi.wordpress.com/134/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/insoshi.wordpress.com/134/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/insoshi.wordpress.com/134/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/insoshi.wordpress.com/134/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/insoshi.wordpress.com/134/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/insoshi.wordpress.com/134/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/insoshi.wordpress.com/134/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/insoshi.wordpress.com/134/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/insoshi.wordpress.com/134/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blog.mhartl.com&blog=3368824&post=134&subd=insoshi&ref=&feed=1" /></div>]]></content:encoded>
			<wfw:commentRss>http://blog.mhartl.com/2008/09/26/using-rails-to-serve-different-content-to-humans-and-robots/feed/</wfw:commentRss>
		<slash:comments>2</slash:comments>
	
		<media:content url="http://1.gravatar.com/avatar/ffda7d145b83c4b118f982401f962ca6?s=96&amp;d=identicon&amp;r=G" medium="image">
			<media:title type="html">mhartl</media:title>
		</media:content>
	<feedburner:origLink>http://blog.mhartl.com/2008/09/26/using-rails-to-serve-different-content-to-humans-and-robots/</feedburner:origLink></item>
		<item>
		<title>Finding and fixing mass assignment problems in Rails applications</title>
		<link>http://feedproxy.google.com/~r/mhartl/~3/7XZfSXccsmE/</link>
		<comments>http://blog.mhartl.com/2008/09/21/finding-and-fixing-mass-assignment-problems-in-rails-applications/#comments</comments>
		<pubDate>Mon, 22 Sep 2008 01:14:11 +0000</pubDate>
		<dc:creator>mhartl</dc:creator>
				<category><![CDATA[Insoshi]]></category>
		<category><![CDATA[Ruby on Rails]]></category>
		<category><![CDATA[mass assignment]]></category>

		<guid isPermaLink="false">http://insoshi.wordpress.com/?p=91</guid>
		<description><![CDATA[Last week I received an email from Eric Chapweske (of Slantwise Design and the Rail Spikes blog) alerting me to mass assignment vulnerabilities in the Insoshi social network sourcecode.  (See my post on mass assignment for a quick review of the concept, and don&#8217;t miss Eric&#8217;s mass assignment article for a more thorough treatment.) [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blog.mhartl.com&blog=3368824&post=91&subd=insoshi&ref=&feed=1" />]]></description>
			<content:encoded><![CDATA[<div class='snap_preview'><br /><p>Last week I received an email from Eric Chapweske (of <a href="http://slantwisedesign.com/">Slantwise Design</a> and the <a href="http://railspikes.com/">Rail Spikes blog</a>) alerting me to mass assignment vulnerabilities in the <a href="http://insoshi.com/">Insoshi social network</a> sourcecode.  (See my <a href="http://blog.insoshi.com/2008/09/21/mass-assignment-in-rails-applications/">post on mass assignment</a> for a quick review of the concept, and don&#8217;t miss <a href="http://railspikes.com/2008/9/22/is-your-rails-application-safe-from-mass-assignment">Eric&#8217;s mass assignment article</a> for a more thorough treatment.)  I quickly set to work fixing the problems, and within a few hours of receiving the email I&#8217;d pushed out a patched version to the <a href="http://github.com/insoshi/insoshi/tree">Insoshi GitHub repository</a>.  Since the process was so instructive, and since mass assignment vulnerabilities are so common, I thought I&#8217;d share some of the details of what it took to fix them. </p>
<h4>Fixing the models and controllers</h4>
<p>The first step in solving mass assignment problems is to find them, so I whipped up a little <a href="http://github.com/mhartl/find_mass_assignment/tree/master"><tt>find_mass_assignment</tt> plugin</a> to make it easier:</p>
<pre>$ script/plugin install git://github.com/mhartl/find_mass_assignment.git</pre>
<p>(You&#8217;ll need Git and Rails 2.1 or later for this to work.)</p>
<p>This defines a Rake task to find mass assignment vulnerabilities.  (It works by searching through the controllers for likely mass assignment and then looking in the models to see if they <em>don&#8217;t</em> define <tt>attr_accessible</tt>.)  Let&#8217;s run it on the buggy Insoshi code and see what we get:</p>
<pre>$ rake find_mass_assignment

/path/to/app/controllers/activities_controller.rb
    46      @activity = Activity.new(params[:event])
    68        if @activity.update_attributes(params[:event])

/path/to/app/controllers/comments_controller.rb
    20      @comment = parent.comments.new(params[:comment].

/path/to/app/controllers/messages_controller.rb
    50      @message = Message.new(:parent_id    =&gt; original_message.id,
    61      @message = Message.new(params[:message].merge(:sender =&gt; current_person,

/path/to/app/controllers/photos_controller.rb
    39      @photo = Photo.new(params[:photo].merge(person_data))
    61        if @photo.update_attributes(:primary =&gt; true)

/path/to/app/controllers/posts_controller.rb
    59        if @post.update_attributes(params[:post])
    157          post = @topic.posts.new(params[:post].merge(:person =&gt; current_person))
    159          post = @blog.posts.new(params[:post])

/path/to/app/controllers/topics_controller.rb
    28      @topic = @forum.topics.new(params[:topic].merge(:person =&gt; current_person))
    44        if @topic.update_attributes(params[:topic])</pre>
<p>Yikes!  That&#8217;s a lot of problems.  How do we squash all these bugs?</p>
<p>One of the vulnerable models is the Post model, which is the base class for the ForumPost and BlogPost models.  We&#8217;ll use the ForumPost model as our example.  First we disable <tt>attr_accessible</tt> in the Post model, since we want to force all the derived classes to redefine it:</p>
<p><tt>app/models/post.rb</tt></p>
<pre class="brush: ruby;">
class Post &lt; ActiveRecord::Base
  include ActivityLogger
  has_many :activities, :foreign_key =&gt; &quot;item_id&quot;, :dependent =&gt; :destroy
  attr_accessible nil
end
</pre>
<p>Then we set <tt>attr_accessible</tt> in the ForumPost model to allow only the post body to be set by mass assignment:</p>
<p><tt>app/models/forum_post.rb</tt></p>
<pre class="brush: ruby;">
class ForumPost &lt; Post
  .
  .
  .

  attr_accessible :body

  belongs_to :topic,  :counter_cache =&gt; true
  belongs_to :person, :counter_cache =&gt; true

  validates_presence_of :body, :person
  validates_length_of :body, :maximum =&gt; 5000
  .
  .
  .
end
</pre>
<p>Then in the Posts controller we update</p>
<pre class="brush: ruby;">
  post = @topic.posts.build(params[:post].merge(:person =&gt; current_person))
</pre>
<p>to set the person attribute explicitly:</p>
<pre class="brush: ruby;">
  post = @topic.posts.build(params[:post])
  post.person = current_person
</pre>
<h4>Bypassing <tt>attr_accessible</tt></h4>
<p>This fixes the controller action, but unfortunately the corresponding RSpec specs fail.  Having a good test suite proved invaluable in fixing the mass assignment problems, but the tests use mass assignment themselves, and much of that code fails.  For example, here is part of the Post spec:</p>
<p><tt>spec/models/post_spec.rb</tt></p>
<pre class="brush: ruby;">
describe ForumPost do

  before(:each) do
    @post = topics(:one).build(:body =&gt; &quot;Hey there&quot;,
                               :person =&gt; people(:quentin))
  end
  .
  .
  .
end
</pre>
<p>This fails because of the attempt to set the <tt>person</tt> attribute by mass assignment.  We <em>could</em> fix this as in the controller:</p>
<pre class="brush: ruby;">
describe ForumPost do

  before(:each) do
    @post = topics(:one).build(:body =&gt; &quot;Hey there&quot;)
    @post.topics.person = people(:quentin)
  end
  .
  .
  .
end
</pre>
<p>Unfortunately, the tests are <em>riddled</em> with this sort of code, and it&#8217;s a nightmare to make all such changes by hand.  Moreover, inside the tests we simply don&#8217;t care about mass assignment vulnerabilities, so making a bunch of cumbersome changes is particularly annoying.  Luckily, there&#8217;s a nice solution; after searching for a bit, I found an <a href="http://pastie.textmate.org/104042">inspiring Pastie</a>, which led me to open up <tt>ActiveRecord::Base</tt> and add some <em>unsafe</em> methods to create Active Record objects that bypass <tt>attr_accessible</tt>:  </p>
<p><tt>config/initializers/unsafe_build_and_create.rb</tt></p>
<pre class="brush: ruby;">
class ActiveRecord::Base

  # Build and create records unsafely, bypassing attr_accessible.
  # These methods are especially useful in tests and in the console.

  def self.unsafe_build(attrs)
    record = new
    record.unsafe_attributes = attrs
    record
  end

  def self.unsafe_create(attrs)
    record = unsafe_build(attrs)
    record.save
    record
  end

  def self.unsafe_create!(attrs)
    unsafe_build(attrs).save!
  end

  def unsafe_attributes=(attrs)
    attrs.each do |k, v|
      send(&quot;#{k}=&quot;, v)
    end
  end
end
</pre>
<p>(By putting in the <tt>config/initializers/</tt> directory, we ensure that the additions will be loaded automatically as part of the Rails environment.)</p>
<p>With these methods in hand, we still have to update the tests by hand, but the edits are much simpler (and many can be done by search-and-replace):</p>
<pre class="brush: ruby;">
describe ForumPost do

  before(:each) do
    @post = topics(:one).unsafe_build(:body =&gt; &quot;Hey there&quot;,
                                      :person =&gt; people(:quentin))
  end
  .
  .
  .
end
</pre>
<p>We can use these methods in the controllers, too, of course, but if we do the word &#8220;unsafe&#8221; serves as a constant reminder that we&#8217;d better be <em>really</em> sure we want to bypass <tt>attr_accessible</tt>.</p>
<p>After making all the fixes, running our Rake task shows only one potentially vulnerable model:</p>
<pre>$ rake find_mass_assignment
/Users/mhartl/rails/insoshi_core/app/controllers/photos_controller.rb
    40      @photo = Photo.new(params[:photo].merge(person_data))
    62        if @photo.update_attributes(:primary =&gt; true)
</pre>
<p>Checking the Photo model, we see that it defines <tt>attr_protected</tt> instead of <tt>attr_accessible</tt> (and explains why):</p>
<p><tt>app/models/photo.rb</tt></p>
<pre class="brush: ruby;">
class Photo &lt; ActiveRecord::Base
  include ActivityLogger
  UPLOAD_LIMIT = 5 # megabytes

  # attr_accessible is a nightmare with attachment_fu, so use
  # attr_protected instead.
  attr_protected :id, :person_id, :parent_id, :created_at, :updated_at
  .
  .
  .
end
</pre>
<p>With that, we&#8217;re done, and our application is secure.  Huzzah!</p>
  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/insoshi.wordpress.com/91/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/insoshi.wordpress.com/91/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/insoshi.wordpress.com/91/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/insoshi.wordpress.com/91/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/insoshi.wordpress.com/91/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/insoshi.wordpress.com/91/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/insoshi.wordpress.com/91/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/insoshi.wordpress.com/91/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/insoshi.wordpress.com/91/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/insoshi.wordpress.com/91/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blog.mhartl.com&blog=3368824&post=91&subd=insoshi&ref=&feed=1" /></div>]]></content:encoded>
			<wfw:commentRss>http://blog.mhartl.com/2008/09/21/finding-and-fixing-mass-assignment-problems-in-rails-applications/feed/</wfw:commentRss>
		<slash:comments>9</slash:comments>
	
		<media:content url="http://1.gravatar.com/avatar/ffda7d145b83c4b118f982401f962ca6?s=96&amp;d=identicon&amp;r=G" medium="image">
			<media:title type="html">mhartl</media:title>
		</media:content>
	<feedburner:origLink>http://blog.mhartl.com/2008/09/21/finding-and-fixing-mass-assignment-problems-in-rails-applications/</feedburner:origLink></item>
		<item>
		<title>Mass assignment in Rails applications</title>
		<link>http://feedproxy.google.com/~r/mhartl/~3/PL6pkual-GY/</link>
		<comments>http://blog.mhartl.com/2008/09/21/mass-assignment-in-rails-applications/#comments</comments>
		<pubDate>Mon, 22 Sep 2008 01:13:54 +0000</pubDate>
		<dc:creator>mhartl</dc:creator>
				<category><![CDATA[Ruby on Rails]]></category>
		<category><![CDATA[mass assignment]]></category>

		<guid isPermaLink="false">http://insoshi.wordpress.com/?p=82</guid>
		<description><![CDATA[This is a brief review of mass assignment in Rails.  See the follow-up post on Finding and fixing mass assignment problems in Rails applications for some more tips on how to find and fix mass assignment problems.
We&#8217;ll begin with a simple example.  Suppose an application has a User model that looks like this:

# [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blog.mhartl.com&blog=3368824&post=82&subd=insoshi&ref=&feed=1" />]]></description>
			<content:encoded><![CDATA[<div class='snap_preview'><br /><p>This is a brief review of mass assignment in Rails.  See the follow-up post on <a href="http://blog.insoshi.com/2008/09/21/finding-and-fixing-mass-assignment-problems-in-rails-applications/">Finding and fixing mass assignment problems in Rails applications</a> for some more tips on how to find and fix mass assignment problems.</p>
<p>We&#8217;ll begin with a simple example.  Suppose an application has a User model that looks like this:</p>
<pre class="brush: ruby;">
# == Schema Information
# Table name: users
#
#  id                         :integer(11)     not null, primary key
#  email                      :string(255)
#  name                       :string(255)
#  password                   :string(255)
#  admin                      :boolean(1)      not null
class User &lt; ActiveRecord::Base
  validates_presence_of :email, :password
  validates_uniqueness_of :email
  .
  .
  .
end
</pre>
<p>Note the presence of an <tt>admin</tt> boolean to identify administrative users.  With this model, the Users controller might have this standard update code:</p>
<pre class="brush: ruby;">
  def update
    @user = User.find(params[:id])

    respond_to do |format|
      if @user.update_attributes(params[:user])
        flash[:notice] = 'User was successfully updated.'
        format.html { redirect_to(@user) }
      else
        format.html { render :action =&gt; &quot;edit&quot; }
      end
    end
  end
</pre>
<p>This works fine, but note that the line</p>
<pre class="brush: ruby;">
  if @user.update_attributes(params[:user])
</pre>
<p>performs an update to the <tt>@user</tt> object through the <tt>params</tt> hash, assigning <em>all</em> the <tt>@user</tt> attributes at once&mdash;that is, as a <em>mass assignment</em>.</p>
<p>The problem with mass assignment is that some malicious [cr|h]acker might write a script to PUT something like <tt>name=New+Name&amp;admin=1</tt>, thereby adding himself as an administrative user!  This would be a Bad Thing&trade;. The standard solution to this problem is to use <tt>attr_accessible</tt> in the model to declare explicitly the attributes that can be modified by mass assignment.  To protect our User model, for example, we would write</p>
<pre class="brush: ruby;">
class User &lt; ActiveRecord::Base

  attr_accessible :email, :name, :password

  validates_presence_of :email, :password
  validates_uniqueness_of :email
  .
  .
  .
end
</pre>
<p>Since <tt>:admin</tt> isn&#8217;t included in the <tt>attr_accessible</tt> argument list, the User model&#8217;s <tt>admin</tt> attribute is safe from unwanted modification.</p>
<p>This seems simple enough, but the rub is that <em>remembering</em> to protect against mass assignment is difficult.  Using mass assignment doesn&#8217;t affect the normal operations of the site, so it&#8217;s hard to notice the problem.  Moreover, although you could shut off mass assignment globally, often there are many models that are used internally and never get modified directly by a web interface.  Not being able to use mass assignment for these models is inconvenient, and manually making all attributes <tt>attr_accessible</tt> is cumbersome and error-prone.  So, what&#8217;s a Rails developer to do?  </p>
<p>Spurred by an email from Eric Chapweske of <a href="http://slantwisedesign.com/">Slantwise Design</a>, I recently <a href="http://blog.insoshi.com/2008/09/21/finding-and-fixing-mass-assignment-problems-in-rails-applications/">audited the Insoshi social network for mass assignment vulnerabilities</a>.  Doing this manually was annoying, so in the process I developed a simple plugin to find likely vulnerabilities automatically, by searching through the controllers for likely mass assignment and then looking in the models to see if they <em>didn&#8217;t</em> define <tt>attr_accessible</tt>.  The result is a list of potential trouble spots.</p>
<p>To use the <a href="http://github.com/mhartl/find_mass_assignment/tree/master"><tt>find_mass_assignment</tt> plugin</a>, simply install it from GitHub as follows:</p>
<pre>$ script/plugin install git://github.com/mhartl/find_mass_assignment.git</pre>
<p>(You&#8217;ll need Git and Rails 2.1 or later for this to work.)  The plugin defines a Rake task to find mass assignment vulnerabilities; running it on the example Users controller from above would yield the following:</p>
<pre>$ rake find_mass_assignment

/path/to/app/controllers/users_controller.rb
  5  if @user.update_attributes(params[:user])</pre>
<p>This tells us that line 5 in the Users controller has a likely mass assignment vulnerability.  </p>
<p>The <tt>find_mass_assignment</tt> plugin doesn&#8217;t fix mass assignment problems automatically, but by making it more convenient to find them I hope it can significantly improve the odds that they will be caught (and fixed!) quickly.</p>
  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/insoshi.wordpress.com/82/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/insoshi.wordpress.com/82/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/insoshi.wordpress.com/82/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/insoshi.wordpress.com/82/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/insoshi.wordpress.com/82/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/insoshi.wordpress.com/82/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/insoshi.wordpress.com/82/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/insoshi.wordpress.com/82/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/insoshi.wordpress.com/82/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/insoshi.wordpress.com/82/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blog.mhartl.com&blog=3368824&post=82&subd=insoshi&ref=&feed=1" /></div>]]></content:encoded>
			<wfw:commentRss>http://blog.mhartl.com/2008/09/21/mass-assignment-in-rails-applications/feed/</wfw:commentRss>
		<slash:comments>1</slash:comments>
	
		<media:content url="http://1.gravatar.com/avatar/ffda7d145b83c4b118f982401f962ca6?s=96&amp;d=identicon&amp;r=G" medium="image">
			<media:title type="html">mhartl</media:title>
		</media:content>
	<feedburner:origLink>http://blog.mhartl.com/2008/09/21/mass-assignment-in-rails-applications/</feedburner:origLink></item>
		<item>
		<title>A security issue with Rails secret session keys</title>
		<link>http://feedproxy.google.com/~r/mhartl/~3/JrcDTR7xnE4/</link>
		<comments>http://blog.mhartl.com/2008/08/15/a-security-issue-with-rails-secret-session-keys/#comments</comments>
		<pubDate>Sat, 16 Aug 2008 04:53:51 +0000</pubDate>
		<dc:creator>mhartl</dc:creator>
				<category><![CDATA[Git]]></category>
		<category><![CDATA[Insoshi]]></category>
		<category><![CDATA[Ruby on Rails]]></category>

		<guid isPermaLink="false">http://insoshi.wordpress.com/?p=54</guid>
		<description><![CDATA[Like most projects that use Rails 2.1, the Insoshi source code ships with a &#8220;secret&#8221; string (which lives in environment.rb) needed for the new cookie-based sessions.  Recently, an alert observer noted that this raises a security issue in Insoshi sessions: the secret key is currently the same for all Insoshi installations, which opens the [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blog.mhartl.com&blog=3368824&post=54&subd=insoshi&ref=&feed=1" />]]></description>
			<content:encoded><![CDATA[<div class='snap_preview'><br /><p>Like most projects that use Rails 2.1, the Insoshi source code ships with a &#8220;secret&#8221; string (which lives in <tt>environment.rb</tt>) needed for the new <a href="http://ryandaigle.com/articles/2007/2/21/what-s-new-in-edge-rails-cookie-based-sessions">cookie-based sessions</a>.  Recently, an <a href="http://github.com/trevorturk">alert observer</a> noted that this raises a security issue in Insoshi sessions: the secret key is currently the same for all Insoshi installations, which opens the sessions up to attack (as noted in <a href="http://groups.google.com/group/rubyonrails-core/browse_thread/thread/4d43c1fa2485f3e3">this discussion thread</a>).  This problem is not unique to Insoshi; it affects essentially any Rails application installed from source.</p>
<p>Part of the reason this problem isn&#8217;t more widely known is because projects generated using the <tt>rails</tt> script automatically receive a unique security string.  The way we&#8217;ve fixed the secret string problem at Insoshi involves piggybacking on the mechanism Rails already has for generating such strings, by replacing the hard-coded string with a file read:</p>
<p><tt>config/environment.rb</tt></p>
<p><em>Before:</em></p>
<pre class="brush: ruby;">

config.action_controller.session = {
    :session_key =&gt; '_instant_social_session',
    :secret      =&gt; '63143b62...8522327'
  }
</pre>
<p><em>After:</em></p>
<pre class="brush: ruby;">
.
.
.
require File.join(File.dirname(__FILE__), 'boot')
require 'rails_generator/secret_key_generator'

Rails::Initializer.run do |config|
  .
  .
  .
  # Your secret key for verifying cookie session data integrity.
  # If you change this key, all old sessions will become invalid!
  # Make sure the secret is at least 30 characters and all random,
  # no regular words or you'll be exposed to dictionary attacks.
  secret_file = File.join(RAILS_ROOT, &quot;secret&quot;)
  if File.exist?(secret_file)
    secret = File.read(secret_file)
  else
    secret = Rails::SecretKeyGenerator.new(&quot;insoshi&quot;).generate_secret
    File.open(secret_file, 'w') { |f| f.write(secret) }
  end
  config.action_controller.session = {
    :session_key =&gt; '_instant_social_session',
    :secret      =&gt; secret
  }
  .
  .
  .
</pre>
<p>(N.B.  The session key <tt>_instant_social_session</tt> is a hint about the origins of the name <em>Insoshi</em>.) In place of  a hard-coded string, the updated code uses the contents of a secret file, if it exists; otherwise, it makes a new string using the same machinery as the <tt>rails</tt> script (included with the line <tt>require 'rails_generator/secret_key_generator'</tt>) and writes it to the secret file.</p>
<p>It&#8217;s important at this point to prevent our source code management tool from versioning the <tt>secret</tt> file, since the whole point of this exercise is to prevent the secret key from being distributed with the source code.  Using Git, this is trivial; we just add &#8217;secret&#8217; to our <tt>.gitignore</tt> file.  (<em>Note:</em> if you are running an application on multiple servers, you should copy the same secret file to each one to ensure that sessions will work with a load-balancer.) Everyone using the Insoshi source code should pull from our <a href="http://github.com/insoshi/insoshi/tree/master">GitHub repository</a> to get the update.</p>
<h4>Handling session expiration</h4>
<p>Unfortunately, the above steps don&#8217;t completely solve our problem.  The comments in <tt>environment.rb</tt> note that &#8220;If you change this key, all old sessions will become invalid!&#8221;  That&#8217;s not quite accurate; the old sessions don&#8217;t merely become invalid: they actually raise an exception, so users with active sessions will be met with your application&#8217;s error page, and a <tt>CGI::Session::CookieStore::TamperedWithCookie</tt> exception will show up in your application&#8217;s log file.  (The error page goes away if the user reloads the page in their browser, but there&#8217;s no way for them to know that.)  Serving up error pages to all those users isn&#8217;t very friendly behavior, and we&#8217;d like to catch the exception and show the page they&#8217;re trying to access instead.</p>
<p>This isn&#8217;t as simple as it seems, because the exception gets raised deep inside the Rails internals.  We can figure out where by running in development mode, where the stack trace look something like this:</p>
<pre>CGI::Session::CookieStore::TamperedWithCookie in HomeController#index 

vendor/rails/actionpack/lib/action_controller/session/cookie_store.rb:144:in `unmarshal'
vendor/rails/actionpack/lib/action_controller/session/cookie_store.rb:101:in `restore'
/usr/local/lib/ruby/1.8/cgi/session.rb:304:in `[]'
vendor/rails/actionpack/lib/action_controller/cgi_process.rb:136:in `session'
vendor/rails/actionpack/lib/action_controller/cgi_process.rb:168:in `stale_session_check!'
vendor/rails/actionpack/lib/action_controller/cgi_process.rb:116:in `session'
.
.
.</pre>
<p>To catch the exception, we need to override the default restore method in <tt>cookie_store.rb</tt>.  To do <em>that</em>, we need to load our change before the application loads, and the easiest way to do this is with a plugin, which we can generate with a script:</p>
<pre>$ script/generate plugin catch_cookie_exception</pre>
<p>Once we edit a couple files, the solution is complete:</p>
<p><tt>vendor/plugins/catch_cookie_exception/init.rb</tt></p>
<pre class="brush: ruby;">
require 'catch_cookie_exception'
</pre>
<p><tt>vendor/plugins/catch_cookie_exception/lib/catch_cookie_exception.rb</tt></p>
<pre class="brush: ruby;">
require 'cgi'
require 'cgi/session'
class CGI::Session::CookieStore
  # Restore session data from the cookie.
  # This method overrides the one in
  # actionpack/lib/action_controller/session/cookie_store.rb
  # in order to handle the case of a &quot;tampered&quot; cookie more gracefully.
  # The issue is that changing the 'secret' in config/environment.rb
  # breaks all sessions in such a way that everyone gets an error page
  # the first time they revisit the site.  Catching the exception here
  # prevents this ugly behavior.
  # This is in a plugin so that it loads after Rails but before environment.rb.
  def restore
    @original = read_cookie
    @data = unmarshal(@original) || {}
  rescue CGI::Session::CookieStore::TamperedWithCookie
    logger = Logger.new(&quot;#{RAILS_ROOT}/log/#{RAILS_ENV}.log&quot;)
    logger.warn &quot;Caught TamperedWithCookie exception on #{Time.now}&quot;
    @data = {}
  end
end
</pre>
<p>Note that, since the exception could be the result of someone attacking the site by tampering with their cookies, we log the exception for future reference.</p>
<p>UPDATE: The  <a href="http://github.com/mhartl/catch_cookie_exception/tree/master"><tt>catch_cookie_exception</tt> plugin</a> is now available at GitHub.</p>
<h4>Acknowledgments</h4>
<p>Thanks again to Trevor Turk for alerting us to this issue.</p>
<img alt="" border="0" src="http://feeds.wordpress.com/1.0/categories/insoshi.wordpress.com/54/" /> <img alt="" border="0" src="http://feeds.wordpress.com/1.0/tags/insoshi.wordpress.com/54/" /> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/insoshi.wordpress.com/54/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/insoshi.wordpress.com/54/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/insoshi.wordpress.com/54/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/insoshi.wordpress.com/54/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/insoshi.wordpress.com/54/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/insoshi.wordpress.com/54/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/insoshi.wordpress.com/54/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/insoshi.wordpress.com/54/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/insoshi.wordpress.com/54/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/insoshi.wordpress.com/54/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blog.mhartl.com&blog=3368824&post=54&subd=insoshi&ref=&feed=1" /></div>]]></content:encoded>
			<wfw:commentRss>http://blog.mhartl.com/2008/08/15/a-security-issue-with-rails-secret-session-keys/feed/</wfw:commentRss>
		<slash:comments>4</slash:comments>
	
		<media:content url="http://1.gravatar.com/avatar/ffda7d145b83c4b118f982401f962ca6?s=96&amp;d=identicon&amp;r=G" medium="image">
			<media:title type="html">mhartl</media:title>
		</media:content>
	<feedburner:origLink>http://blog.mhartl.com/2008/08/15/a-security-issue-with-rails-secret-session-keys/</feedburner:origLink></item>
		<item>
		<title>Running Rails tests with autotest (ZenTest) and RSpec</title>
		<link>http://feedproxy.google.com/~r/mhartl/~3/5uvlmPQHaU8/</link>
		<comments>http://blog.mhartl.com/2008/07/28/running-rails-tests-with-autotest-zentest-and-rspec/#comments</comments>
		<pubDate>Mon, 28 Jul 2008 20:12:59 +0000</pubDate>
		<dc:creator>mhartl</dc:creator>
				<category><![CDATA[RSpec]]></category>
		<category><![CDATA[Ruby on Rails]]></category>
		<category><![CDATA[autotest]]></category>

		<guid isPermaLink="false">http://insoshi.wordpress.com/?p=36</guid>
		<description><![CDATA[I recently ran into a problem with autotest (ZenTest) after upgrading to Rails 2.1 and RSpec 1.4.1.  Solving it was annoying, so I hope I can save others some trouble. Here’s the problem:
With RSpec, autotest hangs
Before the upgrade, I could run my specs just fine using the plain autotest command, but after the upgrade [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blog.mhartl.com&blog=3368824&post=36&subd=insoshi&ref=&feed=1" />]]></description>
			<content:encoded><![CDATA[<div class='snap_preview'><br /><p>I recently ran into a problem with autotest (ZenTest) after <a href="http://blog.insoshi.com/2008/07/03/a-rails-21-case-study-upgrading-the-insoshi-social-networking-platform/">upgrading to Rails 2.1</a> and RSpec 1.4.1.  Solving it was annoying, so I hope I can save others some trouble. Here’s the problem:</p>
<h4>With RSpec, autotest hangs</h4>
<p>Before the upgrade, I could run my specs just fine using the plain <tt>autotest</tt> command, but after the upgrade autotest just hangs:</p>
<pre>$ autotest
loading autotest/rails</pre>
<p>This is on a system running Mac OS X Tiger (10.4), Rails 2.1.0, RSpec 1.4.1, and ZenTest 3.10.0.  Strangely, my friend Long could run autotest fine on a virtually identical system (so you may not run into this problem), but for me this only increased the frustration. After much hand-wringing (and a lot of Google searching), I finally found a <a href="http://railsforum.com/viewtopic.php?id=19546">Rails Forum post</a> with a solution:</p>
<pre>$ RSPEC=true autotest</pre>
<p>Then autotest runs normally.</p>
<h4>Restoring the old RSpec/autotest behavior</h4>
<p>To get the old behavior, you can include the <tt>RSPEC</tt> variable in your environment rather than putting it explicitly on the command line. For example, on a system running bash, export the <tt>RSPEC</tt> variable as follows:</p>
<p>file: <tt>~/.bashrc</tt></p>
<pre class="brush: xml;">export RSPEC=true</pre>
<p>Then source it:</p>
<pre>$ . ~/.bashrc</pre>
<p>Now autotest should run as before:</p>
<pre>$ autotest</pre>
<p>Voil&agrave; (I hope)!</p>
<p><strong>UPDATE</strong>: Since making this post, I&#8217;ve learned that RSpec now ships with a program called <tt>autospec</tt> that solves the same problem; just run</p>
<pre>$ autospec</pre>
<p>and the specs should run as expected.</p>
<img alt="" border="0" src="http://feeds.wordpress.com/1.0/categories/insoshi.wordpress.com/36/" /> <img alt="" border="0" src="http://feeds.wordpress.com/1.0/tags/insoshi.wordpress.com/36/" /> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/insoshi.wordpress.com/36/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/insoshi.wordpress.com/36/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/insoshi.wordpress.com/36/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/insoshi.wordpress.com/36/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/insoshi.wordpress.com/36/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/insoshi.wordpress.com/36/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/insoshi.wordpress.com/36/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/insoshi.wordpress.com/36/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/insoshi.wordpress.com/36/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/insoshi.wordpress.com/36/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blog.mhartl.com&blog=3368824&post=36&subd=insoshi&ref=&feed=1" /></div>]]></content:encoded>
			<wfw:commentRss>http://blog.mhartl.com/2008/07/28/running-rails-tests-with-autotest-zentest-and-rspec/feed/</wfw:commentRss>
		<slash:comments>23</slash:comments>
	
		<media:content url="http://1.gravatar.com/avatar/ffda7d145b83c4b118f982401f962ca6?s=96&amp;d=identicon&amp;r=G" medium="image">
			<media:title type="html">mhartl</media:title>
		</media:content>
	<feedburner:origLink>http://blog.mhartl.com/2008/07/28/running-rails-tests-with-autotest-zentest-and-rspec/</feedburner:origLink></item>
		<item>
		<title>Searching a Ruby on Rails application with Sphinx and Ultrasphinx</title>
		<link>http://feedproxy.google.com/~r/mhartl/~3/QncwA8cfZ9I/</link>
		<comments>http://blog.mhartl.com/2008/07/17/searching-a-ruby-on-rails-application-with-sphinx-and-ultrasphinx/#comments</comments>
		<pubDate>Thu, 17 Jul 2008 23:46:29 +0000</pubDate>
		<dc:creator>mhartl</dc:creator>
				<category><![CDATA[Ferret]]></category>
		<category><![CDATA[Insoshi]]></category>
		<category><![CDATA[Ruby on Rails]]></category>
		<category><![CDATA[Sphinx]]></category>
		<category><![CDATA[Ultrasphinx]]></category>

		<guid isPermaLink="false">http://insoshi.wordpress.com/?p=16</guid>
		<description><![CDATA[We recently switched the Insoshi social networking platform from a Ferret search engine to Sphinx (and Ultrasphinx), due to the well-known problems encountered with Ferret and due to our own experience of its instability on the Insoshi developer site.  (Sphinx is currently running on our demo site, and anyone who wants the Sphinx-enabled source [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blog.mhartl.com&blog=3368824&post=16&subd=insoshi&ref=&feed=1" />]]></description>
			<content:encoded><![CDATA[<div class='snap_preview'><br /><p>We recently switched the <a href="http://insoshi.com/">Insoshi social networking platform</a> from a Ferret search engine to <a href="http://www.sphinxsearch.com/">Sphinx</a> (and <a href="http://blog.evanweaver.com/files/doc/fauna/ultrasphinx/classes/Ultrasphinx.html">Ultrasphinx</a>), due to the well-known <a href="http://groups.google.com/group/rubyonrails-deployment/browse_thread/thread/980fe7cb20cb97dd">problems encountered with Ferret</a> and due to our own experience of its instability on the <a href="http://dogfood.insoshi.com/">Insoshi developer site</a>.  (Sphinx is currently running on our <a href="http://demo.insoshi.com/">demo site</a>, and anyone who wants the Sphinx-enabled source can grab edge Insoshi as described in the <a href="http://blog.insoshi.com/2008/07/03/a-rails-21-case-study-upgrading-the-insoshi-social-networking-platform/">Rails 2.1 upgrade post</a>.  We&#8217;ll merge it into the master branch within a couple weeks.)</p>
<p>The switch did not always go smoothly, and there are several gotchas that I thought might be helpful to discuss in case other people run into them. I&#8217;ve also included some material on using Ultrasphinx, since its documentation is a bit sparse.  For pedagogical purposes, I&#8217;ve simplified the Insoshi source slightly for this discussion; you don&#8217;t have to be familiar with the Insoshi codebase to follow this post.  (N.B. The actual production code contains a trick for dealing with more advanced filtering requirements, which will probably be the subject of a future post.)</p>
<h4>Installing Sphinx</h4>
<p>The first step, naturally enough, is to install Sphinx.  You can get the latest and greatest version at the <a href="http://sphinxsearch.com/downloads.html">Sphinx download page</a>.  (This blog post uses version 0.9.8, which was released just a couple of days before this post was written.)  Download the source, and then install it as follows:</p>
<pre>$ tar zxf sphinx-0.9.8.tar.gz
$ cd  sphinx-0.9.8
$ ./configure --with-pgsql
$ make
$ sudo make install</pre>
<p>The configure step ensures that Sphinx gets compiled with PostgreSQL support (MySQL comes for free).  We&#8217;ve had trouble getting all the Postgres stuff to work properly, but it doesn&#8217;t hurt to have it. If you&#8217;d rather omit the Postgres support, just use <tt>./configure</tt> by itself.</p>
<h4>Installing Ultrasphinx</h4>
<p>The second step is to install the Ultrasphinx plugin, which has one gem dependency:</p>
<pre>$ sudo gem install chronic</pre>
<p>The installation itself is trickier than it sounds; although there are plenty of tutorials that tell you how to do it, as far as I can tell they don&#8217;t work.  I tried a couple of different tacks, both of which failed.  First, I tried</p>
<pre>$ svn export svn://rubyforge.org/var/svn/fauna/ultrasphinx/trunk vendor/plugins/ultrasphinx
Export complete.</pre>
<p>The only problem is, this didn&#8217;t do anything; there was literally no change to my working copy.  I then tried a plugin install:</p>
<pre>$ script/plugin install svn://rubyforge.org/var/svn/fauna/ultrasphinx/trunk
Export complete.</pre>
<p>Still nothing.  After some time flailing about, I finally found a <a href="http://jamesgolick.com/2008/3/3/plugins-i-ve-known-and-loved-3-ultrasphinx">James on Software Sphinx/Ultrasphinx post</a>, which suggested cloning his GitHub fork of Ultrasphinx.  That worked at first, but later on I encountered a clash with the latest version of will_paginate:</p>
<pre>WillPaginate: You are using a paginated collection of class
Ultrasphinx::Search which conforms to the old API of WillPaginate::Collection
by using `page_count`, while the current method name is `total_pages`. Please
upgrade yours or 3rd-party code that provides the paginated collection.</pre>
<p>Luckily, with some judicious Googling I was able to find a second repository at GitHub, whose most recent commit as of this writing is <a href="http://github.com/DrMark/ultrasphinx/commit/cbe4ef9b00c59e3670e60965595a7c9aa164f914">updating the code to work with the latest will_paginate</a>, which certainly looked promising.  And, indeed, it worked beautifully, so I&#8217;m happy to recommend it:</p>
<pre>$ git clone git://github.com/DrMark/ultrasphinx.git vendor/plugins/ultrasphinx
$ rm -rf vendor/plugins/ultrasphinx/.git</pre>
<p>(This is one of the many reasons GitHub rocks; if the &#8220;official&#8221; version of a plugin is unavailable or out of date, you still might be able to find an updated fork on GitHub.)</p>
<h4>Configuring Ultrasphinx</h4>
<p>To configure Ultrasphinx, I followed the config instructions at the main Ultrasphinx site:</p>
<pre>Next, copy the examples/default.base file to RAILS_ROOT/config/ultrasphinx/default.base.
This file sets up the Sphinx daemon options such as port, host, and index location.</pre>
<p>Since many of the Insoshi fields allow HTML, the search results are better if we strip HTML tags first:</p>
<p><tt>config/ultrasphinx/default.base</tt></p>
<pre>index
{
  .
  .
  .
  # HTML-specific options
  html_strip = 1
}</pre>
<p>N.B. This is a replacement for the older <tt>strip_html</tt> syntax, used inside the <tt>source</tt> section:</p>
<p><tt>config/ultrasphinx/default.base</tt></p>
<pre>source
{
  # Individual SQL source options
  sql_ranged_throttle = 0
  sql_range_step = 5000
  sql_query_post =
  strip_html = 1
}</pre>
<p>If you get a warning like</p>
<pre>WARNING: key 'strip_html' is deprecated in config/ultrasphinx/development.conf line 24;
use 'html_strip (per-index)' instead.</pre>
<p>just remove the <tt>strip_html</tt> line and put an <tt>html_strip</tt> line in its place (taking care to put it in the <tt>index</tt> section of the configuration file).</p>
<h4>Bootstrapping Ultrasphinx</h4>
<p>Now we&#8217;re ready to fire up Ultrasphinx, which uses Sphinx to build up a search index of our database:</p>
<pre>$ rake ultrasphinx:bootstrap</pre>
<p>There&#8217;s just one hitch: many people (including me) get an error at this stage:</p>
<pre>dyld: Library not loaded: /usr/local/mysql/lib/mysql/libmysqlclient.15.dylib
  Referenced from: /usr/local/bin/indexer
  Reason: image not found</pre>
<p>I found a <a href="http://freelancing-gods.com/posts/sphinx_quick_fix">solution</a> using the canonical &#8220;Google the error message&#8221; method.  There&#8217;s something screwy with the location of the MySQL libraries, but it&#8217;s nothing a little symlink couldn&#8217;t fix:</p>
<pre>$ sudo ln -s /usr/local/mysql/lib /usr/local/mysql/lib/mysql</pre>
<h4>Testing Sphinx and Ultrasphinx</h4>
<p>In principle, things are working now under the hood; we just need to add in some code to our models and controllers to execute the searches.  I prefer test-driven development, though, so the next priority is to get Sphinx and Ultrasphinx working in a test environment.</p>
<p>It&#8217;s important to stop the Ultrasphinx daemon, which might be running in development mode if you used <tt>rake ultrasphinx:bootstrap</tt> above:</p>
<pre>$ rake ultrasphinx:daemon:stop</pre>
<p>Then make a test-specific configuration file:</p>
<p><tt>config/ultrasphinx/test.base</tt></p>
<pre>{
  # Individual SQL source options
  sql_ranged_throttle = 0
  sql_range_step = 999999999
  sql_query_post =
}
.
.
.
index
{
  .
  .
  .
  # HTML-specific options
  html_strip = 1
}</pre>
<p>The line <tt>sql_range_step = 999999999</tt> here is key.  The <tt>sql_range_step</tt> variable controls how much Ultrasphinx increases the ids of the rows as it indexes; by default, it&#8217;s 5000, but Insoshi uses <a href="http://m.onkey.org/2007/10/26/fixtures-go-foxy">foxy fixtures</a>, which often create objects with <em>huge</em> ids.  As a result, the indexing step can take a long time (several minutes), even for a tiny test database.  Setting <tt>sql_range_step</tt> to a larger step size solves the problem.</p>
<p>With that done, we&#8217;re ready to fire things up:</p>
<pre>$ rake ultrasphinx:bootstrap RAILS_ENV=test</pre>
<p>One problem we run into is that the Sphinx test daemon might not always be running, so it would be nice to skip the search tests (or specs) if this is the case.  For example, suppose that we have a Searches controller (whose <tt>index</tt> action will handle searches).  Here is a skeleton for the Searches controller specs that runs only when Sphinx is running:</p>
<p><tt>spec/controllers/searches_controller_spec.rb</tt></p>
<pre class="brush: ruby;">
# Return a list of system processes.
def processes
  process_cmd = case RUBY_PLATFORM
                when /djgpp|(cyg|ms|bcc)win|mingw/ then 'tasklist /v'
                when /solaris/                     then 'ps -ef'
                else
                  'ps aux'
                end
  `#{process_cmd}`
end

# Return true if the search daemon is running.
def testing_search?
  processes.include?('searchd')
end

describe SearchesController do
  .

  .
  .
end if testing_search?
</pre>
<p>(A blog post on <a href="http://stephencelis.com/archive/2008/4/testing-with-ultrasphinx">testing with Ultrasphinx</a> proved useful in this context.)</p>
<h4>Writing the first tests</h4>
<p>OK, now we&#8217;re ready to write some concrete tests. Some basic tests (using RSpec) might look like these:</p>
<p><tt>spec/controllers/searches_controller_spec.rb</tt></p>
<pre class="brush: ruby;">
describe SearchesController do

  describe &quot;Person searches&quot; do

    it &quot;should search by name&quot; do
      get :index, :q =&gt; &quot;quentin&quot;, :model =&gt; &quot;Person&quot;
      assigns(:results).should == [people(:quentin)].paginate
    end

    it &quot;should search by description&quot; do
      get :index, :q =&gt; &quot;I'm Quentin&quot;, :model =&gt; &quot;Person&quot;
      assigns(:results).should == [people(:quentin)].paginate
    end
  end
end if testing_search?
</pre>
<p>Here we&#8217;ve passed a <tt>model</tt> parameter in anticipation of using a single action to search multiple models.</p>
<p>The specs fail, of course:</p>
<pre>$ script/spec spec/controllers/searches_controller_spec.rb
2 examples, 2 failures</pre>
<p>Apart from the <tt>if testing_search?</tt> clause, there&#8217;s nothing here beyond vanilla RSpec, so in what follows I won&#8217;t bother showing any more specs.</p>
<h4><tt>Person</tt>: Basic indexing</h4>
<p>Now we&#8217;re ready for some basic searching. Suppose we have a Person model with name and description fields, which we want to enable for searching.  We need the <a href="http://blog.evanweaver.com/files/doc/fauna/ultrasphinx/classes/ActiveRecord/Base.html#M000001"><tt>is_indexed</tt> method from Ultrasphinx</a>:</p>
<p><tt>app/models/person.rb</tt></p>
<pre class="brush: ruby;">
class Person &lt; ActiveRecord::Base
  is_indexed :fields =&gt; [ 'name', 'description' ]
  .
  .
  .
end
</pre>
<p>Then a sample Searches controller index might look like this:</p>
<p><tt>app/controllers/searches_controller.rb</tt></p>
<pre class="brush: ruby;">
def index
  query = params[:q].strip
  page  = params[:page] || 1
  model = params[:model]
  filters = {}
  @search = Ultrasphinx::Search.new(:query =&gt; query,
                                    :page =&gt; page,
                                    :class_names =&gt; model,
                                    :filters =&gt; filters)
  @search.run
  @results = @search.results
end
</pre>
<p>Note the use of a <tt>:page</tt> option; Ultrasphinx works with the <tt>will_paginate</tt> plugin out of the box.</p>
<p>A sample search box partial might look like this:</p>
<p><tt>app/views/searches/_box.html.erb</tt></p>
<pre class="brush: ruby;">
&lt;% form_tag searches_path, :method =&gt; :get do %&gt;
  &lt;fieldset&gt;
    &lt;%= text_field_tag :q, h(params[:q]), :maxlength =&gt; 50 %&gt;
    &lt;%= submit_tag &quot;Search&quot; %&gt;
    &lt;%= hidden_field_tag &quot;model&quot;, search_model %&gt;
  &lt;/fieldset&gt;
&lt;% end %&gt;
</pre>
<p>where <tt>search_model</tt> is just a helper that inspects <tt>params</tt> and returns the name of the model being searched.  (For example:</p>
<p><tt>app/helpers/searches_helper.rb</tt></p>
<pre class="brush: ruby;">
module SearchesHelper

  # Return the model to be searched based on params.
  def search_model
    return &quot;Person&quot;    if params[:controller] =~ /home/
    return &quot;ForumPost&quot; if params[:controller] =~ /forums/
    params[:model] || params[:controller].classify
  end
end
</pre>
<p>where <tt>params[:controller].classify</tt> automagically returns the string <tt>"Person"</tt> inside the People controller and <tt>"Message"</tt> inside the Messages controller.)</p>
<p>As long as the test database contains the appropriate user (in our case, Quentin from <tt>restful_authentication</tt>), the specs should pass once we reindex:</p>
<pre>$ rake ultrasphinx:bootstrap RAILS_ENV=test
$ script/spec spec/controllers/searches_controller_spec.rb
2 examples, 0 failures</pre>
<p>If they fail, chances are that either (1) there&#8217;s some rogue development daemon running or (2) we forgot to reindex the test database after changing a model.  If this happens, you can be extra paranoid by recycling everything:</p>
<pre>$ rake ultrasphinx:daemon:stop
$ rake ultrasphinx:bootstrap RAILS_ENV=test</pre>
<h4><tt>Message</tt>: Ultrasphinx with conditions and filtering</h4>
<p>One common task is to put a condition on a search result.  For example, suppose we have a Message model with a subject and content we want to index, but with &#8220;trashed&#8221; messages we want to exclude.  Suppose further that recipients trash messages by setting a <tt>recipient_deleted_at</tt> attribute in the Message model.  Untrashed messages would then have a <tt>NULL</tt> value for <tt>recipient_deleted_at</tt>:</p>
<p><tt>app/models/message.rb</tt></p>
<pre class="brush: ruby;">
class Message &lt; ActiveRecord::Base
  is_indexed :fields =&gt; [ 'subject', 'content', 'recipient_id' ],
             :conditions =&gt; &quot;recipient_deleted_at IS NULL&quot;
  .
  .
  .
end
</pre>
<p>Of course, when searching through messages for a particular person, we should only return messages actually sent to that person.  This is why we added the <tt>recipient_id</tt> to the index fields above; this way, we can use an Ultrasphinx <em>filter</em> to restrict the results appropriately in the Searches controller:</p>
<p><tt>app/controllers/searches_controller.rb</tt></p>
<pre class="brush: ruby;">
def index
  query = params[:q].strip
  page  = params[:page] || 1
  model = params[:model]
  filters = {}
  if model == &quot;Message&quot;
    # Restrict message results to those sent to the current person.
    filters['recipient_id'] = current_person.id
  end
  @search = Ultrasphinx::Search.new(:query =&gt; params[:q],
                                    :page =&gt; params[:page] || 1,
                                    :class_names =&gt; params[:model],
                                    :filters =&gt; filters)
  @search.run
  @results = @search.results
end
</pre>
<p>Of course, this requires an appropriately defined <tt>current_person</tt> object in line 8, which we assume is taken care of by the application&#8217;s authentication scheme.</p>
<h4><tt>ForumPost</tt>: Ultrasphinx with Single Table Inheritance (STI) and associations</h4>
<p>Our final example combines conditions with an <em>include</em>.  Insoshi has a ForumPost model that inherits from a Post base class (which is also used for blog posts) using Single Table Inheritance (STI).  We want to restrict forum searches to the body of forum posts, <em>excluding</em> blog posts.  We also want to include the topic name in searches, so that a post &#8220;Lorem ipsum&#8221; under topic &#8220;Foobar&#8221; will show up for both the queries &#8220;Lorem&#8221; and &#8220;Foobar&#8221;. We can achieve this by using a <tt>conditions</tt> clause on the STI type, while using an <tt>include</tt> for the topic association:</p>
<pre class="brush: ruby;">
class ForumPost &lt; Post
  is_indexed :fields =&gt; [ 'body' ],
             :conditions =&gt; &quot;type = 'ForumPost'&quot;,
             :include =&gt; [{:association_name =&gt; 'topic', :field =&gt; 'name'}]
  belongs_to :topic
  .
  .
  .
end
</pre>
<p>(If we leave out the <tt>type</tt> condition, Ultrasphinx happily indexex all the blog posts as well.  Rails then complains when trying to make a new ForumPost using a BlogPost id.)</p>
<p>With that, we&#8217;ve covered all our basic search needs.  As noted above, there&#8217;s one more advanced technique being used at Insoshi (handling searches on boolean attributes such as <tt>deactivated</tt>), which I&#8217;ll probably cover in a later post.  It&#8217;s also worth noting that, unlike Ferret, Sphinx doesn&#8217;t update the search index with every Active Record update; you need to update the index periodically with a cron job. Take a look at the <a href="http://blog.evanweaver.com/files/doc/fauna/ultrasphinx/files/DEPLOYMENT_NOTES.html">Ultrasphinx deployment notes</a> for more details.</p>
<h4>TextMate Footnotes and Ultrasphinx</h4>
<p>Finally, there&#8217;s a minor incompatibility between Ultrasphinx and the latest (Rails 2.1-compatible) TextMate Footnotes, which gives the following error (at least when using vendored Rails):</p>
<pre>activesupport/lib/active_support/dependencies.rb:275:in `load_missing_constant':
uninitialized constant Footnotes::Filter (NameError)</pre>
<p>This is because Ultrasphinx is looking for the Rails file <tt>initializer.rb</tt>, but instead it finds <tt>initializer.rb</tt> as defined by Footnotes.  The fix is to change &#8220;initializer&#8221; to something else (say, &#8220;loader&#8221;) everywhere; see <a href="http://github.com/mhartl/rails-footnotes/tree/mhartl">my fork of Footnotes at GitHub</a> for an example.</p>
<img alt="" border="0" src="http://feeds.wordpress.com/1.0/categories/insoshi.wordpress.com/16/" /> <img alt="" border="0" src="http://feeds.wordpress.com/1.0/tags/insoshi.wordpress.com/16/" /> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/insoshi.wordpress.com/16/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/insoshi.wordpress.com/16/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/insoshi.wordpress.com/16/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/insoshi.wordpress.com/16/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/insoshi.wordpress.com/16/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/insoshi.wordpress.com/16/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/insoshi.wordpress.com/16/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/insoshi.wordpress.com/16/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/insoshi.wordpress.com/16/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/insoshi.wordpress.com/16/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blog.mhartl.com&blog=3368824&post=16&subd=insoshi&ref=&feed=1" /></div>]]></content:encoded>
			<wfw:commentRss>http://blog.mhartl.com/2008/07/17/searching-a-ruby-on-rails-application-with-sphinx-and-ultrasphinx/feed/</wfw:commentRss>
		<slash:comments>27</slash:comments>
	
		<media:content url="http://1.gravatar.com/avatar/ffda7d145b83c4b118f982401f962ca6?s=96&amp;d=identicon&amp;r=G" medium="image">
			<media:title type="html">mhartl</media:title>
		</media:content>
	<feedburner:origLink>http://blog.mhartl.com/2008/07/17/searching-a-ruby-on-rails-application-with-sphinx-and-ultrasphinx/</feedburner:origLink></item>
	</channel>
</rss>
