<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" media="screen" href="/~d/styles/rss2full.xsl"?><?xml-stylesheet type="text/css" media="screen" href="http://feeds.feedburner.com/~d/styles/itemcontent.css"?><rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:wfw="http://wellformedweb.org/CommentAPI/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:sy="http://purl.org/rss/1.0/modules/syndication/" xmlns:slash="http://purl.org/rss/1.0/modules/slash/" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0" version="2.0">

<channel>
	<title>Engine Yard Blog</title>
	
	<link>http://www.engineyard.com/blog</link>
	<description />
	<lastBuildDate>Sat, 21 Nov 2009 00:43:05 +0000</lastBuildDate>
	<generator>http://wordpress.org/?v=2.8.4</generator>
	<language>en</language>
	<sy:updatePeriod>hourly</sy:updatePeriod>
	<sy:updateFrequency>1</sy:updateFrequency>
			<atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="self" href="http://www.engineyard.com/feed/" type="application/rss+xml" /><feedburner:feedFlare href="http://add.my.yahoo.com/rss?url=http%3A%2F%2Fwww.engineyard.com%2Ffeed%2F" src="http://us.i1.yimg.com/us.yimg.com/i/us/my/addtomyyahoo4.gif">Subscribe with My Yahoo!</feedburner:feedFlare><feedburner:feedFlare href="http://www.newsgator.com/ngs/subscriber/subext.aspx?url=http%3A%2F%2Fwww.engineyard.com%2Ffeed%2F" src="http://www.newsgator.com/images/ngsub1.gif">Subscribe with NewsGator</feedburner:feedFlare><feedburner:feedFlare href="http://feeds.my.aol.com/add.jsp?url=http%3A%2F%2Fwww.engineyard.com%2Ffeed%2F" src="http://o.aolcdn.com/favorites.my.aol.com/webmaster/ffclient/webroot/locale/en-US/images/myAOLButtonSmall.gif">Subscribe with My AOL</feedburner:feedFlare><feedburner:feedFlare href="http://www.bloglines.com/sub/http://www.engineyard.com/feed/" src="http://www.bloglines.com/images/sub_modern11.gif">Subscribe with Bloglines</feedburner:feedFlare><feedburner:feedFlare href="http://www.netvibes.com/subscribe.php?url=http%3A%2F%2Fwww.engineyard.com%2Ffeed%2F" src="http://www.netvibes.com/img/add2netvibes.gif">Subscribe with Netvibes</feedburner:feedFlare><feedburner:feedFlare href="http://fusion.google.com/add?feedurl=http%3A%2F%2Fwww.engineyard.com%2Ffeed%2F" src="http://buttons.googlesyndication.com/fusion/add.gif">Subscribe with Google</feedburner:feedFlare><feedburner:feedFlare href="http://www.pageflakes.com/subscribe.aspx?url=http%3A%2F%2Fwww.engineyard.com%2Ffeed%2F" src="http://www.pageflakes.com/ImageFile.ashx?instanceId=Static_4&amp;fileName=ATP_blu_91x17.gif">Subscribe with Pageflakes</feedburner:feedFlare><feedburner:feedFlare href="http://www.plusmo.com/add?url=http%3A%2F%2Fwww.engineyard.com%2Ffeed%2F" src="http://plusmo.com/res/graphics/fbplusmo.gif">Subscribe with Plusmo</feedburner:feedFlare><feedburner:feedFlare href="http://www.thefreedictionary.com/_/hp/AddRSS.aspx?http%3A%2F%2Fwww.engineyard.com%2Ffeed%2F" src="http://img.tfd.com/hp/addToTheFreeDictionary.gif">Subscribe with The Free Dictionary</feedburner:feedFlare><feedburner:feedFlare href="http://www.bitty.com/manual/?contenttype=rssfeed&amp;contentvalue=http%3A%2F%2Fwww.engineyard.com%2Ffeed%2F" src="http://www.bitty.com/img/bittychicklet_91x17.gif">Subscribe with Bitty Browser</feedburner:feedFlare><feedburner:feedFlare href="http://www.newsalloy.com/?rss=http%3A%2F%2Fwww.engineyard.com%2Ffeed%2F" src="http://www.newsalloy.com/subrss3.gif">Subscribe with NewsAlloy</feedburner:feedFlare><feedburner:feedFlare href="http://www.live.com/?add=http%3A%2F%2Fwww.engineyard.com%2Ffeed%2F" src="http://tkfiles.storage.msn.com/x1piYkpqHC_35nIp1gLE68-wvzLZO8iXl_JMledmJQXP-XTBOLfmQv4zhj4MhcWEJh_GtoBIiAl1Mjh-ndp9k47If7hTaFno0mxW9_i3p_5qQw">Subscribe with Live.com</feedburner:feedFlare><feedburner:feedFlare href="http://mix.excite.eu/add?feedurl=http%3A%2F%2Fwww.engineyard.com%2Ffeed%2F" src="http://image.excite.co.uk/mix/addtomix.gif">Subscribe with Excite MIX</feedburner:feedFlare><feedburner:feedFlare href="http://download.attensa.com/app/get_attensa.html?feedurl=http%3A%2F%2Fwww.engineyard.com%2Ffeed%2F" src="http://www.attensa.com/blogs/attensa/WindowsLiveWriter/BadgeredintoBadges_10C02/attensa_feed_button5.gif">Subscribe with Attensa for Outlook</feedburner:feedFlare><feedburner:feedFlare href="http://www.webwag.com/wwgthis.php?url=http%3A%2F%2Fwww.engineyard.com%2Ffeed%2F" src="http://www.webwag.com/images/wwgthis.gif">Subscribe with Webwag</feedburner:feedFlare><feedburner:feedFlare href="http://www.podcastready.com/oneclick_bookmark.php?url=http%3A%2F%2Fwww.engineyard.com%2Ffeed%2F" src="http://www.podcastready.com/images/podcastready_button.gif">Subscribe with Podcast Ready</feedburner:feedFlare><feedburner:feedFlare href="http://www.flurry.com/pushRssFeed.do?r=fb&amp;url=http%3A%2F%2Fwww.engineyard.com%2Ffeed%2F" src="http://www.flurry.com/images/flurry_rss_logo2.gif">Subscribe with Flurry</feedburner:feedFlare><feedburner:feedFlare href="http://www.wikio.com/subscribe?url=http%3A%2F%2Fwww.engineyard.com%2Ffeed%2F" src="http://www.wikio.com/shared/img/add2wikio.gif">Subscribe with Wikio</feedburner:feedFlare><feedburner:feedFlare href="http://www.dailyrotation.com/index.php?feed=http%3A%2F%2Fwww.engineyard.com%2Ffeed%2F" src="http://www.dailyrotation.com/rss-dr2.gif">Subscribe with Daily Rotation</feedburner:feedFlare><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="hub" href="http://pubsubhubbub.appspot.com" /><item>
		<title>The State of XML Parsing in Ruby (Circa 2009)</title>
		<link>http://feedproxy.google.com/~r/engineyard/~3/RsA0p4IhYK8/</link>
		<comments>http://www.engineyard.com/blog/2009/xml-parsing-in-ruby/#comments</comments>
		<pubDate>Fri, 20 Nov 2009 18:30:29 +0000</pubDate>
		<dc:creator>Nick Sieger</dc:creator>
				<category><![CDATA[Technology]]></category>
		<category><![CDATA[Nokogiri]]></category>
		<category><![CDATA[REXML]]></category>
		<category><![CDATA[XML]]></category>

		<guid isPermaLink="false">http://www.engineyard.com/blog/?p=2917</guid>
		<description><![CDATA[Although many of us wish we could consume the web through a magic programmer portal that shields us and our code from all the pointy angle brackets, the reality that is the legacy of HTML, Atom and RSS on the web leaves us little choice but to soldier on. So let’s take a look at what Ruby-colored armor is available to us as we continue our quest to slay the XML dragons.]]></description>
			<content:encoded><![CDATA[<p>It’s almost the end of 2009, and I have to ask: are we through dealing with XML yet?</p>
<p>Although many of us wish we could consume the web through a magic programmer portal that shields us and our code from all the pointy angle brackets, the reality that is the legacy of HTML, Atom and RSS on the web leaves us little choice but to soldier on. So let’s take a look at what Ruby-colored armor is available to us as we continue our quest to slay the XML dragons.</p>
<h2 id="background">Background</h2>
<p>Historically, Ruby has had a number of options for dealing with structured markup, though oddly none have reached a solid consensus among Ruby developers as the “go to” library. The earliest available library seems to be Yoshida Masato’s <a href="http://www.yoshidam.net/Ruby.html#xmlparser"><code>XMLParser</code></a>, which wraps <a href="http://expat.sourceforge.net/">Expat</a> and was first released around the time that Expat <em>itself</em> was released, back in 1998. A pure Ruby parser by Jim Menard called <a href="http://nqxml.sourceforge.net/">NQXML</a> appeared in 2001, though it never matured to the level of a robust XML parser.</p>
<p>In late 2001, <a href="http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/24744">Matz expressed his desire for out of the box XML support</a>, but sadly, nothing appeared in Ruby’s standard library until 2003, when <a href="http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&amp;revision=3925">REXML was imported</a> for the <a href="http://svn.ruby-lang.org/repos/ruby/tags/v1_8_0/doc/NEWS">1.8.0 release</a>. After reading bike-shed discussions like <a href="http://blade.nagaokaut.ac.jp/cgi-bin/vframe.rb/ruby/ruby-talk/24931?24875-26245">this one on ruby-talk in November 2001</a>, or <a href="http://web.archive.org/web/20061023014435/wiki.rubygarden.org/Ruby/page/show/XMLinRuby">this wayback-machine page from the old RubyGarden wiki</a>, it’s not hard to see why. Meanwhile, other language runtimes, such as <a href="http://docs.python.org/library/markup.html">Python</a> and <a href="https://jaxp-sources.dev.java.net/nonav/docs/api/">Java</a>, moved along and built solid, acceptable foundations, making Ruby’s omission seem more glaring.</p>
<p>But all was not lost: Ruby has always had a quality without a name that made it a great language for distilling an API. All that was needed was an infusion of interest and talent in Ruby, and a few more experiments and iterations.</p>
<p>Fast forward to the present time, and all those chips have fallen. We’ve seen evolution from <a href="http://www.germane-software.com/software/rexml/">REXML</a> to <a href="http://libxml.rubyforge.org/">libxml-ruby</a> to <a href="http://wiki.github.com/hpricot/hpricot">Hpricot</a>, and finally to <a href="http://nokogiri.org/">Nokogiri</a>. So, is the XML landscape on Ruby so dire? Certainly not, as you’ll see by the end of this article! While the standard library support for XML hasn’t progressed beyond REXML yet, state-of-the-art solutions are a few keystrokes away.</p>
<h2 id="xml_apis">XML APIs</h2>
<p>A big part of what makes XML such a pain to work with is the APIs. We Rubyists tend to have an especially low tolerance for friction in API design, and we really feel it when we work with XML. If XML is just a tree structure, why isn’t navigating it as simple and elegant as traversing a Ruby <code>Enumerable</code>?</p>
<p>The canonical example of API craptasticism is undoubtedly the <a href="http://www.w3.org/TR/DOM-Level-2-Core/">W3C DOM API</a>. For proof, observe the meteoric rise of <a href="http://jquery.com">jQuery</a> in the JavaScript world. While it would be easy to fill an entire article with criticisms regarding the DOM, <a href="http://www.artima.com/intv/dom.html">it’s been done before</a>. (Incidentally, read the <a href="http://www.artima.com/intv/schema.html">whole series of interviews with Elliotte Rusty Harold</a> for a series of insights on API design, schema evolution, and more.)</p>
<p>Instead, we’ll take a brief exploratory tour of some Ruby XML APIs using code examples. Though some of the examples may seem trivially short, don’t underestimate their power. Conciseness and readability are Ruby’s gifts to the library authors and they’re being put to good use.</p>
<p>The libraries we’ll use for comparison are REXML, Nokogiri, and JAXP, <a href="http://en.wikipedia.org/wiki/Java_API_for_XML_Processing">Java’s XML parsing APIs</a> (via JRuby).</p>
<h2 id="parsing">Parsing</h2>
<p>The simplest possible thing to do in XML is to hand the library some XML and get back a document.</p>
<p><em>REXML</em></p>
<pre>require 'rexml/document'
document = REXML::Document.new(xml)</pre>
<p><em>Nokogiri</em></p>
<pre>require 'nokogiri'
document = Nokogiri::XML(xml)</pre>
<p>Both REXML and Nokogiri more or less get this right. What’s also nice is that they both transparently accept either an IO-like object or a string. Contrast this to Java:</p>
<p><em>JAXP/JRuby</em></p>
<pre>factory = javax.xml.parsers.DocumentBuilderFactory.newInstance
factory.namespace_aware = true # unfortunately, important!
parser = factory.newDocumentBuilder
# String
document = parser.parse(xml)
# IO
document = parser.parse(xml.to_inputstream)</pre>
<p>In that familiar Java style, the JAXP approach forces you to choose from many options and write more code for the happy path. JRuby helps you a little bit by converting a Ruby string into a Java string, but needs a little help with intent for converting an <code>IO</code> to a Java <code>InputStream</code>.</p>
<h2 id="xpath">XPath</h2>
<p>Now that we’ve got a document object, let’s query it via XPath, assuming the underlying format is an Atom feed. Here is the code to grab the entries’ titles and store them as an array of strings:</p>
<p><em>REXML</em></p>
<pre>elements = REXML::XPath.match(document.root, "//atom:entry/atom:title/text()",
                              "atom" =&gt; "http://www.w3.org/2005/Atom")
titles = elements.map {|el| el.value }</pre>
<p><em>Nokogiri</em></p>
<pre>elements = document.xpath("//atom:entry/atom:title/text()",
                          "atom" =&gt; "http://www.w3.org/2005/Atom")
titles = elements.map {|e| e.to_s}</pre>
<p>Again, both REXML and Nokogiri clock in at similar code sizes, but subtle differences begin to emerge. Nokogiri’s use of <code>#xpath</code> as an instance method on the document object feels more natural as a way of drilling down for further detail. Also, note that both APIs return DOM objects for the text, so we need to take one more step to convert them to pure Ruby strings. Here, Nokogiri’s use of the standard <code>String#to_s</code> method is more intuitive; <code>REXML::Text</code>’s version returns the raw text without the entities replaced.</p>
<p>Unfortunately, doing XPath in Java gets a bit more complicated. First we need to construct an <code>XPath</code> object. At least JRuby helps us a bit here–we can create an instance of the <code>NamespaceContext</code> interface completely in Ruby, and omit the methods we don’t care about.</p>
<p><em>JAXP/JRuby</em></p>
<pre>xpath = javax.xml.xpath.XPathFactory.newInstance.newXPath
ns_context = Object.new
def ns_context.getNamespaceURI(prefix)
  {"atom" =&gt; "http://www.w3.org/2005/Atom"}[prefix]
end
xpath.namespace_context = ns_context</pre>
<p>Next, we evaluate the expression and construct the array titles:</p>
<p><em>JAXP/JRuby</em></p>
<pre>nodes = xpath.evaluate("//atom:entry/atom:title/text()",
                       document, javax.xml.xpath.XPathConstants::NODESET)
titles = []
0.upto(nodes.length-1) do |i|
  titles &lt;&lt; nodes.item(i).node_value
end</pre>
<p>That last bit where we need to externally iterate the DOM API is particularly un-Ruby-like. With JRuby we can mix in some methods to the NodeList class:</p>
<p><em>JAXP/JRuby</em></p>
<pre>module org::w3c::dom::NodeList
  include Enumerable
  def each
    0.upto(length - 1) do |i|
      yield item(i)
    end
  end
end</pre>
<p>And replace the external iteration with a more natural internal one:</p>
<p><em>JAXP/JRuby</em></p>
<pre>titles = nodes.map {|e| e.node_value}</pre>
<p>This kind of technique tends to become a fairly common occurrence when coding Ruby to Java libraries in JRuby. Fortunately Ruby makes it simple to hide away the ugliness in the Java APIs!</p>
<h2 id="walking_the_dom">Walking the DOM</h2>
<p>Say we’d like to explore the DOM. Both REXML and Nokogiri provide multiple ways of doing this, with parent/child/sibling navigation methods. They also each sport a recursive descent method, which is quite convenient.</p>
<p><em>REXML</em></p>
<pre>titles = []
document.root.each_recursive do |elem|
  titles &lt;&lt; elem.text.to_s if elem.name == "title"
end</pre>
<p><em>Nokogiri</em></p>
<pre>titles = []
document.root.traverse do |elem|
  titles &lt;&lt; elem.content if elem.name == "title"
end</pre>
<p>Needless to say, Java’s DOM API has no such convenience method, so we have to write one. But again, JRuby makes it easy to Rubify the code. Note that our <code>#traverse</code> method makes use of our <code>Enumerable</code>-ization of NodeList above as well.</p>
<p><em>JAXP/JRuby</em></p>
<pre>module org::w3c::dom::Node
  def traverse(&amp;blk)
    blk.call(self)
    child_nodes.each do |e|
      e.traverse(&amp;blk)
    end
  end
end

titles = []
document.traverse do |elem|
  titles &lt;&lt; elem.text_content if elem.node_name == "title"
end</pre>
<h2 id="pull_parsing">Pull parsing</h2>
<p>All three libraries have a pull parser (also called a stream parser or reader) as well. Pull parsers are efficient because they behave like a cursor scrolling through the document, but usually result in more verbose code because of the need to implement a small state machine on top of lower-level XML events. They are best employed on very large documents where it’s impractical to store the entire DOM tree in memory at once.</p>
<p><em>REXML</em></p>
<pre>parser = REXML::Parsers::PullParser.new(xml_stream)
titles = []
text = ''
grab_text = false
parser.each do |event|
  case event.event_type
  when :start_element
    grab_text = true if event[0] == "title"
  when :text
    text &lt;&lt; event[1] if grab_text
  when :end_element
    if event[0] == "title"
      titles &lt;&lt; text
      text = ''
      grab_text = false
    end
  end
end</pre>
<p><em>Nokogiri</em></p>
<pre>reader = Nokogiri::XML::Reader(xml_stream)
titles = []
text = ''
grab_text = false
reader.each do |elem|
  if elem.name == "title"
    if elem.node_type == 1  # start element?
      grab_text = true
    else # elem.node_type == 15  # end element?
      titles &lt;&lt; text
      text = ''
      grab_text = false
    end
  elsif grab_text &amp;&amp; elem.node_type == 3 # text?
    text &lt;&lt; elem.value
  end
end</pre>
<p>(Aside to the Nokogiri team: where are the reader node type constants?)</p>
<p><em>JAXP/JRuby</em></p>
<pre>include javax.xml.stream.XMLStreamConstants
factory = javax.xml.stream.XMLInputFactory.newInstance
reader = factory.createXMLStreamReader(xml_stream.to_inputstream)
titles = []
text = ''
grab_text = false
while reader.has_next
  case reader.next
  when START_ELEMENT
    grab_text = true if reader.local_name == "title"
  when CHARACTERS
    text &lt;&lt; reader.text if grab_text
  when END_ELEMENT
    if reader.local_name == "title"
      titles &lt;&lt; text
      text = ''
      grab_text = false
    end
  end
end</pre>
<p>Not surprisingly, all three pull parser examples end up looking very similar. The subtleties of the pull parser APIs end up getting blurred in the loops and conditionals. Only write this code when you have to.</p>
<h2 id="performance">Performance</h2>
<p>At the end of the day, it comes down to performance, doesn’t it? Although the topic of Ruby XML parser performance <a href="http://www.rubyinside.com/ruby-xml-performance-benchmarks-1641.html">has been discussed before</a>, I thought it would be instructive to do another round of comparisons with JRuby and Ruby 1.9 thrown into the mix.</p>
<p><em>System Configuration</em></p>
<ul>
<li>Mac OS X 10.5 on a MacBook Pro 2.53 GHz Core 2 Duo</li>
<li>Ruby 1.8.6p287</li>
<li>Ruby 1.9.1p243</li>
<li>JRuby 1.5.0.dev (rev c7b3348) on Apple JDK 5 (32-bit)</li>
<li>Nokogiri 1.4.0</li>
<li>libxml2 2.7.3</li>
</ul>
<p>Here are results comparing Nokogiri and Hpricot on the three implementations  along with the JAXP version which only runs on JRuby (smaller is better).</p>
<p><img class="aligncenter" title="XML Benchmarks 1" src="http://eyweb-images.s3.amazonaws.com/xmlbench.001.png" alt="" width="520" height="396" /></p>
<p>The REXML results were over an order of magnitude slower, so it&#8217;s easier to view them on a separate graph. Note the number of iterations here is 100 vs. 1000 for the results above.</p>
<p style="text-align: center;"><img class="aligncenter" title="XML Benchmarks 2" src="http://eyweb-images.s3.amazonaws.com/xmlbench.002.png" alt="" width="375" height="310" /></p>
<p>While these results don’t paint a <em>complete</em> picture of XML parser performance, they should give you enough of a guideline to make a decision on which parser to use once you take portability and readability into account. In summary:</p>
<ul>
<li>Use REXML when your parsing needs are minimal and want the widest portability (across all implementations) with the smallest install footprint.</li>
<li>Use JRuby with the JAXP APIs for portability across any operating system that supports the Java platform (including Google AppEngine).</li>
<li>Use Nokogiri for everything else. It’s the fastest implementation, and produces the most programmer-friendly code of all Ruby XML parsers to date.</li>
</ul>
<p>(As a footnote, we on the Nokogiri and JRuby teams are looking for community help to <a href="http://www.serabe.com/2009/08/26/final-status-update/">further develop the pure-Java backend for Nokogiri</a> so that AppEngine and other JVM deployment scenarios that don’t allow loading native code can benefit from Nokogiri’s awesomeness. Please leave a comment or <a href="http://xircles.codehaus.org/lists/user@jruby.codehaus.org">contact the JRuby team on the mailing list</a> if you’re interested.)</p>
<p>The <a href="http://github.com/nicksieger/xmlbench">source code for this article is available</a> if you’d like to examine the code or run the benchmarks yourself. Keep an eye on the Engine Yard blog for an upcoming post on Nokogiri, and as always, leave questions and thoughts in the comments!</p>
<img src="http://www.engineyard.com/blog/?ak_action=api_record_view&id=2917&type=feed" alt="" /><img src="http://feeds.feedburner.com/~r/engineyard/~4/RsA0p4IhYK8" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://www.engineyard.com/blog/2009/xml-parsing-in-ruby/feed/</wfw:commentRss>
		<slash:comments>2</slash:comments>
		<feedburner:origLink>http://www.engineyard.com/blog/2009/xml-parsing-in-ruby/</feedburner:origLink></item>
		<item>
		<title>Key-Value Stores in Ruby: The Wrap Up</title>
		<link>http://feedproxy.google.com/~r/engineyard/~3/e2UY7TVYCCM/</link>
		<comments>http://www.engineyard.com/blog/2009/key-value-stores-in-ruby-the-wrap-up/#comments</comments>
		<pubDate>Tue, 17 Nov 2009 18:00:17 +0000</pubDate>
		<dc:creator>Kirk Haines</dc:creator>
				<category><![CDATA[Technology]]></category>
		<category><![CDATA[couchdb]]></category>
		<category><![CDATA[JavaScript]]></category>
		<category><![CDATA[key-value stores]]></category>
		<category><![CDATA[MongoDB]]></category>
		<category><![CDATA[Ruby]]></category>
		<category><![CDATA[S3]]></category>

		<guid isPermaLink="false">http://www.engineyard.com/blog/?p=2541</guid>
		<description><![CDATA[This last article in our key-value series will briefly cover a few interesting topics that could each have had full articles of their own. This means that if they seem interesting to you, follow the links that I provide to get more information on them. Lastly, I'll wrap up by introducing Moneta, written by Yehuda Katz, which provides a unified API for a wide variety of different Key-Value Stores. If you want to write code that allows the user to choose the store to use, you'll want to pay attention to Moneta.]]></description>
			<content:encoded><![CDATA[<p>This last article in <a href="http://www.engineyard.com/blog/2009/key-value-stores-in-ruby/">our key-value series</a> will briefly cover a few interesting topics that could each have had full articles of their own. This means that if they seem interesting to you, follow the links that I provide to get more information on them. Lastly, I&#8217;ll wrap up by introducing <a href="http://github.com/wycats/moneta/">Moneta</a>, written by Yehuda Katz, which provides a unified API for a wide variety of different Key-Value Stores. If you want to write code that allows the user to choose the store to use, you&#8217;ll want to pay attention to Moneta.</p>
<p>The difficult part of discussing Key-Value Stores stores <em>today</em> is that it&#8217;s a product area seeing rapid development and constant evolution. There are more interesting stores and libraries available than can easily be covered, even in a series like this. I could probably be writing posts every two weeks into next year without running out of subjects. So, alas, many things must be left <em>un</em>discussed or <em>under</em>discussed. But let&#8217;s move on to the topics we <em>can</em> cover&#8230;</p>
<h2>CouchDB</h2>
<p>The first great Key-Value Store that isn&#8217;t going to get its own article is <a href="http://couchdb.apache.org/">CouchDB</a>.  Apache&#8217;s CouchDB is a document-oriented database, like <a href="http://www.engineyard.com/blog/2009/mongodb-a-light-in-the-darkness-key-value-stores-part-5/">MongoDB</a>.  It, however, exposes a RESTful JSON based API that you address with a built in HTTP interface. Like MongoDB, it offers a schema free data store. CouchDB offers solid, built-in replication, and uses JavaScript as its query language. It is a powerful tool.</p>
<p>There are several Ruby libraries which can be used to facilitate using CouchDB. In the examples below, I have used <a href="http://github.com/jchris/couchrest">CouchRest</a>, which is based on CouchDB&#8217;s own <a href="http://svn.apache.org/repos/asf/incubator/couchdb/trunk/share/www/script/couch.js">couch.js</a> library:</p>
<div>
<pre>require 'rubygems'
require 'couchrest'
require 'yaml'

DBH = CouchRest.database!('exercise-log')

response = DBH.save_doc({
  :date =&gt; Time.now,
  :activity =&gt; ARGV[0],
  :duration =&gt; ARGV[1]})

stored_record = DBH.get(response['id'])
puts "Stored:\n#{stored_record.to_yaml}"</pre>
</div>
<div>
<pre>wyhaines$ ruby /tmp/couch1.rb
Stored:
--- !map:CouchRest::Document
duration: "97:34"
_rev: 1-eb6f6e3a3e2eae0cd99f3fcbc63d29d6
_id: 0d9e71f44b3e0d3a2013c282bbccb5a0
activity: pedaling
date: 2009/11/12 21:07:45 +0000</pre>
</div>
<p>Like MongdoDB, one can store any set of keys/values together as a document in CouchDB, and then retrieve it later.  CouchRest returns a response from the server that contains an <code>id</code> field, which can be used to retrieve the record that was just stored.</p>
<p>For more complex queries of the document store, one can use views.  Views have a lot of power, because they are ultimately defined using JavaScript, but they don&#8217;t lend themselves to easy ad-hoc manipulation of the database.</p>
<div>
<pre>DBH.save_doc({
  "_id" =&gt; "_design/query",
  :views =&gt; {
    :allkeys =&gt; {
      :map =&gt; "function(doc) { for (var word in doc) { if (!word.match(/^_/)) emit(word,doc[word])}}"
    }
  }
})</pre>
</div>
<p>That inserts a view into the database that will be identified by <code>query/allkeys</code>.  What a view does is defined by the JavaScript code  it contains.  Once a view is inserted into CouchDB, using it is simple:</p>
<div>
<pre>puts DBH.view('query/allkeys').to_yaml</pre>
</div>
<p>That particular function was lifted shamelessly from the CouchRest README, and just has a couple terms renamed to make it a little more clear. The output:</p>
<div>
<pre>---
total_rows: 3
rows:
- id: 0d9e71f44b3e0d3a2013c282bbccb5a0
  value: pedaling
  key: activity
- id: 0d9e71f44b3e0d3a2013c282bbccb5a0
  value: 2009/11/12 21:07:45 +0000
  key: date
- id: 0d9e71f44b3e0d3a2013c282bbccb5a0
  value: "97:34"
  key: duration
offset: 0</pre>
</div>
<p>This is really just the tip of the iceberg with CouchDB/CouchRest; there&#8217;s a wealth of functionality. CouchDB views are implemented with map/reduce capability, which means you can use them to crunch some pretty complex problems on your data. Additionally, CouchRest provides a <code>CouchRest::ExtendedDocument</code>, which your own classes can inherit from. This lets you  easily create a Ruby model for your data, which is then transparently stored inside CouchDB.</p>
<div>
<pre>class Exercise  "running", :date =&gt; Time.now, :duration =&gt; "23:44")</pre>
</div>
<p>Dig into the CouchDB and CouchRest documentation if this looks interesting to you.</p>
<h2>S3</h2>
<p>I just wanted to briefly mention <a href="http://aws.amazon.com/s3/">Amazon&#8217;s Simple Storage Service</a>. It is, fundamentally, a simple HTTP accessible Key-Value Store that Amazon has turned into a service.  Requests to S3 will have higher latency than requests to a locally hosted data store (and its <a href="http://www.engineyard.com/blog/2009/rails-in-the-wild-5-client-side-performance-observations/">response latency can be high too</a>), but if you want a simple, robust store that will scale to as much data as you have to push at it, you might seriously consider S3.</p>
<h2>Moneta</h2>
<p><a href="http://github.com/wycats/moneta/">Moneta</a> is a unified interface to a variety of different key-value type data stores. That is, the same code can be run against a variety of different backing stores, and it will just work. Moneta supports the following stores as of this posting:</p>
<ul>
<li>Basic File Store</li>
<li>BerkeleyDB</li>
<li>CouchDB</li>
<li>DataMapper</li>
<li>File store for xattr</li>
<li>In-memory store</li>
<li>Memcache store</li>
<li>Redis</li>
<li>S3</li>
<li>SDBM</li>
<li>Tokyo</li>
<li>Xattrs in a file system</li>
</ul>
<p>Consider this example, which, again, uses CouchDB:</p>
<div>
<pre>irb(main):003:0&gt; require 'moneta/couch'
require 'rubygems'
require 'yaml'
require 'moneta'
require 'moneta/couch'

cache = Moneta::Couch.new(:db =&gt; 'football')

cache['1a_final'] = {
  :where =&gt; 'Laramie; War Memorial Stadium',
  :when =&gt; "11:30 MST",
  :who =&gt; "Southeast Cyclones &amp; Lingle-Ft. Laramie Doggers",
  :prediction =&gt; "SE Cyclones by 14"}

puts cache['1a_final'].inspect</pre>
</div>
<div>
<pre>wyhaines$ ruby /tmp/moneta1.rb
---
- prediction: SE Cyclones by 14
  when: 11:30 MST
  who: Southeast Cyclones &amp; Lingle-Ft. Laramie Doggers
  where: Laramie; War Memorial Stadium</pre>
</div>
<p>It works, very simply.  If I want to change the code to use something else, like a file based store, it&#8217;s as simple as changing one line:</p>
<div>
<pre>--- couch.rb    2009-11-19 15:00:07.000000000 -0700
+++ file.rb     2009-11-19 15:01:12.000000000 -0700
@@ -1,9 +1,9 @@
 require 'rubygems'
 require 'yaml'
 require 'moneta'
-require 'moneta/couch'
+require 'moneta/file'

-cache = Moneta::Couch.new(:db =&gt; 'football')
+cache = Moneta::File.new(:path =&gt; '/tmp/football')

 cache['1a_final'] = {
   :where =&gt; 'Laramie; War Memorial Stadium',</pre>
</div>
<p>The rest of the code works without alteration.  The Moneta API is designed to be very similar to that of <code>Hash</code>.  It has a limited feature set, but the features it provides work identically across all of the supported platforms. For example, it doesn&#8217;t currently support iteration or partial matches. If your Key-Value Store needs are simple and you want something that can work with whatever store your <em>users</em> want to use, definitely check out Moneta; it&#8217;s a well written tool.</p>
<p>With that, we&#8217;ve reached the end of this series. It&#8217;s been fun to explore the unique features, as well as the threads that unify each of these different approaches to the problem, on a non-SQL key-value type data store. I hope that I&#8217;ve exposed you to new and useful tools.</p>
<p>The landscape of Key-Value Stores is changing rapidly, so it is difficult to stay fully informed all the time. For instance, just a couple days ago there was a blog post implementing a <a href="http://legitimatesounding.com/blog/NoSQL_meet_SQL.html">SQL front end for CouchDB</a>. It&#8217;s done in Perl, but all it would take is an interested person and a little time, and you could have it in Ruby, too.</p>
<p>If you use a Key-Value Store system, or plan to, keep your eyes open for new developments, because you can bet that someone else will have something interesting next week or next month that may change the landscape again. As always, leave feedback in the comments, and thanks for reading!</p>
<img src="http://www.engineyard.com/blog/?ak_action=api_record_view&id=2541&type=feed" alt="" /><img src="http://feeds.feedburner.com/~r/engineyard/~4/e2UY7TVYCCM" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://www.engineyard.com/blog/2009/key-value-stores-in-ruby-the-wrap-up/feed/</wfw:commentRss>
		<slash:comments>2</slash:comments>
		<feedburner:origLink>http://www.engineyard.com/blog/2009/key-value-stores-in-ruby-the-wrap-up/</feedburner:origLink></item>
		<item>
		<title>Rails Roadshow Coming Home!</title>
		<link>http://feedproxy.google.com/~r/engineyard/~3/xoA5HCLXicQ/</link>
		<comments>http://www.engineyard.com/blog/2009/rails-roadshow-coming-home/#comments</comments>
		<pubDate>Tue, 17 Nov 2009 00:13:35 +0000</pubDate>
		<dc:creator>Leah Silber</dc:creator>
				<category><![CDATA[Events]]></category>
		<category><![CDATA[Rails Performance]]></category>

		<guid isPermaLink="false">http://www.engineyard.com/blog/?p=2920</guid>
		<description><![CDATA[The Roadshow was so fantastic, that we decided we ought do it one more time—this time, here at home, in San Francisco. I'm happy to announce the sixth iteration of the Rails Performance in the Cloud Roadshow, set for Wednesday, December 2nd, bright and early in the day.]]></description>
			<content:encoded><![CDATA[<p><a href="http://www.railsroadshow.com"><img class="alignnone" title="Rails Roadshow" src="http://eyweb-images.s3.amazonaws.com/roadshow.jpg" alt="" width="500" height="114" /></a></p>
<p>Earlier this month, the Engine Yard crew took a quick trip around the states, stopping in five different cities to talk about <a href="http://railsroadshow.com/">Rails Performance in the Cloud</a>. We visited Boston, Chicago, Austin, Los Angeles and Seattle, and met with a great crowd of technologists in each. We were joined by some of our favorite technology partners, who talked about cloud computing and their different performance-related products.</p>
<p>The Roadshow was a fantastic success; attendee feedback was overwhelmingly positive, and on our end, it was great to get out and meet folks in their home cities. Our partners enjoyed meeting existing customers, and for those who <em>weren&#8217;t</em> customers, we were able to present a uniquely well-rounded view of the ecosystem.</p>
<p>On to the point: the Roadshow was <em>so</em> fantastic, that we decided we ought do it one more time—this time, here at home, in San Francisco. <strong>I&#8217;m happy to announce the sixth iteration of the <a href="http://railsroadshow.com/">Rails Performance in the Cloud Roadshow</a></strong>, set for Wednesday, December 2nd, bright and early in the day.</p>
<p>We&#8217;ll be hosting right here at Engine Yard Headquarters in the SOMA part of San Francisco, and will be joined by our partners at New Relic, Pivotal Labs, Soasta and CVSDude. We&#8217;ll go from 8:15 in the morning until noon, and of course, breakfast <em>will</em> be served.</p>
<p>Space is limited, so <a href="http://railsroadshow.com/location-sf.html">reserve your seat today</a>!</p>
<img src="http://www.engineyard.com/blog/?ak_action=api_record_view&id=2920&type=feed" alt="" /><img src="http://feeds.feedburner.com/~r/engineyard/~4/xoA5HCLXicQ" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://www.engineyard.com/blog/2009/rails-roadshow-coming-home/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		<feedburner:origLink>http://www.engineyard.com/blog/2009/rails-roadshow-coming-home/</feedburner:origLink></item>
		<item>
		<title>Programming Contest! And the Challenge is…Measure Rails Momentum</title>
		<link>http://feedproxy.google.com/~r/engineyard/~3/SYFUmVSBGBI/</link>
		<comments>http://www.engineyard.com/blog/2009/programming-contest-and-the-challenge-is-measure-rails-momentum/#comments</comments>
		<pubDate>Fri, 13 Nov 2009 01:25:50 +0000</pubDate>
		<dc:creator>Michael Mullany</dc:creator>
				<category><![CDATA[Contests]]></category>
		<category><![CDATA[contest]]></category>
		<category><![CDATA[Rails]]></category>

		<guid isPermaLink="false">http://www.engineyard.com/blog/?p=2855</guid>
		<description><![CDATA[We announced the Worst App Server Ever (WASE) contest last week, as the second in a series of Engine Yard programming contests. Since then, we&#8217;ve heard lots about your efforts to put a basic twitterbot together, and now the time has come to describe the challenge computation. Remember: you have until 5 p.m. Monday to complete [...]]]></description>
			<content:encoded><![CDATA[<p>We announced the <a href="http://www.engineyard.com/blog/2009/win-a-motorola-droid-programming-contest-worst-app-server-technology-ever/">Worst App Server Ever (WASE) contest</a> last week, as the second in a series of Engine Yard programming contests. Since then, we&#8217;ve heard lots about your efforts to put a <a href="http://github.com/dougal/wase_endpoint/ ">basic twitterbot </a>together, and now the time has come to describe the challenge computation. Remember: you have until 5 p.m. Monday to complete your calculations!</p>
<p><span style="color: #ff0000;">UPDATE: </span><span style="color: #ff0000;">Challenge calculations submissions should be in the form of a RETWEET to @engi<span style="color: #ff0000;">neyard</span></span><span style="color: #ff0000;"> of the first message in your wase program from your home twitter account.  The final WASEpoint in your program listing should be @eycontest (so we can measure who finished first)</span><span style="color: #ff0000;">. You can <a href="http://www.engineyard.com/contests/wase/">register your wasepoints here</a>.]</span></p>
<p>People often talk about the momentum of intangible things, but it&#8217;s always been pretty much impossible to define. No Longer! In this contest, you&#8217;ll be measuring the momentum of Rails. How will you accomplish this, you ask?</p>
<p>Well, since Rails <em>is</em> its community, if we can measure the mass, speed and direction of the Rails community, then (clearly!) we can establish its momentum. For the purposes of this contest, and to make things easy, we will use as a rough proxy that the Rails community is everyone following <a href="http://www.twitter.com/dhh">@dhh</a> on twitter.</p>
<p>You must perform the following tasks to establish Rails momentum:</p>
<p>1) Establish the mass of the Rails community: defined as looking up the locations of all <a href="http://www.twitter.com/dhh">@dhh</a> followers from Twitter, geo-coding their locations, and then multiplying the number of people in each location by the average body mass of an adult in that country.</p>
<p>2) Establish the current location of the Rails community: defined by taking the locations and body weights from step 1, and calculating the community&#8217;s center of mass (its centroid).</p>
<p>3) Establish the speed and direction of the Rails community: defined by taking its current location (calculated in Step 2) and comparing it to the origin of Rails. Rails 1.0 was released in Chicago on December 14th, 2005, so in the approximately 1,430 days since then, its location has moved. This means we can calculate its average speed and direction over the last four years.</p>
<p>Although we will expect the answer in the form of JSON object properties, the answer (in free text) to the contest would look like:</p>
<ul>
<li>Rails Momentum is 15 metric tons per hour with a bearing of 120 degrees. Its current location is latitude 38.898748 ° and longitude -77.037684°</li>
</ul>
<h2>Guidelines and Suggestions for Implementors</h2>
<p>Please refer back to our <a href="http://www.engineyard.com/blog/2009/win-a-motorola-droid-programming-contest-worst-app-server-technology-ever/">earlier post</a> for rules on how your entry must be structured and submitted. This description goes into details on the format of the input data and provides suggestions for WASEpoints and tips on what to avoid. A significant meta-challenge is agreeing on intermediate object formats with your fellow contestants! We have not specified what they should be.</p>
<h3>1) Calculating the mass of the Rails community</h3>
<p>The input data set has an array of all the twitter ID&#8217;s currently following <a href="http://www.twitter.com/dhh">@dhh</a>. Twitter limits your API access to 150 calls per hour, unless you are a Twitter white-listed developer—so there is a clear opportunity for a white-listed developer to create a popular WASEpoint. If you&#8217;re <em>not</em> a white-listed developer, be prepared to collaborate with others to pool your twitter requests via WASEpoint chaining. We will make an exception to the standard WASEpoint rules here on &#8220;no state at a WASEpoint&#8221; and allow people to create WASEpoints for others that provide a cached location lookup for the location property of each follower.</p>
<p>Only about 50% of twitter user profiles have standard formatted locations in the form of [City, State], [State, City] [State] or [City, Country], [Country, City] [Country]. Location data that lacks this format should be discarded, and the corresponding followers ignored. There&#8217;s an opportunity here to write a WASEpoint that cleans and standardizes this data-set for others to use. We do <em>not</em> expect you to geocode imprecise location tags such as &#8220;The Midwest&#8221; or &#8220;Somewhere in the Clouds&#8221;.</p>
<p>There are many REST-accessible geocoding services on the web. Both Yahoo and Google have REST API&#8217;s and there are other responsive geocoding services with Ruby clients. We include a list of standardized countries and the average adult bodyweight as part of the input dataset for use in calculations. (It was actually quite surprising to see how many sources we had to go to to get this data, and it&#8217;s mostly only accurate for OECD countries—most countries simply contain weights based on an average BMI.) Bodyweights are in kilograms. This is another module of work that would make a nice WASEpoint.</p>
<h3>2) Calculating the current location</h3>
<p>Calculating a center of mass for a sphere like the earth would basically put Rails in the center of the earth, but we didn&#8217;t want to imply that Rails has gone to hell. Instead you are allowed to assume a flat-earth when calculating the centroid of the community, and the center of this flat mapping should be at Chicago&#8217;s longitude and the equator&#8217;s latitude. This latitude/longitude is provided as part of the input data set.</p>
<p>This puts the &#8220;edge&#8221; of this map approximately in western China, so India is to the &#8220;east&#8221; of Chicago, but Bangladesh would be &#8220;west&#8221; of Chicago. In this map, the center of mass for a developer in India and a developer in Bangladesh would not be on their border, it would be in the Caribbean. There is a clear opportunity here to write two WASEpoints: one that geocodes correctly and one that calculates centroids correctly.</p>
<p><img class="alignnone size-full wp-image-2885" title="Programming contest sample world map" src="http://eyweb-images.s3.amazonaws.com/contest_map.png" alt="Programming contest sample world map" width="510" height="398" /></p>
<p>(If anyone wants to tackle calculating a true 3D centroid with a projection back to the nearest point on the earth&#8217;s surface that would certainly be a strong candidate for third prize (best WASEpoint))</p>
<h3>3) Establishing the speed and direction</h3>
<p>Rails 1.0 was announced in Chicago (lat 41 54 	long 87 39 ) on Dec 14th, 2005. If you establish that the center of the community is now in (for example) Albany, New York (lat 42 45 long 73 48), then it has traveled approximately 711 kilometers in 1,430 days, giving it an average land velocity of about 0.25 km/hr (unladen), and a final bearing of about 80°. For this part of the calculation we DO expect you to consider the earth spherical, but there are a number of handy math guides on the internets to help you calculate bearings from latitude and longitude data. This conversion alone could make a nice WASEpoint for others to use.</p>
<h2>The Input Data Object</h2>
<p>We have posted the input data for the challenge on web. We&#8217;re giving you two JSON files in UTF-8 format.</p>
<ul>
<li><a href="http://assets.engineyard.com/wase/TwitterList.json">TwitterList.json</a>= An array of twitter id&#8217;s of everyone following <a href="http://www.twitter.com/dhh">@dhh</a></li>
</ul>
<ul>
<li><a href="http://assets.engineyard.com/wase/WeightbyCountry.json">WeightbyCountry.json</a> = An array of Countries with their Bodyweight Values.</li>
</ul>
<h2>The Output Data Object</h2>
<ul>
<li>We will accept output data in whatever data format your final WASEpoint emits, but it must be obvious to us what the momentum number is and what the bearing is. We would suggest the following json format however (with sample data):</li>
</ul>
<p>{&#8221;RailsMomentum&#8221;:{&#8221;Momentum&#8221;:{&#8221;kgs per meter per second&#8221;:15},&#8221;location&#8221;:[{"latitude":333333,"longitude":333333}],&#8221;bearing&#8221;:{&#8221;degrees&#8221;:36,&#8221;minutes&#8221;:45,&#8221;seconds&#8221;:45}}}</p>
<p>Simple enough, right? Well, no one every said it was going to be easy! In fact, we tried to structure this, so you&#8217;d have to compete as part of a team! Good luck—we can&#8217;t wait to see how it goes!</p>
<img src="http://www.engineyard.com/blog/?ak_action=api_record_view&id=2855&type=feed" alt="" /><img src="http://feeds.feedburner.com/~r/engineyard/~4/SYFUmVSBGBI" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://www.engineyard.com/blog/2009/programming-contest-and-the-challenge-is-measure-rails-momentum/feed/</wfw:commentRss>
		<slash:comments>7</slash:comments>
		<feedburner:origLink>http://www.engineyard.com/blog/2009/programming-contest-and-the-challenge-is-measure-rails-momentum/</feedburner:origLink></item>
		<item>
		<title>Using the Rubygems Bundler for Your App</title>
		<link>http://feedproxy.google.com/~r/engineyard/~3/tffXyG_DiBo/</link>
		<comments>http://www.engineyard.com/blog/2009/using-the-rubygems-bundler-for-your-app/#comments</comments>
		<pubDate>Thu, 12 Nov 2009 18:00:54 +0000</pubDate>
		<dc:creator>Sam Merritt</dc:creator>
				<category><![CDATA[Technology]]></category>
		<category><![CDATA[bundler]]></category>

		<guid isPermaLink="false">http://www.engineyard.com/blog/?p=2701</guid>
		<description><![CDATA[The new Rubygems bundler makes managing your application&#8217;s gem dependencies easy. And for applications with many components, it makes separating components&#8217; dependencies easy too.
Let&#8217;s start off with a simple, two-part application. Part 1 is a Sinatra app that puts JSON-serialized messages into an AMQP queue. Part 2 is a daemon that consumes those messages.
Here&#8217;s what a Gemfile for this [...]]]></description>
			<content:encoded><![CDATA[<p>The new<a href="http://github.com/wycats/bundler"> Rubygems bundler</a> makes managing your application&#8217;s gem dependencies easy. And for applications with many components, it makes separating components&#8217; dependencies easy too.</p>
<p>Let&#8217;s start off with a simple, two-part application. Part 1 is a Sinatra app that puts JSON-serialized messages into an AMQP queue. Part 2 is a daemon that consumes those messages.</p>
<p>Here&#8217;s what a Gemfile for this application might look like:</p>
<pre>gem 'json'
gem 'sinatra'
source 'http://gems.github.com'
gem 'famoseagle-carrot', :require_as =&gt; 'carrot'

gem 'eventmachine'
gem 'amqp'</pre>
<p>The Sinatra app starts off with these lines:</p>
<pre># This makes sure the bundled gems are in our $LOAD_PATH
require File.expand_path(File.join(File.dirname(__FILE__), 'vendor', 'gems', 'environment'))

# This actually requires the bundled gems
Bundler.require_env

class MyApp &lt; Sinatra::Base
  # stuff</pre>
<p>The daemon starts like this:</p>
<pre># This makes sure the bundled gems are in our $LOAD_PATH
require File.expand_path(File.join(File.dirname(__FILE__), 'vendor', 'gems', 'environment'))

# This actually requires the bundled gems
Bundler.require_env

AMQP.start do
  # stuff</pre>
<p>However, this loads too many gems. The Sinatra app synchronously publishes its messages using carrot, so it doesn&#8217;t need the EventMachine gem or the AMQP gem. Likewise, the daemon doesn&#8217;t serve HTTP requests, so it doesn&#8217;t need Sinatra, and it&#8217;s processing messages asynchronously using EventMachine, so it doesn&#8217;t need Carrot.</p>
<p>This isn&#8217;t the end of the world for this set of gems; it just makes all the Ruby processes use a little more memory than they otherwise might. However, if the Sinatra app uses one gem that defines <code>Array#foo</code> one way, and the daemon uses another gem that defines <code>Array#foo</code> an entirely <em>different</em> way, then each component must require only its own dependencies.</p>
<p>Fortunately, the bundler makes this really easy: just change the Gemfile to look like this:</p>
<pre>gem 'json'

only :app do
  gem 'sinatra'
  source 'http://gems.github.com'
  gem 'famoseagle-carrot', :require_as =&gt; 'carrot'
end

only :daemon do
  gem 'eventmachine'
  gem 'amqp'
end</pre>
<p>In the Sinatra app, change <code>Bundler.require_env</code> to <code>Bundler.require_env(:app)</code>. This loads JSON, Sinatra, and Carrot, but not EventMachine or AMQP.</p>
<p>Similarly, in the daemon, <code>Bundler.require_env</code> becomes <code>Bundler.require_env(:daemon)</code>.</p>
<p>But wait, it gets better!</p>
<p>Bundler.require_env can be called more than once with a different environment name each time. Consider tests (which, of course, you wrote first, right?): you want to run them. Now your Gemfile might look like this:</p>
<pre>gem 'json'

only :app do
  gem 'sinatra'
  source 'http://gems.github.com'
  gem 'famoseagle-carrot', :require_as =&gt; 'carrot'
end

only :daemon do
  gem 'eventmachine'
  gem 'amqp'
end

only :test do
  gem 'rspec'
  gem 'webrat'
end</pre>
<p>Then, in your spec, you start like this:</p>
<pre>require 'my_app'       # load the Sinatra app so we can test it
Bundler.require_env(:test)    # get rspec and webrat in here

describe "the main page" do
  # stuff</pre>
<p>Now each part of your app has its dependencies neatly contained. Each part only gets what it needs, and the different dependencies can&#8217;t step on each others&#8217; toes.</p>
<p>Sweet.</p>
<img src="http://www.engineyard.com/blog/?ak_action=api_record_view&id=2701&type=feed" alt="" /><img src="http://feeds.feedburner.com/~r/engineyard/~4/tffXyG_DiBo" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://www.engineyard.com/blog/2009/using-the-rubygems-bundler-for-your-app/feed/</wfw:commentRss>
		<slash:comments>12</slash:comments>
		<feedburner:origLink>http://www.engineyard.com/blog/2009/using-the-rubygems-bundler-for-your-app/</feedburner:origLink></item>
		<item>
		<title>Ruby Week Startup Crawl!</title>
		<link>http://feedproxy.google.com/~r/engineyard/~3/qM-QGJ86QFo/</link>
		<comments>http://www.engineyard.com/blog/2009/ruby-week-startup-crawl/#comments</comments>
		<pubDate>Wed, 11 Nov 2009 21:30:41 +0000</pubDate>
		<dc:creator>Leah Silber</dc:creator>
				<category><![CDATA[Events]]></category>
		<category><![CDATA[RubyConf]]></category>

		<guid isPermaLink="false">http://www.engineyard.com/blog/?p=2813</guid>
		<description><![CDATA[

Next week is going to be quite the week for Ruby in San Francisco, with QCon, RubyConf and JRubyConf. Engine Yard is excited to be participating in all three events, but there&#8217;s even more to tell you about!
With so many Rubyists in town, it only made sense to find a way to have you all [...]]]></description>
			<content:encoded><![CDATA[<p style="margin: 0.0px 0.0px 0.0px 0.0px; font: 12.0px Helvetica;">
<p><a href="http://www.startupcrawl.com"><img class="alignleft" style="border: 1px solid black;" title="Startup Crawl" src="http://eyweb-images.s3.amazonaws.com/startup_crawl.jpg " alt="Startup Crawl" width="310" height="155" /></a></p>
<p><span style="font-family: Georgia, 'Times New Roman', 'Bitstream Charter', Times, serif; font-size: 13px; line-height: 19px;">Next week is going to be <em>quite</em> the week for Ruby in San Francisco, with QCon, RubyConf and JRubyConf. Engine Yard is excited to be participating in all three events, but there&#8217;s even more to tell you about!</span></p>
<p>With so many Rubyists in town, it only made sense to find a way to have you all over for drinks and socializing. As such, we&#8217;ve signed on to participate in the RubyConf week <a href="http://www.startupcrawl.com/" target="_blank">Startup Crawl</a>. We&#8217;re working with our friends over at <a href="http://www.scribd.com/" target="_blank">Scribd</a>, along with a host of other local startups (<a href="http://www.zendesk.com/">Zendesk</a>, <a href="https://www.yammer.com/" target="_blank">Yammer</a>, <a href="http://www.justin.tv/" target="_blank">Justin.tv</a> and more!) and opening our doors to everyone.</p>
<p>The Crawl will take place Friday evening, November 20th; there will be buses leaving from the RubyConf hotel starting at 5:30 and making trips back and forth until 11 p.m. We&#8217;ll have food, drinks, giveaways, and of course Engine Yard Cloud demos for all.</p>
<p>If you&#8217;re planning on taking the bus, be sure to sign up at <a href="http://www.startupcrawl.com/" target="_blank">the Startup Crawl site</a>, so we can be sure there&#8217;s enough transportation for everyone. Otherwise, feel free to walk in any time after 5:30—we&#8217;d love to meet you!</p>
<img src="http://www.engineyard.com/blog/?ak_action=api_record_view&id=2813&type=feed" alt="" /><img src="http://feeds.feedburner.com/~r/engineyard/~4/qM-QGJ86QFo" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://www.engineyard.com/blog/2009/ruby-week-startup-crawl/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		<feedburner:origLink>http://www.engineyard.com/blog/2009/ruby-week-startup-crawl/</feedburner:origLink></item>
		<item>
		<title>Nginx Security Vulnerability: SSL Man in the Middle Attack</title>
		<link>http://feedproxy.google.com/~r/engineyard/~3/q2rgjX4q204/</link>
		<comments>http://www.engineyard.com/blog/2009/nginx-security-vulnerability-ssl-man-in-the-middle-attack/#comments</comments>
		<pubDate>Tue, 10 Nov 2009 17:30:35 +0000</pubDate>
		<dc:creator>Michael Mullany</dc:creator>
				<category><![CDATA[News]]></category>
		<category><![CDATA[nginx]]></category>
		<category><![CDATA[security vulnerability]]></category>

		<guid isPermaLink="false">http://www.engineyard.com/blog/?p=2804</guid>
		<description><![CDATA[A security vulnerability in all versions of nginx (as well as several other web servers) has been reported. Attackers can exploit this vulnerability by intercepting SSL sessions and compromising encryption key renegotiation via a plaintext injection, allowing the attacker to read the plaintext of the SSL session. A patch has been released for this vulnerability.
Engine [...]]]></description>
			<content:encoded><![CDATA[<p>A <a onclick="javascript:pageTracker._trackPageview('/outbound/article/seclists.org');" href="http://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2009-3555">security vulnerability in all versions of nginx </a>(as well as several other web servers) has been reported. Attackers can exploit this vulnerability by intercepting SSL sessions and compromising encryption key renegotiation via a plaintext injection, allowing the attacker to read the plaintext of the SSL session. A <a href="http://sysoev.ru/nginx/patch.cve-2009-3555.txt">patch</a> has been released for this vulnerability.</p>
<p>Engine Yard customers have already been contacted via email about this issue. For Engine Yard Cloud customers, this patch will be automatically applied the next time you perform a deploy. All other customers should open a support ticket so that you can arrange an appropriate maintenance window with support.</p>
<img src="http://www.engineyard.com/blog/?ak_action=api_record_view&id=2804&type=feed" alt="" /><img src="http://feeds.feedburner.com/~r/engineyard/~4/q2rgjX4q204" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://www.engineyard.com/blog/2009/nginx-security-vulnerability-ssl-man-in-the-middle-attack/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		<feedburner:origLink>http://www.engineyard.com/blog/2009/nginx-security-vulnerability-ssl-man-in-the-middle-attack/</feedburner:origLink></item>
		<item>
		<title>Rails Roadshow Recap</title>
		<link>http://feedproxy.google.com/~r/engineyard/~3/67u-udqFY7g/</link>
		<comments>http://www.engineyard.com/blog/2009/rails-roadshow-recap/#comments</comments>
		<pubDate>Thu, 05 Nov 2009 22:05:11 +0000</pubDate>
		<dc:creator>Abheek Anand</dc:creator>
				<category><![CDATA[Cloud]]></category>
		<category><![CDATA[Events]]></category>

		<guid isPermaLink="false">http://www.engineyard.com/blog/?p=2788</guid>
		<description><![CDATA[It's been a whirlwind few weeks, but the Rails Performance in the Cloud Roadshow is complete, and we're all back home! We had a great time getting out and meeting customers and prospects, and, of course, talking about cloud performance. If you missed it, or just want a refresher, the three Engine Yard slide decks are now available.

Attendee James McElhiney beat me to the punch with his review, but I still wanted to take a few minutes to talk about how things went.]]></description>
			<content:encoded><![CDATA[<p>It&#8217;s been a whirlwind few weeks, but the Rails Performance in the Cloud Roadshow is complete, and we&#8217;re all back home! We had a great time getting out and meeting customers and prospects, and, of course, talking about cloud performance. If you missed it, or just want a refresher, <a href="http://www.engineyard.com/community/railsroadshow">the Engine Yard Cloud slide deck is now available</a>.</p>
<p>Attendee <a href="http://www.bitwelder.com/?p=21">James McElhiney</a> beat me to the punch with his review, but I still wanted to take a few minutes to talk about how things went.</p>
<p>We started with Boston, and then moved to Chicago, Austin, Los Angeles and Seattle. Our partners Amazon Web Services, CVSDude, New Relic and Soasta joined us for the trip, meeting with various customers, developers and managers. The crowd was varied,  highly technical, and despite the early-morning start, had great energy.</p>
<p>The Roadshow was all about performance, and between the presentations and audience questions, we covered close to everything you&#8217;d need to know to meet and exceed your performance goals. Some specifics:</p>
<p>Tom Mornini and I presented Engine Yard&#8217;s value proposition, along with a live demo of the <a href="http://www.engineyard.com/products/cloud">Engine Yard Cloud</a> platform. Tom focused on end-user performance and the very significant impact it can have on business metrics, while I went into detail on some performance metrics that should be relevant to all rails developers, highlighting how Engine Yard Cloud can help users scale in the cloud.</p>
<p>Next <a href="http://newrelic.com/">New Relic</a> walked us through RPM, and how they use it internally to monitor, well, RPM. Self-referential coolness aside, it was a great way to see how a tool can help you identify the bottlenecks in your application, while still remaining easy to setup and maintain. When the first audience question started with &#8220;I&#8217;m signing up for your service even as I ask this&#8230;&#8221; we knew things were on the right track.</p>
<p><a href="http://cvsdude.com/">CVSDude</a>&#8217;s Willie Wang presented on enterprise-grade source control management, and how their solutions can help achieve agility in deployments. This is a key component of performance optimization—agile deployments enable us to fix performance problems faster and deliver features to customers more effectively. With Git support in their near-term roadmap, CVSDude was a great addition for our Rails audience.</p>
<p>Many of the attendees were customers of Amazon AWS, and Mike Culver&#8217;s presentations described the value proposition of AWS very well. The Amazon team had a busy week—with newly announced <a href="http://aws.typepad.com/aws/2009/10/introducing-rds-the-amazon-relational-database-service-.html">services</a>, <a href="http://aws.typepad.com/aws/2009/10/two-new-ec2-instance-types-additional-memory.html">instance types</a> and a <a href="http://aws.typepad.com/aws/2009/10/amazon-ec2-now-an-even-better-value.html">price reduction</a> (which we at Engine Yard immediately <a href="http://www.engineyard.com/blog/2009/announcement-engine-yard-cloud-price-reduction/">passed along to our customers</a>). As expected, a lot of the questions were around their new announcements, and Mike explained how each of these increased the value proposition of cloud computing even more.</p>
<p>Finally, <a href="http://soasta.com/">Soasta</a>&#8217;s Dave Murphy had a great presentation on load testing using Soasta Cloud Test. Some of his numbers were particularly impressive—seeing them use cloud infrastructure to deploy multi-million user load tests on live production environments really brought home the value of using the cloud model. Tools like CloudTest, in conjunction with Engine Yard, can make it trivial for users to test deployments that would have been impossible to test before.</p>
<p>For me personally, the roadshow was a great way to meet customers, developers and spend cycles understanding their needs better. The biggest takeaway for me was that customers really care about scalability, but realize that they add most value by focusing on user happiness. This was exactly the philosophy we used while designing Engine Yard Cloud—we want you to continue to focus on features that add value to your products, and leave all your scalability, performance and agility needs to the Engine Yard Cloud platform.</p>
<p>If you missed the Roadshow, be sure to register for our upcoming <a href="http://engineyard.cmail3.com/t/y/l/uydkdh/kjiythgi/r">Engine Yard Cloud Demo Webinar</a>; I&#8217;ll be walking through some of the topics covered at the Roadshow and taking questions.</p>
<p>We look forward to visiting more cities soon. Until then, enjoy the presentations, and send us your comments and feedback!</p>
<img src="http://www.engineyard.com/blog/?ak_action=api_record_view&id=2788&type=feed" alt="" /><img src="http://feeds.feedburner.com/~r/engineyard/~4/67u-udqFY7g" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://www.engineyard.com/blog/2009/rails-roadshow-recap/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		<feedburner:origLink>http://www.engineyard.com/blog/2009/rails-roadshow-recap/</feedburner:origLink></item>
		<item>
		<title>Win a Motorola DROID Programming Contest: “Worst App Server Technology Ever”</title>
		<link>http://feedproxy.google.com/~r/engineyard/~3/eWWKRwy-OAY/</link>
		<comments>http://www.engineyard.com/blog/2009/win-a-motorola-droid-programming-contest-worst-app-server-technology-ever/#comments</comments>
		<pubDate>Tue, 03 Nov 2009 15:30:26 +0000</pubDate>
		<dc:creator>Michael Mullany</dc:creator>
				<category><![CDATA[Contests]]></category>
		<category><![CDATA[actors]]></category>
		<category><![CDATA[contest]]></category>
		<category><![CDATA[dataflow]]></category>
		<category><![CDATA[twitter]]></category>

		<guid isPermaLink="false">http://www.engineyard.com/blog/?p=2710</guid>
		<description><![CDATA[The goal of this contest is to collaborate with your other contestants to build the &#8220;worst app server ever&#8221; (WASE) , and use it to complete one or more challenge computations. The challenge computation(s) and their input data-set(s) will be announced and posted next week on Thursday, November 12. The contest will remain open until [...]]]></description>
			<content:encoded><![CDATA[<p>The goal of this contest is to collaborate with your other contestants to build the &#8220;worst app server ever&#8221; (WASE) , and use it to complete one or more challenge computations. The challenge computation(s) and their input data-set(s) will be announced and posted next week on Thursday, November 12. The contest will remain open until Monday, November 16th at 6pm PST.  Winners will be announced within the following week.</p>
<p>(<span style="color: #ff9900;">Update: </span>We think the rule-set below is now complete, but we still welcome any suggestions or tweaks that you might have.)</p>
<p>There will be three prizes.</p>
<ul>
<li>The first prize (a Motorola DROID and $1,000 of Engine Yard Cloud credit) goes to the person who completes our challenge task correctly first.</li>
<li>Second prize (a DROID and $500 of cloud credit) goes to the person who builds the most popular WASE endpoint (the one used the most often in the most submissions).</li>
<li>Third prize (a DROID) goes to the &#8220;best&#8221; WASE endpoint written in Ruby (as determined by us). The contest DROIDS are full price, non-contract-linked, US models.</li>
</ul>
<h2>How WASE Works</h2>
<p>Why is WASE the worlds worst app server technology? Well, instead of a sane message bus like AMQP, WASE uses Twitter as its message bus. Instead of a proper message router, WASE uses a list of twitter accounts as its program listing. And instead of encapsulating data with each message, WASE messages only contain a reference to JSON objects or arrays at input and output location(s) specified by a bit.ly.</p>
<h2>&#8220;This Sounds Like the World&#8217;s Worst App Server. Tell Me More.&#8221;</h2>
<p>Well here is an example of how a sample computation might work. Let&#8217;s say @engineyard wants to take a JSON file containing an array of names, and get a list of  the top quartile of names after sorting the array. We know that there are two Twitter accounts (which we will henceforth call WASEpoints) @ey-sort and @ey-firsthalf that can be useful to us. We know @ey-sort takes an input data set, sorts it and outputs the result. We also know that @ey-firsthalf takes an input data set, and outputs the first &#8220;half&#8221; of the dataset. To perform a computation, we set up URI&#8217;s for the program listing, the input data and the output data, and kick off the computation with an appropriate message. (For those of you with dataflow or actor-based programming experience, WASE should look like a vague, but disreputable cousin.)</p>
<p>So let&#8217;s go through the message flow:</p>
<p>We&#8217;ll put our program listing at:</p>
<p><a href="http://www.engineyard.com/top-quartile-sorted-list.json">www.engineyard.com/top-quartile-sorted-list.json</a>,  (bit.ly/7yQK6) whose body contents are a JSON array:</p>
<p>["@ey-sort", "@ey-firsthalf", "@ey-firsthalf", "@engineyard"]</p>
<p>We put our input data here:  <a href="http://www.engineyard.com/unsortedmegalist.json">www.engineyard.com/unsortedmegalist.json</a> (bit.ly/3kl0xs)</p>
<p>And set up a location for our output data here:  <a href="http://www.engineyard.com/top25percentofmymegalist.json">www.engineyard.com/top25percentofmymegalist.json</a> (bit.ly/2uhGcl)</p>
<p>Or to summarize the <a href="http://code.google.com/p/bitly-api/wiki/ApiDocumentation">bit.ly&#8217;s</a>,</p>
<p>Program listing: bit.ly/7yQK6 (read with a http: GET)</p>
<p>Output location: bit.ly/2uhGcl (written with a http: PUT)</p>
<p>Input location: bit.ly/3kl0xs (read with a http: GET)</p>
<p>To perform the computation, we&#8217;d simply send the following twitter message from our @engineyard account: @ey-sort #wase, 0, bit.ly/7yQK6, 1256850843, bit.ly/2uhGcl, bit.ly/3kl0xs</p>
<p>So the message format of a WASTE message is: [WASEpoint], [WASE hashtag] [Program Counter (0 initially)], [Program listing URI], [<a href="http://www.unixtimestamp.com/index.php">Unix Timestamp</a>], [Output URI] [,Input URI (optional)] [, Input URI 2 (optional)]</p>
<p>In the case of this computation, the message and computation sequence would look like:</p>
<ul>
<li>@engineyard sends: &#8220;@ey-sort #wase, 0, bit.ly/7yQK6, 1256850843, bit.ly/2uhGcl, bit.ly/3kl0xs&#8221;</li>
</ul>
<p>&#8230;. @ey-sort reads the message from @engineyard in its twitter list and parses the message. First it GETs the program listing from bit.ly/7yQK6, GETS the input data set from bit.ly/3kl0xs, sorts it, PUTS the output to bit.ly/2uhGcl, then looks for the 0+1 WASEpoint in the program listing (@ey-sort) and then..</p>
<ul>
<li>@ey-sort sends: &#8220;@ey-firsthalf #wase, 1, bit.ly/7yQK6, 1256850875, bit.ly/2uhGcl&#8221;</li>
</ul>
<p>&#8230;. @ey-firsthalf reads the message from @ey-sort in its twitter list and parses the message. First it GETs the program listing from bit.ly/7yQK6, GETS the input data set from bit.ly/2uhGcl, halves it, then PUTS the output to bit.ly/2uhGcl,  looks for the 1+1 WASEpoint in the program listing (@ey-firsthalf) and then..</p>
<ul>
<li>@ey-firsthalf sends (to itself): &#8220;@ey-firsthalf #wase, 2, bit.ly/7yQK6, 1256850885, bit.ly/2uhGcl&#8221;</li>
</ul>
<p>&#8230; etc. &#8230;</p>
<ul>
<li>@ey-firsthalf sends: &#8220;@engineyard #wase, 3, bit.ly/7yQK6, 1256850899, bit.ly/2uhGcl&#8221;</li>
</ul>
<p>&#8212; finally @engineyard receives this message with the pointer to the final location of output data.</p>
<p>A few new things here. There&#8217;s a program counter that tells the WASEpoint where in the program listing the computation is, and there&#8217;s a Unix timestamp (could be useful for discarding messages that get held up in the twitterverse?). If no input URI is specified, then the WASEpoint should use the Output URI as both Input and Output locations. One restriction that we will enforce for the contest is that a WASEPOINT MAY NOT DECREMENT A PROGRAM COUNTER: to avoid infinite looping. (<span style="color: #ff9900;">Update: </span>And a WASEpoint must conserve the program listing and output URI&#8217;s from the input to the output message.)</p>
<p>Hey, maybe we should have some basic error handling. Hmm, let&#8217;s say @ey-firsthalf is expecting a standard JSON object but the input data fails to parse properly. Let&#8217;s have it send the following message:</p>
<ul>
<li>@ey-firsthalf sends: &#8220;@engineyard, #wase, -1, bit.ly/7yQK6, 1256850885, bit.ly/2uhGcl</li>
</ul>
<p>So the error message structure is: [<span style="color: #ff9900;">Update: </span>first WASEpoint in program listing], [WASE hashtag], [Negative of Program Counter], [Program listing URI], [Unix Timestamp], [Output URI] [, Input URI (optional)] [, Input URI 2 (optional)]</p>
<p>Note that there are no type declarations in the message format because the only data-type supported by WASE are JSON objects and arrays.</p>
<h2>What are Guidelines for the Contest?</h2>
<p>Apart from the message format and data guidelines above &#8212; here are additional guidelines:</p>
<p>1. Each contestant may register no more than 5 WASE endpoints/twitter accounts. WASEpoints must be <a href="http://www.engineyard.com/contests/wase/">registered here</a> to be eligible for use. Your WASEpoints must follow <a href="http://twitter.com/eycontest">@eycontest</a>. This is also where you should go to pick and choose good WASEpoints for constructing your app. Each contest entry must use a minimum of 10 WASEpoints from at least four separate contestants, where each WASEpoints performs functionally significant data operations. <span style="color: #ff0000;">You must supply your own Output URI</span>!</p>
<p>2. Please do not submit WASEpoints whose twitter accounts you do not own :-) We do not want to encourage the business of spamming Ashton Kutcher with mysterious messages. We will test each submitted WASEpoint with a DM to make sure they are legitimate.</p>
<p>3. Source code for your WASEpoint must be posted to a public repository (e.g codaset, github, sourceforge, kenai) for other contestants to inspect :-) If observed behavior deviates from the posted code (aka you have filed a prank WASEpoint), then your entry and all your WASEpoints will be removed from the registered list.</p>
<p>4. A WASEpoint must not store state, and may not rely on any state data other than the input data (of course, it&#8217;s easy to generate a private data set programmatically, but this will also be considered state). Trivial WASEpoints (e.g. identity) will be disqualified, although triviality is hard to define, you know it when you see it.</p>
<p>5. You must use bit.ly as your URL shortener (to make everyone&#8217;s job building parsers easier—and bit.ly has a http: interface.</p>
<p>7. <span style="color: #ff0000;">UPDATE: </span><span style="color: #ff0000;">Challenge calculations submissions must be in the form of a RETWEET to @engineyard</span> <span style="color: #ff0000;">of the first message in your WASE program listing from your home twitter account. The final WASEpoint in your program listing should be @eycontest. </span>You must be following @engineyard with your home account in order to enter.</p>
<p>8. We must be able to reproduce your computation using your program listing and our own output URI.</p>
<p>9. We STRONGLY encourage people to write their WASEpoints in Ruby, but we&#8217;ll also accept Perl, Scala and Python. Although, be prepared for people avoiding your WASEpoint since the common denominator among people reading this blog is the fact that they know Ruby!</p>
<p>10. We may alter these guidelines along the way, based on your input and feedback, although the spirit and philosophy of them will remain.</p>
<img src="http://www.engineyard.com/blog/?ak_action=api_record_view&id=2710&type=feed" alt="" /><img src="http://feeds.feedburner.com/~r/engineyard/~4/eWWKRwy-OAY" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://www.engineyard.com/blog/2009/win-a-motorola-droid-programming-contest-worst-app-server-technology-ever/feed/</wfw:commentRss>
		<slash:comments>28</slash:comments>
		<feedburner:origLink>http://www.engineyard.com/blog/2009/win-a-motorola-droid-programming-contest-worst-app-server-technology-ever/</feedburner:origLink></item>
		<item>
		<title>Distlockrun: Lockrun for Your Cloud</title>
		<link>http://feedproxy.google.com/~r/engineyard/~3/yMLdZiUAWEU/</link>
		<comments>http://www.engineyard.com/blog/2009/distlockrun-lockrun-for-your-cloud/#comments</comments>
		<pubDate>Fri, 30 Oct 2009 18:40:13 +0000</pubDate>
		<dc:creator>Sam Merritt</dc:creator>
				<category><![CDATA[Technology]]></category>
		<category><![CDATA[Lockrun]]></category>

		<guid isPermaLink="false">http://www.engineyard.com/blog/?p=2636</guid>
		<description><![CDATA[Lockrun is a handy little utility for ensuring you don&#8217;t run two of the same cron job (or other task) at the same time on one machine. It&#8217;s especially handy when the cron job in question has a widely varying duration. Lockrun was written by Steve Friedl and initially released in 2006.
However, when you have [...]]]></description>
			<content:encoded><![CDATA[<p><a href="http://unixwiz.net/tools/lockrun.html">Lockrun is a handy little utility</a> for ensuring you don&#8217;t run two of the same cron job (or other task) at the same time on one machine. It&#8217;s especially handy when the cron job in question has a widely varying duration. Lockrun was written by Steve Friedl and <a href="http://blog.unixwiz.net/2006/06/new_tool_lockru.html">initially released in 2006</a>.</p>
<p>However, when you have jobs that use resources from multiple machines, lockrun isn&#8217;t adequate. Consider a cron job that builds a local index of a bunch of NFS-mounted files. You don&#8217;t want more than one consumer of that NFS volume to index it at a time, otherwise performance will be degraded.</p>
<p>Enter <a href="http://github.com/smerritt/distlockrun">distlockrun</a>: it works similarly to lockrun except that it talks to a central server for mutual exclusion instead of locking a file.</p>
<p>First of all, you need the lock server running. It&#8217;s really lightweight, so you can run it on any machine you&#8217;ve already got. For example, on Engine Yard Cloud, it should be run on the database master in a screen session.</p>
<p>Start the server like so: <code>distlockrun-server</code></p>
<p>By default, it starts on port 7890, but that can be changed with the <code>--port</code> option.</p>
<p>Then, on the client, run your job as follows: <code>distlockrun --server {server's hostname} big-expensive-indexer.rb</code></p>
<p>If there&#8217;s already a big-expensive-indexer.rb running, then distlockrun will just exit. Otherwise, it runs the job and then tells the lock server that it&#8217;s finished.</p>
<p>That&#8217;s all it takes to get cluster-wide mutual exclusion; short but sweet!</p>
<img src="http://www.engineyard.com/blog/?ak_action=api_record_view&id=2636&type=feed" alt="" /><img src="http://feeds.feedburner.com/~r/engineyard/~4/yMLdZiUAWEU" height="1" width="1"/>]]></content:encoded>
			<wfw:commentRss>http://www.engineyard.com/blog/2009/distlockrun-lockrun-for-your-cloud/feed/</wfw:commentRss>
		<slash:comments>1</slash:comments>
		<feedburner:origLink>http://www.engineyard.com/blog/2009/distlockrun-lockrun-for-your-cloud/</feedburner:origLink></item>
	</channel>
</rss>
