<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" media="screen" href="/~d/styles/atom10full.xsl"?><?xml-stylesheet type="text/css" media="screen" href="http://feeds.feedburner.com/~d/styles/itemcontent.css"?><feed xmlns="http://www.w3.org/2005/Atom" xmlns:openSearch="http://a9.com/-/spec/opensearch/1.1/" xmlns:georss="http://www.georss.org/georss" xmlns:gd="http://schemas.google.com/g/2005" xmlns:thr="http://purl.org/syndication/thread/1.0" gd:etag="W/&quot;C0ADQHc-eyp7ImA9WhRUEU8.&quot;"><id>tag:blogger.com,1999:blog-11265228</id><updated>2012-01-20T20:56:11.953-08:00</updated><title>tech</title><subtitle type="html" /><link rel="http://schemas.google.com/g/2005#feed" type="application/atom+xml" href="http://thushw.blogspot.com/feeds/posts/default" /><link rel="alternate" type="text/html" href="http://thushw.blogspot.com/" /><link rel="next" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default?start-index=26&amp;max-results=25&amp;redirect=false&amp;v=2" /><author><name>thushara</name><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="24" src="http://3.bp.blogspot.com/_ePuWmGHSaXg/SLcEM2cOYCI/AAAAAAAAABw/CiAaWps_KW0/s1600-R/n744060995_1382099_3757.jpg" /></author><generator version="7.00" uri="http://www.blogger.com">Blogger</generator><openSearch:totalResults>128</openSearch:totalResults><openSearch:startIndex>1</openSearch:startIndex><openSearch:itemsPerPage>25</openSearch:itemsPerPage><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="self" type="application/atom+xml" href="http://feeds.feedburner.com/blogspot/hfRE" /><feedburner:info xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0" uri="blogspot/hfre" /><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="hub" href="http://pubsubhubbub.appspot.com/" /><entry gd:etag="W/&quot;CEAHQ3w9fCp7ImA9WhRVFEU.&quot;"><id>tag:blogger.com,1999:blog-11265228.post-1305721713631645500</id><published>2012-01-13T11:25:00.001-08:00</published><updated>2012-01-13T11:25:32.264-08:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-01-13T11:25:32.264-08:00</app:edited><title>Unix Shell : set commands on files</title><content type="html">&lt;a href="http://www.catonmat.net/blog/set-operations-in-unix-shell/"&gt;http://www.catonmat.net/blog/set-operations-in-unix-shell/&lt;/a&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/11265228-1305721713631645500?l=thushw.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://thushw.blogspot.com/feeds/1305721713631645500/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=11265228&amp;postID=1305721713631645500" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/1305721713631645500?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/1305721713631645500?v=2" /><link rel="alternate" type="text/html" href="http://thushw.blogspot.com/2012/01/unix-shell-set-commands-on-files.html" title="Unix Shell : set commands on files" /><author><name>thushara</name><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="24" src="http://3.bp.blogspot.com/_ePuWmGHSaXg/SLcEM2cOYCI/AAAAAAAAABw/CiAaWps_KW0/s1600-R/n744060995_1382099_3757.jpg" /></author><thr:total>0</thr:total></entry><entry gd:etag="W/&quot;CUcMRn4zeip7ImA9WhRWF0Q.&quot;"><id>tag:blogger.com,1999:blog-11265228.post-1464172807750339305</id><published>2012-01-05T11:50:00.001-08:00</published><updated>2012-01-05T11:51:27.082-08:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-01-05T11:51:27.082-08:00</app:edited><title>Std C++ library - linking static / dynamic pitfalls</title><content type="html">&lt;a href="http://www.trilithium.com/johan/2005/06/static-libstdc/"&gt;http://www.trilithium.com/johan/2005/06/static-libstdc/&lt;/a&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/11265228-1464172807750339305?l=thushw.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://thushw.blogspot.com/feeds/1464172807750339305/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=11265228&amp;postID=1464172807750339305" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/1464172807750339305?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/1464172807750339305?v=2" /><link rel="alternate" type="text/html" href="http://thushw.blogspot.com/2012/01/std-c-library-linking-static-dynamic.html" title="Std C++ library - linking static / dynamic pitfalls" /><author><name>thushara</name><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="24" src="http://3.bp.blogspot.com/_ePuWmGHSaXg/SLcEM2cOYCI/AAAAAAAAABw/CiAaWps_KW0/s1600-R/n744060995_1382099_3757.jpg" /></author><thr:total>0</thr:total></entry><entry gd:etag="W/&quot;CkYDQXoyeSp7ImA9WhRREEQ.&quot;"><id>tag:blogger.com,1999:blog-11265228.post-4220573357759096089</id><published>2011-11-23T15:10:00.000-08:00</published><updated>2011-11-23T15:16:10.491-08:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-11-23T15:16:10.491-08:00</app:edited><title>HDFS namenode won't start after disk full error</title><content type="html">&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/-UZoNVQ3xlAc/Ts195pO6QaI/AAAAAAAAASo/BC6tJYhMHlE/s1600/hadoop-logo.jpg" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://2.bp.blogspot.com/-UZoNVQ3xlAc/Ts195pO6QaI/AAAAAAAAASo/BC6tJYhMHlE/s1600/hadoop-logo.jpg" /&gt;&lt;/a&gt;&lt;/div&gt;If you have trouble restarting the NameNode in a Hadoop cluster after a disk full error, if you don't mind losing some data, you can do the following to get it back up.&lt;br /&gt;
&lt;br /&gt;
Find the 'edits' file in the hadoop dfs.name.dir/current and write this sequence to it :&lt;br /&gt;
&lt;br /&gt;
&lt;pre style="font-family: Andale Mono, Lucida Console, Monaco, fixed, monospace; color: #000000; background-color: #eee;font-size: 12px;border: 1px dashed #999999;line-height: 14px;padding: 5px; overflow: auto; width: 100%"&gt;&lt;code&gt;printf &amp;quot;\xff\xff\xff\xee\xff&amp;quot; &amp;gt; edits
&lt;/code&gt;&lt;/pre&gt;&lt;br /&gt;
After that, you should be able to start hadoop. Credit &lt;a href="http://lucene.472066.n3.nabble.com/Help-can-t-start-namenode-after-disk-full-error-td3055667.html"&gt;here&lt;/a&gt;.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/11265228-4220573357759096089?l=thushw.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://thushw.blogspot.com/feeds/4220573357759096089/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=11265228&amp;postID=4220573357759096089" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/4220573357759096089?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/4220573357759096089?v=2" /><link rel="alternate" type="text/html" href="http://thushw.blogspot.com/2011/11/hadoop-namenode-wont-start-after-disk.html" title="HDFS namenode won't start after disk full error" /><author><name>thushara</name><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="24" src="http://3.bp.blogspot.com/_ePuWmGHSaXg/SLcEM2cOYCI/AAAAAAAAABw/CiAaWps_KW0/s1600-R/n744060995_1382099_3757.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://2.bp.blogspot.com/-UZoNVQ3xlAc/Ts195pO6QaI/AAAAAAAAASo/BC6tJYhMHlE/s72-c/hadoop-logo.jpg" height="72" width="72" /><thr:total>0</thr:total></entry><entry gd:etag="W/&quot;DkICSHg6cCp7ImA9WhRTFE4.&quot;"><id>tag:blogger.com,1999:blog-11265228.post-7189234551009794425</id><published>2011-11-04T12:20:00.000-07:00</published><updated>2011-11-04T12:22:49.618-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-11-04T12:22:49.618-07:00</app:edited><title>Bash - prevent multiple copies of script from running</title><content type="html">&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/-WX8hWIhf8l4/TDkTGvqJ1sI/AAAAAAAAAMM/DiZzzibt3e4/s1600/bash.jpg" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://3.bp.blogspot.com/-WX8hWIhf8l4/TDkTGvqJ1sI/AAAAAAAAAMM/DiZzzibt3e4/s1600/bash.jpg" /&gt;&lt;/a&gt;&lt;/div&gt;Since bash commands each spawn its own process, we can't lock files to achieve single copy running semantics. Why? Because file locks are per process and they are automatically cleared when the process dies. Thus it is nonsensical to expect a linux command to lock a file, why, when that command returns, the lock file will be automatically unlocked defeating the purpose of the lock completely!&lt;br /&gt;
&lt;br /&gt;
One easy way to prevent multiple copies running is to find an atomic Linux command that can both do a certain operation and return whether that operation succeeded atomically. This command must fail on the second time. The command to make a directory - mkdir - is one such command.&lt;br /&gt;
&lt;br /&gt;
So the script could try to mkdir a particular directory - let's call this the lock directory. If it fails, we don't start. Now if it works, we must remove the lock directory when the script ends so that the script can run again. We do this using the trap command - trap will make sure a given command will execute when the script exits at any point.&lt;br /&gt;
&lt;br /&gt;
Here is the code:&lt;br /&gt;
&lt;br /&gt;
&lt;pre style="background-color: #eeeeee; border: 1px dashed #999999; color: black; font-family: Andale Mono, Lucida Console, Monaco, fixed, monospace; font-size: 12px; line-height: 14px; overflow: auto; padding: 5px; width: 100%;"&gt;&lt;code&gt;#!/bin/bash                                                                                                                                                                                        
mkdir /tmp/locka 2&amp;gt;/dev/null || {
    exit
}
trap "rmdir /tmp/locka" EXIT
#script work, the sleep 10 below is to test this
#without having a real script.
sleep 10

&lt;/code&gt;&lt;/pre&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/11265228-7189234551009794425?l=thushw.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://thushw.blogspot.com/feeds/7189234551009794425/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=11265228&amp;postID=7189234551009794425" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/7189234551009794425?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/7189234551009794425?v=2" /><link rel="alternate" type="text/html" href="http://thushw.blogspot.com/2011/11/bash-prevent-multiple-copies-of-script.html" title="Bash - prevent multiple copies of script from running" /><author><name>thushara</name><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="24" src="http://3.bp.blogspot.com/_ePuWmGHSaXg/SLcEM2cOYCI/AAAAAAAAABw/CiAaWps_KW0/s1600-R/n744060995_1382099_3757.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://3.bp.blogspot.com/-WX8hWIhf8l4/TDkTGvqJ1sI/AAAAAAAAAMM/DiZzzibt3e4/s72-c/bash.jpg" height="72" width="72" /><thr:total>0</thr:total></entry><entry gd:etag="W/&quot;DUcGR3g4fCp7ImA9WhdSGEQ.&quot;"><id>tag:blogger.com,1999:blog-11265228.post-5659818983142722806</id><published>2011-07-28T16:44:00.000-07:00</published><updated>2011-07-28T16:50:26.634-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-07-28T16:50:26.634-07:00</app:edited><title>Java : write binary data to a mysql out file</title><content type="html">&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/-mKOauULovVc/TjH1u5-n5-I/AAAAAAAAAR8/8F8AiJEUB-I/s1600/JDOx120.gif" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://1.bp.blogspot.com/-mKOauULovVc/TjH1u5-n5-I/AAAAAAAAAR8/8F8AiJEUB-I/s1600/JDOx120.gif" /&gt;&lt;/a&gt;&lt;/div&gt;I had the need to generate - within Java code - a mysql out file with both text and binary data. The binary data is for some content that has been gzipped and stored as a blob in a mysql table. While it is trivial to write binary data to a blob field directly using JDO, for performance reasons, we had to use the "load infile" approach. Thus the first step was to create an outfile.&lt;br /&gt;
&lt;br /&gt;
Here is the function that would convert binary data to a form that can be written to an out file. It follows the algorithm implemented by mysql for its "SELECT INTO outfile" functionality as described &lt;a href="http://dev.mysql.com/doc/refman/5.1/en/load-data.html"&gt;here&lt;/a&gt;.&lt;br /&gt;
&lt;br /&gt;
&lt;pre style="background-color: #eeeeee; border: 1px dashed #999999; color: black; font-family: Andale Mono, Lucida Console, Monaco, fixed, monospace; font-size: 12px; line-height: 14px; overflow: auto; padding: 5px; width: 100%;"&gt;&lt;code&gt;    public static byte[] getEscapedBlob(byte[] blob) {
        ByteArrayOutputStream bos = new ByteArrayOutputStream();
        for (int i=0; i&amp;lt;blob.length; i++) {
            if (blob[i]=='\t' || blob[i]=='\n' || blob[i]=='\\') {
                bos.write('\\');
                bos.write(blob[i]);
            } else if (blob[i] == 0) {
                bos.write('\\');
                bos.write('0');
            } else {
                bos.write(blob[i]);
            }
        }
        return bos.toByteArray();
    }
&lt;/code&gt;&lt;/pre&gt;&lt;br /&gt;
This is how you would use this function to generate a mysql outfile.&lt;br /&gt;
&lt;br /&gt;
&lt;pre style="background-color: #eeeeee; border: 1px dashed #999999; color: black; font-family: Andale Mono, Lucida Console, Monaco, fixed, monospace; font-size: 12px; line-height: 14px; overflow: auto; padding: 5px; width: 100%;"&gt;&lt;code&gt;                //gen infile for mysql
                byte[] out = getEscapedBlob(data);
                BufferedOutputStream f = new BufferedOutputStream(new FileOutputStream("/path/to/data.csv")) ;
                String nonBlobFields = "\\N\t10\t20100301\t18\t1102010\t2010-03-01 00:00:00\t";
                byte[] nonBlobData = nonBlobFields.getBytes("UTF-8");
                f.write(nonBlobData, 0, nonBlobData.length);
                f.write(out, 0, out.length);
                f.write('\n');
                f.close();

&lt;/code&gt;&lt;/pre&gt;&lt;br /&gt;
This writes some integer data followed by the blob data to the outfile, which can then be loaded back using "LOAD INFILE".&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/11265228-5659818983142722806?l=thushw.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://thushw.blogspot.com/feeds/5659818983142722806/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=11265228&amp;postID=5659818983142722806" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/5659818983142722806?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/5659818983142722806?v=2" /><link rel="alternate" type="text/html" href="http://thushw.blogspot.com/2011/07/java-write-binary-data-to-mysql-out.html" title="Java : write binary data to a mysql out file" /><author><name>thushara</name><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="24" src="http://3.bp.blogspot.com/_ePuWmGHSaXg/SLcEM2cOYCI/AAAAAAAAABw/CiAaWps_KW0/s1600-R/n744060995_1382099_3757.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://1.bp.blogspot.com/-mKOauULovVc/TjH1u5-n5-I/AAAAAAAAAR8/8F8AiJEUB-I/s72-c/JDOx120.gif" height="72" width="72" /><thr:total>0</thr:total></entry><entry gd:etag="W/&quot;DkQNQnw8eip7ImA9WhdSEko.&quot;"><id>tag:blogger.com,1999:blog-11265228.post-6746133981027424561</id><published>2011-07-21T11:53:00.000-07:00</published><updated>2011-07-21T11:53:13.272-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-07-21T11:53:13.272-07:00</app:edited><title>Ubuntu : Install packages on a cluster of machines</title><content type="html">&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/-WX8hWIhf8l4/TDkTGvqJ1sI/AAAAAAAAAMM/DiZzzibt3e4/s1600/bash.jpg" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://3.bp.blogspot.com/-WX8hWIhf8l4/TDkTGvqJ1sI/AAAAAAAAAMM/DiZzzibt3e4/s1600/bash.jpg" /&gt;&lt;/a&gt;&lt;/div&gt;Sometimes, you have a cluster of machines where some packages need to be installed. It would be nice to be able to automate this so that you could do everything from a single terminal. We have seen how &lt;a href="http://thushw.blogspot.com/2010/12/bash-one-liner-find-process-start-time.html"&gt;a command can be run on multiple machines from a single terminal&lt;/a&gt; before. This only works if you have password-less ssh set up between the machine that you are running the command from and the cluster on which you want the command to actually run. The only aspect that makes this a little harder for installing software is that you need to be root to install packages and ssh keys are not generally set-up for root.&lt;br /&gt;
&lt;br /&gt;
However, there is an option -S that you can provide sudo that will make sudo read the password from stdin. We can use this combined with the bash loop to come up with a one liner that would install a package across a cluster of machines.&lt;br /&gt;
&lt;br /&gt;
&lt;pre style="background-color: #eeeeee; border: 1px dashed #999999; color: black; font-family: Andale Mono, Lucida Console, Monaco, fixed, monospace; font-size: 12px; line-height: 14px; overflow: auto; padding: 5px; width: 100%;"&gt;&lt;code&gt;for m in m1 m2 m3 m4 ; do echo $m; ssh $m "echo password | sudo -S apt-get -y install curl" ; done
&lt;/code&gt;&lt;/pre&gt;&lt;br /&gt;
The -S option makes sure that the command will not prompt you for a password or complain about a missing tty. The -y option for apt-get prevents it from prompting you prior to the install.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/11265228-6746133981027424561?l=thushw.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://thushw.blogspot.com/feeds/6746133981027424561/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=11265228&amp;postID=6746133981027424561" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/6746133981027424561?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/6746133981027424561?v=2" /><link rel="alternate" type="text/html" href="http://thushw.blogspot.com/2011/07/ubuntu-install-packages-on-cluster-of.html" title="Ubuntu : Install packages on a cluster of machines" /><author><name>thushara</name><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="24" src="http://3.bp.blogspot.com/_ePuWmGHSaXg/SLcEM2cOYCI/AAAAAAAAABw/CiAaWps_KW0/s1600-R/n744060995_1382099_3757.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://3.bp.blogspot.com/-WX8hWIhf8l4/TDkTGvqJ1sI/AAAAAAAAAMM/DiZzzibt3e4/s72-c/bash.jpg" height="72" width="72" /><thr:total>0</thr:total></entry><entry gd:etag="W/&quot;DUcAQnc4fSp7ImA9WhdTF0g.&quot;"><id>tag:blogger.com,1999:blog-11265228.post-6566232479913609530</id><published>2011-07-15T11:20:00.000-07:00</published><updated>2011-07-15T12:10:43.935-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-07-15T12:10:43.935-07:00</app:edited><title>Mac / Microsoft Excel / newlines (\r \n)</title><content type="html">&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/-OfDBuYe_w6U/SaRw5Ah_znI/AAAAAAAAAEA/emTYFYmy984/s1600/applelin.JPG" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="200" src="http://3.bp.blogspot.com/-OfDBuYe_w6U/SaRw5Ah_znI/AAAAAAAAAEA/emTYFYmy984/s200/applelin.JPG" width="165" /&gt;&lt;/a&gt;&lt;/div&gt;It is a frequently the case that the business department hands over Excel files to the engineering department for some type of data processing. The first step here is to convert this to a proper comma separated text file (csv).&lt;br /&gt;
&lt;br /&gt;
If you are doing this conversion using Microsoft Excel on a Mac, you'll note that the resulting file does not have Unix-style newlines. A Unix new line is the 0x0a character, also written as \n. What Excel produces is the 0x0d character, also written as \r.&lt;br /&gt;
&lt;br /&gt;
Most Linux commands do not recognize \r as a line ending. There are several ways to convert the \r characters to proper Linux style line endings. Using the vi editor is a common method. However, there is also the issue that sometimes if the Excel spreadsheet has blank columns, Excel insists on writing a possibly large number of \r characters at the end of the&amp;nbsp; csv file. The vi method would write a newline per each of these \r characters and that is not ideal.&lt;br /&gt;
&lt;br /&gt;
Instead, you could use this perl one-liner to accomplish both : turn all \r into \n except for the trailing \r characters :&lt;br /&gt;
&lt;br /&gt;
&lt;pre style="background-color: #eeeeee; border: 1px dashed #999999; color: black; font-family: Andale Mono, Lucida Console, Monaco, fixed, monospace; font-size: 12px; line-height: 14px; overflow: auto; padding: 5px; width: 100%;"&gt;&lt;code&gt;perl -ne 's/([^\r])\r/$1\n/g; s/\r//g; print;'  imported.csv
&lt;/code&gt;&lt;/pre&gt;&lt;br /&gt;
The regular expression replaces any non \r character followed by \r with the non \r character followed by a \n. Since the trailing \r characters do not match this pattern, they are thus ignored. The second regexp removes these \r characters.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/11265228-6566232479913609530?l=thushw.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://thushw.blogspot.com/feeds/6566232479913609530/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=11265228&amp;postID=6566232479913609530" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/6566232479913609530?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/6566232479913609530?v=2" /><link rel="alternate" type="text/html" href="http://thushw.blogspot.com/2011/07/mac-microsoft-excel-newlines-r-n.html" title="Mac / Microsoft Excel / newlines (\r \n)" /><author><name>thushara</name><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="24" src="http://3.bp.blogspot.com/_ePuWmGHSaXg/SLcEM2cOYCI/AAAAAAAAABw/CiAaWps_KW0/s1600-R/n744060995_1382099_3757.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://3.bp.blogspot.com/-OfDBuYe_w6U/SaRw5Ah_znI/AAAAAAAAAEA/emTYFYmy984/s72-c/applelin.JPG" height="72" width="72" /><thr:total>0</thr:total></entry><entry gd:etag="W/&quot;A04CQ309eSp7ImA9WhZaGUo.&quot;"><id>tag:blogger.com,1999:blog-11265228.post-1432656030019631097</id><published>2011-07-06T12:17:00.000-07:00</published><updated>2011-07-06T12:19:22.361-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-07-06T12:19:22.361-07:00</app:edited><title>Linux Shell, HUP and process status on logout</title><content type="html">&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/-WX8hWIhf8l4/TDkTGvqJ1sI/AAAAAAAAAMM/DiZzzibt3e4/s1600/bash.jpg" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://3.bp.blogspot.com/-WX8hWIhf8l4/TDkTGvqJ1sI/AAAAAAAAAMM/DiZzzibt3e4/s1600/bash.jpg" /&gt;&lt;/a&gt;&lt;/div&gt;It used to be the case that all processes a user starts are killed by the shell upon logout. Not any more, as recent experiments with Ubuntu 10.04 shows.&lt;br /&gt;
&lt;br /&gt;
The shell can be configured to send a HUP signal to its children when the shell exits. This is controlled by the huponexit shell option as explained in the bash man page:&lt;br /&gt;
&lt;br /&gt;
&lt;i&gt;If the huponexit shell option has been set with shopt, bash sends a SIGHUP to all jobs when an interactive login shell exits.&lt;/i&gt;&lt;br /&gt;
&lt;br /&gt;
Determine the setting of huponexit with:&lt;br /&gt;
&lt;br /&gt;
shopt huponexit&lt;br /&gt;
&lt;br /&gt;
If it is "off", then processes started by the user will remain running after logout. This setting makes it easier to start a long running process simply from within the shell, without invoking a screen and without having to wrap the process in nohup.&lt;br /&gt;
&lt;br /&gt;
&lt;a href="http://serverfault.com/questions/115999/if-i-launch-a-background-process-and-then-log-out-will-it-continue-to-run"&gt;Here&lt;/a&gt; is a discussion on the issue. &lt;br /&gt;
&lt;br /&gt;
However, this setting seems to &lt;a href="http://sshmenu.sourceforge.net/faq/"&gt;cause problems&lt;/a&gt; for interactive sessions when a new user could start referring to an old user's now invalid processes.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/11265228-1432656030019631097?l=thushw.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://thushw.blogspot.com/feeds/1432656030019631097/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=11265228&amp;postID=1432656030019631097" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/1432656030019631097?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/1432656030019631097?v=2" /><link rel="alternate" type="text/html" href="http://thushw.blogspot.com/2011/07/linux-shell-hup-and-process-status-on.html" title="Linux Shell, HUP and process status on logout" /><author><name>thushara</name><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="24" src="http://3.bp.blogspot.com/_ePuWmGHSaXg/SLcEM2cOYCI/AAAAAAAAABw/CiAaWps_KW0/s1600-R/n744060995_1382099_3757.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://3.bp.blogspot.com/-WX8hWIhf8l4/TDkTGvqJ1sI/AAAAAAAAAMM/DiZzzibt3e4/s72-c/bash.jpg" height="72" width="72" /><thr:total>0</thr:total></entry><entry gd:etag="W/&quot;CEEDR3c6cCp7ImA9WhZaEEk.&quot;"><id>tag:blogger.com,1999:blog-11265228.post-5552033308041400129</id><published>2011-06-23T13:54:00.000-07:00</published><updated>2011-06-25T15:57:56.918-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-06-25T15:57:56.918-07:00</app:edited><title>Asynchronous UDP server using Java NIO</title><content type="html">&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/-4W78f7H59p0/St5PoQPJYJI/AAAAAAAAAGA/dB0W9ywuFM0/s1600/nio.jpg" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://3.bp.blogspot.com/-4W78f7H59p0/St5PoQPJYJI/AAAAAAAAAGA/dB0W9ywuFM0/s1600/nio.jpg" /&gt;&lt;/a&gt;&lt;/div&gt;UDP is a light-weight protocol as compared to TCP. When the data transmitted is small (in hundreds of bytes), and an occasional loss of data is not critical, UDP can be used to improve throughput of the program.&lt;br /&gt;
&lt;br /&gt;
The native sockets library (C) provides the epoll function - available on Linux 2.6.x kernels - that can be used for both TCP and UDP sockets. In an &lt;a href="http://thushw.blogspot.com/2009/11/java-callback-api-for-epoll-building-on.html"&gt;earlier post&lt;/a&gt;, I described a framework that can be used to implement an asynchronous client that connects to multiple servers using TCP. I found several code examples that described how Java NIO can be used for this purpose. It turns out that it is even simpler to write a NIO server for UDP.&lt;br /&gt;
&lt;br /&gt;
I would not recommend writing a UDP server if the request/response cannot be transmitted in a single UDP packet or if a packet has a dependency on an earlier packet. UDP packets can arrive out of order and the headers have no sequence numbers to enable re-ordering. If you want to handle reordering, you will be implementing what TCP provides for this purpose and it is probably a better idea to stick with TCP.&lt;br /&gt;
&lt;br /&gt;
The following program does well when the request/response sticks in a single UDP packet. &lt;a href="http://stackoverflow.com/questions/1098897/what-is-the-largest-safe-udp-packet-size-on-the-internet"&gt;512 bytes is generally considered the safe maximum size&lt;/a&gt; and the DNS protocol mandates a maximum packet size of 512 when it uses UDP.&lt;br /&gt;
&lt;br /&gt;
&lt;pre style="background-color: #eeeeee; border: 1px dashed #999999; color: black; font-family: Andale Mono, Lucida Console, Monaco, fixed, monospace; font-size: 12px; line-height: 14px; overflow: auto; padding: 5px; width: 100%;"&gt;&lt;code&gt;public class ASyncUDPSvr {
    static int BUF_SZ = 1024;

    class Con {
        ByteBuffer req;
        ByteBuffer resp;
        SocketAddress sa;

        public Con() {
            req = ByteBuffer.allocate(BUF_SZ);
        }
    }

    static int port = 8340;
    private void process() {
        try {
            Selector selector = Selector.open();
            DatagramChannel channel = DatagramChannel.open();
            InetSocketAddress isa = new InetSocketAddress(port);
            channel.socket().bind(isa);
            channel.configureBlocking(false);
            SelectionKey clientKey = channel.register(selector, SelectionKey.OP_READ);
            clientKey.attach(new Con());
            while (true) {
                try {
                    selector.select();
                    Iterator selectedKeys = selector.selectedKeys().iterator();
                    while (selectedKeys.hasNext()) {
                        try {
                            SelectionKey key = (SelectionKey) selectedKeys.next();
                            selectedKeys.remove();

                            if (!key.isValid()) {
                              continue;
                            }

                            if (key.isReadable()) {
                                read(key);
                                key.interestOps(SelectionKey.OP_WRITE);
                            } else if (key.isWritable()) {
                                write(key);
                                key.interestOps(SelectionKey.OP_READ);
                            }
                        } catch (IOException e) {
                            System.err.println("glitch, continuing... " +(e.getMessage()!=null?e.getMessage():""));
                        }
                    }
                } catch (IOException e) {
                    System.err.println("glitch, continuing... " +(e.getMessage()!=null?e.getMessage():""));
                }
            }
        } catch (IOException e) {
            System.err.println("network error: " + (e.getMessage()!=null?e.getMessage():""));
        }
    }

    private void read(SelectionKey key) throws IOException {
        DatagramChannel chan = (DatagramChannel)key.channel();
        Con con = (Con)key.attachment();
        con.sa = chan.receive(con.req);
        System.out.println(new String(con.req.array(), "UTF-8"));
        con.resp = Charset.forName( "UTF-8" ).newEncoder().encode(CharBuffer.wrap("send the same string"));
    }

    private void write(SelectionKey key) throws IOException {
        DatagramChannel chan = (DatagramChannel)key.channel();
        Con con = (Con)key.attachment();
        chan.send(con.resp, con.sa);
    }

    static public void main(String[] args) {
        ASyncUDPSvr svr = new ASyncUDPSvr();
        svr.process();
    }
}
&lt;/code&gt;&lt;/pre&gt;&lt;br /&gt;
When dealing with small data sizes that fit in one packet, clearly if the NIO interface signals us that data is available to be read, then all the data must be available. Thus the protocol does not need to worry about accumulating network data in buffers. We still do need an object that is tied to each client connection as the reading and writing happen in two distinct parts of the code.&lt;br /&gt;
&lt;br /&gt;
First, after establishing our UDP socket locally on the server, we signal NIO that the socket is ready for reads. When NIO wakes us up - via the select() call - we can immediately read the full request made by the client. At this point, we form our response but do not want to write it back to the network right away, as the kernel buffers may be full and the write may block. So, we store the response on the object attached to the client connection (via the SelectionKey object), signal NIO that we are now ready to write and go back to our select() loop.&lt;br /&gt;
&lt;br /&gt;
Next when NIO wakes us up from the select() call, we can proceed to write. Again since the data fits in one packet, we know that the send() call need not be retried, and all data will be sent.&lt;br /&gt;
&lt;br /&gt;
However, the nature of UDP does not provide the advantages TCP provides in epoll() mode. A UDP server does not provide a separate socket for each new client. Thus the epoll selector always has just the single socket. Each new client sends its datagrams to the single UDP receive buffer of the server.&lt;br /&gt;
&lt;br /&gt;
A threaded server without the use of epoll() might be more advantageous. Each thread could wait on the single server socket, using a receive() call. The kernel will ensure that only one thread wakes up from the receive() call. I hope to use such an implementation and measure both designs.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/11265228-5552033308041400129?l=thushw.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://thushw.blogspot.com/feeds/5552033308041400129/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=11265228&amp;postID=5552033308041400129" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/5552033308041400129?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/5552033308041400129?v=2" /><link rel="alternate" type="text/html" href="http://thushw.blogspot.com/2011/06/asynchronous-udp-server-using-java-nio.html" title="Asynchronous UDP server using Java NIO" /><author><name>thushara</name><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="24" src="http://3.bp.blogspot.com/_ePuWmGHSaXg/SLcEM2cOYCI/AAAAAAAAABw/CiAaWps_KW0/s1600-R/n744060995_1382099_3757.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://3.bp.blogspot.com/-4W78f7H59p0/St5PoQPJYJI/AAAAAAAAAGA/dB0W9ywuFM0/s72-c/nio.jpg" height="72" width="72" /><thr:total>0</thr:total></entry><entry gd:etag="W/&quot;DE8EQX8zeCp7ImA9WhZUF0k.&quot;"><id>tag:blogger.com,1999:blog-11265228.post-6483890612686312847</id><published>2011-06-10T16:00:00.000-07:00</published><updated>2011-06-10T16:00:00.180-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-06-10T16:00:00.180-07:00</app:edited><title>Java splitting an empty string</title><content type="html">&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/-uTUV5k8P0yU/SUgnAftQ0NI/AAAAAAAAAC0/T6uDw6gfnUY/s1600/cheeserjk7.png" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="155" src="http://2.bp.blogspot.com/-uTUV5k8P0yU/SUgnAftQ0NI/AAAAAAAAAC0/T6uDw6gfnUY/s200/cheeserjk7.png" width="200" /&gt;&lt;/a&gt;&lt;/div&gt;Splitting an empty string results in an array whose single element is an empty string - not intuitive. The expected result is either a null array or a zero-length array.&lt;br /&gt;
&lt;br /&gt;
Perl:&lt;br /&gt;
&lt;br /&gt;
$$$:~$ perl -e '@x=split(/ /, ""); $s=@x;print "$s\n"'&lt;br /&gt;
0&lt;br /&gt;
&lt;br /&gt;
Python:&lt;br /&gt;
$$$:~$ python -c 'list="".split();l=len(list);print(l)'&lt;br /&gt;
0&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/11265228-6483890612686312847?l=thushw.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://thushw.blogspot.com/feeds/6483890612686312847/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=11265228&amp;postID=6483890612686312847" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/6483890612686312847?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/6483890612686312847?v=2" /><link rel="alternate" type="text/html" href="http://thushw.blogspot.com/2011/06/java-splitting-empty-string.html" title="Java splitting an empty string" /><author><name>thushara</name><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="24" src="http://3.bp.blogspot.com/_ePuWmGHSaXg/SLcEM2cOYCI/AAAAAAAAABw/CiAaWps_KW0/s1600-R/n744060995_1382099_3757.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://2.bp.blogspot.com/-uTUV5k8P0yU/SUgnAftQ0NI/AAAAAAAAAC0/T6uDw6gfnUY/s72-c/cheeserjk7.png" height="72" width="72" /><thr:total>0</thr:total></entry><entry gd:etag="W/&quot;A0cAQHczeCp7ImA9WhZUFkg.&quot;"><id>tag:blogger.com,1999:blog-11265228.post-6943362180833376454</id><published>2011-06-09T15:36:00.000-07:00</published><updated>2011-06-09T15:37:21.980-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-06-09T15:37:21.980-07:00</app:edited><title>/dev/urandom does not generate correct multi-byte sequences</title><content type="html">&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/-WX8hWIhf8l4/TDkTGvqJ1sI/AAAAAAAAAMM/DiZzzibt3e4/s1600/bash.jpg" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://3.bp.blogspot.com/-WX8hWIhf8l4/TDkTGvqJ1sI/AAAAAAAAAMM/DiZzzibt3e4/s1600/bash.jpg" /&gt;&lt;/a&gt;&lt;/div&gt;If you use /dev/urandom with "tr" to generate random strings, you may have a problem if &lt;a href="http://lists.freebsd.org/pipermail/freebsd-bugs/2010-January/038142.html"&gt;your shell uses a multi-byte locale&lt;/a&gt;. Upon encountering illegal bytes, tr will complain with "tr: Illegal byte sequence".&lt;br /&gt;
&lt;br /&gt;
Setting the LC_TYPE=C before tr would do the trick:&lt;br /&gt;
&lt;br /&gt;
cat /dev/urandom| LC_CTYPE=C tr -dc 'a-zA-Z0-9'&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/11265228-6943362180833376454?l=thushw.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://thushw.blogspot.com/feeds/6943362180833376454/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=11265228&amp;postID=6943362180833376454" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/6943362180833376454?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/6943362180833376454?v=2" /><link rel="alternate" type="text/html" href="http://thushw.blogspot.com/2011/06/devurandom-does-not-generate-correct.html" title="/dev/urandom does not generate correct multi-byte sequences" /><author><name>thushara</name><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="24" src="http://3.bp.blogspot.com/_ePuWmGHSaXg/SLcEM2cOYCI/AAAAAAAAABw/CiAaWps_KW0/s1600-R/n744060995_1382099_3757.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://3.bp.blogspot.com/-WX8hWIhf8l4/TDkTGvqJ1sI/AAAAAAAAAMM/DiZzzibt3e4/s72-c/bash.jpg" height="72" width="72" /><thr:total>0</thr:total></entry><entry gd:etag="W/&quot;CEcNQ387fip7ImA9WhZWGU8.&quot;"><id>tag:blogger.com,1999:blog-11265228.post-6242907664120197119</id><published>2011-05-20T13:03:00.000-07:00</published><updated>2011-05-20T13:08:12.106-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-05-20T13:08:12.106-07:00</app:edited><title>Some web servers are in love with 30X redirects</title><content type="html">Here is a URL that redirects via 30X response headers no less than 10 times:&lt;br /&gt;
&lt;br /&gt;
http://join.scoreondemand.com/strack/MTAwNC45LjQ0LjQ0LjI5LjAuMC4wLjA/scoreondemand/64/0/Default.aspx&lt;br /&gt;
&lt;br /&gt;
The evidence:&lt;br /&gt;
&lt;br /&gt;
&lt;pre style="font-family: Andale Mono, Lucida Console, Monaco, fixed, monospace; color: #000000; background-color: #eee;font-size: 12px;border: 1px dashed #999999;line-height: 14px;padding: 5px; overflow: auto; width: 100%"&gt;&lt;code&gt;mpire@seaxoaff01:~$ curl -I http://join.scoreondemand.com/strack/MTAwNC45LjQ0LjQ0LjI5LjAuMC4wLjA/scoreondemand/64/0/Default.aspx
HTTP/1.1 302 Found
Date: Fri, 20 May 2011 19:56:12 GMT
Cneonction: close
Location: http://join.scoreondemand.com/track/MTAwNC42NC40Ny40Ny4yOS4wLjAuMC4w/Default.aspx?switched=1&amp;amp;strack=0
ScoreTracker: scash04
Content-Type: text/html
Set-Cookie: NSC_tdpsfdbti-obut-80=ffffffff090a1f1e45525d5f4f58455e445a4a423660;Version=1;Max-Age=3600;path=/;httponly

mpire@seaxoaff01:~$ curl -I &amp;quot;http://join.scoreondemand.com/track/MTAwNC42NC40Ny40Ny4yOS4wLjAuMC4w/Default.aspx?switched=1&amp;amp;strack=0&amp;quot;
HTTP/1.1 302 Found
Date: Fri, 20 May 2011 19:56:31 GMT
Set-Cookie: PHPSESSID=rbra31g59vc4me30bbofgei7d1; path=/
Expires: Thu, 19 Nov 1981 08:52:00 GMT
Cache-Control: no-store, no-cache, must-revalidate, post-check=0, pre-check=0
Pragma: no-cache
nnCoection: close
Set-Cookie: nats=MTAwNC42NC40Ny40Ny4yOS4wLjAuMC4w; expires=Mon, 30-May-2011 19:56:31 GMT; path=/; domain=scoreondemand.com
Set-Cookie: nats_cookie=No%2BReferring%2BURL; expires=Mon, 30-May-2011 19:56:31 GMT; path=/; domain=scoreondemand.com
Set-Cookie: nats_unique=MTAwNC42NC40Ny40Ny4yOS4wLjAuMC4w; expires=Sat, 21-May-2011 19:56:31 GMT; path=/; domain=scoreondemand.com
Set-Cookie: nats_sess=726064a93aca6b9d49d72dd57f477c57; expires=Sun, 28-Aug-2011 19:56:31 GMT; path=/; domain=scoreondemand.com
Location: http://www.scoreondemand.com/Default.aspx?nats=MTAwNC42NC40Ny40Ny4yOS4wLjAuMC4w&amp;amp;switched=1&amp;amp;strack=0
ScoreTracker: scash01
Content-Type: text/html
Set-Cookie: NSC_tdpsfdbti-obut-80=ffffffff090a1f1d45525d5f4f58455e445a4a423660;Version=1;Max-Age=3600;path=/;httponly

mpire@seaxoaff01:~$ curl -I &amp;quot;http://www.scoreondemand.com/Default.aspx?nats=MTAwNC42NC40Ny40Ny4yOS4wLjAuMC4w&amp;amp;switched=1&amp;amp;strack=0&amp;quot;
HTTP/1.1 302 Found
Date: Fri, 20 May 2011 19:56:47 GMT
X-AspNet-Version: 2.0.50727
Location: http://join.eboobstore.com/strack/MTAwNC42NC40Ny40Ny4yOS4wLjAuMC4w/eboobstore/64/0/apple/
Set-Cookie: ASP.NET_SessionId=xhivgbmef0bqm1uhihz1r455; path=/; HttpOnly
Set-Cookie: SVOD1=UserID=11133628&amp;amp;SessionID=1bM0125818hnmy5CAG9P; expires=Thu, 18-Aug-2011 19:56:47 GMT; path=/
Set-Cookie: NATS=MTAwNC42NC40Ny40Ny4yOS4wLjAuMC4w; expires=Thu, 18-Aug-2011 19:56:47 GMT; path=/
Cache-Control: private
Content-Type: text/html; charset=utf-8
Content-Length: 206

mpire@seaxoaff01:~$ curl -I &amp;quot;http://join.eboobstore.com/strack/MTAwNC42NC40Ny40Ny4yOS4wLjAuMC4w/eboobstore/64/0/apple/&amp;quot;
HTTP/1.1 302 Found
Date: Fri, 20 May 2011 19:57:13 GMT
Cneonction: close
Location: http://join.eboobstore.com/track/MTAwNC42NC41MC41MC4yOS4wLjAuMC4w/apple/?switched=1&amp;amp;strack=0
ScoreTracker: scash04
Content-Type: text/html; charset=UTF-8
Set-Cookie: NSC_tdpsfdbti-obut-80=ffffffff090a1f1e45525d5f4f58455e445a4a423660;Version=1;Max-Age=3600;path=/;httponly

mpire@seaxoaff01:~$ curl -I &amp;quot;http://join.eboobstore.com/track/MTAwNC42NC41MC41MC4yOS4wLjAuMC4w/apple/?switched=1&amp;amp;strack=0&amp;quot;
HTTP/1.1 302 Found
Date: Fri, 20 May 2011 19:57:39 GMT
Set-Cookie: PHPSESSID=utrmur64derb0n2onaotf08640; path=/
Expires: Thu, 19 Nov 1981 08:52:00 GMT
Cache-Control: no-store, no-cache, must-revalidate, post-check=0, pre-check=0
Pragma: no-cache
nnCoection: close
Set-Cookie: nats=MTAwNC42NC41MC41MC4yOS4wLjAuMC4w; expires=Mon, 30-May-2011 19:57:39 GMT; path=/; domain=eboobstore.com
Set-Cookie: nats_cookie=No%2BReferring%2BURL; expires=Mon, 30-May-2011 19:57:39 GMT; path=/; domain=eboobstore.com
Set-Cookie: nats_unique=MTAwNC42NC41MC41MC4yOS4wLjAuMC4w; expires=Sat, 21-May-2011 19:57:39 GMT; path=/; domain=eboobstore.com
Set-Cookie: nats_sess=a7568ec181d54a6316d6452656565dba; expires=Sun, 28-Aug-2011 19:57:39 GMT; path=/; domain=eboobstore.com
Location: http://www.eboobstore.com/apple/?nats=MTAwNC42NC41MC41MC4yOS4wLjAuMC4w&amp;amp;switched=1&amp;amp;strack=0
ScoreTracker: scash01
Content-Type: text/html; charset=UTF-8
Set-Cookie: NSC_tdpsfdbti-obut-80=ffffffff090a1f1d45525d5f4f58455e445a4a423660;Version=1;Max-Age=3600;path=/;httponly

mpire@seaxoaff01:~$ curl -I &amp;quot;http://www.eboobstore.com/apple/?nats=MTAwNC42NC41MC41MC4yOS4wLjAuMC4w&amp;amp;switched=1&amp;amp;strack=0&amp;quot;
HTTP/1.1 302 Found
Date: Fri, 20 May 2011 19:58:46 GMT
Location: http://www.eboobstore.com/urlmunge/munger/nats=MTAwNC42NC41MC41MC4yOS4wLjAuMC4w&amp;amp;switched=1&amp;amp;strack=0_URL_apple/
Content-Type: text/html; charset=UTF-8

mpire@seaxoaff01:~$ curl -I &amp;quot;http://www.eboobstore.com/urlmunge/munger/nats=MTAwNC42NC41MC41MC4yOS4wLjAuMC4w&amp;amp;switched=1&amp;amp;strack=0_URL_apple/&amp;quot;
HTTP/1.1 302 Found
Date: Fri, 20 May 2011 19:58:58 GMT
Set-Cookie: PHPSESSID=33hcivt9bgl7o6qv5bpfv7aun4; path=/; domain=eboobstore.com
Expires: Thu, 19 Nov 1981 08:52:00 GMT
Cache-Control: no-store, no-cache, must-revalidate, post-check=0, pre-check=0
Pragma: no-cache
Location: http://www.eboobstore.com/apple
ScoreTracker: web04
Content-Type: text/html; charset=UTF-8

mpire@seaxoaff01:~$ curl -I &amp;quot;http://www.eboobstore.com/apple&amp;quot;
HTTP/1.1 301 Moved Permanently
Date: Fri, 20 May 2011 19:59:10 GMT
Location: http://eboobstore.com/apple/
Content-Type: text/html; charset=UTF-8

mpire@seaxoaff01:~$ curl -I &amp;quot;http://eboobstore.com/apple/&amp;quot;
HTTP/1.1 301 Moved Permanently
Date: Fri, 20 May 2011 19:59:48 GMT
Location: http://www.eboobstore.com/apple/
Content-Type: text/html; charset=UTF-8

mpire@seaxoaff01:~$ curl -I &amp;quot;http://www.eboobstore.com/apple/&amp;quot;
HTTP/1.1 200 OK
Date: Fri, 20 May 2011 20:00:30 GMT
ScoreTracker: web06
Content-Type: text/html; charset=UTF-8

&lt;/code&gt;&lt;/pre&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/11265228-6242907664120197119?l=thushw.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://thushw.blogspot.com/feeds/6242907664120197119/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=11265228&amp;postID=6242907664120197119" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/6242907664120197119?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/6242907664120197119?v=2" /><link rel="alternate" type="text/html" href="http://thushw.blogspot.com/2011/05/some-web-servers-are-in-love-with-30x.html" title="Some web servers are in love with 30X redirects" /><author><name>thushara</name><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="24" src="http://3.bp.blogspot.com/_ePuWmGHSaXg/SLcEM2cOYCI/AAAAAAAAABw/CiAaWps_KW0/s1600-R/n744060995_1382099_3757.jpg" /></author><thr:total>0</thr:total></entry><entry gd:etag="W/&quot;DU4CR3w-cCp7ImA9WhZTGUw.&quot;"><id>tag:blogger.com,1999:blog-11265228.post-451596702167318356</id><published>2011-03-23T14:58:00.000-07:00</published><updated>2011-03-23T15:06:06.258-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-03-23T15:06:06.258-07:00</app:edited><title>Insanely compressed html files</title><content type="html">&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="https://lh4.googleusercontent.com/-jf8rhlmwb6U/TMNwmUDbxiI/AAAAAAAAANE/Ubtmb_S-uWo/s1600/gzip.jpg" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="https://lh4.googleusercontent.com/-jf8rhlmwb6U/TMNwmUDbxiI/AAAAAAAAANE/Ubtmb_S-uWo/s1600/gzip.jpg" /&gt;&lt;/a&gt;&lt;/div&gt;Today, I discovered a URL that sent some insanely compressed content. The compressed content was sent by the server using Content-Encoding: gzip and Transfer-encoding: chunked. The compressed size of the content was 2,921,925 bytes and it decompressed to 1,004,263,982 bytes. The decompressed content was roughly 344 times the size of the compressed content.&lt;br /&gt;
&lt;br /&gt;
This caused certain things to go wrong in the production process. I had set a limit of a few Megs on all fetches and had assumed that a single fetch could not be more than a few Megs. This was the first time I have seen such a huge decompression rate. This caused a subsequent file mapping to fail due to inadequate memory.&lt;br /&gt;
&lt;br /&gt;
The downloaded content suggested why this would compress so well. The URL was http://www.jeltel.com.au/news.php There seems to be a dynamically generated part on this URL. If you examine its source, you will see a marker like this:&lt;br /&gt;
&lt;br /&gt;
&lt;pre style="background-color: #eeeeee; border: 1px dashed rgb(153, 153, 153); color: black; font-family: Andale Mono,Lucida Console,Monaco,fixed,monospace; font-size: 12px; line-height: 14px; overflow: auto; padding: 5px; width: 100%;"&gt;&lt;code&gt;&amp;lt;!-- JELTEL_CONTENT_BEGIN --&amp;gt;
&lt;/code&gt;&lt;/pre&gt;&lt;br /&gt;
Content after that seems dynamically generated. You will find markup like this:&lt;br /&gt;
&lt;br /&gt;
&lt;pre style="background-color: #eeeeee; border: 1px dashed rgb(153, 153, 153); color: black; font-family: Andale Mono,Lucida Console,Monaco,fixed,monospace; font-size: 12px; line-height: 14px; overflow: auto; padding: 5px; width: 100%;"&gt;&lt;code&gt;&amp;lt;h2&amp;gt;&amp;lt;/h2&amp;gt; - &amp;lt;br/&amp;gt;&amp;lt;h4&amp;gt;... &amp;lt;a href=""&amp;gt;read more&amp;lt;/a&amp;gt;&amp;lt;/h4&amp;gt;
&lt;/code&gt;&lt;/pre&gt;&lt;br /&gt;
On this particular instance, there was an unusually large amount of fake content generated. The downloaded file had just 33 lines, but the last long line was a huge repeating pattern of :&lt;br /&gt;
&lt;br /&gt;
&lt;pre style="background-color: #eeeeee; border: 1px dashed rgb(153, 153, 153); color: black; font-family: Andale Mono,Lucida Console,Monaco,fixed,monospace; font-size: 12px; line-height: 14px; overflow: auto; padding: 5px; width: 100%;"&gt;&lt;code&gt;&amp;lt;a href=""&amp;gt;read more&amp;lt;/a&amp;gt;&amp;lt;/h4&amp;gt;&amp;lt;br/&amp;gt;&amp;lt;br/&amp;gt;&amp;lt;h2&amp;gt;&amp;lt;/h2&amp;gt; - &amp;lt;br/&amp;gt;&amp;lt;h4&amp;gt;... 
&lt;/code&gt;&lt;/pre&gt;&lt;br /&gt;
This would of course compress well.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/11265228-451596702167318356?l=thushw.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://thushw.blogspot.com/feeds/451596702167318356/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=11265228&amp;postID=451596702167318356" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/451596702167318356?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/451596702167318356?v=2" /><link rel="alternate" type="text/html" href="http://thushw.blogspot.com/2011/03/insanely-compressed-html-files.html" title="Insanely compressed html files" /><author><name>thushara</name><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="24" src="http://3.bp.blogspot.com/_ePuWmGHSaXg/SLcEM2cOYCI/AAAAAAAAABw/CiAaWps_KW0/s1600-R/n744060995_1382099_3757.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="https://lh4.googleusercontent.com/-jf8rhlmwb6U/TMNwmUDbxiI/AAAAAAAAANE/Ubtmb_S-uWo/s72-c/gzip.jpg" height="72" width="72" /><thr:total>0</thr:total></entry><entry gd:etag="W/&quot;CkEGQnw6cCp7ImA9WhZTE08.&quot;"><id>tag:blogger.com,1999:blog-11265228.post-8122097066920833098</id><published>2011-03-16T16:48:00.000-07:00</published><updated>2011-03-16T17:10:23.218-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-03-16T17:10:23.218-07:00</app:edited><title>Linux sort : bug with , separator and confusing period?</title><content type="html">&lt;pre style="background-color: #eeeeee; border: 1px dashed rgb(153, 153, 153); color: black; font-family: Andale Mono,Lucida Console,Monaco,fixed,monospace; font-size: 12px; line-height: 14px; overflow: auto; padding: 5px; width: 100%;"&gt;&lt;code&gt;user@host:~/$ echo -e "alan,20,3,0\ngeorge,5,0,0\nalice,3,5,0\ndora,4,0.9,5" | sort -n -k 2 -t ,
dora,4,0.9,5
alice,3,5,0
george,5,0,0
alan,20,3,0&lt;/code&gt;user@host:~/$ &lt;/pre&gt;&lt;br /&gt;
The line with "dora" as the first term should be printed after "alice" and before "george", as we are asking sort to sort on the second column. The 3rd column value of "0.9" seems to confuse sort on this.&lt;br /&gt;
&lt;br /&gt;
This is not a bug in sort but due to the &lt;a href="http://www.gnu.org/software/coreutils/faq/"&gt;locale setting&lt;/a&gt; on different operating systems.&lt;br /&gt;
&lt;br /&gt;
On the above link, look for "Sort does not sort in normal order".&lt;br /&gt;
&lt;br /&gt;
Setting LC_PATH=C sorts as expected:&lt;br /&gt;
&lt;br /&gt;
&lt;pre style="font-family: Andale Mono, Lucida Console, Monaco, fixed, monospace; color: #000000; background-color: #eee;font-size: 12px;border: 1px dashed #999999;line-height: 14px;padding: 5px; overflow: auto; width: 100%"&gt;&lt;code&gt;user@host:~/$ echo -e &amp;quot;alan,20,3,0\ngeorge,5,0,0\nalice,3,5,0\ndora,4,0.9,5&amp;quot; &amp;#124; LC_ALL=C sort -n -k 2 -t ,
alice,3,5,0
dora,4,0.9,5
george,5,0,0
alan,20,3,0
user@host:~/$ 

&lt;/code&gt;&lt;/pre&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/11265228-8122097066920833098?l=thushw.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://thushw.blogspot.com/feeds/8122097066920833098/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=11265228&amp;postID=8122097066920833098" title="3 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/8122097066920833098?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/8122097066920833098?v=2" /><link rel="alternate" type="text/html" href="http://thushw.blogspot.com/2011/03/linux-sort-bug-with-separator-and.html" title="Linux sort : bug with , separator and confusing period?" /><author><name>thushara</name><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="24" src="http://3.bp.blogspot.com/_ePuWmGHSaXg/SLcEM2cOYCI/AAAAAAAAABw/CiAaWps_KW0/s1600-R/n744060995_1382099_3757.jpg" /></author><thr:total>3</thr:total></entry><entry gd:etag="W/&quot;AkMBQ3k6fip7ImA9Wx9aGEo.&quot;"><id>tag:blogger.com,1999:blog-11265228.post-8924959807601485706</id><published>2011-03-11T13:20:00.000-08:00</published><updated>2011-03-11T13:20:52.716-08:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-03-11T13:20:52.716-08:00</app:edited><title>A useful, scriptable way to remove offending known_hosts keys</title><content type="html">You can use &lt;a href="http://jeremy.baumont.org/blog/?p=9"&gt;ssh-keygen -R&lt;/a&gt; to remove invalid keys from the known_hosts file. This becomes useful if the host names are hashed in the file. The default in Ubuntu/Lucid is to hash the host names.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/11265228-8924959807601485706?l=thushw.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://thushw.blogspot.com/feeds/8924959807601485706/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=11265228&amp;postID=8924959807601485706" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/8924959807601485706?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/8924959807601485706?v=2" /><link rel="alternate" type="text/html" href="http://thushw.blogspot.com/2011/03/useful-scriptable-way-to-remove.html" title="A useful, scriptable way to remove offending known_hosts keys" /><author><name>thushara</name><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="24" src="http://3.bp.blogspot.com/_ePuWmGHSaXg/SLcEM2cOYCI/AAAAAAAAABw/CiAaWps_KW0/s1600-R/n744060995_1382099_3757.jpg" /></author><thr:total>1</thr:total></entry><entry gd:etag="W/&quot;D0cFSX09fSp7ImA9Wx9bFEw.&quot;"><id>tag:blogger.com,1999:blog-11265228.post-8217762207361448683</id><published>2011-02-22T14:41:00.000-08:00</published><updated>2011-02-22T14:50:18.365-08:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-02-22T14:50:18.365-08:00</app:edited><title>Java code to tail -N a text file</title><content type="html">&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/-4W78f7H59p0/St5PoQPJYJI/AAAAAAAAAGA/dB0W9ywuFM0/s1600/nio.jpg" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://3.bp.blogspot.com/-4W78f7H59p0/St5PoQPJYJI/AAAAAAAAAGA/dB0W9ywuFM0/s1600/nio.jpg" /&gt;&lt;/a&gt;&lt;/div&gt;This code allows you to go over the last &lt;i&gt;N&lt;/i&gt; lines of a specified  file. It has a "head" method, which simply allows you to go over the  file from the beginning.&lt;br /&gt;
&lt;pre style="background-color: #eeeeee; border: 1px dashed rgb(153, 153, 153); color: black; font-family: Andale Mono,Lucida Console,Monaco,fixed,monospace; font-size: 12px; line-height: 14px; overflow: auto; padding: 5px; width: 100%;"&gt;&lt;code&gt;import java.io.*;
import java.nio.channels.FileChannel;
import java.nio.CharBuffer;
import java.nio.ByteBuffer;
import java.util.Iterator;
import java.util.NoSuchElementException;&lt;/code&gt;public class MMapFile {

&amp;nbsp;&amp;nbsp;&amp;nbsp; public class MMapIterator implements Iterator&amp;lt;String&amp;gt; {
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; private int offset;

&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; public MMapIterator(int offset) {
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; this.offset = offset;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; }
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; public boolean hasNext() {
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; return offset &amp;lt; cb.limit();
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; }

&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; public String next() {
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; ByteArrayOutputStream sb = new ByteArrayOutputStream();
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; if (offset &amp;gt;= cb.limit())
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; throw new NoSuchElementException();
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; for (; offset &amp;lt; cb.limit(); offset++) {
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; byte c = (cb.get(offset));
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; if (c == '\n') {
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; offset++;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; break;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; }
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; if (c != '\r') {
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; sb.write(c);
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; }

&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; }
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; try {
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; return sb.toString("UTF-8");
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; } catch (UnsupportedEncodingException e) {}
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; return sb.toString();
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; }

&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; public void remove() {

&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; }
&amp;nbsp;&amp;nbsp;&amp;nbsp; }


&amp;nbsp;&amp;nbsp;&amp;nbsp; private ByteBuffer cb;
&amp;nbsp;&amp;nbsp;&amp;nbsp; long size;
&amp;nbsp;&amp;nbsp;&amp;nbsp; private long numLines = -1;
&amp;nbsp;&amp;nbsp;&amp;nbsp; public MMapFile(String file) throws FileNotFoundException, IOException {
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; FileChannel fc = new FileInputStream(new File(file)).getChannel();
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; size = fc.size();
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; cb = fc.map(FileChannel.MapMode.READ_ONLY, 0, fc.size());
&amp;nbsp;&amp;nbsp;&amp;nbsp; }

&amp;nbsp;&amp;nbsp;&amp;nbsp; public long getNumLines() {
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; if (numLines != -1) return numLines;&amp;nbsp; //cache number of lines
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; long cnt = 0;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; for (int i=0; i &amp;lt;size; i++) {
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; if (cb.get(i) == '\n')
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; cnt++;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; }
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; numLines = cnt;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; return cnt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; }

&amp;nbsp;&amp;nbsp;&amp;nbsp; public Iterator&amp;lt;String&amp;gt; tail(long lines) {
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; long cnt=0;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; long i=0;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; for (i=size-1; i&amp;gt;=0; i--) {
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; if (cb.get((int)i) == '\n') {
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; cnt++;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; if (cnt == lines+1)
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; break;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; }
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; }
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; return new MMapIterator((int)i+1);
&amp;nbsp;&amp;nbsp;&amp;nbsp; }

&amp;nbsp;&amp;nbsp;&amp;nbsp; public Iterator&amp;lt;String&amp;gt; head() {
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; return new MMapIterator(0);
&amp;nbsp;&amp;nbsp;&amp;nbsp; }

&amp;nbsp;&amp;nbsp;&amp;nbsp; static public void main(String[] args) {
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; try {
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; Iterator&amp;lt;String&amp;gt; it = new MMapFile("/test.txt").head();
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; while (it.hasNext()) {
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; System.out.println(it.next());
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; }
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; } catch (Exception e) {
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; }

&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; System.out.println();

&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; try {
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; Iterator&amp;lt;String&amp;gt; it = new MMapFile("/test.txt").tail(2);
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; while (it.hasNext()) {
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; System.out.println(it.next());
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; }
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; } catch (Exception e) {

&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; }

&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; System.out.println();

&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; try {
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; System.out.println("lines: "+new MMapFile("/test.txt").getNumLines());
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; } catch (Exception e) {

&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; }

&amp;nbsp;&amp;nbsp;&amp;nbsp; }

}
&lt;/pre&gt;&lt;br /&gt;
The technique is to simply map the file into memory using java.nio.channels.FileChannel.map in the Java NIO library and manipulate the file data using memory techniques.&lt;br /&gt;
&lt;br /&gt;
For the "tail" function, we walk back the mapped bytes counting newlines. The MMapIterator class conveniently provides a way to iterate over lines once we find the starting line.&lt;br /&gt;
&lt;br /&gt;
There is a point where care must be taken in the MMapIterator.next() implementation. That is making sure that bytes are converted to the appropriate string encoding. We use "UTF-8" but if you are dealing with a different encoding in the input file, this should be changed.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/11265228-8217762207361448683?l=thushw.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://thushw.blogspot.com/feeds/8217762207361448683/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=11265228&amp;postID=8217762207361448683" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/8217762207361448683?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/8217762207361448683?v=2" /><link rel="alternate" type="text/html" href="http://thushw.blogspot.com/2011/02/java-code-to-tail-n-text-file.html" title="Java code to tail -N a text file" /><author><name>thushara</name><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="24" src="http://3.bp.blogspot.com/_ePuWmGHSaXg/SLcEM2cOYCI/AAAAAAAAABw/CiAaWps_KW0/s1600-R/n744060995_1382099_3757.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://3.bp.blogspot.com/-4W78f7H59p0/St5PoQPJYJI/AAAAAAAAAGA/dB0W9ywuFM0/s72-c/nio.jpg" height="72" width="72" /><thr:total>0</thr:total></entry><entry gd:etag="W/&quot;CEYERXcyeCp7ImA9Wx9bE04.&quot;"><id>tag:blogger.com,1999:blog-11265228.post-5805064846925792963</id><published>2011-02-21T15:45:00.000-08:00</published><updated>2011-02-21T15:48:24.990-08:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-02-21T15:48:24.990-08:00</app:edited><title>Java : all 8-bit data cannot be cast to char type</title><content type="html">&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/-uTUV5k8P0yU/SUgnAftQ0NI/AAAAAAAAAC0/T6uDw6gfnUY/s1600/cheeserjk7.png" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="155" src="http://2.bp.blogspot.com/-uTUV5k8P0yU/SUgnAftQ0NI/AAAAAAAAAC0/T6uDw6gfnUY/s200/cheeserjk7.png" width="200" /&gt;&lt;/a&gt;&lt;/div&gt;If you have a byte and want to make a String, a simple cast to a char will only work if you are dealing with 7-bit ASCII. If that byte could be extended ASCII, the cast will not encode to the correct character code.&lt;br /&gt;
&lt;br /&gt;
This is the sure way to get all 8-bit characters represented in Strings:&lt;br /&gt;
&lt;br /&gt;
&lt;pre style="background-color: #eeeeee; border: 1px dashed rgb(153, 153, 153); color: black; font-family: Andale Mono,Lucida Console,Monaco,fixed,monospace; font-size: 12px; line-height: 14px; overflow: auto; padding: 5px; width: 100%;"&gt;&lt;code&gt;new String(new byte[]{c}, "ISO-8859-1")&lt;/code&gt;&lt;/pre&gt;&lt;br /&gt;
This is to do with the default encoding used by the JVM which is likely not "ISO-8859-1". On Linux, it is likely to be "UTF-8" and it is "MacRoman" on Snow Leopard (Mac).&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/11265228-5805064846925792963?l=thushw.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://thushw.blogspot.com/feeds/5805064846925792963/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=11265228&amp;postID=5805064846925792963" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/5805064846925792963?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/5805064846925792963?v=2" /><link rel="alternate" type="text/html" href="http://thushw.blogspot.com/2011/02/java-all-8-bit-data-cannot-be-cast-to.html" title="Java : all 8-bit data cannot be cast to char type" /><author><name>thushara</name><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="24" src="http://3.bp.blogspot.com/_ePuWmGHSaXg/SLcEM2cOYCI/AAAAAAAAABw/CiAaWps_KW0/s1600-R/n744060995_1382099_3757.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://2.bp.blogspot.com/-uTUV5k8P0yU/SUgnAftQ0NI/AAAAAAAAAC0/T6uDw6gfnUY/s72-c/cheeserjk7.png" height="72" width="72" /><thr:total>0</thr:total></entry><entry gd:etag="W/&quot;AkIFSXwyeSp7ImA9WhZaFEs.&quot;"><id>tag:blogger.com,1999:blog-11265228.post-4836101236966765139</id><published>2011-02-18T14:40:00.000-08:00</published><updated>2011-06-30T14:15:18.291-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-06-30T14:15:18.291-07:00</app:edited><title>Running HBase - some issues to be aware of</title><content type="html">&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://hbase.apache.org/images/hbase_logo_med.gif" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://hbase.apache.org/images/hbase_logo_med.gif" /&gt;&lt;/a&gt;&lt;/div&gt;I want to take a moment to note down a few issues I had with setting up a distributed HBase environment in case it helps someone else.&lt;br /&gt;
&lt;br /&gt;
First, I set up Hadoop from the &lt;a href="http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-append/"&gt;0.20 append branch&lt;/a&gt; as described &lt;a href="http://hbase.apache.org/notsoquick.html"&gt;here&lt;/a&gt;. I used two machines where the first machine was the master and both machines were used as slaves. This is a &lt;a href="http://www.michael-noll.com/tutorials/running-hadoop-on-ubuntu-linux-multi-node-cluster/"&gt;guide I used&lt;/a&gt;.&lt;br /&gt;
&lt;br /&gt;
&lt;pre style="font-family: Andale Mono, Lucida Console, Monaco, fixed, monospace; color: #000000; background-color: #eee;font-size: 12px;border: 1px dashed #999999;line-height: 14px;padding: 5px; overflow: auto; width: 100%"&gt;&lt;code&gt;mkdir ~/hadoop-0.20-append
cd ~/hadoop-0.20-append
svn co https://svn.apache.org/repos/asf/hadoop/common/branches/branch-0.20-append/ .
ant jar
&lt;/code&gt;&lt;/pre&gt;&lt;br /&gt;
At the end of this, you will have the hadoop jar file at ~/hadoop-0.20-append/build&lt;br /&gt;
&lt;br /&gt;
The first mistake I made was to use the IP address of the name node for fs.default.name in the conf/core-site.xml file. There is a bug in Hadoop 0.20 release that &lt;a href="https://issues.apache.org/jira/browse/HADOOP-5191"&gt;prevents the use of IP address&lt;/a&gt; in this context.&lt;br /&gt;
&lt;br /&gt;
Interestingly, the basic HDFS shell commands worked (ex: get, ls) with the IP address being used for fs.default.name. The problem only cropped up after I setup HBase and tried to use the HBase shell.&lt;br /&gt;
&lt;br /&gt;
To setup HBase, I followed the steps outlined &lt;a href="http://hbase.apache.org/notsoquick.html"&gt;here&lt;/a&gt;. &lt;br /&gt;
&lt;br /&gt;
Before I discovered the IP related issue, I encountered an error that showed I was not following the steps faithfully enough. While HBase ships with a version of Hadoop from presumably the 0.20 append branch, it was not identical to the version I built from the 0.20 append branch. As stated in the documentation, I then copied the Hadoop jar I built over the jar shipped with HBase.&lt;br /&gt;
&lt;br /&gt;
Next, I ran into the IP issue. Generally, changing fs.default.name and restarting the Hadoop cluster is not enough in such cases as certain data has been written to HDFS name and data directories already and any mismatch emboldens further "namespace mismatch" errors. Thus, before changing the fs.default.name, I removed the directories specified by dfs.name.dir and dfs.data.dir. In case of dfs.data.dir, I had to remove it on both slaves. Then I changed the IP over to the machine name, formatted the name node and re-started the Hadoop cluster.&lt;br /&gt;
&lt;br /&gt;
It still was not over. This time there was the issue of these machine names not being in the DNS. They happened to be simply names assigned to these machines which were not in the domain name system used by the machines to communicate to one another. Thus I went into the /etc/hosts file on both machines and added appropriate entries to allow each box to resolve the domain name to an IP. &lt;br /&gt;
&lt;br /&gt;
After which, I could create a table and insert rows into it as explained &lt;a href="http://hbase.apache.org/quickstart.html#shell_exercises"&gt;here&lt;/a&gt;.&lt;br /&gt;
&lt;br /&gt;
The next step was to programmatically create a table and add rows to it. I adapted the example from &lt;a href="http://bitsofinfo.wordpress.com/2009/10/12/hbase-examples-os-x-maven-howto/"&gt;here&lt;/a&gt;. The programming API by default allows the code to find the hbase configuration files using the class path. Thus, I added the path to the hbase/conf directory to the classpath to get the program to work. Alternately, you could use &lt;br /&gt;
&lt;pre style="background-color: #eeeeee; border: 1px dashed rgb(153, 153, 153); color: black; font-family: Andale Mono,Lucida Console,Monaco,fixed,monospace; font-size: 12px; line-height: 14px; overflow: auto; padding: 5px; width: 100%;"&gt;&lt;code&gt;org.apache.hadoop.hbase.HbaseConfiguration.addHbaseResources(org.apache.hadoop.conf.Configuration) &lt;/code&gt;&lt;/pre&gt;which would have you use &lt;br /&gt;
&lt;pre style="background-color: #eeeeee; border: 1px dashed rgb(153, 153, 153); color: black; font-family: Andale Mono,Lucida Console,Monaco,fixed,monospace; font-size: 12px; line-height: 14px; overflow: auto; padding: 5px; width: 100%;"&gt;&lt;code&gt;org.apache.hadoop.conf.Configuration.addResources(org.apache.hadoop.fs.Path)&lt;/code&gt;&lt;/pre&gt;to add the paths to individual configuration files.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/11265228-4836101236966765139?l=thushw.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://thushw.blogspot.com/feeds/4836101236966765139/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=11265228&amp;postID=4836101236966765139" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/4836101236966765139?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/4836101236966765139?v=2" /><link rel="alternate" type="text/html" href="http://thushw.blogspot.com/2011/02/running-hbase-some-issues-to-be-aware.html" title="Running HBase - some issues to be aware of" /><author><name>thushara</name><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="24" src="http://3.bp.blogspot.com/_ePuWmGHSaXg/SLcEM2cOYCI/AAAAAAAAABw/CiAaWps_KW0/s1600-R/n744060995_1382099_3757.jpg" /></author><thr:total>0</thr:total></entry><entry gd:etag="W/&quot;DkMAQXY8eCp7ImA9Wx9UFEg.&quot;"><id>tag:blogger.com,1999:blog-11265228.post-2934948525309104577</id><published>2011-02-11T12:00:00.000-08:00</published><updated>2011-02-11T12:00:40.870-08:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-02-11T12:00:40.870-08:00</app:edited><title>Debugging a most curious hang / spin</title><content type="html">Here is the stack trace from a JVM that shows 1/4 core of the machine being 100% utilized.&lt;br /&gt;
&lt;br /&gt;
2011-02-11 14:48:05&lt;br /&gt;
Full thread dump Java HotSpot(TM) Server VM (19.0-b09 mixed mode):&lt;br /&gt;
&lt;br /&gt;
"pool-23-thread-4" prio=10 tid=0x29e81800 nid=0x50dd waiting for monitor entry [0x2b83f000]&lt;br /&gt;
&amp;nbsp;&amp;nbsp; java.lang.Thread.State: BLOCKED (on object monitor)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.TagType.getTagAt(TagType.java:667)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.Tag.parseAllgetNextTag(Tag.java:631)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.Tag.parseAll(Tag.java:607)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.Source.fullSequentialParse(Source.java:609)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at HTMLTagExtractorUsingJerichoParser.handleRedirects(HTMLTagExtractorUsingJerichoParser.java:217)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at HTMLTagExtractorUsingJerichoParser.parse(HTMLTagExtractorUsingJerichoParser.java:51)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at LinksProcessor.add(LinksProcessor.java:222)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at LinksProcessor.run(LinksProcessor.java:77)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at java.lang.Thread.run(Thread.java:662)&lt;br /&gt;
&lt;br /&gt;
"pool-23-thread-3" prio=10 tid=0x29e7f400 nid=0x50dc waiting on condition [0x2c355000]&lt;br /&gt;
&amp;nbsp;&amp;nbsp; java.lang.Thread.State: RUNNABLE&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at java.util.Arrays.copyOfRange(Arrays.java:3209)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at java.lang.String.&lt;init&gt;(String.java:215)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at java.lang.StringBuilder.toString(StringBuilder.java:430)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.StartTag.getStartDelimiter(StartTag.java:600)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.StartTag.getNext(StartTag.java:660)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.StartTag.getEndTag(StartTag.java:777)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.StartTag.getEndTagInternal(StartTag.java:566)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.StartTag.getElement(StartTag.java:167)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.Element.getChildElements(Element.java:327)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.Element.getChildElements(Element.java:335)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.Element.getChildElements(Element.java:335)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.Element.getChildElements(Element.java:335)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.Element.getChildElements(Element.java:335)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.Element.getChildElements(Element.java:335)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.Element.getChildElements(Element.java:335)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.Source.getChildElements(Source.java:721)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.Element.getParentElement(Element.java:282)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at HTMLTagExtractorUsingJerichoParser.getRedirectURL(HTMLTagExtractorUsingJerichoParser.java:232)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at HTMLTagExtractorUsingJerichoParser.handleRedirects(HTMLTagExtractorUsingJerichoParser.java:219)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at HTMLTagExtractorUsingJerichoParser.parse(HTMLTagExtractorUsingJerichoParser.java:51)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at LinksProcessor.add(LinksProcessor.java:222)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at LinksProcessor.run(LinksProcessor.java:77)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at java.lang.Thread.run(Thread.java:662)&lt;br /&gt;
&lt;br /&gt;
"pool-23-thread-2" prio=10 tid=0x29e82400 nid=0x50db waiting for monitor entry [0x28cf6000]&lt;br /&gt;
&amp;nbsp;&amp;nbsp; java.lang.Thread.State: BLOCKED (on object monitor)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at java.util.ArrayList.&lt;init&gt;(ArrayList.java:112)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at java.util.ArrayList.&lt;init&gt;(ArrayList.java:119)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.Element.getChildElements(Element.java:309)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.Element.getChildElements(Element.java:335)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.Element.getChildElements(Element.java:335)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.Element.getChildElements(Element.java:335)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.Element.getChildElements(Element.java:335)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.Element.getChildElements(Element.java:335)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.Element.getChildElements(Element.java:335)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.Element.getChildElements(Element.java:335)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.Element.getChildElements(Element.java:335)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.Element.getChildElements(Element.java:335)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.Element.getChildElements(Element.java:335)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.Element.getChildElements(Element.java:335)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.Element.getChildElements(Element.java:335)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.Element.getChildElements(Element.java:335)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.Element.getChildElements(Element.java:335)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.Element.getChildElements(Element.java:335)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.Element.getChildElements(Element.java:335)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.Element.getChildElements(Element.java:335)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.Element.getChildElements(Element.java:335)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.Element.getChildElements(Element.java:335)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.Element.getChildElements(Element.java:335)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.Source.getChildElements(Source.java:721)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.Element.getParentElement(Element.java:282)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at HTMLTagExtractorUsingJerichoParser.getRedirectURL(HTMLTagExtractorUsingJerichoParser.java:232)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at HTMLTagExtractorUsingJerichoParser.handleRedirects(HTMLTagExtractorUsingJerichoParser.java:219)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at HTMLTagExtractorUsingJerichoParser.parse(HTMLTagExtractorUsingJerichoParser.java:51)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at LinksProcessor.add(LinksProcessor.java:222)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at LinksProcessor.run(LinksProcessor.java:77)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at java.lang.Thread.run(Thread.java:662)&lt;br /&gt;
&lt;br /&gt;
"pool-23-thread-1" prio=10 tid=0x29e7cc00 nid=0x50da waiting for monitor entry [0x2a8e8000]&lt;br /&gt;
&amp;nbsp;&amp;nbsp; java.lang.Thread.State: BLOCKED (on object monitor)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at java.util.LinkedList.&lt;init&gt;(LinkedList.java:78)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.Attributes.construct(Attributes.java:109)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.Attributes.construct(Attributes.java:78)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.StartTagType.parseAttributes(StartTagType.java:672)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.StartTagTypeGenericImplementation.constructTagAt(StartTagTypeGenericImplementation.java:132)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.TagType.getTagAt(TagType.java:674)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.Tag.parseAllgetNextTag(Tag.java:631)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.Tag.parseAll(Tag.java:607)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at net.htmlparser.jericho.Source.fullSequentialParse(Source.java:609)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at HTMLTagExtractorUsingJerichoParser.handleRedirects(HTMLTagExtractorUsingJerichoParser.java:217)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at HTMLTagExtractorUsingJerichoParser.parse(HTMLTagExtractorUsingJerichoParser.java:51)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at LinksProcessor.add(LinksProcessor.java:222)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at LinksProcessor.run(LinksProcessor.java:77)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at java.lang.Thread.run(Thread.java:662)&lt;br /&gt;
&lt;br /&gt;
"Low Memory Detector" daemon prio=10 tid=0x2efac400 nid=0x47dd runnable [0x00000000]&lt;br /&gt;
&amp;nbsp;&amp;nbsp; java.lang.Thread.State: RUNNABLE&lt;br /&gt;
&lt;br /&gt;
"CompilerThread1" daemon prio=10 tid=0x2efaa000 nid=0x47dc waiting on condition [0x00000000]&lt;br /&gt;
&amp;nbsp;&amp;nbsp; java.lang.Thread.State: RUNNABLE&lt;br /&gt;
&lt;br /&gt;
"CompilerThread0" daemon prio=10 tid=0x2efa8000 nid=0x47db waiting on condition [0x00000000]&lt;br /&gt;
&amp;nbsp;&amp;nbsp; java.lang.Thread.State: RUNNABLE&lt;br /&gt;
&lt;br /&gt;
"Signal Dispatcher" daemon prio=10 tid=0x2efa6800 nid=0x47da waiting on condition [0x00000000]&lt;br /&gt;
&amp;nbsp;&amp;nbsp; java.lang.Thread.State: RUNNABLE&lt;br /&gt;
&lt;br /&gt;
"Finalizer" daemon prio=10 tid=0x2ef96400 nid=0x47d9 in Object.wait() [0x2ec65000]&lt;br /&gt;
&amp;nbsp;&amp;nbsp; java.lang.Thread.State: WAITING (on object monitor)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at java.lang.Object.wait(Native Method)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; - waiting on &amp;lt;0x35c53470&amp;gt; (a java.lang.ref.ReferenceQueue$Lock)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at java.lang.ref.ReferenceQueue.remove(ReferenceQueue.java:118)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; - locked &amp;lt;0x35c53470&amp;gt; (a java.lang.ref.ReferenceQueue$Lock)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at java.lang.ref.ReferenceQueue.remove(ReferenceQueue.java:134)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at java.lang.ref.Finalizer$FinalizerThread.run(Finalizer.java:159)&lt;br /&gt;
&lt;br /&gt;
"Reference Handler" daemon prio=10 tid=0x2ef94c00 nid=0x47d8 in Object.wait() [0x2ece6000]&lt;br /&gt;
&amp;nbsp;&amp;nbsp; java.lang.Thread.State: WAITING (on object monitor)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at java.lang.Object.wait(Native Method)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; - waiting on &amp;lt;0x35c53448&amp;gt; (a java.lang.ref.Reference$Lock)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at java.lang.Object.wait(Object.java:485)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at java.lang.ref.Reference$ReferenceHandler.run(Reference.java:116)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; - locked &amp;lt;0x35c53448&amp;gt; (a java.lang.ref.Reference$Lock)&lt;br /&gt;
&lt;br /&gt;
"main" prio=10 tid=0x091c2800 nid=0x47d2 waiting for monitor entry [0xb6b22000]&lt;br /&gt;
&amp;nbsp;&amp;nbsp; java.lang.Thread.State: BLOCKED (on object monitor)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at org.apache.lucene.index.TermBuffer.toTerm(TermBuffer.java:122)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at org.apache.lucene.index.SegmentTermEnum.term(SegmentTermEnum.java:169)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at org.apache.lucene.index.TermInfosReader.get(TermInfosReader.java:233)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at org.apache.lucene.index.TermInfosReader.get(TermInfosReader.java:179)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at org.apache.lucene.index.SegmentTermDocs.seek(SegmentTermDocs.java:57)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at org.apache.lucene.index.DocumentsWriter.applyDeletes(DocumentsWriter.java:1002)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; - locked &amp;lt;0x35c98398&amp;gt; (a org.apache.lucene.index.DocumentsWriter)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at org.apache.lucene.index.DocumentsWriter.applyDeletes(DocumentsWriter.java:958)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; - locked &amp;lt;0x35c98398&amp;gt; (a org.apache.lucene.index.DocumentsWriter)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at org.apache.lucene.index.IndexWriter.applyDeletes(IndexWriter.java:5207)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; - locked &amp;lt;0x35c98198&amp;gt; (a org.apache.lucene.index.IndexWriter)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at org.apache.lucene.index.IndexWriter.doFlushInternal(IndexWriter.java:4370)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; - locked &amp;lt;0x35c98198&amp;gt; (a org.apache.lucene.index.IndexWriter)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at org.apache.lucene.index.IndexWriter.doFlush(IndexWriter.java:4209)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; - locked &amp;lt;0x35c98198&amp;gt; (a org.apache.lucene.index.IndexWriter)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at org.apache.lucene.index.IndexWriter.flush(IndexWriter.java:4200)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at org.apache.lucene.index.IndexWriter.prepareCommit(IndexWriter.java:4078)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at org.apache.lucene.index.IndexWriter.commit(IndexWriter.java:4151)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; - locked &amp;lt;0x35c99188&amp;gt; (a java.lang.Object)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at org.apache.lucene.index.IndexWriter.commit(IndexWriter.java:4124)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at IndexerHelper.indexLinks(IndexerHelper.java:137)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at indexLinks(Gauntlet.java:330)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at battle(Gauntlet.java:353)&lt;br /&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp; at main(Gauntlet.java:435)&lt;br /&gt;
&lt;br /&gt;
"VM Thread" prio=10 tid=0x2ef92400 nid=0x47d7 runnable &lt;br /&gt;
&lt;br /&gt;
"GC task thread#0 (ParallelGC)" prio=10 tid=0x091c9c00 nid=0x47d3 runnable &lt;br /&gt;
&lt;br /&gt;
"GC task thread#1 (ParallelGC)" prio=10 tid=0x091cb400 nid=0x47d4 runnable &lt;br /&gt;
&lt;br /&gt;
"GC task thread#2 (ParallelGC)" prio=10 tid=0x091cc800 nid=0x47d5 runnable &lt;br /&gt;
&lt;br /&gt;
"GC task thread#3 (ParallelGC)" prio=10 tid=0x091ce000 nid=0x47d6 runnable &lt;br /&gt;
&lt;br /&gt;
"VM Periodic Task Thread" prio=10 tid=0x091dd000 nid=0x47de waiting on condition &lt;br /&gt;
&lt;br /&gt;
The main thread has issued a cancellation to its children and proceeded to commit the Lucene buffers the child threads have been filling. However, none of the child threads seem to be in the part of the code where Lucene buffers are being modified. And the Lucene commit() does not finish. It is moving along as I can see the index being updated in the file system, but for some reason it is spinning.&lt;/init&gt;&lt;/init&gt;&lt;/init&gt;&lt;/init&gt;&lt;br /&gt;
&lt;br /&gt;
The other 4 child threads seem to be spinning. Somehow, all 4 cores are not being used. Just one core is 100% utilized.&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;pre style="background-color: #eeeeee; border: 1px dashed rgb(153, 153, 153); color: black; font-family: Andale Mono,Lucida Console,Monaco,fixed,monospace; font-size: 12px; line-height: 14px; overflow: auto; padding: 5px; width: 100%;"&gt;&lt;code&gt;top - 15:00:19 up 22 days, 23:49,&amp;nbsp; 2 users,&amp;nbsp; load average: 1.00, 1.00, 0.94
Tasks: 121 total,&amp;nbsp;&amp;nbsp; 1 running, 120 sleeping,&amp;nbsp;&amp;nbsp; 0 stopped,&amp;nbsp;&amp;nbsp; 0 zombie
Cpu0&amp;nbsp; :&amp;nbsp; 0.0%us,&amp;nbsp; 0.0%sy,&amp;nbsp; 0.0%ni,100.0%id,&amp;nbsp; 0.0%wa,&amp;nbsp; 0.0%hi,&amp;nbsp; 0.0%si,&amp;nbsp; 0.0%st
Cpu1&amp;nbsp; :&amp;nbsp; 0.0%us,&amp;nbsp; 0.0%sy,&amp;nbsp; 0.0%ni,100.0%id,&amp;nbsp; 0.0%wa,&amp;nbsp; 0.0%hi,&amp;nbsp; 0.0%si,&amp;nbsp; 0.0%st
Cpu2&amp;nbsp; :&amp;nbsp; 0.3%us,&amp;nbsp; 0.0%sy,&amp;nbsp; 0.0%ni, 99.7%id,&amp;nbsp; 0.0%wa,&amp;nbsp; 0.0%hi,&amp;nbsp; 0.0%si,&amp;nbsp; 0.0%st
Cpu3&amp;nbsp; :100.0%us,&amp;nbsp; 0.0%sy,&amp;nbsp; 0.0%ni,&amp;nbsp; 0.0%id,&amp;nbsp; 0.0%wa,&amp;nbsp; 0.0%hi,&amp;nbsp; 0.0%si,&amp;nbsp; 0.0%st
Mem:&amp;nbsp;&amp;nbsp; 3872764k total,&amp;nbsp; 3791916k used,&amp;nbsp;&amp;nbsp;&amp;nbsp; 80848k free,&amp;nbsp;&amp;nbsp; 121416k buffers
Swap:&amp;nbsp; 3906552k total,&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 9192k used,&amp;nbsp; 3897360k free,&amp;nbsp; 1228492k cached

&amp;nbsp; PID USER&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; PR&amp;nbsp; NI&amp;nbsp; VIRT&amp;nbsp; RES&amp;nbsp; SHR S %CPU %MEM&amp;nbsp;&amp;nbsp;&amp;nbsp; TIME+&amp;nbsp; COMMAND&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 
18385 user &amp;nbsp;&amp;nbsp;&amp;nbsp; 20&amp;nbsp;&amp;nbsp; 0 2388m 2.2g&amp;nbsp; 10m S&amp;nbsp; 100 58.8&amp;nbsp;&amp;nbsp; 2225:32 java -Xmx2G -Xss512k ...
26819 user &amp;nbsp;&amp;nbsp;&amp;nbsp; 20&amp;nbsp;&amp;nbsp; 0&amp;nbsp; 2548 1196&amp;nbsp; 904 R&amp;nbsp;&amp;nbsp;&amp;nbsp; 0&amp;nbsp; 0.0&amp;nbsp;&amp;nbsp; 0:00.09 top&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; &lt;/code&gt;&lt;/pre&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/11265228-2934948525309104577?l=thushw.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://thushw.blogspot.com/feeds/2934948525309104577/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=11265228&amp;postID=2934948525309104577" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/2934948525309104577?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/2934948525309104577?v=2" /><link rel="alternate" type="text/html" href="http://thushw.blogspot.com/2011/02/debugging-most-curious-hang-spin.html" title="Debugging a most curious hang / spin" /><author><name>thushara</name><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="24" src="http://3.bp.blogspot.com/_ePuWmGHSaXg/SLcEM2cOYCI/AAAAAAAAABw/CiAaWps_KW0/s1600-R/n744060995_1382099_3757.jpg" /></author><thr:total>0</thr:total></entry><entry gd:etag="W/&quot;DkcERHgzeyp7ImA9Wx9VFk0.&quot;"><id>tag:blogger.com,1999:blog-11265228.post-2316285274627615444</id><published>2011-02-01T15:46:00.000-08:00</published><updated>2011-02-01T15:46:45.683-08:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-02-01T15:46:45.683-08:00</app:edited><title>beware of using Lucene NIOFSDirectory from a thread pool</title><content type="html">&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/_ePuWmGHSaXg/St5PoQPJYJI/AAAAAAAAAGA/frR2BzQO46U/s1600/nio.jpg" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://3.bp.blogspot.com/_ePuWmGHSaXg/St5PoQPJYJI/AAAAAAAAAGA/frR2BzQO46U/s1600/nio.jpg" /&gt;&lt;/a&gt;&lt;/div&gt;NIOFSDirectory does not handle a Thread.interrupt() well. If interrupted this way during I/O, it is known to throw a ClosedByInterruptException.&lt;br /&gt;
&lt;br /&gt;
I was using a thread pool (using the java.util.concurrent) package and noticed that a termination of the thread pool could result in the ClosedByInterruptException being thrown.&lt;br /&gt;
&lt;br /&gt;
The warning is provided in the &lt;a href="http://lucene.apache.org/java/3_0_3/api/core/org/apache/lucene/store/NIOFSDirectory.html"&gt;new documentation&lt;/a&gt;.&lt;br /&gt;
&lt;br /&gt;
You would still be ok if you could wait for the thread pool to finish its tasks. You could use ExecutorService.shutdown() followed by ExecutorService.awaitTermination() and these calls would not cause the concurrent package to interrupt the NIOFSDirectory implementation.&lt;br /&gt;
&lt;br /&gt;
The problem crops up if you have to resort to a ExecutorService.shutdownNow() as that would try canceling the already queued up tasks, which would end up interrupting the NIOFSDirectory code.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/11265228-2316285274627615444?l=thushw.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://thushw.blogspot.com/feeds/2316285274627615444/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=11265228&amp;postID=2316285274627615444" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/2316285274627615444?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/2316285274627615444?v=2" /><link rel="alternate" type="text/html" href="http://thushw.blogspot.com/2011/02/beware-of-using-lucene-niofsdirectory.html" title="beware of using Lucene NIOFSDirectory from a thread pool" /><author><name>thushara</name><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="24" src="http://3.bp.blogspot.com/_ePuWmGHSaXg/SLcEM2cOYCI/AAAAAAAAABw/CiAaWps_KW0/s1600-R/n744060995_1382099_3757.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://3.bp.blogspot.com/_ePuWmGHSaXg/St5PoQPJYJI/AAAAAAAAAGA/frR2BzQO46U/s72-c/nio.jpg" height="72" width="72" /><thr:total>0</thr:total></entry><entry gd:etag="W/&quot;CEYASH0_cSp7ImA9Wx9XGEQ.&quot;"><id>tag:blogger.com,1999:blog-11265228.post-7257322804099566742</id><published>2011-01-12T20:15:00.001-08:00</published><updated>2011-01-12T20:15:49.349-08:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-01-12T20:15:49.349-08:00</app:edited><title>bad gzip data from web servers</title><content type="html">&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/_ePuWmGHSaXg/TS58aTWwrQI/AAAAAAAAAOQ/QLRIC8AfESA/s1600/Screen+shot+2011-01-12+at+8.14.12+PM.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="316" src="http://4.bp.blogspot.com/_ePuWmGHSaXg/TS58aTWwrQI/AAAAAAAAAOQ/QLRIC8AfESA/s320/Screen+shot+2011-01-12+at+8.14.12+PM.png" width="320" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/11265228-7257322804099566742?l=thushw.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://thushw.blogspot.com/feeds/7257322804099566742/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=11265228&amp;postID=7257322804099566742" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/7257322804099566742?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/7257322804099566742?v=2" /><link rel="alternate" type="text/html" href="http://thushw.blogspot.com/2011/01/bad-gzip-data-from-web-servers.html" title="bad gzip data from web servers" /><author><name>thushara</name><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="24" src="http://3.bp.blogspot.com/_ePuWmGHSaXg/SLcEM2cOYCI/AAAAAAAAABw/CiAaWps_KW0/s1600-R/n744060995_1382099_3757.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://4.bp.blogspot.com/_ePuWmGHSaXg/TS58aTWwrQI/AAAAAAAAAOQ/QLRIC8AfESA/s72-c/Screen+shot+2011-01-12+at+8.14.12+PM.png" height="72" width="72" /><thr:total>0</thr:total></entry><entry gd:etag="W/&quot;C0cARH0_fCp7ImA9Wx9QFks.&quot;"><id>tag:blogger.com,1999:blog-11265228.post-3689651061740514694</id><published>2010-12-29T14:08:00.000-08:00</published><updated>2010-12-29T14:17:25.344-08:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-12-29T14:17:25.344-08:00</app:edited><title>bash one liner : find process start time</title><content type="html">&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/_ePuWmGHSaXg/TDkTGvqJ1sI/AAAAAAAAAMM/6pH6HmblNgI/s1600/bash.jpg" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://1.bp.blogspot.com/_ePuWmGHSaXg/TDkTGvqJ1sI/AAAAAAAAAMM/6pH6HmblNgI/s1600/bash.jpg" /&gt;&lt;/a&gt;&lt;/div&gt;Sometimes you want to find the start time of a process. You might want to check the start time of a particular process running on many Linux servers. Rather than logging into each machine, we can use password-less ssh to do this from one machine. But first, we need to craft the bash one liner to do this on one particular machine.&lt;br /&gt;
&lt;br /&gt;
Say, you want to find out the start time of process called 'foobar'. This is what you could do:&lt;br /&gt;
&lt;br /&gt;
&lt;pre style="background-color: #eeeeee; border: 1px dashed rgb(153, 153, 153); color: black; font-family: Andale Mono,Lucida Console,Monaco,fixed,monospace; font-size: 12px; line-height: 14px; overflow: auto; padding: 5px; width: 100%;"&gt;&lt;code&gt;ps auxww | grep foobar | grep -v '/bin/sh' | grep -v grep | tr -s '\t' ' ' | cut -f 9 -d ' '
&lt;/code&gt;&lt;/pre&gt;&lt;br /&gt;
Notice I use "grep -v" to eliminate certain processes that are not relevant. I omit the process that starts foobar ("/bin/sh"). I also omit the "grep" command we are using from the output. If your process is not started explicitly by the shell, you need not do the former, but filtering out "grep" is always useful.&lt;br /&gt;
&lt;br /&gt;
The interesting parts are to the end of the command line. We are looking for the 9th column which has the "start time" of the process. However, since the "ps" output may have multiple tab characters separating the columns, we need to convert multiple tabs to a single tab or a space. Here I have chosen to use the "tr" command to convert multiple repeating tabs to a single space.&lt;br /&gt;
&lt;br /&gt;
Now that we have this handy command, and you are tasked with checking the process start time across a dozen or more machines, it is simple enough to wrap this in a nice one-liner bash loop:&lt;br /&gt;
&lt;br /&gt;
&lt;pre style="background-color: #eeeeee; border: 1px dashed rgb(153, 153, 153); color: black; font-family: Andale Mono,Lucida Console,Monaco,fixed,monospace; font-size: 12px; line-height: 14px; overflow: auto; padding: 5px; width: 100%;"&gt;&lt;code&gt;for m in host1 host2 host3 host4 ; do echo $m; ssh $m "ps auxww | grep foobar | grep -v '/bin/sh' | grep -v grep | tr -s '\t' ' ' | cut -f 9 -d ' '"  ; done
&lt;/code&gt;&lt;/pre&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/11265228-3689651061740514694?l=thushw.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://thushw.blogspot.com/feeds/3689651061740514694/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=11265228&amp;postID=3689651061740514694" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/3689651061740514694?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/3689651061740514694?v=2" /><link rel="alternate" type="text/html" href="http://thushw.blogspot.com/2010/12/bash-one-liner-find-process-start-time.html" title="bash one liner : find process start time" /><author><name>thushara</name><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="24" src="http://3.bp.blogspot.com/_ePuWmGHSaXg/SLcEM2cOYCI/AAAAAAAAABw/CiAaWps_KW0/s1600-R/n744060995_1382099_3757.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://1.bp.blogspot.com/_ePuWmGHSaXg/TDkTGvqJ1sI/AAAAAAAAAMM/6pH6HmblNgI/s72-c/bash.jpg" height="72" width="72" /><thr:total>1</thr:total></entry><entry gd:etag="W/&quot;CEQERH07eCp7ImA9Wx9RGEo.&quot;"><id>tag:blogger.com,1999:blog-11265228.post-5428088857843601188</id><published>2010-12-20T11:03:00.000-08:00</published><updated>2010-12-20T11:11:45.300-08:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-12-20T11:11:45.300-08:00</app:edited><title>python : don't  use sys.exit() inside signal handlers</title><content type="html">&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/_ePuWmGHSaXg/TQ-qaDESIZI/AAAAAAAAAOE/M_SAFYf7HbA/s1600/python-logo.gif" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://1.bp.blogspot.com/_ePuWmGHSaXg/TQ-qaDESIZI/AAAAAAAAAOE/M_SAFYf7HbA/s1600/python-logo.gif" /&gt;&lt;/a&gt;&lt;/div&gt;It is common to want to exit the program on handling a kill signal. But you should probably not use the standard sys.exit() function for this. Instead use the os._exit() function.&lt;br /&gt;
&lt;br /&gt;
The reason is that python implements sys.exit() to throw an exception to the stack frame that was executing at the time the kill signal was received by the interpreter. If the kill signal was intercepted within a _try/_except block, control will be given back to this block and this is probably not what you intended.&lt;br /&gt;
&lt;br /&gt;
This happened to me on an automated script last night, and since I wasn't aware of this feature of sys.exit(), it puzzled me a bit. The logs showed that the script was stopping, but then it kept continuing from the point where the kill interrupted it.&lt;br /&gt;
&lt;br /&gt;
Here is the relevant part of the log:&lt;br /&gt;
&lt;br /&gt;
&lt;pre style="background-color: #eeeeee; border: 1px dashed rgb(153, 153, 153); color: black; font-family: Andale Mono,Lucida Console,Monaco,fixed,monospace; font-size: 12px; line-height: 14px; overflow: auto; padding: 5px; width: 100%;"&gt;&lt;code&gt;running update table set somedate="2010-12-20 10:34:27" where id=4329
running update table set somedate="2010-12-20 10:34:27" where id=4330
Stopping as requested..
commiting mysql buffers
stopped
failed: update table set somedate="2010-12-20 10:34:27" where id=4330
running update table set somedate="2010-12-20 10:34:27" where id=4346
failed: update table set somedate="2010-12-20 10:34:27" where id=4346

&lt;/code&gt;&lt;/pre&gt;&lt;br /&gt;
Notice how the script just carried on from the point of interruption, but notice how everything is failing after the failed stop. The failure is due to the cleanup done in the signal handler, the db connection is closed.&lt;br /&gt;
&lt;br /&gt;
Here is the stack trace at the point where the signal was received (I could get this by doing another "kill", as the _try/_except logic was particularly long and it was still stuck there, you might not be so lucky!) :&lt;br /&gt;
&lt;br /&gt;
&lt;pre style="background-color: #eeeeee; border: 1px dashed rgb(153, 153, 153); color: black; font-family: Andale Mono,Lucida Console,Monaco,fixed,monospace; font-size: 12px; line-height: 14px; overflow: auto; padding: 5px; width: 100%;"&gt;&lt;code&gt;Traceback (most recent call last):
  File "/path/to/script.py", line 165, in &amp;lt;module&amp;gt;
    exec_retry(cursor,mysql,1)
  File "/path/to/script.py", line 72, in exec_retry
    time.sleep(secs)
  File "/path/to/script.py", line 59, in kill_handler
    conn.commit()
_mysql_exceptions.OperationalError: (2006, 'MySQL server has gone away')

&lt;/code&gt;&lt;/pre&gt;&lt;br /&gt;
This is the point where the signal was received, particularly within the time.sleep() call:&lt;br /&gt;
&lt;br /&gt;
&lt;pre style="background-color: #eeeeee; border: 1px dashed rgb(153, 153, 153); color: black; font-family: Andale Mono,Lucida Console,Monaco,fixed,monospace; font-size: 12px; line-height: 14px; overflow: auto; padding: 5px; width: 100%;"&gt;&lt;code&gt;def exec_retry(cursor, cmd, secs):
    retries=0
    while retries&amp;lt;2:
        try:
            return cursor.execute(cmd)
    except:
            retries+=1
            time.sleep(secs)
    print "failed: %s" % cmd
    return 0


&lt;/code&gt;&lt;/pre&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/11265228-5428088857843601188?l=thushw.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://thushw.blogspot.com/feeds/5428088857843601188/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=11265228&amp;postID=5428088857843601188" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/5428088857843601188?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/5428088857843601188?v=2" /><link rel="alternate" type="text/html" href="http://thushw.blogspot.com/2010/12/python-dont-use-sysexit-inside-signal.html" title="python : don't  use sys.exit() inside signal handlers" /><author><name>thushara</name><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="24" src="http://3.bp.blogspot.com/_ePuWmGHSaXg/SLcEM2cOYCI/AAAAAAAAABw/CiAaWps_KW0/s1600-R/n744060995_1382099_3757.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://1.bp.blogspot.com/_ePuWmGHSaXg/TQ-qaDESIZI/AAAAAAAAAOE/M_SAFYf7HbA/s72-c/python-logo.gif" height="72" width="72" /><thr:total>0</thr:total></entry><entry gd:etag="W/&quot;DEUGSX8zfCp7ImA9Wx9RGEo.&quot;"><id>tag:blogger.com,1999:blog-11265228.post-4225140173261742634</id><published>2010-12-10T11:54:00.000-08:00</published><updated>2010-12-20T12:17:08.184-08:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-12-20T12:17:08.184-08:00</app:edited><title>use perl BEGIN / END blocks for summations</title><content type="html">&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/_ePuWmGHSaXg/TQ-5vRfBy6I/AAAAAAAAAOI/pdXcYwflSyw/s1600/Programming-republic-of-perl.png" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://2.bp.blogspot.com/_ePuWmGHSaXg/TQ-5vRfBy6I/AAAAAAAAAOI/pdXcYwflSyw/s1600/Programming-republic-of-perl.png" /&gt;&lt;/a&gt;&lt;/div&gt;Various Perl one-liners are very useful in data manipulation. the "-ne" mode in perl allows the command specified to be run over each line of stdin. However, if you want to do a summation and only print the final tally, you can make use of the BEGIN / END blocks in Perl. Initialize the counter in the BEGIN block, print the sum in the END block.&lt;br /&gt;
&lt;br /&gt;
Say, there is a file of numbers called "nums" , each number seperated by a newline, and we want to sum the numbers: &lt;br /&gt;
&lt;br /&gt;
&lt;pre style="background-color: #eeeeee; border: 1px dashed rgb(153, 153, 153); color: black; font-family: Andale Mono,Lucida Console,Monaco,fixed,monospace; font-size: 12px; line-height: 14px; overflow: auto; padding: 5px; width: 100%;"&gt;&lt;code&gt;cat nums | perl -ne 'BEGIN{$s=0;} chomp; $s+=$_; END {print "$s\n"}'
&lt;/code&gt;&lt;/pre&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/11265228-4225140173261742634?l=thushw.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://thushw.blogspot.com/feeds/4225140173261742634/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=11265228&amp;postID=4225140173261742634" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/4225140173261742634?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/4225140173261742634?v=2" /><link rel="alternate" type="text/html" href="http://thushw.blogspot.com/2010/12/use-perl-begin-end-blocks-for.html" title="use perl BEGIN / END blocks for summations" /><author><name>thushara</name><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="24" src="http://3.bp.blogspot.com/_ePuWmGHSaXg/SLcEM2cOYCI/AAAAAAAAABw/CiAaWps_KW0/s1600-R/n744060995_1382099_3757.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://2.bp.blogspot.com/_ePuWmGHSaXg/TQ-5vRfBy6I/AAAAAAAAAOI/pdXcYwflSyw/s72-c/Programming-republic-of-perl.png" height="72" width="72" /><thr:total>0</thr:total></entry><entry gd:etag="W/&quot;CUEMRXk-fSp7ImA9Wx9TFUk.&quot;"><id>tag:blogger.com,1999:blog-11265228.post-8767789059911060725</id><published>2010-11-23T12:14:00.000-08:00</published><updated>2010-11-23T12:21:24.755-08:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-11-23T12:21:24.755-08:00</app:edited><title>bash one liners to setup password-less SSH</title><content type="html">&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/_ePuWmGHSaXg/TDkTGvqJ1sI/AAAAAAAAAMM/6pH6HmblNgI/s1600/bash.jpg" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://1.bp.blogspot.com/_ePuWmGHSaXg/TDkTGvqJ1sI/AAAAAAAAAMM/6pH6HmblNgI/s1600/bash.jpg" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;
&lt;pre style="background-color: #eeeeee; border: 1px dashed rgb(153, 153, 153); color: black; font-family: Andale Mono,Lucida Console,Monaco,fixed,monospace; font-size: 12px; line-height: 14px; overflow: auto; padding: 5px; width: 100%;"&gt;&lt;code&gt;ssh user@host "cat &amp;gt;&amp;gt; .ssh/authorized_keys2" &amp;lt; .ssh/id_rsa.pub
&lt;/code&gt;&lt;/pre&gt;&lt;br /&gt;
This will append the ssh public key on the local machine to the authorized_keys file in the remote machine so that the local machine will in the future be able to ssh to the remote without a password.&lt;br /&gt;
&lt;br /&gt;
If you are setting up multiple machines this way, this one liner is faster than having to ssh into each remote to update the authorized_keys file.&lt;br /&gt;
&lt;br /&gt;
You could build on this to setup multiple machines with a single command:&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;pre style="background-color: #eeeeee; border: 1px dashed rgb(153, 153, 153); color: black; font-family: Andale Mono,Lucida Console,Monaco,fixed,monospace; font-size: 12px; line-height: 14px; overflow: auto; padding: 5px; width: 100%;"&gt;&lt;code&gt;for m in host1 host2 host3 host4; do ssh user@$m "cat &amp;gt;&amp;gt; .ssh/authorized_keys2" &amp;lt; .ssh/id_rsa.pub; done&lt;/code&gt;&lt;/pre&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/11265228-8767789059911060725?l=thushw.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://thushw.blogspot.com/feeds/8767789059911060725/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=11265228&amp;postID=8767789059911060725" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/8767789059911060725?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/11265228/posts/default/8767789059911060725?v=2" /><link rel="alternate" type="text/html" href="http://thushw.blogspot.com/2010/11/bash-one-liners-to-setup-password-less.html" title="bash one liners to setup password-less SSH" /><author><name>thushara</name><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="24" src="http://3.bp.blogspot.com/_ePuWmGHSaXg/SLcEM2cOYCI/AAAAAAAAABw/CiAaWps_KW0/s1600-R/n744060995_1382099_3757.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://1.bp.blogspot.com/_ePuWmGHSaXg/TDkTGvqJ1sI/AAAAAAAAAMM/6pH6HmblNgI/s72-c/bash.jpg" height="72" width="72" /><thr:total>1</thr:total></entry></feed>

