<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" media="screen" href="/~d/styles/atom10full.xsl"?><?xml-stylesheet type="text/css" media="screen" href="http://feeds.feedburner.com/~d/styles/itemcontent.css"?><feed xmlns="http://www.w3.org/2005/Atom" xmlns:openSearch="http://a9.com/-/spec/opensearch/1.1/" xmlns:georss="http://www.georss.org/georss" xmlns:gd="http://schemas.google.com/g/2005" xmlns:thr="http://purl.org/syndication/thread/1.0" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0" gd:etag="W/&quot;D0YNQnkyfSp7ImA9WhVUGEw.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732</id><updated>2012-05-23T16:19:53.795-07:00</updated><category term="parallel computing" /><category term="courses" /><category term="ai" /><category term="causality" /><category term="basketball" /><category term="dannys_predictions" /><category term="books" /><category term="data structure" /><category term="challenge problem" /><category term="lawyers" /><category term="toronto" /><category term="methodology" /><category term="art" /><category term="analytics" /><category term="ranking" /><category term="algorithms" /><category term="uncertainty" /><category term="memorization" /><category term="hadoop" /><category term="classification" /><category term="linear_programming" /><category term="psychology" /><category term="online marketing" /><category term="pain machine" /><category term="taxes" /><category term="netflix" /><category term="data analysis" /><category term="scipy" /><category term="schools" /><category term="sports" /><category term="rapidminer" /><category term="scrabble" /><category term="probability" /><category term="c++" /><category term="talent" /><category term="dynamic algorithms" /><category term="computation" /><category term="displaying code" /><category term="san francisco" /><category term="career choice" /><category term="success" /><category term="APIs" /><category term="public_relations" /><category term="incentives" /><category term="rationality" /><category term="controversies" /><category term="social networks" /><category term="summer school" /><category term="nearest neighbors" /><category term="buildings" /><category term="theoretical computer science" /><category term="march_madness" /><category term="conversation starters" /><category term="the_webs" /><category term="statistics" /><category term="mcmc" /><category term="chess" /><category term="MAP_inference" /><category term="conferences" /><category term="google" /><category term="randomness" /><category term="auctions" /><category term="nutsandbolts" /><category term="structured prediction" /><category term="advertising" /><category term="military" /><category term="the real world" /><category term="monte" /><category term="beginners" /><category term="data visualization" /><category term="image_processing" /><category term="python" /><category term="biology" /><category term="lake oswego rental" /><category term="public transportation" /><category term="scott turner" /><category term="max_product_belief_propagation" /><category term="belief propagation" /><category term="code" /><category term="football" /><category term="horse racing" /><category term="learning" /><category term="artificial intelligence" /><category term="science" /><category term="linux" /><category term="computational complexity" /><category term="logistic regression" /><category term="protocol_buffers" /><category term="math" /><category term="recommendation systems" /><category term="emacs" /><category term="research" /><category term="robotics" /><category term="bayesian models" /><category term="programming" /><category term="politics" /><category term="sympy" /><category term="videos" /><category term="graduate school" /><category term="slice sampling" /><category term="matrix factorization" /><category term="graphical_models" /><category term="distributed computing" /><category term="databases" /><category term="seo" /><category term="economics" /><category term="blogger" /><category term="computer vision" /><category term="web2.0" /><category term="constraint_satisfaction" /><category term="george" /><category term="web_security" /><category term="twitter" /><category term="regularization" /><category term="history" /><category term="gambling" /><category term="machine learning" /><category term="data" /><category term="markets" /><category term="sociology" /><category term="energy use" /><category term="medicine" /><title>This Number Crunching Life</title><subtitle type="html">Randomness in the world with a smattering of other randomness</subtitle><link rel="http://schemas.google.com/g/2005#feed" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/posts/default" /><link rel="alternate" type="text/html" href="http://blog.smellthedata.com/" /><link rel="next" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default?start-index=26&amp;max-results=25&amp;redirect=false&amp;v=2" /><author><name>Danny Tarlow</name><uri>http://www.blogger.com/profile/14670021337844708633</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="25" src="http://1.bp.blogspot.com/_cFAlw8-Y0gE/TRrm8pdSK1I/AAAAAAAAA5o/S8w-VVzdc1A/S220/mehak.jpg" /></author><generator version="7.00" uri="http://www.blogger.com">Blogger</generator><openSearch:totalResults>168</openSearch:totalResults><openSearch:startIndex>1</openSearch:startIndex><openSearch:itemsPerPage>25</openSearch:itemsPerPage><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="self" type="application/atom+xml" href="http://feeds.feedburner.com/ThisNumberCrunchingLife" /><feedburner:info uri="thisnumbercrunchinglife" /><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="hub" href="http://pubsubhubbub.appspot.com/" /><entry gd:etag="W/&quot;DUIDRXY7eyp7ImA9WhVQF08.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-7857345903016628650</id><published>2012-04-06T08:15:00.005-07:00</published><updated>2012-04-06T08:52:54.803-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-04-06T08:52:54.803-07:00</app:edited><title>Final 2012 Full-Bracket Results</title><content type="html">&lt;p&gt;Hopefully everyone had a chance to watch the exciting game between Kentucky and Kansas this past Monday. This post only covers the results of the &lt;a href="http://tournament.fantasysports.yahoo.com/t1/group/9198"&gt;full tournament bracket&lt;/a&gt; and not the second chance Sweet Sixteen bracket.&lt;/p&gt;
&lt;p&gt;
Here are the full standings, including ESPN analysts (E) and my own picks.
&lt;table&gt;
&lt;tr&gt;&lt;td&gt;TheMatrixFactorizer&lt;/td&gt;&lt;td&gt;127&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;Jay Bilas (E)&lt;/td&gt;&lt;td&gt;126&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;Lee's picks&lt;/td&gt;&lt;td&gt;124&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;The Pain Machine&lt;/td&gt;&lt;td&gt;122&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;Baseline&lt;/td&gt;&lt;td&gt;120&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;Danny's Dangerous Picks&lt;/td&gt;&lt;td&gt;117&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;By The Numbers&lt;/td&gt;&lt;td&gt;104&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;Dick Vitale (E)&lt;/td&gt;&lt;td&gt;102&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;Obama&lt;/td&gt;&lt;td&gt;102&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;Predict the Madness&lt;/td&gt;&lt;td&gt;99&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;Ryan Boesch&lt;/td&gt;&lt;td&gt;98&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;TheSentinel&lt;/td&gt;&lt;td&gt;86&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;AJsMadness&lt;/td&gt;&lt;td&gt;73&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;machine_learning_first_try&lt;/td&gt;&lt;td&gt;45&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;
&lt;/p&gt;
&lt;p&gt;Great contest this year and congratulations to this year's winner, TheMatrixFactorizer! It not only won the full-bracket contest, it also squeezed past ESPN analyst Jay Bilas by a point. Once again, machines triumph over humans in our contest. I, for one, welcome our new March Madness predicting robot overlords.&lt;/p&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1107147718367558732-7857345903016628650?l=blog.smellthedata.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/dFEJZ3d5SX_ObalSKagq0LLEmNs/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/dFEJZ3d5SX_ObalSKagq0LLEmNs/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/dFEJZ3d5SX_ObalSKagq0LLEmNs/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/dFEJZ3d5SX_ObalSKagq0LLEmNs/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/EAp88gWALVM" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/7857345903016628650/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=7857345903016628650" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/7857345903016628650?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/7857345903016628650?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/EAp88gWALVM/final-2012-full-bracket-results.html" title="Final 2012 Full-Bracket Results" /><author><name>Lee</name><uri>http://www.blogger.com/profile/17617335710795529109</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>1</thr:total><feedburner:origLink>http://blog.smellthedata.com/2012/04/final-2012-full-bracket-results.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A0MHRHY7eyp7ImA9WhVRE0o.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-980048127593656605</id><published>2012-03-21T18:19:00.003-07:00</published><updated>2012-03-21T18:23:55.803-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-03-21T18:23:55.803-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="scott turner" /><category scheme="http://www.blogger.com/atom/ns#" term="march_madness" /><title>Round 2 Update + Upset Analysis</title><content type="html">&lt;i&gt;Here's another great guest post from &lt;a href="http://netprophetblog.blogspot.ca/"&gt;Scott Turner&lt;/a&gt;, our #1 Machine March Madness guest poster.  Great analysis -- thanks Scott!  If you want more where this came from, check out &lt;a href="http://netprophetblog.blogspot.ca/"&gt;his blog&lt;/a&gt;.&lt;/i&gt;
&lt;br/&gt;&lt;br/&gt;
On my blog &lt;a href="http://netprophetblog.blogspot.com/2012/03/upset-review.html"&gt;here&lt;/a&gt; I took a closer look at how the Pain Machine predicts upsets in the tournament and how effective it was this year.&amp;nbsp; I thought it might be interesting to look at how the top competitors in the &lt;a href="http://tournament.fantasysports.yahoo.com/t1/group/9198"&gt;Machine Madness&lt;/a&gt; contest predicted upsets.&amp;nbsp; I put together the following table with the competitors across the top and an X in every cell where they predicted an upset.&amp;nbsp; Boxes are green for correct predictions and red for incorrect predictions.&amp;nbsp; The final row(s) in the table shows the scores &amp;amp; possible scores for each competitors.
&lt;br/&gt;&lt;br/&gt;
&lt;table border="1"&gt;&lt;tbody&gt;
&lt;tr&gt;       &lt;th style="background-color: white;"&gt;Game&lt;/th&gt;       &lt;th style="background-color: #cfe2f3;"&gt;Pain Machine&lt;/th&gt;       &lt;th style="background-color: #cfe2f3;"&gt;Predict the Madness&lt;/th&gt;       &lt;th style="background-color: #cfe2f3;"&gt;Sentinel&lt;/th&gt;       &lt;th style="background-color: #cfe2f3;"&gt;Danny's
Conservative
Picks &lt;/th&gt;       &lt;th style="background-color: #cfe2f3;"&gt;AJ's Madness&lt;/th&gt;       &lt;th style="background-color: #cfe2f3;"&gt;Matrix Factorizer&lt;/th&gt;     &lt;/tr&gt;
&lt;tr&gt;       &lt;td style="background-color: #fff2cc;"&gt;Texas over Cincy&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;     &lt;/tr&gt;
&lt;tr&gt;       &lt;td style="background-color: #fff2cc;"&gt;Texas over FSU&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;     &lt;/tr&gt;
&lt;tr&gt;       &lt;td style="background-color: #fff2cc;"&gt;WVU over Gonzaga&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;     &lt;/tr&gt;
&lt;tr&gt;       &lt;td style="background-color: #fff2cc;"&gt;Purdue over St. Mary's&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;     &lt;/tr&gt;
&lt;tr&gt;       &lt;td style="background-color: #fff2cc;"&gt;NC State over SDSU&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;
&lt;/td&gt;     &lt;/tr&gt;
&lt;tr&gt;       &lt;td style="background-color: #fff2cc;"&gt;South Florida over Temple&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;     &lt;/tr&gt;
&lt;tr&gt;       &lt;td style="background-color: #fff2cc;"&gt;New Mexico over Louisville&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;     &lt;/tr&gt;
&lt;tr&gt;       &lt;td style="background-color: #fff2cc;"&gt;Virginia over Florida&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;     &lt;/tr&gt;
&lt;tr&gt;       &lt;td style="background-color: #fff2cc;"&gt;Colorado State over Murray State&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;&lt;/td&gt;     &lt;/tr&gt;
&lt;tr&gt;       &lt;td style="background-color: #fff2cc;"&gt;Vandy over Wisconsin&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;     &lt;/tr&gt;
&lt;tr&gt;       &lt;td style="background-color: #fff2cc;"&gt;Wichita State over Indiana&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;     &lt;/tr&gt;
&lt;tr&gt;       &lt;td style="background-color: #fff2cc;"&gt;Murray State over Marquette&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;     &lt;/tr&gt;
&lt;tr&gt;       &lt;td style="background-color: white;"&gt;Upset Prediction Rate&lt;/td&gt;       &lt;td style="background-color: white; text-align: center;"&gt;43%&lt;/td&gt;       &lt;td style="background-color: white; text-align: center;"&gt;25%&lt;/td&gt;       &lt;td style="background-color: white; text-align: center;"&gt;33%&lt;/td&gt;       &lt;td style="background-color: white; text-align: center;"&gt;0%&lt;/td&gt;       &lt;td style="background-color: white; text-align: center;"&gt;25%&lt;/td&gt;       &lt;td style="background-color: white; text-align: center;"&gt;29%&lt;/td&gt;     &lt;/tr&gt;
&lt;tr&gt;       &lt;td style="background-color: white;"&gt;Current Score&lt;/td&gt;       &lt;td style="background-color: white; text-align: center;"&gt;42&lt;/td&gt;       &lt;td style="background-color: white; text-align: center;"&gt;43&lt;/td&gt;       &lt;td style="background-color: white; text-align: center;"&gt;42&lt;/td&gt;       &lt;td style="background-color: white; text-align: center;"&gt;41&lt;/td&gt;       &lt;td style="background-color: white; text-align: center;"&gt;41&lt;/td&gt;       &lt;td style="background-color: white; text-align: center;"&gt;39&lt;/td&gt;     &lt;/tr&gt;
&lt;tr&gt;       &lt;td style="background-color: white;"&gt;Possible Points&lt;/td&gt;       &lt;td style="background-color: white; text-align: center;"&gt;166&lt;/td&gt;       &lt;td style="background-color: white; text-align: center;"&gt;155&lt;/td&gt;       &lt;td style="background-color: white; text-align: center;"&gt;166&lt;/td&gt;       &lt;td style="background-color: white; text-align: center;"&gt;161&lt;/td&gt;       &lt;td style="background-color: white; text-align: center;"&gt;137&lt;/td&gt;       &lt;td style="background-color: white; text-align: center;"&gt;163&lt;/td&gt;     &lt;/tr&gt;
&lt;/tbody&gt; &lt;/table&gt;
&lt;br/&gt;&lt;br/&gt;
(I'm not counting #9 over #8 as an upset.  That's why Danny has only 41 points; he predicted a #9 over #8 upsets that did not happen.)
&lt;br/&gt;&lt;br/&gt;
&lt;b&gt;So what do you think? &lt;/b&gt;
&lt;br/&gt;&lt;br/&gt;
One thing that jumps out immediately is that the competitors predicted many more upsets this year than in past years.&amp;nbsp; Historically we'd expect around 7-8 upsets in the first two rounds.&amp;nbsp; Last year the average number of upsets was about 2 (discounting the Pain Machine and LMRC).&amp;nbsp; The Pain Machine is forced to predict this many, but this year the Matrix Factorizer also predicts 7, and Predict the Madness and AJ's Madness predict 4.&amp;nbsp; From what I can glean from the model descriptions, none of these models (other than the Pain Machine) force a certain level of upsets.&amp;nbsp; 
&lt;br/&gt;&lt;br/&gt;
Monte's model ("Predict the Madness") seems to use only statistical inputs, and not any strength measures, or strength of competition measures.&amp;nbsp; This sort of model will value statistics over strength of schedule, and so you might see it making upset picks that would not agree with the team strengths (as proxied by seeds).
&lt;br/&gt;&lt;br/&gt;
The Sentinel uses a Monte Carlo type method to predict games, so rather than always produce the most likely result, it only most likely to produce the most likely result.&amp;nbsp; (If that makes sense :-)&amp;nbsp; The model can be tweaked by choosing how long to run the Monte Carlo simulation.&amp;nbsp; With a setting of 50 it seems to produce about half the expected number of upsets.
&lt;br/&gt;&lt;br/&gt;
Danny's Dangerous Picks are anything but; it is by far the most conservative of the competitors.&amp;nbsp; The pick of Murray State over Marquette suggests that Danny's asymmetric loss function component might have led to his model undervaluing strength of schedule.
&lt;br/&gt;&lt;br/&gt;
AJ's Madness model seems to employ a number of hand-tuned weights for different components of the prediction formula.&amp;nbsp; That may account for the prediction upsets, including the somewhat surprising CSU over Murray State prediction.
&lt;br/&gt;&lt;br/&gt;
The Matrix Factorizer has two features that might lead to a high upset rate.&amp;nbsp; First, there's an asymmetric reward for getting a correct pick, which might skew towards upsets.&amp;nbsp; Secondly, Jasper optimized his model parameters based upon the results of previous tournaments, so that presumably built in a bias towards making some upset picks.
&lt;br/&gt;&lt;br/&gt;
&lt;b&gt;What's interesting about the actual upsets?&lt;/b&gt;
&lt;br/&gt;&lt;br/&gt;
First, Texas over Cincy and Purdue over St. Mary's were consensus picks (excepting Danny's Conservative Picks). &amp;nbsp; This suggests that these teams really were mis-seeded.&amp;nbsp; Purdue vs. St. Mary's is the classic trap seeding problem for humans -- St. Mary's has a much better record, but faced much weaker competition.&amp;nbsp; Texas came very close to beating Cincinnati -- they shot 16% in the first half and still tied the game up late -- which would have made the predictors 2-0 on consensus picks.
&lt;br/&gt;&lt;br/&gt;
Second, the predictors agreed on few of the other picks.&amp;nbsp; Three predictors liked WVU over Gonzaga, and the Pain Machine and the Matrix Factorizer agreed on two other games.&amp;nbsp; Murray State over Marquette is an interesting pick -- another classic trap pick for a predictor that undervalues strength of schedule -- and both Danny's predictor and the Matrix Factorizer "fell" for this pick.
&lt;br/&gt;&lt;br/&gt;
&lt;b&gt;So how did the predictors do?&lt;/b&gt;
&lt;br/&gt;&lt;br/&gt;
The Pain Machine was by far the best, getting 43% of its upset predictions correct.&amp;nbsp; Sentinel was next at 33%.&amp;nbsp; Perhaps not coincidentally, these two predictors have the most possible points remaining. 
&lt;br/&gt;&lt;br/&gt;
In terms of scoring, the Baseline is ahead of all the predictors, so none came out ahead (so far) due to their predictions.&amp;nbsp; The PM and Sentinel do have a slight edge in possible points remaining over the Baseline.
&lt;br/&gt;&lt;br/&gt;
&lt;b&gt;So who will win?&lt;/b&gt;
&lt;br/&gt;&lt;br/&gt;
The contest winner will probably come down to predicting the final game correctly.&amp;nbsp; There's a more interesting spread of champion predictions than I expected -- particularly given the statistical dominance of Kentucky.&amp;nbsp;
&lt;br/&gt;&lt;br/&gt;
If Kentucky wins, the likely winner will be the Baseline or Danny.&amp;nbsp; If Kansas wins, the Pain Machine will likely win unless Wisconsin makes it to the Final Four, in which case AJ should win.&amp;nbsp; If Michigan State wins, then the Sentinel will likely win.&amp;nbsp; And finally, if Ohio State wins, then Predict the Madness should win.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1107147718367558732-980048127593656605?l=blog.smellthedata.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/1zUJHBXdLoY2ymACeZgkLwlpz2M/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/1zUJHBXdLoY2ymACeZgkLwlpz2M/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/1zUJHBXdLoY2ymACeZgkLwlpz2M/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/1zUJHBXdLoY2ymACeZgkLwlpz2M/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/djPqu6OWlF8" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/980048127593656605/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=980048127593656605" title="2 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/980048127593656605?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/980048127593656605?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/djPqu6OWlF8/round-2-update-upset-analysis.html" title="Round 2 Update + Upset Analysis" /><author><name>Danny Tarlow</name><uri>http://www.blogger.com/profile/14670021337844708633</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="25" src="http://1.bp.blogspot.com/_cFAlw8-Y0gE/TRrm8pdSK1I/AAAAAAAAA5o/S8w-VVzdc1A/S220/mehak.jpg" /></author><thr:total>2</thr:total><feedburner:origLink>http://blog.smellthedata.com/2012/03/round-2-update-upset-analysis.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A04NR3k6eCp7ImA9WhVREk0.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-7335697462621838289</id><published>2012-03-19T19:03:00.006-07:00</published><updated>2012-03-19T19:19:56.710-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-03-19T19:19:56.710-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="march_madness" /><title>Second Chance Competition Announcement</title><content type="html">For all of you who didn't get your algorithms finished in time, and for all of the original competitors who'd like a fresh start, we're pleased to announce this year's "second chance" Sweet 16 contest.
&lt;br/&gt;&lt;br/&gt;
This one will be run a little bit differently.  For machines, &lt;a href="http://blog.smellthedata.com/2012/03/data-usage-clarification.html"&gt;the rules are all still the same&lt;/a&gt;.  The difference is that there will now be a pool of human competitors in the mix -- Facebook friends and fans of our sponsor, &lt;a href="http://tarlowknee.com/minimally-invasive-knee-replacement/"&gt;a knee doctor who likes robots&lt;/a&gt;.
&lt;br/&gt;&lt;br/&gt;
The prize pool for the second chance tournament will be $50 and $25 gift certificates for first and second place, respectively, and they will go to the top two entrants, whether they be human or computer.
&lt;br/&gt;&lt;br/&gt;
If you want to participate as a human, you need to add &lt;a href="https://www.facebook.com/pages/Advanced-Knee-Care/#!/stefan.tarlow"&gt;Doctor Tarlow&lt;/a&gt; on Facebook and look for his announcement there.  For those who wish to enter an algorithm, here are the instructions:
&lt;ul&gt;
&lt;li&gt;Send me email at dannytarlow+MarchMadness@gmail.com with your team name, along with a short description of your approach.  (If you entered the main competition and haven't significantly changed your algorithm, just send me a mail saying you're in for the Sweet 16.)  Also, consider joining &lt;a href="http://groups.google.com/group/machine-march-madness"&gt;the Google group&lt;/a&gt;.&lt;/li&gt;
&lt;li&gt;Follow this link, and enter your picks before 7PM EST on Thursday.  &lt;b&gt;When entering your bracket name, add "[C]" before your name, to indicate you are a computer entrant:&lt;/b&gt;&lt;br/&gt;
&lt;a href="http://tournament.fantasysports.yahoo.com/t2/register/joinprivategroup_assign_team?GID=9372&amp;P=robotsvshumans"&gt;http://tournament.fantasysports.yahoo.com/t2/register/joinprivategroup_assign_team?GID=9372&amp;P=robotsvshumans&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;

That's it!  Good luck to all the algorithmic competitors out there.  I hope we can pull out a victory over those pesky humans.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1107147718367558732-7335697462621838289?l=blog.smellthedata.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/atXTvLLoBKirkkruyQYz9fiBnDY/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/atXTvLLoBKirkkruyQYz9fiBnDY/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/atXTvLLoBKirkkruyQYz9fiBnDY/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/atXTvLLoBKirkkruyQYz9fiBnDY/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/oHz8g_vUfuE" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/7335697462621838289/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=7335697462621838289" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/7335697462621838289?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/7335697462621838289?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/oHz8g_vUfuE/second-chance-competition-announcement.html" title="Second Chance Competition Announcement" /><author><name>Danny Tarlow</name><uri>http://www.blogger.com/profile/14670021337844708633</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="25" src="http://1.bp.blogspot.com/_cFAlw8-Y0gE/TRrm8pdSK1I/AAAAAAAAA5o/S8w-VVzdc1A/S220/mehak.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://blog.smellthedata.com/2012/03/second-chance-competition-announcement.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CEcCQXgyfip7ImA9WhVREUU.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-8636834382672840040</id><published>2012-03-19T10:57:00.006-07:00</published><updated>2012-03-19T11:34:20.696-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-03-19T11:34:20.696-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="monte" /><category scheme="http://www.blogger.com/atom/ns#" term="march_madness" /><title>"Predict the Madness" by Monte McNair</title><content type="html">&lt;i&gt;This is a guest post by Monte McNair, the man behind team "Predict the Madness," which is the leader of the machine competitors after the second round.&lt;/i&gt;&lt;br/&gt;&lt;br/&gt;
Developing a system to fill out the best NCAA Tournament bracket is composed of two parts: matchup prediction and bracket optimization.
&lt;br/&gt;&lt;br/&gt;
&lt;b&gt;MATCHUP PREDICTION&lt;/b&gt;&lt;br/&gt;
The first thing to do is come up with a method to predict the likelihood of one team beating another. Since we only care about advancement, I want a system that produces a perentage as opposed to a point spread or something else. Therefore, I use a logistic regression with the outcome of games being the dependent variable. For the variables, I use the location of the game, metrics for the team's offense and defense, and metrics of the team's opponents' averages for both offense and defense. The NCAA Tournament is played at all neutral sites, but since I'm training on all games, I want to know how important playing at home is so that I can strip this out for neutral site games. The reason to use components of a team's offense and defense as opposed to simply points is that the different components that contribute to points have varying levels of reliability. As KenPom figured out this year, for example, defensive 3P% is extremely unreliable. My model takes this into account and weights it less than it would be if we used its influence on points against. By breaking it down, we let the model determine which factors are most reliable in predicting future performance.
&lt;br/&gt;&lt;br/&gt;
The main thing we care about is that the model does a good job of predicing future games. Instead of waiting for future games, however, we can just use out of sample games. I took about 1/3 of our games and made them training games and left the other 2/3 as testing games. One thing I did that may be different from most is that I used all of a team's games for the season except for the game in question to create their profile. For example, say North Carolina played Duke on January 7th in one of my training games. For North Carolina's profile, I used stats from all of their games before AND after January 7th. I'm not sure what other systems do but I think they might use all games (without excluding the game in question) or perhaps just games PRIOR to the game in question. In any case, after training the model, I can test it against the out of sample games I set aside for testing. I divided up all the test games into 100 buckets ordered by their predicted win percentage and compared it to the actual win percentage in those games. As we can see, the buckets are closely aligned meaning the predictions are fairly accurate.
&lt;br/&gt;&lt;br/&gt;
&lt;a href="http://3.bp.blogspot.com/-ldrKvY6vLKk/T2d0h12ypzI/AAAAAAAABLs/QTUr6T_8BEI/s1600/Prediction%2BBuckets.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 291px;" src="http://3.bp.blogspot.com/-ldrKvY6vLKk/T2d0h12ypzI/AAAAAAAABLs/QTUr6T_8BEI/s400/Prediction%2BBuckets.png" border="0" alt=""id="BLOGGER_PHOTO_ID_5721669976338900786" /&gt;&lt;/a&gt;
&lt;br/&gt;&lt;br/&gt;
&lt;b&gt;BRACKET OPTIMIZATION&lt;/b&gt;&lt;br/&gt;
The next thing to do is to take our matchup predictions and maximize our expected points based on the scoring system we are presented with. While this is most beneficial when scoring systems provide bonuses for picking upsets or some other unique scoring, it can still be helpful in basic scoring systems and is better than simply advancing winners round by round.
&lt;br/&gt;&lt;br/&gt;
As an example, take Louisville and New Mexico, the 4 and 5 seeds in the West region. My model predicts New Mexico as the favorite in a game against Louisville, projected to win 51.2% of the time. Both are favored in their 1st round matchups as well, so if we were to simply advance them both, we'd then choose New Mexico to advance over Louisville in the 2nd round. However, New Mexico has a tougher 1st round opponent in Long Beach State than Louisville does against Davidson. In the table below, we see that New Mexico wins just 65% against LBSU while Louisville wins 75% of the time against Davidson. This is enough to make it more likely that Louisville advances to the Sweet 16 than New Mexico, despite UNM being the better team.
&lt;br/&gt;&lt;br/&gt;
&lt;table&gt;
&lt;tr&gt;&lt;td/&gt; &lt;td&gt;1st&lt;/td&gt;  &lt;td&gt;2nd&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;New Mexico &lt;td&gt;64.9%&lt;/td&gt; &lt;td&gt;37.2%&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;Louisville &lt;td&gt;75.3%&lt;/td&gt; &lt;td&gt;40.7%&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;
&lt;br/&gt;
New Mexico over Louisville: 51.2%
&lt;br/&gt;&lt;br/&gt;
In a basic scoring system, this rarely comes into play and when it does, it provides little benefit. But it still is best to be accurate if you can.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1107147718367558732-8636834382672840040?l=blog.smellthedata.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/ZtzDoNpktjj6zhtQIyaMwbRTXuA/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/ZtzDoNpktjj6zhtQIyaMwbRTXuA/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/ZtzDoNpktjj6zhtQIyaMwbRTXuA/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/ZtzDoNpktjj6zhtQIyaMwbRTXuA/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/OLo8bXe-WvQ" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/8636834382672840040/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=8636834382672840040" title="2 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/8636834382672840040?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/8636834382672840040?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/OLo8bXe-WvQ/predict-madness-by-monte-mcnair.html" title="&quot;Predict the Madness&quot; by Monte McNair" /><author><name>Danny Tarlow</name><uri>http://www.blogger.com/profile/14670021337844708633</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="25" src="http://1.bp.blogspot.com/_cFAlw8-Y0gE/TRrm8pdSK1I/AAAAAAAAA5o/S8w-VVzdc1A/S220/mehak.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://3.bp.blogspot.com/-ldrKvY6vLKk/T2d0h12ypzI/AAAAAAAABLs/QTUr6T_8BEI/s72-c/Prediction%2BBuckets.png" height="72" width="72" /><thr:total>2</thr:total><feedburner:origLink>http://blog.smellthedata.com/2012/03/predict-madness-by-monte-mcnair.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CU8NQXg_cSp7ImA9WhVREEg.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-1196472112115966752</id><published>2012-03-17T13:20:00.013-07:00</published><updated>2012-03-17T23:58:10.649-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-03-17T23:58:10.649-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="march_madness" /><title>Machine March Madness: Round 1 Update</title><content type="html">As usual, the first round was full of upsets, with two of the #2 ranked teams falling.  None of our competitors predicted either of those upsets, but they are still putting on a respectable performance.  Here are details of each competitor's entry, along with the current performance.&lt;br/&gt;&lt;br/&gt;
The favorites at this point look like "The Matrix Factorizer" and "The Pain Machine".  Both did quite well in the first round, and both have 7/8 elite eight teams still surviving, along with all 4/4 final four teams still alive.&lt;br/&gt;

&lt;hr&gt;
&lt;b&gt;The Matrix Factorizer&lt;/b&gt; &lt;br/&gt;&lt;br/&gt;
Jasper&lt;br/&gt;&lt;br/&gt;
I modified Danny's starter code in two ways: First, I added an asymmetric component to the
loss function, so the model is rewarded for getting the prediction correct 
even if the absolute predicted scores are wrong.  Second, I changed the regularization
so that latent vectors are penalized for deviating from the global average over latent
vectors, rather than being penalized for being far from 0.  This can be interpreted as
imposing a basic hierarchical prior.
&lt;br/&gt;&lt;br/&gt;
I then ran a search over model parameters (e.g., latent dimension, regularization strength, parameter that
trades off the two parts of the loss function) to find the setting that did best on number of correct
predictions made in the past 5 years's tournaments.
&lt;br/&gt;&lt;br/&gt;
24 of 33 Correct, 25 Pts, 171 Pts Possible&lt;br/&gt;

&lt;hr&gt;
&lt;b&gt;The Pain Machine&lt;/b&gt;&lt;br/&gt;&lt;br/&gt;

Scott Turner&lt;br/&gt;&lt;br/&gt;

Methodology: Linear regression on a number of statistics, including strength ratings to predict MOV (Margin of Victory).  Some modifications for tournament use, particularly to force a likely number of upsets.
&lt;br/&gt;&lt;br/&gt;

23 of 33 Correct, 24 Pts, 170 Pts Possible&lt;br/&gt;

&lt;hr&gt;

&lt;b&gt;TheSentinel&lt;/b&gt;&lt;br/&gt;&lt;br/&gt;
 
Chuck Dickens&lt;br/&gt;&lt;br/&gt;
 
Methodology:  Using Ken Pomeroy's Pythag formula to rate teams, then calculated the actual game probabilities with the log5 formula. 
 
Used a random number generator to determine outcome of games.  This provided some randomness which created a few interesting upsets. 
Simulate the tournament 50 times and record each team's probability to reach subsequent rounds.
Step through each round of the bracket choosing winners based on the team that had a higher probability to win that round.
 &lt;br/&gt;&lt;br/&gt;
I found that running the simulation 50 times gave me the most variability in the final four, running the simulation more than 100 times gave me a bracket that had almost no upsets and most all of the higher seeded teams progressed through the tournament.
&lt;br/&gt;&lt;br/&gt;
23 of 33 Correct, 24 Pts, 172 Pts Possible

&lt;hr&gt;

&lt;b&gt;Baseline&lt;/b&gt;&lt;br/&gt;&lt;/br/&gt;
Always pick the higher seed. &lt;br/&gt;&lt;br/&gt;

23 of 33 Correct, 24 Pts, 168 Pts Possible
&lt;hr&gt;

&lt;b&gt;Ryan's Picks&lt;/b&gt;  &lt;br/&gt;&lt;br/&gt;
Ryan&lt;br/&gt;&lt;br/&gt;

For each season (e.g. 2006-2007) I have enumerated the teams and 
compiled the scores of the games into a matrix S. For example, if team 
1 beat team 2 with a score of 82-72 then S12=82 and S21=72. Ideally, 
each team would play every other team at least once, but this is 
obviously not the case so the matrix S is sparse. Using the method 
proposed by George Dahl, I define vectors o and d which correspond to 
each teams offensive and defensive ability. The approximation to the 
matrix S is then just the outer product od' (for example 
(od')_12=o1d2=S12est). This is a simple rank one approximation for the 
matrix. If each team played each other at least once then the matrix S 
would be dense and the vectors o and d could be found by finding the 
SVD of S (see &lt;a href="http://www.stanford.edu/~boyd/ee263/notes/low_rank_approx.pdf"&gt;http://www.stanford.edu/~boyd/ee263/notes/low_rank_approx.pdf&lt;/a&gt;). 
Because this is not the case, we instead define a matrix P that 
represents which teams played that season. For example, P12=P21=1 if 
teams 1 and 2 played a game. Now the problem stated by George can be 
expressed compactedly as, "minimize ||P.*(o*d')-S||_F". Here, '.*' 
represents the Hadamard product and ||.||_F is the Frobenius norm. In 
this from, it is easy to see that, for constant vector o and variable 
vector d, this is a convex problem. Also, for constant vector d and 
variable vector o this is a convex problem. Therefore, by solving a 
series of convex problems, alternating the vector variable between o 
and d, the problem converges rapidly in about 5 to 10 steps (see 
"Nonnegative Matrix Factorizations" code here &lt;a href="http://cvxr.com/cvx/examples/"&gt;http://cvxr.com/cvx/examples/&lt;/a&gt;). &lt;br/&gt;&lt;br/&gt;

See &lt;a href="http://groups.google.com/group/machine-march-madness/browse_thread/thread/f0efd87778bdbca7"&gt;this post&lt;/a&gt; for more details.&lt;br/&gt;&lt;br/&gt;

23 of 33 Correct, 24 Pts &lt;br/&gt;&lt;br/&gt;

&lt;hr&gt;

&lt;b&gt;Danny's Dangerous Picks&lt;/b&gt;&lt;br/&gt;&lt;br/&gt;

I started with the basic matrix factorization approach from my starter code, then I added small neural networks that applied a transformation to the base latent vectors based on whether the team was playing at home, away, or in the tournament.  These transformation vectors were learned based on season and tournament performance of teams from other years.  I split the data into 5 cross-validation sets, and looked for hyperparameter settings that did best on tournament prediction in past years. Like Jon, I also added an asymmetric component to the loss function.
&lt;br/&gt;&lt;br/&gt;
Interestingly (disappointingly), after finding the setting of parameters that did best on past data, my method made some pretty conservative predictions for this year, predicting only 3 upsets.&lt;br/&gt;&lt;br/&gt;

22 of 33 Correct, 23 Pts, 165 Pts Possible

&lt;hr&gt;

&lt;b&gt;Predict the Madness&lt;/b&gt;&lt;br/&gt;&lt;br/&gt;
Monte McNair &lt;br/&gt;&lt;br/&gt;
Methodology: To determine the probability of any matchup (Team 1 beating Team 2), I use a logistic regression using statistics for offense/defense of team and team's opponents plus location, dependent variable is outcome of the game. To select bracket, I use a program to calculate the best possible bracket by maximizing number of points based on scoring system, this correctly accounts for situations where simply advancing favored teams round by round would fail.&lt;br/&gt;&lt;br/&gt;

22 of 33 Correct, 23 Pts, 157 Pts Possible&lt;br/&gt;&lt;br/&gt;

&lt;hr&gt;

&lt;b&gt;AJ's Madness&lt;/b&gt;&lt;br/&gt;&lt;br/&gt;
AJ Diliberto&lt;br/&gt;&lt;br/&gt;
 
The methodology is that I selected various stats and gave weight to those that I feel are important, such as points for and against, offensive rebounds, and turnover margin. I also factored in whether they were from one of the big conferences, the level of experience and success the coach has had, and then overlaid the formula with a strength of schedule formula that would reduce certain teams scores based on how good or bad the competition was that they played to get those stats. 
&lt;br/&gt;&lt;br/&gt;
  22 of 33 Correct, 23 Pts, 139 Pts Possible&lt;br/&gt;

&lt;hr&gt;

&lt;b&gt;Machine Learning First Try&lt;/b&gt;&lt;br/&gt;&lt;br/&gt;
Joe Gilbert &lt;br/&gt;&lt;br/&gt;

My methodology is as follows:&lt;br/&gt;
1. Develop a matrix that contains only 2011 scores (done using your data)&lt;br/&gt;
2. Develop a matrix that contains all of your teams and generate columns for averages over all players in 2011:  minutes played, FT attempted/made, 3P attempted/made (done), rebounds, turnovers, fouls (again using your data)&lt;br/&gt;
3.  Use machine learning, specifically a traditional Forest algorithm to predict each team's score for each game based on the 2011 data only&lt;br/&gt;
4.  Select the winner for each round and repeat step 3 for the next round to determine the next winners&lt;br/&gt;
Currently, the algorithm predicted the first round modeling each team's score as an "Away" team since they are all technically on the road.  I think I may change it so that the scores are based on a mean value of the model for an Away team and Home team because currently it is predicting LIU Brooklyn over MSU in the 1st round...if it comes true then so be it.&lt;br/&gt;&lt;br/&gt;

20 of 33 Correct, 21 Pts, 91 Pts Possible
&lt;hr&gt;

&lt;b&gt;By The Numbers&lt;/b&gt;&lt;br/&gt;&lt;br/&gt;
  Tim Jacobs &lt;br/&gt;&lt;br/&gt;

Methodology:&lt;br/&gt;
I took the data so generously provided, trained a couple of neural networks on the past performance, then used average away performance for each team to predict performance in the tourney.  The networks are training as I type.&lt;br/&gt;&lt;br/&gt;

17 of 33 Correct, 18 Pts, 166 Pts Possible

&lt;hr&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1107147718367558732-1196472112115966752?l=blog.smellthedata.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/FmgnEDf6eu0rbxUJqhR4vERyJvo/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/FmgnEDf6eu0rbxUJqhR4vERyJvo/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/FmgnEDf6eu0rbxUJqhR4vERyJvo/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/FmgnEDf6eu0rbxUJqhR4vERyJvo/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/XfcxMuQhFBo" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/1196472112115966752/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=1196472112115966752" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/1196472112115966752?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/1196472112115966752?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/XfcxMuQhFBo/machine-march-madness-round-1-update.html" title="Machine March Madness: Round 1 Update" /><author><name>Danny Tarlow</name><uri>http://www.blogger.com/profile/14670021337844708633</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="25" src="http://1.bp.blogspot.com/_cFAlw8-Y0gE/TRrm8pdSK1I/AAAAAAAAA5o/S8w-VVzdc1A/S220/mehak.jpg" /></author><thr:total>1</thr:total><feedburner:origLink>http://blog.smellthedata.com/2012/03/machine-march-madness-round-1-update.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DkQHQHc8fSp7ImA9WhVSF04.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-6195570397128382413</id><published>2012-03-14T07:06:00.003-07:00</published><updated>2012-03-14T07:12:11.975-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-03-14T07:12:11.975-07:00</app:edited><title>Data Usage Clarification</title><content type="html">&lt;p&gt;I just realized that the &lt;a href="http://groups.google.com/group/machine-march-madness/browse_thread/thread/5ea550b0ceff52e6"&gt;data rules and usage discussion&lt;/a&gt; happened on the Google Group and not everyone may have read it. Similarly, a clarification on &lt;a href="http://groups.google.com/group/machine-march-madness/browse_thread/thread/68be7d1f0a6b240"&gt;hand-tweaking&lt;/a&gt;.&lt;/p&gt;
&lt;p&gt;Basically, no human judgment data should enter your model except for your decisions on how to build the model and hyper-parameters for that model. Also, if you do use data that we did not provide, please let us know and please make it available to all the other competitors so that they might have the opportunity to use it as well.&lt;/p&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1107147718367558732-6195570397128382413?l=blog.smellthedata.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/sbGQV99u-CtpwzLLrRn-VVWcPxY/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/sbGQV99u-CtpwzLLrRn-VVWcPxY/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/sbGQV99u-CtpwzLLrRn-VVWcPxY/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/sbGQV99u-CtpwzLLrRn-VVWcPxY/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/HjDygs9a4aM" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/6195570397128382413/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=6195570397128382413" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/6195570397128382413?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/6195570397128382413?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/HjDygs9a4aM/data-usage-clarification.html" title="Data Usage Clarification" /><author><name>Lee</name><uri>http://www.blogger.com/profile/17617335710795529109</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>0</thr:total><feedburner:origLink>http://blog.smellthedata.com/2012/03/data-usage-clarification.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DkQFRHw-eyp7ImA9WhVSFko.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-2426953320198026060</id><published>2012-03-13T14:27:00.003-07:00</published><updated>2012-03-13T14:31:55.253-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-03-13T14:31:55.253-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="march_madness" /><title>Fast Company Article</title><content type="html">&lt;a href="http://www.fastcompany.com/user/david-holmes"&gt;David Holmes&lt;/a&gt; over at &lt;a href="http://www.fastcompany.com/"&gt;Fast Company&lt;/a&gt; wrote a nice article on about our Machine March Madness contest:&lt;br/&gt;
&lt;a href="http://www.fastcompany.com/1824382/march-madness-ncaa-tournament-predictions-algorithms"&gt;http://www.fastcompany.com/1824382/march-madness-ncaa-tournament-predictions-algorithms&lt;/a&gt;

&lt;br/&gt;&lt;br/&gt;
Thanks David!
&lt;br/&gt;&lt;br/&gt;
To everybody else: I hope you're hard at work on your algorithm.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1107147718367558732-2426953320198026060?l=blog.smellthedata.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/tYN9EtPoU2973NgKPoc8g8yMQHc/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/tYN9EtPoU2973NgKPoc8g8yMQHc/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/tYN9EtPoU2973NgKPoc8g8yMQHc/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/tYN9EtPoU2973NgKPoc8g8yMQHc/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/ULVuydh1Bps" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/2426953320198026060/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=2426953320198026060" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/2426953320198026060?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/2426953320198026060?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/ULVuydh1Bps/fast-company-article.html" title="Fast Company Article" /><author><name>Danny Tarlow</name><uri>http://www.blogger.com/profile/14670021337844708633</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="25" src="http://1.bp.blogspot.com/_cFAlw8-Y0gE/TRrm8pdSK1I/AAAAAAAAA5o/S8w-VVzdc1A/S220/mehak.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://blog.smellthedata.com/2012/03/fast-company-article.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CEIEQHcycSp7ImA9WhVSFkU.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-8615127815074076292</id><published>2012-03-13T07:56:00.009-07:00</published><updated>2012-03-13T16:48:21.999-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-03-13T16:48:21.999-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="march_madness" /><title>Prizes and deadline reminder</title><content type="html">Now is the time to make a final push for getting your &lt;a href="http://blog.smellthedata.com/2012/02/machine-march-madness-2012.html"&gt;Machine March Madness&lt;/a&gt; algorithms tuned and running smoothly.  Remember, submissions are due before tip-off of the first game on Thursday, but you probably want to get them in a little early, just to be safe.
&lt;br/&gt;&lt;br/&gt;
I'm also pleased to announce the prizes: for the main competition, the winning algorithm's owner will get a $50 Amazon or Apple gift certificate, while second place will get a $25 one.
&lt;br/&gt;&lt;br/&gt;
Also, for the "second chance" Sweet 16 contest, we will be hosting a humans versus computers contest, with our field of computers competing against Facebook friends and fans of our sponsor, &lt;a href="http://tarlowknee.com/minimally-invasive-knee-replacement/"&gt;a knee doctor who is into robotic-assisted surgery&lt;/a&gt;.
The prize pool for the second chance tournament will also be $50/$25 gift certificates, but the prizes could go either to a human or computer.
&lt;br/&gt;&lt;br/&gt;
If you want to participate as a human, you need to add &lt;a href="https://www.facebook.com/pages/Advanced-Knee-Care/#!/stefan.tarlow"&gt;Doctor Tarlow&lt;/a&gt; on Facebook, but if you're reading this blog, hopefully you'll enter an algorithm and participate on our team instead.
&lt;br/&gt;&lt;br/&gt;
The human team has chosen the name, "Dr. T's Robot Powers".  We'll need to come up with something better for our computer team.  Ideas are welcome in the comments.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1107147718367558732-8615127815074076292?l=blog.smellthedata.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/LnUZ6X6w23F3MJ5oluX0O4v_a4E/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/LnUZ6X6w23F3MJ5oluX0O4v_a4E/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/LnUZ6X6w23F3MJ5oluX0O4v_a4E/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/LnUZ6X6w23F3MJ5oluX0O4v_a4E/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/nttKpd_m2S4" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/8615127815074076292/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=8615127815074076292" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/8615127815074076292?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/8615127815074076292?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/nttKpd_m2S4/prizes-and-deadline-reminder.html" title="Prizes and deadline reminder" /><author><name>Danny Tarlow</name><uri>http://www.blogger.com/profile/14670021337844708633</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="25" src="http://1.bp.blogspot.com/_cFAlw8-Y0gE/TRrm8pdSK1I/AAAAAAAAA5o/S8w-VVzdc1A/S220/mehak.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://blog.smellthedata.com/2012/03/prizes-and-deadline-reminder.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CkUBQ3g5cSp7ImA9WhVSFUQ.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-7112423992500248435</id><published>2012-03-12T14:45:00.008-07:00</published><updated>2012-03-12T15:10:52.629-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-03-12T15:10:52.629-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="scott turner" /><category scheme="http://www.blogger.com/atom/ns#" term="march_madness" /><title>How to pick upsets?</title><content type="html">&lt;i&gt;&lt;a href="http://netprophetblog.blogspot.com/"&gt;Scott Turner&lt;/a&gt; writes...&lt;/i&gt;
&lt;blockquote&gt;Doing well in a tournament picking contest probably comes down to picking the right upsets.  Anyone can pick the higher seeds to win.
&lt;br/&gt;&lt;br/&gt;
Define an upset as a lower seed beating a higher seed, and ignore upsets where there's only 1 step differential (i.e., a #9 beating a #8).  If my math from last year is correct, the upset rate in the tournament is around 22%.  Half those upsets happen in the first round, about 7.
&lt;br/&gt;&lt;br/&gt;
Some recent thoughts about upsets:
&lt;br/&gt;&lt;br/&gt;
&lt;a href="http://harvardsportsanalysis.wordpress.com/2012/03/12/predicting-ncaa-tournament-upsets-the-importance-of-turnovers-and-rebounding/"&gt;http://harvardsportsanalysis.wordpress.com/2012/03/12/predicting-ncaa-tournament-upsets-the-importance-of-turnovers-and-rebounding/&lt;/a&gt; &lt;br/&gt;
&lt;a href="http://courtsideanalyst.wordpress.com/2012/03/12/two-potential-ncaa-upset-picks-with-supporting-math/"&gt;http://courtsideanalyst.wordpress.com/2012/03/12/two-potential-ncaa-upset-picks-with-supporting-math/&lt;/a&gt; &lt;br/&gt;
&lt;a href="http://www.teamrankings.com/blog/ncaa-basketball/why-you-should-ignore-the-seeds-when-filling-out-your-2012-ncaa-brackets"&gt;http://www.teamrankings.com/blog/ncaa-basketball/why-you-should-ignore-the-seeds-when-filling-out-your-2012-ncaa-brackets&lt;/a&gt;
&lt;br/&gt;&lt;br/&gt;
I leave it to Danny / Lee to turn this into a blog posting :-)
&lt;/blockquote&gt;
My response...
&lt;br/&gt;&lt;br/&gt;
From a machine learning perspective, I think Scott raises an interesting issue here.  Let me rephrase the problem a little more abstractly, to more clearly get at the crux of the issue.  Suppose that some oracle were to come down and tell us that exactly 15 of the games in this year's March Madness tournament will be upsets.  How should this affect our prediction strategy?
&lt;br/&gt;&lt;br/&gt;
There are probably two natural answers:
&lt;ul&gt;
&lt;li&gt;Don't change anything.  I have my prediction for each game, and I think it's going to lead to the most number of correct predictions.&lt;/li&gt;
&lt;li&gt;Make my base predictions, but go back and find the games that I'm most uncertain about, and flip predictions until I am predicting exactly 15 upsets.&lt;/li&gt;
&lt;/ul&gt;

Actually, these both are reasonable strategies, but they say something different about the objective function that we are optimizing with our picks.  If the goal is to just get as many games right as possible, and we believe our model captures all of the information we have about the outcome of the games (and we believe the game outcomes are statistically independent), then the first strategy will still maximize the expected number of games that we will get correct.  However, by making this choice, assuming our model isn't predicting 15 upsets already, then we've eliminated ourselves from contention for the $5 million prize that Yahoo offers to anybody who picks the perfect bracket.
&lt;br/&gt;&lt;br/&gt;
So if the goal is to win the $5 million prize and you believe the oracle, then the right strategy is to pick the 15 upsets that the model thinks are most likely.
&lt;br/&gt;&lt;br/&gt;
However, while both of these strategies make some sense, they both seem too extreme.  Perhaps the more natural objective should be to ensure that we win this year's &lt;a href="http://blog.smellthedata.com/2012/02/machine-march-madness-2012.html"&gt;Machine March Madness prediction contest&lt;/a&gt;.  If that's our goal, what's the best strategy?  What if we had the predictions from all of the competitors for past years, and I told you that this year's field was going to be drawn from a similar set of competitors?
&lt;br/&gt;&lt;br/&gt;
See Scott's &lt;a href="http://netprophetblog.blogspot.com/2012/03/its-upsetting.html"&gt;picks for most likely upsets over at his blog.&lt;/a&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1107147718367558732-7112423992500248435?l=blog.smellthedata.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/nXteQnwIRFIvWhwB6fHk_KKUSbY/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/nXteQnwIRFIvWhwB6fHk_KKUSbY/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/nXteQnwIRFIvWhwB6fHk_KKUSbY/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/nXteQnwIRFIvWhwB6fHk_KKUSbY/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/e0MPSEZ2FLI" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/7112423992500248435/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=7112423992500248435" title="2 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/7112423992500248435?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/7112423992500248435?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/e0MPSEZ2FLI/how-to-pick-upsets.html" title="How to pick upsets?" /><author><name>Danny Tarlow</name><uri>http://www.blogger.com/profile/14670021337844708633</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="25" src="http://1.bp.blogspot.com/_cFAlw8-Y0gE/TRrm8pdSK1I/AAAAAAAAA5o/S8w-VVzdc1A/S220/mehak.jpg" /></author><thr:total>2</thr:total><feedburner:origLink>http://blog.smellthedata.com/2012/03/how-to-pick-upsets.html</feedburner:origLink></entry><entry gd:etag="W/&quot;D04FQH87fCp7ImA9WhVSFUw.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-987183390113650731</id><published>2012-03-11T18:26:00.003-07:00</published><updated>2012-03-11T18:31:51.104-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-03-11T18:31:51.104-07:00</app:edited><title>2012 Contest Registration</title><content type="html">&lt;p&gt;In order to facilitate in the contest, we will be using Yahoo! again for you to enter your bracket entries. Please do the following to register your team and participate in the contest:
&lt;ol&gt;
&lt;li&gt;Send an e-mail to "leezen+marchmadness" at gmail to provide your: team name, team member names, and a brief description of your methodology.&lt;/li&gt;
&lt;li&gt;Enter your picks in the Yahoo! &lt;a href="http://tournament.fantasysports.yahoo.com/t1/register/joinprivategroup_assign_team?GID=9198&amp;P=robotsvshumans"&gt;tournament group&lt;/a&gt; with the entry name being your team name.&lt;/li&gt;
&lt;li&gt;Watch the tournament with your friends and have fun!&lt;/li&gt;
&lt;/ol&gt;
&lt;/p&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1107147718367558732-987183390113650731?l=blog.smellthedata.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/Z4L7__dXW_MipOsymRy23ZMeVIk/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/Z4L7__dXW_MipOsymRy23ZMeVIk/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/Z4L7__dXW_MipOsymRy23ZMeVIk/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/Z4L7__dXW_MipOsymRy23ZMeVIk/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/LVjdMHkuevg" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/987183390113650731/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=987183390113650731" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/987183390113650731?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/987183390113650731?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/LVjdMHkuevg/yahoo-group.html" title="2012 Contest Registration" /><author><name>Lee</name><uri>http://www.blogger.com/profile/17617335710795529109</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>0</thr:total><feedburner:origLink>http://blog.smellthedata.com/2012/03/yahoo-group.html</feedburner:origLink></entry><entry gd:etag="W/&quot;D0IHRX4_fCp7ImA9WhVSFUw.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-7809167936687637131</id><published>2012-03-11T18:15:00.002-07:00</published><updated>2012-03-11T18:25:34.044-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-03-11T18:25:34.044-07:00</app:edited><title>Data for 2012</title><content type="html">&lt;p&gt;Selection Sunday! What a day! First we have a great post by Scott Turner on using RapidMiner. Then, the Selection Committee has &lt;a href="http://espn.go.com/mens-college-basketball/tournament/2012/story/_/id/7673645/ncaa-tournament-kentucky-wildcats-syracuse-orange-north-carolina-tar-heels-michigan-st-spartans-top-seeds"&gt;set the seeding&lt;/a&gt;. Now, it's &lt;b&gt;YOUR&lt;/b&gt; turn to predict who will win the 2012 NCAA Tournament.&lt;/p&gt;
&lt;p&gt;
There are two files you can download:
&lt;ul&gt;
&lt;li&gt;&lt;a href="https://docs.google.com/open?id=0BysperLdI86MbnFqZ3B2T0dUM2F2UTA1MzY2V0hfdw"&gt;Scores for all games 2006-2011 seasons&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="https://docs.google.com/open?id=0BysperLdI86MdTg2M1JyUkxRME9jMGZUT1Q5T2JKQQ"&gt;Player-level data for all games 2006-2011 seasons&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
The includes everything from the beginning of the 2006 season up to and including the March 11, 2012 games. Please let us know if you find any issues with the data. One known issue is that some scores in the first file do not match the scores if you were to add up all the player scores from the player-level data. This is due to the fact that data we crawled is occasionally inconsistent in this regard and might be off by a few points.
&lt;/p&gt;
&lt;p&gt;The data format is as before for both files, except that the aggregate game data is now tab-separated. Please see &lt;a href="http://blog.smellthedata.com/2011/03/aggregate-game-results.html"&gt;aggregate game data schema&lt;/a&gt; and &lt;a href="http://blog.smellthedata.com/2011/03/selection-sunday-today.html"&gt;player-level data schema&lt;/a&gt; for details. Good luck!&lt;/p&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1107147718367558732-7809167936687637131?l=blog.smellthedata.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/vhPVHlKO0sMG-AWWS7jMsR8Ra1A/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/vhPVHlKO0sMG-AWWS7jMsR8Ra1A/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/vhPVHlKO0sMG-AWWS7jMsR8Ra1A/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/vhPVHlKO0sMG-AWWS7jMsR8Ra1A/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/H7HmRZ-ZgTM" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/7809167936687637131/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=7809167936687637131" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/7809167936687637131?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/7809167936687637131?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/H7HmRZ-ZgTM/data-for-2012.html" title="Data for 2012" /><author><name>Lee</name><uri>http://www.blogger.com/profile/17617335710795529109</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>1</thr:total><feedburner:origLink>http://blog.smellthedata.com/2012/03/data-for-2012.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CU8AQnw7fip7ImA9WhVSFEQ.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-1803882667758473517</id><published>2012-03-11T12:15:00.004-07:00</published><updated>2012-03-11T12:24:03.206-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-03-11T12:24:03.206-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="scott turner" /><category scheme="http://www.blogger.com/atom/ns#" term="rapidminer" /><category scheme="http://www.blogger.com/atom/ns#" term="march_madness" /><title>Using RapidMiner to Predict March Madness</title><content type="html">&lt;i&gt;This is a guest post by Dr. Scott Turner, who won the Machine March Madness prediction contest last year, and who was the co-winner of the Sweet 16 contest from two years ago.  If you like this post, check out his great blog all about algorithmic prediction of NCAA basketball: &lt;a href="http://netprophetblog.blogspot.com/"&gt;http://netprophetblog.blogspot.com/&lt;/a&gt;.
&lt;br/&gt;&lt;br/&gt;
Dr. Turner has a Ph.D. in Artificial Intelligence from UCLA. His dissertation subject was a program called MINSTREL that told stories about King Arthur and his knights, as a way to explore issues in creativity and storytelling. Since obtaining his Ph.D. in 1993, Dr. Turner has worked for the Aerospace Corporation, where he advises the nation's space programs on software and systems engineering issues. &lt;/i&gt;
&lt;br/&gt;&lt;br/&gt;
Danny &amp;amp; Lee asked me to contribute a guest post as part of the Machine Madness contest.  I started writing a posting about using RapidMiner as part of a prediction workflow, but unfortunately I became overwhelmed with other tasks and wasn't able to finish it.&amp;nbsp; I had given up on finishing it when I realized that anyone entering the Machine Madness contest at this late date might well appreciate a tool that could make creating the routine parts of building a predictive model very fast.&amp;nbsp; So I quickly finished it up and hope it will prove helpful to someone.&amp;nbsp; Readers who are expert data miners won't find much here, but I hope that it might be useful to the interested amateur who knows more about basketball (football, baseball, etc.) than about statistics and data mining and wants to put in a quick entry. 
&lt;br/&gt;&lt;br/&gt;
I will assume that you have some program or method for generating the statistics or ratings you want to use to predict games and that you've saved those results as an Excel file.&amp;nbsp; (These might just be season averages of the statistics Danny &amp;amp; Lee are providing.)&amp;nbsp; As a tool RapidMiner is not well-suited for this part of the problem; it's strengths are in pulling the predictive value out of those statistics rather than generating them.&amp;nbsp; (Or perhaps I should say that it's not well-suited as I understand it.&amp;nbsp; I wouldn't be surprised to learn that it has useful features in this area that I don't know about.)&amp;nbsp; The Excel file should have one line for each game, with columns for the team names, statistics, ratings, and scores.
&lt;br/&gt;&lt;br/&gt;
The next step is to download and install RapidMiner.&amp;nbsp; You can do that &lt;a href="http://rapid-i.com/content/view/26/84/"&gt;here&lt;/a&gt;.&amp;nbsp; The "community edition" of RapidMiner is completely free.&amp;nbsp; (I like free.)&amp;nbsp; There's a user forum &lt;a href="http://forum.rapid-i.com/"&gt;here&lt;/a&gt; where questions usually get a fairly quick response.
&lt;br/&gt;&lt;br/&gt;
Once you've installed, start up RapidMiner.&amp;nbsp; You'll see this:&amp;nbsp; 

&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://2.bp.blogspot.com/-mlOccj6QQ8w/T1ku4V5lq_I/AAAAAAAAGTs/OzxI1mHmdZw/s1600/Image1.jpg" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://2.bp.blogspot.com/-mlOccj6QQ8w/T1ku4V5lq_I/AAAAAAAAGTs/OzxI1mHmdZw/s1600/Image1.jpg" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;br/&gt;&lt;br/&gt;
RapidMiner has three default perspectives: Design, Results, and Welcome.&amp;nbsp; It starts up in Welcome.&amp;nbsp; Switch to Design by clicking on the icon that looks like a pencil writing in a notebook, from the View menu, or by hitting F8.&amp;nbsp; The Design view looks like this:
&lt;br/&gt;&lt;br/&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://4.bp.blogspot.com/-kUEI6RJsHWM/T1kvd8seywI/AAAAAAAAGT0/zqxF7hKxuNw/s1600/Image2.jpg" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://4.bp.blogspot.com/-kUEI6RJsHWM/T1kvd8seywI/AAAAAAAAGT0/zqxF7hKxuNw/s1600/Image2.jpg" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;br/&gt;&lt;br/&gt;
The blank central area is the canvas where you'll graphically build your RapidMiner process.&amp;nbsp; The left-side has a menu of Operators as well as Repositories (where processes are stored).&amp;nbsp; The right-side has details about the current operator (Just a blank "Process" in this case because we haven't added anything yet.)
&lt;br/&gt;&lt;br/&gt;
To start, let's read in our Excel file of game data.&amp;nbsp; In the list of Operators on the left-side of the RapidMiner window, you'll see a folder labeled "Import".&amp;nbsp; Clicking on that reveals sub-folders labeled "Data," "Models", and so on.&amp;nbsp; Click on the Data folder and you'll see a list of operators.&amp;nbsp; "Read Excel" should be near the top.&amp;nbsp; Click and drag that operator onto the blank area in the middle of the screen and release.&amp;nbsp; You'll see this:
&lt;br/&gt;&lt;br/&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://3.bp.blogspot.com/-OJdYE9b0CB4/T1kxKpP6V1I/AAAAAAAAGT8/LSEsWoTJ7h0/s1600/Image3.jpg" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://3.bp.blogspot.com/-OJdYE9b0CB4/T1kxKpP6V1I/AAAAAAAAGT8/LSEsWoTJ7h0/s1600/Image3.jpg" /&gt;&lt;/a&gt;&lt;/div&gt;
There are a couple of things to note.&amp;nbsp; First, RapidMiner has automatically drawn a connection from the output of this process (the little semi-circle node on the right of the box) to the right edge of the workspace.&amp;nbsp; Anything going out to that edge will show up in the Results view when the process is executed.&amp;nbsp; Second, the message window at the bottom of the workspace shows an error.&amp;nbsp; It is complaining "The mandatory parameter "excel file" is undefined."
&lt;br/&gt;&lt;br/&gt;
To fix this, look to the right-side.&amp;nbsp; You'll see that is now showing the details for the highlighted "Read Excel" operator.&amp;nbsp; Just below there you'll see a button for an "Import Configuration Wizard" and then some input boxes for the various parameters for this operator, including the "excel file" parameter being complained about.&amp;nbsp; There's also a description/help box for the operator below the parameters section.
&lt;br/&gt;&lt;br/&gt;
Use the "Import Configuration Wizard" to find your Excel file and prepare it to be read in.&amp;nbsp; The wizard does some basic data checking, so you may discover a problem in your file at this point.&amp;nbsp; Here's what the final step of the wizard looks like for my sample data:
&lt;br/&gt;&lt;br/&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://1.bp.blogspot.com/-zW2Q5lnFIao/T1kzQcYSwfI/AAAAAAAAGUE/Ulq9PssJSlg/s1600/Image5.jpg" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://1.bp.blogspot.com/-zW2Q5lnFIao/T1kzQcYSwfI/AAAAAAAAGUE/Ulq9PssJSlg/s1600/Image5.jpg" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;br/&gt;&lt;br/&gt;
There are 8 columns to my data:&amp;nbsp; name, score, TrueSkill mean, and home winning percentage.&amp;nbsp; (The TrueSkill mean is a rating system.&amp;nbsp; You can read more about it &lt;a href="http://netprophetblog.blogspot.com/2011/04/trueskill.html"&gt;here&lt;/a&gt;.)&amp;nbsp; These will be the inputs to my prediction model.
&lt;br/&gt;&lt;br/&gt;
To run a process in RapidMiner, you click the right-facing blue triangle button near the top of the window.&amp;nbsp; Right now our process isn't very interesting -- it just reads in the Excel file and sends it to the Results -- but let's run it and see what happens.&amp;nbsp; You may be asked to save your model and whether you want to switch to the Results view.&amp;nbsp; For both questions you can save a default answer, which is handy.&amp;nbsp; When you switch to the Results view you'll see something like this:
&lt;br/&gt;&lt;br/&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://2.bp.blogspot.com/-KXVkVbgr8K4/T1k0yAseHkI/AAAAAAAAGUM/hitcdfAL4bc/s1600/Image6.jpg" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://2.bp.blogspot.com/-KXVkVbgr8K4/T1k0yAseHkI/AAAAAAAAGUM/hitcdfAL4bc/s1600/Image6.jpg" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;br/&gt;&lt;br/&gt;
The data you read in creates an "Example Set" and this window is showing you the Meta Data View for the data set.&amp;nbsp; In my case, the data set has 3699 examples (games), and for each attribute in the examples, the window shows the Role, Name, Type, Statistics, Range and Missings.&amp;nbsp; There's some interesting stuff here -- for example, home teams scored between 28 and 124 points in this season.&amp;nbsp; A home team scored only 28 points?!&amp;nbsp; That's pretty intriguing.
&lt;br/&gt;&lt;br/&gt;
Let's follow up.&amp;nbsp; Click on the "Data View" checkbutton and then on the Hscore column to look at the actual data sorted by home team's score:
&lt;br/&gt;&lt;br/&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://3.bp.blogspot.com/--IKQZAB7CW0/T1k1uyuDV6I/AAAAAAAAGUU/ulQdYBHS8u4/s1600/Image7.jpg" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://3.bp.blogspot.com/--IKQZAB7CW0/T1k1uyuDV6I/AAAAAAAAGUU/ulQdYBHS8u4/s1600/Image7.jpg" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;br/&gt;&lt;br/&gt;
Apparently that 28 point performance was put in by &lt;a href="http://rivals.yahoo.com/ncaa/basketball/recap?gid=201202150515"&gt;SMU against UAB&lt;/a&gt;.&amp;nbsp; That had to be fun to watch! You can do some interesting data analysis with the Plot View and Advanced Chart options here, but let's continue on with building a process.
&lt;br/&gt;&lt;br/&gt;
Switch back to the Design view&amp;nbsp; and let's work on conditioning the data.&amp;nbsp; In many cases, there are problems in the input data -- such as missing values -- that will corrupt your prediction models.&amp;nbsp; RapidMiner provides a number of operators for fixing these sorts of problems.&amp;nbsp; Let's work on fixing missing values.&amp;nbsp; In the Design View on the Operators tab on the right part of the screen you'll see a search box.&amp;nbsp; This is handy for finding operators by name.&amp;nbsp; Type "missing" into the Search box and you should see this:
&lt;br/&gt;&lt;br/&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://4.bp.blogspot.com/-6nriugTEFWc/T1yyCrTWHMI/AAAAAAAAGUc/oRIDmamkGtQ/s1600/Image8.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://4.bp.blogspot.com/-6nriugTEFWc/T1yyCrTWHMI/AAAAAAAAGUc/oRIDmamkGtQ/s1600/Image8.png" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;br/&gt;&lt;br/&gt;

Click on the "Missing Values"operator, drag it onto the canvas in the middle of the screen and drop it.&amp;nbsp; You'll now have this:
&lt;br/&gt;&lt;br/&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://4.bp.blogspot.com/-ziPjTFRwfco/T1yyZ7P2YZI/AAAAAAAAGUk/p5mWS1PqMkM/s1600/Image9.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://4.bp.blogspot.com/-ziPjTFRwfco/T1yyZ7P2YZI/AAAAAAAAGUk/p5mWS1PqMkM/s1600/Image9.png" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;br/&gt;&lt;br/&gt;
You'll see that RapidMiner is complaining of an error in our process: we don't have an input to the Replace Missing Values operator.&amp;nbsp; We want to connect the output of our Excel file to the input of this operator.&amp;nbsp; To do this, we left click on the output of the Read Excel operator, and drag the resulting orange line to the input of the Replace Missing Values operator and release.&amp;nbsp; This causes a pop-up box asking if we really want to disconnect the current output connection or not.&amp;nbsp; Allow RapidMiner to disconnect the port and you should have this:
&lt;br/&gt;&lt;br/&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://2.bp.blogspot.com/-3tVYTcU92mY/T1yzQ_euqYI/AAAAAAAAGUs/H9dMZYb4YjU/s1600/Image10.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://2.bp.blogspot.com/-3tVYTcU92mY/T1yzQ_euqYI/AAAAAAAAGUs/H9dMZYb4YjU/s1600/Image10.png" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;br/&gt;&lt;br/&gt;
And that's all you need do:&amp;nbsp; Add operators and hook them together into a process.&amp;nbsp; By default, the Replace Missing Values operators replaces all missing values with the average value for that attribute.&amp;nbsp; That's fine for now, so we'll leave it as is.
&lt;br/&gt;&lt;br/&gt;
One very important step we need to take is to create a "label".&amp;nbsp; The label is the attribute that we're trying to predict.&amp;nbsp; In our case, we'll be trying to predict the winner of the game: "Home" or "Away".&amp;nbsp; We don't actually have that in our input data, so we'll need to create a new attribute and set it to be our label.
&lt;br/&gt;&lt;br/&gt;
To do this, find the "Generate Attributes" operator and the "Set Role" operator and modify your process to look like this:
&lt;br/&gt;&lt;br/&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://1.bp.blogspot.com/-tPzY8xl-JEo/T1y0oUE9J2I/AAAAAAAAGU0/aPPEEox3m3k/s1600/Image11.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://1.bp.blogspot.com/-tPzY8xl-JEo/T1y0oUE9J2I/AAAAAAAAGU0/aPPEEox3m3k/s1600/Image11.png" /&gt;&lt;/a&gt;&lt;/div&gt;

Now click on the "Generate Attributes" operator.&amp;nbsp; On the right you'll see a button labeled "function descriptions" and "Edit List(0)".&amp;nbsp; Click on this to bring up a view that will let us define a new attribute in our data set.
&lt;br/&gt;&lt;br/&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://2.bp.blogspot.com/-PYRjJ37xmUE/T1y1Tkb-5MI/AAAAAAAAGU8/UPyVX4rW718/s1600/Image12.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://2.bp.blogspot.com/-PYRjJ37xmUE/T1y1Tkb-5MI/AAAAAAAAGU8/UPyVX4rW718/s1600/Image12.png" /&gt;&lt;/a&gt;&lt;/div&gt;

This is fairly simple to use.&amp;nbsp; We type in a name for our new attribute in the left-hand column and then an expression for calculating it in the right hand column.&amp;nbsp; We can use any existing attribute in our expression, and if you click on the calculator icon, it will bring up a tool to help create expressions.&amp;nbsp; In our case, we want to create a new attribute called "winner" that has the value "Home" if the home team scored more than the Away team, and "Away" otherwise.&amp;nbsp; The expression to do this is 'if(Hscore&amp;gt;Ascore,"Home","Away")':
&lt;br/&gt;&lt;br/&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://3.bp.blogspot.com/-o_8sbIyws3U/T1y2QR--HfI/AAAAAAAAGVE/VTPfvA0Iiuk/s1600/Image13.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://3.bp.blogspot.com/-o_8sbIyws3U/T1y2QR--HfI/AAAAAAAAGVE/VTPfvA0Iiuk/s1600/Image13.png" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;br/&gt;&lt;br/&gt;
And that's it for creating the new attribute.&amp;nbsp; Now we need to set the Role of this attribute to "label" so that our models will know what we're trying to predict.&amp;nbsp; To do this, click on the Set Role operator and in the right-side pane, select our new attribute from the drop-down box next to Name, and "label" from the drop-down box next to "target role":

&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://4.bp.blogspot.com/-N4RCbmdhIBE/T1y3QQC35sI/AAAAAAAAGVM/IL5qtNhrtQw/s1600/Image14.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://4.bp.blogspot.com/-N4RCbmdhIBE/T1y3QQC35sI/AAAAAAAAGVM/IL5qtNhrtQw/s1600/Image14.png" /&gt;&lt;/a&gt;&lt;/div&gt;

We're almost ready to start modeling, but let's check to make sure we've added the "winner" attribute correctly.&amp;nbsp; Hit the run button to run the process and let's look at the output in the Results view:

&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://1.bp.blogspot.com/-pNZoduA7kow/T1y4A8fAPqI/AAAAAAAAGVU/79g448DVlmk/s1600/Image15.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://1.bp.blogspot.com/-pNZoduA7kow/T1y4A8fAPqI/AAAAAAAAGVU/79g448DVlmk/s1600/Image15.png" /&gt;&lt;/a&gt;&lt;/div&gt;

At the top of the results (colored light yellow because of its role as "label") we see the new attribute "winner".&amp;nbsp; In this data set, the Home team won almost twice as often as the Away team.&amp;nbsp; If you click on the Data View button, you can check a few games to make sure the calculation is correct:

&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;/div&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;/div&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;/div&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://3.bp.blogspot.com/-LTH_093-Gm4/T1y4f0v3sAI/AAAAAAAAGVc/qlJWyVW0liI/s1600/Image16.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://3.bp.blogspot.com/-LTH_093-Gm4/T1y4f0v3sAI/AAAAAAAAGVc/qlJWyVW0liI/s1600/Image16.png" /&gt;&lt;/a&gt;&lt;/div&gt;

Looks good, so let's go back to Design View and train a model.&amp;nbsp; Switch back to the Design View and find the k-NN model, drag it into the process and connect it up to look like this:

&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://4.bp.blogspot.com/-m7ygxJEKPyo/T1y9y5_HvMI/AAAAAAAAGWM/qOBbd7upM84/s1600/Image22.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://4.bp.blogspot.com/-m7ygxJEKPyo/T1y9y5_HvMI/AAAAAAAAGWM/qOBbd7upM84/s1600/Image22.png" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://3.bp.blogspot.com/-fyvhEtRUECU/T1y5IC71eoI/AAAAAAAAGVk/QblvG3Woass/s1600/Image17.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;/a&gt;&lt;/div&gt;
&amp;nbsp; 
Along the right-side you can see the parameters for the k-NN operator.&amp;nbsp; Change "k" to 3.&amp;nbsp; We're almost ready to create a model, but we need to add one last step.&amp;nbsp; Right now the input data to our model includes the scores of both teams.&amp;nbsp; It isn't very hard to predict who will win the game if we know who scored the most points :-) so we'll need to remove that information from our examples.&amp;nbsp; To do this, we need an operator called "Select Attributes".&amp;nbsp; Drop this into our process between "Set Role" and "k-NN".

&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://4.bp.blogspot.com/-rxzz_4ZteY0/T1y_hTVRykI/AAAAAAAAGWc/jVl5vgsQBcQ/s1600/Image24.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://4.bp.blogspot.com/-rxzz_4ZteY0/T1y_hTVRykI/AAAAAAAAGWc/jVl5vgsQBcQ/s1600/Image24.png" /&gt;&lt;/a&gt;&lt;/div&gt;


Highlight  the new operator, and on the right-side, set the "attribute filter  type" to subset and then click on "Select Attributes".&amp;nbsp; That will bring  up this dialog:

&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://3.bp.blogspot.com/-2epMjqz031E/T1y7Rms2h5I/AAAAAAAAGV8/eS95NlNA_Rk/s1600/Image20.png" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://3.bp.blogspot.com/-2epMjqz031E/T1y7Rms2h5I/AAAAAAAAGV8/eS95NlNA_Rk/s1600/Image20.png" /&gt;&lt;/a&gt;&lt;/div&gt;

Now  we simply select attributes we want to include from the left side and  use the green arrow to move them to the right side.&amp;nbsp; We want to leave  out the Hscore, Ascore and Date attributes.

&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://2.bp.blogspot.com/-RKxv6h8Drmo/T1y75mVZMgI/AAAAAAAAGWE/RWGRI9_68m8/s1600/Image21.png" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://2.bp.blogspot.com/-RKxv6h8Drmo/T1y75mVZMgI/AAAAAAAAGWE/RWGRI9_68m8/s1600/Image21.png" /&gt;&lt;/a&gt;&lt;/div&gt;

Save this and we're now ready to run the process to create a model.&amp;nbsp;&amp;nbsp; Hit the Run button and you should see results that look like this:

&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://1.bp.blogspot.com/-j-j40vSEbIg/T1zAI6w-2xI/AAAAAAAAGWk/9Qp73apLM4c/s1600/Image25.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://1.bp.blogspot.com/-j-j40vSEbIg/T1zAI6w-2xI/AAAAAAAAGWk/9Qp73apLM4c/s1600/Image25.png" /&gt;&lt;/a&gt;&lt;/div&gt;

Great, we created a model!&amp;nbsp; But how good is it?&amp;nbsp; We don't have any idea.&amp;nbsp; To figure that out, we need to apply the model and then measure its performance.&amp;nbsp; Let's do that.
&lt;br/&gt;&lt;br/&gt;
Switch back to the Design View, and find the "Apply Model" and the "Performance (Classification)" and add them to your process after the k-NN operator like so:

&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://2.bp.blogspot.com/-E2p8tYAFTGs/T1zBj-cqQ4I/AAAAAAAAGWs/c_bCzWczMC4/s1600/Image26.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://2.bp.blogspot.com/-E2p8tYAFTGs/T1zBj-cqQ4I/AAAAAAAAGWs/c_bCzWczMC4/s1600/Image26.png" /&gt;&lt;/a&gt;&lt;/div&gt;

Note that the model output of the K-NN operator goes into the model input for the Apply Model operator, and the example set output goes into the unlabeled input.&amp;nbsp; The labeled output of Apply Model goes into the labeled input of the Performance operator, and the performance output of that operator goes out the right-hand side of our process.
&lt;br/&gt;&lt;br/&gt;
Run this, and you should get a Results View that looks something like this:

&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://4.bp.blogspot.com/-WyoRhjThU9Y/T1zCNsuaq9I/AAAAAAAAGW0/Pas0ujmYS2w/s1600/Image27.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://4.bp.blogspot.com/-WyoRhjThU9Y/T1zCNsuaq9I/AAAAAAAAGW0/Pas0ujmYS2w/s1600/Image27.png" /&gt;&lt;/a&gt;&lt;/div&gt;

Wow, 83% accuracy predicting the winner of the game -- pretty good!&amp;nbsp; Good enough to win the Machine Madness contest?&amp;nbsp; Who can say? :-)
&lt;br/&gt;&lt;br/&gt;
This illustrates the basics of using RapidMiner for prediction.&amp;nbsp; RapidMiner has a wealth of features and options, and there are many improvements you can make to the simple process flow I've illustrated above.&amp;nbsp; But hopefully this has given you enough guidance to get started, and good luck!&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1107147718367558732-1803882667758473517?l=blog.smellthedata.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/70Sqg8CEpYN5wKnHdStA6LUglcU/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/70Sqg8CEpYN5wKnHdStA6LUglcU/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/70Sqg8CEpYN5wKnHdStA6LUglcU/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/70Sqg8CEpYN5wKnHdStA6LUglcU/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/VIH_T5m__lk" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/1803882667758473517/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=1803882667758473517" title="2 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/1803882667758473517?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/1803882667758473517?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/VIH_T5m__lk/using-rapidminer-to-predict-march.html" title="Using RapidMiner to Predict March Madness" /><author><name>Danny Tarlow</name><uri>http://www.blogger.com/profile/14670021337844708633</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="25" src="http://1.bp.blogspot.com/_cFAlw8-Y0gE/TRrm8pdSK1I/AAAAAAAAA5o/S8w-VVzdc1A/S220/mehak.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://2.bp.blogspot.com/-mlOccj6QQ8w/T1ku4V5lq_I/AAAAAAAAGTs/OzxI1mHmdZw/s72-c/Image1.jpg" height="72" width="72" /><thr:total>2</thr:total><feedburner:origLink>http://blog.smellthedata.com/2012/03/using-rapidminer-to-predict-march.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DUQAQno9eCp7ImA9WhVTFEg.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-6025240714062715649</id><published>2012-02-28T11:26:00.003-08:00</published><updated>2012-02-28T11:29:03.460-08:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-02-28T11:29:03.460-08:00</app:edited><title>Preliminary Aggregate Data</title><content type="html">&lt;p&gt;For those of you who want to play with just aggregate game result data, I've posted an &lt;a href="https://docs.google.com/open?id=0BysperLdI86MQmFjSTdUUk1TT2FLQlN3OEhWSVN4UQ"&gt;updated version&lt;/a&gt; that you can play with. The format is the same as described in &lt;a href="http://blog.smellthedata.com/2011/03/aggregate-game-results.html"&gt;a previous post&lt;/a&gt;: date, home team, away team, home score, away score, and whether or not the home team won.&lt;/p&gt;
&lt;p&gt;This data covers the 2006 season through 2/26/2012 and, as with the player-level data, will be updated on Selection Sunday to reflect the most up to date information.&lt;/p&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1107147718367558732-6025240714062715649?l=blog.smellthedata.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/7DFA2NYCymoFH_yJuWqsUHmRCnM/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/7DFA2NYCymoFH_yJuWqsUHmRCnM/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/7DFA2NYCymoFH_yJuWqsUHmRCnM/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/7DFA2NYCymoFH_yJuWqsUHmRCnM/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/oGD03eFOD84" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/6025240714062715649/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=6025240714062715649" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/6025240714062715649?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/6025240714062715649?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/oGD03eFOD84/preliminary-aggregate-data.html" title="Preliminary Aggregate Data" /><author><name>Lee</name><uri>http://www.blogger.com/profile/17617335710795529109</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>0</thr:total><feedburner:origLink>http://blog.smellthedata.com/2012/02/preliminary-aggregate-data.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CEYDSX8-cCp7ImA9WhVTE0Q.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-544540249661372932</id><published>2012-02-27T17:17:00.002-08:00</published><updated>2012-02-27T17:22:58.158-08:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-02-27T17:22:58.158-08:00</app:edited><title>Preliminary 2011 Season Data</title><content type="html">&lt;p&gt;In addition to data from the 2006-2010 seasons shared publicly via &lt;a href="https://docs.google.com/open?id=0BysperLdI86MNjA3ZWIzNDUtNTE1NC00MWMzLTlmZmMtOGRiMjQwYjhkM2Q0"&gt;Google Docs&lt;/a&gt;&lt;/p&gt;
&lt;p&gt;
We've published some preliminary data for the 2011 season. This uses the same format as past seasons' data and spans the beginning of the 2011 season through 2/26.
&lt;ul&gt;&lt;li&gt;&lt;a href="https://docs.google.com/open?id=0BysperLdI86MMUgyYXlNTHJUQmVWUGljbTlob1dOZw"&gt;Preliminary 2011 data&lt;/a&gt;&lt;/li&gt;&lt;/ul&gt;
&lt;/p&gt;
&lt;p&gt;
After Selection Sunday (March 11th), we will publish an updated set of data for the 2011 season. Please let us know if you find any problems with the preliminary data.
&lt;/p&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1107147718367558732-544540249661372932?l=blog.smellthedata.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/_lMS-nwM8j1_178KjIVxKUzuE_k/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/_lMS-nwM8j1_178KjIVxKUzuE_k/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/_lMS-nwM8j1_178KjIVxKUzuE_k/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/_lMS-nwM8j1_178KjIVxKUzuE_k/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/Vc_Re9FM2zI" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/544540249661372932/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=544540249661372932" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/544540249661372932?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/544540249661372932?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/Vc_Re9FM2zI/preliminary-2011-season-data.html" title="Preliminary 2011 Season Data" /><author><name>Lee</name><uri>http://www.blogger.com/profile/17617335710795529109</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>0</thr:total><feedburner:origLink>http://blog.smellthedata.com/2012/02/preliminary-2011-season-data.html</feedburner:origLink></entry><entry gd:etag="W/&quot;D0YFQHgzcCp7ImA9WhVTE0s.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-7679628154578982041</id><published>2012-02-27T09:34:00.005-08:00</published><updated>2012-02-27T09:51:51.688-08:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-02-27T09:51:51.688-08:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="march_madness" /><title>Machine March Madness 2012: Starter Code</title><content type="html">I've started a github repository for the 2012 March Madness competition, to which I've committed some python code that I worked on over the weekend:&lt;br/&gt;
&lt;a href="https://github.com/dtarlow/Machine-March-Madness"&gt;https://github.com/dtarlow/Machine-March-Madness&lt;/a&gt;
&lt;br/&gt;&lt;br/&gt;
Here, you can find code that parses data from previous seasons, constructs the past brackets, and learns a few different models based on past data.  More details are in the &lt;a href="https://github.com/dtarlow/Machine-March-Madness/blob/master/README"&gt;README&lt;/a&gt;.
&lt;br/&gt;&lt;br/&gt;
I will post in more detail about the models once I get them working a bit better, but I encourage you to take a look at the high level structure in &lt;a href="https://github.com/dtarlow/Machine-March-Madness/blob/master/learn_synthetic.py"&gt;learn_synthetic.py&lt;/a&gt; and &lt;a href="https://github.com/dtarlow/Machine-March-Madness/blob/master/model.py"&gt;model.py&lt;/a&gt;.
&lt;br/&gt;&lt;br/&gt;
I've brainstormed a bunch of TODOs at the bottom of the &lt;a href="https://github.com/dtarlow/Machine-March-Madness/blob/master/README"&gt;README&lt;/a&gt;, so if you'd like to jump in and work on some of those, please do.  Or feel free to go off in your own direction.
&lt;br/&gt;&lt;br/&gt;
For detailed discussions of the code, questions, or bug reports/fixes, head on over to the &lt;a href="http://groups.google.com/group/machine-march-madness"&gt;official Google group&lt;/a&gt;.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1107147718367558732-7679628154578982041?l=blog.smellthedata.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/KlGF9x37ruNcdBN7Fl81dS7BQPM/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/KlGF9x37ruNcdBN7Fl81dS7BQPM/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/KlGF9x37ruNcdBN7Fl81dS7BQPM/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/KlGF9x37ruNcdBN7Fl81dS7BQPM/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/w2CVGHsgHNQ" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/7679628154578982041/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=7679628154578982041" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/7679628154578982041?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/7679628154578982041?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/w2CVGHsgHNQ/machine-march-madness-2012-starter-code.html" title="Machine March Madness 2012: Starter Code" /><author><name>Danny Tarlow</name><uri>http://www.blogger.com/profile/14670021337844708633</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="25" src="http://1.bp.blogspot.com/_cFAlw8-Y0gE/TRrm8pdSK1I/AAAAAAAAA5o/S8w-VVzdc1A/S220/mehak.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://blog.smellthedata.com/2012/02/machine-march-madness-2012-starter-code.html</feedburner:origLink></entry><entry gd:etag="W/&quot;D08HQX0_fCp7ImA9WhVTEk0.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-3012166986868072230</id><published>2012-02-25T13:33:00.003-08:00</published><updated>2012-02-25T13:37:10.344-08:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-02-25T13:37:10.344-08:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="march_madness" /><title>Google group for March Madness competition...</title><content type="html">... &lt;a href="http://groups.google.com/group/machine-march-madness"&gt;here&lt;/a&gt;.
&lt;br/&gt;&lt;br/&gt;
We'll use the Google group for discussion of issues related to rules, but other posts are fair game: maybe you're looking for somebody to team up with, or maybe you want to brainstorm modeling ideas, etc.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1107147718367558732-3012166986868072230?l=blog.smellthedata.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/mVN5zKQKe6WGO_tyQ5NZ6qjblWs/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/mVN5zKQKe6WGO_tyQ5NZ6qjblWs/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/mVN5zKQKe6WGO_tyQ5NZ6qjblWs/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/mVN5zKQKe6WGO_tyQ5NZ6qjblWs/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/FwPo_3u4s88" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/3012166986868072230/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=3012166986868072230" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/3012166986868072230?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/3012166986868072230?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/FwPo_3u4s88/google-group-for-march-madness.html" title="Google group for March Madness competition..." /><author><name>Danny Tarlow</name><uri>http://www.blogger.com/profile/14670021337844708633</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="25" src="http://1.bp.blogspot.com/_cFAlw8-Y0gE/TRrm8pdSK1I/AAAAAAAAA5o/S8w-VVzdc1A/S220/mehak.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://blog.smellthedata.com/2012/02/google-group-for-march-madness.html</feedburner:origLink></entry><entry gd:etag="W/&quot;AkECSXY-cCp7ImA9WhVTEEw.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-739121887555351790</id><published>2012-02-23T07:09:00.002-08:00</published><updated>2012-02-23T09:37:48.858-08:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-02-23T09:37:48.858-08:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="march_madness" /><title>Machine March Madness 2012</title><content type="html">Every year, the NCAA College Basketball seasons ends with a tournament of 64 teams.
Humans around the US (but also elsewhere in the world) fill in brackets with predictions
of the outcome, enter pools, and wait excitedly for the results.
&lt;br/&gt;&lt;br/&gt;
College basketball is a streaky and fairly high variance game, so there are many chances
for an underdog to make a run deep into the tournament.  We see this often -- for example, last year's
tournament featured a final four made up of 3, 4, 8, and 11 seeds -- leading to the
colloquial tournament name, "March Madness".
&lt;br/&gt;&lt;br/&gt;
So without further ado, it is my pleasure to announce that this year, 
this blog, in conjunction with 
&lt;a href="http://blog.smellthedata.com/2010/03/march-madness-algorithm-contest-your.html"&gt;commissioner Lee&lt;/a&gt;, will host another 
&lt;a href="http://newscientist.com/blogs/onepercent/2011/03/software-to-predict-march-madn.html"&gt;"Machine March Madness" contest&lt;/a&gt;.  The big idea is simple:
using data from this season and from past seasons 
(which we will provide -- e.g., past data here: 
&lt;a href="http://blog.smellthedata.com/2011/03/selection-sunday-today.html"&gt;full&lt;/a&gt; and
&lt;a href="http://blog.smellthedata.com/2011/03/aggregate-game-results.html"&gt;simple&lt;/a&gt;), build
a computer system that fills out a bracket, then pit yourself against the field
of silicon competition.  You can see posts from last season's tournament 
&lt;a href="http://blog.smellthedata.com/search/label/march_madness"&gt;here&lt;/a&gt;, and
some press coverage 
&lt;a href="http://newscientist.com/blogs/onepercent/2011/03/software-to-predict-march-madn.html"&gt;here&lt;/a&gt;.
&lt;br/&gt;&lt;br/&gt;
We'll get more details coming soon, including details about prizes.  For now, you can do a few things.
&lt;br/&gt;
&lt;ol&gt;
&lt;li&gt;Download the past data (&lt;a href="http://blog.smellthedata.com/2011/03/selection-sunday-today.html"&gt;full&lt;/a&gt; and
&lt;a href="http://blog.smellthedata.com/2011/03/aggregate-game-results.html"&gt;simple&lt;/a&gt;), and start thinking about how you'd model the tournament.
To get some starter ideas, I recommend 
&lt;a href="http://blog.smellthedata.com/2011/02/thoughts-on-modeling-basketball.html"&gt;this timeless post&lt;/a&gt;
by 
&lt;a href="http://www.cs.toronto.edu/~gdahl/"&gt;George Dahl&lt;/a&gt;.&lt;/li&gt;
&lt;li&gt;Let us know in the comments if there is any other data that you would like to use.  The rule we
have is that all systems must be built using the same data, but we're open to suggestions about 
what this data is.&lt;/li&gt;
&lt;li&gt;Get started!&lt;/li&gt;
&lt;/ol&gt;
&lt;br/&gt;&lt;br/&gt;
&lt;b&gt;Update:&lt;/b&gt; Here's &lt;a href="http://www.quora.com/What-past-data-would-be-useful-for-predicting-the-results-of-this-years-NCAA-college-basketball-March-Madness-tournament-using-machine-learning"&gt;a question about additional data to use, posted on Quora.&lt;/a&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1107147718367558732-739121887555351790?l=blog.smellthedata.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/6rvah4KKnjORff9tnp0Wt8wJk68/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/6rvah4KKnjORff9tnp0Wt8wJk68/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/6rvah4KKnjORff9tnp0Wt8wJk68/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/6rvah4KKnjORff9tnp0Wt8wJk68/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/gDyRqCcLXVs" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/739121887555351790/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=739121887555351790" title="2 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/739121887555351790?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/739121887555351790?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/gDyRqCcLXVs/machine-march-madness-2012.html" title="Machine March Madness 2012" /><author><name>Danny Tarlow</name><uri>http://www.blogger.com/profile/14670021337844708633</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="25" src="http://1.bp.blogspot.com/_cFAlw8-Y0gE/TRrm8pdSK1I/AAAAAAAAA5o/S8w-VVzdc1A/S220/mehak.jpg" /></author><thr:total>2</thr:total><feedburner:origLink>http://blog.smellthedata.com/2012/02/machine-march-madness-2012.html</feedburner:origLink></entry><entry gd:etag="W/&quot;D0YNQnY7eCp7ImA9WhVUGEw.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-5855682003026300474</id><published>2011-08-11T13:58:00.000-07:00</published><updated>2012-05-23T16:19:53.800-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-05-23T16:19:53.800-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="mcmc" /><category scheme="http://www.blogger.com/atom/ns#" term="slice sampling" /><category scheme="http://www.blogger.com/atom/ns#" term="machine learning" /><category scheme="http://www.blogger.com/atom/ns#" term="bayesian models" /><title>Testing Intuitions about Markov Chain Monte Carlo: Do I have a bug?</title><content type="html">For one project I've been working on recently, I'm using a Markov Chain Monte Carlo (MCMC) method known as &lt;a href="http://en.wikipedia.org/wiki/Slice_sampling"&gt;slice sampling&lt;/a&gt;.   There are some good tutorials, examples, and implementations out there (e.g., by &lt;a href="http://homepages.inf.ed.ac.uk/imurray2/teaching/09mlss/"&gt;Iain Murray&lt;/a&gt; or &lt;a href="http://www.cs.toronto.edu/~radford/slice.software.html"&gt;Radford Neal&lt;/a&gt;), but for various reasons, I wanted to implement my own version.
&lt;br/&gt;&lt;br/&gt;
Now, debugging MCMC algorithms is somewhat troublesome, due to their random nature and the fact that chains just sometimes &lt;a href="http://en.wikipedia.org/wiki/Markov_chain_mixing_time"&gt;mix slowly&lt;/a&gt;, but there are some good ways to be pretty sure that you get things right.  For example, the &lt;a href="http://qed.econ.queensu.ca/pub/faculty/ferrall/quant/papers/04_04_29_geweke.pdf"&gt;Geweke method&lt;/a&gt; is highly regarded as _the_ method to make sure you're getting it right.  So this exercise is not actually really about debugging.  It's more about testing intuitions about the behavior of a sampler.
&lt;br/&gt;&lt;br/&gt;
With that out of the way, on to the question:&lt;br/&gt;
I implemented my sampler, initialized it with small random numbers for the parameters, and set it running on a simple test model (which I'm intentionally not describing in detail).  One high level statistic that is relevant to look at is the (log) probability of samples versus iteration of the sampler, so I made that plot.  It looks like this:&lt;br/&gt;
&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://2.bp.blogspot.com/-2XrtQfyRofc/TkRGTue1a4I/AAAAAAAABEI/PXCViZzuSvo/s1600/prob_trace_for_blog.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 300px;" src="http://2.bp.blogspot.com/-2XrtQfyRofc/TkRGTue1a4I/AAAAAAAABEI/PXCViZzuSvo/s400/prob_trace_for_blog.png" border="0" alt=""id="BLOGGER_PHOTO_ID_5639709938083523458" /&gt;&lt;/a&gt;

This plot looks a bit surprising.  Upon initialization, the sampler moves directly to regions of space that have very low probability (remember, this is a _log_ probability*), and it appears to just keep going to exponentially less and less probable regions.  The point of a sampler is that it should spend an amount of time in a state in proportion to the state's probability.  And this sampler is making a beeline to a state that is e^-600 times less probable than where it started.
&lt;br/&gt;&lt;br/&gt;
So here's the question: do I have a bug?  In other words, if you were my supervisor and I came to you with this plot, would you dismiss this plot and send me back to debugging.  If not, explain how this possibly could make sense.
&lt;br/&gt;&lt;br/&gt;
I'll post my answer sometime in the next couple days.

&lt;br/&gt;&lt;br/&gt;
* I'm leaving out constants, so the graph would be shifted down (but wouldn't change shape or scale) if I was including all the constants.

&lt;br/&gt;&lt;br/&gt;
&lt;b&gt;Update:&lt;/b&gt; This is long overdue, but Iain Murray nailed it in the comments:
&lt;blockquote&gt;No there isn’t (necessarily) a bug. This type of plot is very easily produced with valid code: e.g. by slice sampling a unit spherical Gaussian distribution in D=5000 dimensions and initializing at an atypical point of high-probability (much closer to the origin than sqrt(D) away). Simple Metropolis and slice samplers can’t change the log-prob by more than ≈1 per iteration, so large log-prob changes are slow and painful. Intelligent proposals, reparameterizations, or auxiliary variable methods can improve matters. This is a nice illustration that initializing with an optimizer (without some form of early stopping) can be a bad idea!&lt;/blockquote&gt;

In fact, I was using Matlab's built-in slice sampler, along with a zero-mean, spherical, many thousand-dimensional Gaussian distribution for the likelihood, and initializing near the mode (0).&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1107147718367558732-5855682003026300474?l=blog.smellthedata.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/FKgl8KitwMIrYV2jNVfNV4avK3A/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/FKgl8KitwMIrYV2jNVfNV4avK3A/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/FKgl8KitwMIrYV2jNVfNV4avK3A/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/FKgl8KitwMIrYV2jNVfNV4avK3A/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/Nk5lsS-_Als" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/5855682003026300474/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=5855682003026300474" title="4 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/5855682003026300474?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/5855682003026300474?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/Nk5lsS-_Als/testing-intuitions-about-markov-chain.html" title="Testing Intuitions about Markov Chain Monte Carlo: Do I have a bug?" /><author><name>Danny Tarlow</name><uri>http://www.blogger.com/profile/14670021337844708633</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="25" src="http://1.bp.blogspot.com/_cFAlw8-Y0gE/TRrm8pdSK1I/AAAAAAAAA5o/S8w-VVzdc1A/S220/mehak.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://2.bp.blogspot.com/-2XrtQfyRofc/TkRGTue1a4I/AAAAAAAABEI/PXCViZzuSvo/s72-c/prob_trace_for_blog.png" height="72" width="72" /><thr:total>4</thr:total><feedburner:origLink>http://blog.smellthedata.com/2011/08/testing-intuitions-about-markov-chain.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CkAHQ3g8fip7ImA9WhZRFEs.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-3862217420283787892</id><published>2011-04-10T11:37:00.001-07:00</published><updated>2011-04-10T11:38:52.676-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-04-10T11:38:52.676-07:00</app:edited><title>Crawling Code</title><content type="html">&lt;p&gt;One of the contestants requested that I upload the code to crawl the boxscores. I have done so and it is available on github: &lt;a href="https://github.com/leezen/boxscore-crawler"&gt;https://github.com/leezen/boxscore-crawler&lt;/a&gt;&lt;/p&gt;
&lt;p&gt;Note that Yahoo changed its format starting around March 10th and the code uses the flag to get the old boxscore format. It is unclear how long this option will remain available from Yahoo.&lt;/p&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1107147718367558732-3862217420283787892?l=blog.smellthedata.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/LYaT4MpdtvER4NrxPRBFy-IVeHs/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/LYaT4MpdtvER4NrxPRBFy-IVeHs/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/LYaT4MpdtvER4NrxPRBFy-IVeHs/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/LYaT4MpdtvER4NrxPRBFy-IVeHs/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/c8LkgiOWabU" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/3862217420283787892/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=3862217420283787892" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/3862217420283787892?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/3862217420283787892?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/c8LkgiOWabU/crawling-code.html" title="Crawling Code" /><author><name>Lee</name><uri>http://www.blogger.com/profile/17617335710795529109</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>0</thr:total><feedburner:origLink>http://blog.smellthedata.com/2011/04/crawling-code.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A0MAR38zfip7ImA9WhVTEk0.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-6994575445129067438</id><published>2011-04-10T10:42:00.002-07:00</published><updated>2012-02-25T14:37:26.186-08:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-02-25T14:37:26.186-08:00</app:edited><title>2011 Predictive Analytics Challenge Winner</title><content type="html">&lt;p&gt;We knew it would be a machine, but we didn't know which one until UConn's victory ensured The Pain Machine the title as &lt;b&gt;winner&lt;/b&gt; of the 2011 March Madness Predictive Analytics Challenge! Congratulations to Scott Turner and his entry on the victory -- his second in a row! He will be receiving a $25 gift certificate to Amazon.com. It doesn't sound like he'll be resting on his laurels as he's started a &lt;a href="http://netprophetblog.blogspot.com/"&gt;blog&lt;/a&gt; that will detail further development of his system.&lt;/p&gt;
&lt;p&gt;Thank you to all the participants and entrants this year. We would love to know how you thought the contest went, how we can improve for next year, and any other feedback you might have! We look forward to your participation again next year!&lt;/p&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1107147718367558732-6994575445129067438?l=blog.smellthedata.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/c-a2tonJGZWhbe8yV_CvLPeXGSA/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/c-a2tonJGZWhbe8yV_CvLPeXGSA/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/c-a2tonJGZWhbe8yV_CvLPeXGSA/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/c-a2tonJGZWhbe8yV_CvLPeXGSA/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/8pldI17CpS8" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/6994575445129067438/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=6994575445129067438" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/6994575445129067438?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/6994575445129067438?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/8pldI17CpS8/2011-predictive-analytics-challenge.html" title="2011 Predictive Analytics Challenge Winner" /><author><name>Lee</name><uri>http://www.blogger.com/profile/17617335710795529109</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>0</thr:total><feedburner:origLink>http://blog.smellthedata.com/2011/04/2011-predictive-analytics-challenge.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CUIBRng5fip7ImA9WhVSFUo.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-6555941734949898522</id><published>2011-03-27T16:20:00.001-07:00</published><updated>2012-03-12T10:32:37.626-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-03-12T10:32:37.626-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="march_madness" /><title>2011 Algorithmic March Madness: Machines Lock in Victory over Humans</title><content type="html">Well that was an exciting and surprising weekend of basketball.  &lt;a href="http://scores.espn.go.com/ncb/recap?gameId=310850057"&gt;(8)Butler beat (2)Florida in overtime&lt;/a&gt;, and &lt;a href="http://sports.espn.go.com/ncb/recap?gameId=310862305"&gt;(11)Virginia Commonwealth handily took care of (1)Kansas&lt;/a&gt;, eliminating the last remaining #1 seed.  Rounding out the Final Four are &lt;a href="http://scores.espn.go.com/ncb/boxscore?gameId=310850041"&gt;(3)UConn&lt;/a&gt; and &lt;a href="http://sports.espn.go.com/ncb/recap?gameId=310860153"&gt;(4)Kentucky&lt;/a&gt;.
&lt;br/&gt;&lt;br/&gt;
March Madness is usually good for some Cinderella stories, but this Final Four seems particularly improbable.  There are no #1 seeds remaining (this has been the case 3 times in March Madness history, according to the TV announcers), and never before have teams seeded as low as #11 and #8 met in a Final Four game.
&lt;br/&gt;&lt;br/&gt;
None of the entries in our second annual &lt;a href="http://blog.smellthedata.com/2011/03/official-2011-march-madness-predictive.html"&gt;March Madness Algorithm Challenge&lt;/a&gt; saw this amount of madness coming.  Two entrants correctly predicted that (3)UConn would make the final four (Team Delete Kernel and The Pain Machine), but no other algorithms or baselines got any Final Four teams correct.  In fact, the only entry that has any more chance at points is The Pain Machine, which has UConn winning one more game.
&lt;br/&gt;&lt;br/&gt;
So the question of which algorithm will win the contest is still not settled: a UConn victory on April 2, and The Pain Machine walks home with the prize; a UConn loss, and Team Delete Kernel is our winner.
&lt;br/&gt;&lt;br/&gt;
What &lt;b&gt;is&lt;/b&gt; settled at this point is that a machine will claim victory over the human-aided competition.  The human baselines include our commissioner Lee's bracket; the Higher Seed bracket (where the human intervention came via the committee that chose seeds); and the &lt;a href="http://fivethirtyeight.blogs.nytimes.com/2011/03/14/how-we-made-our-n-c-a-a-picks/"&gt;Nate Silver baseline&lt;/a&gt;, which was a part-human, part-computer effort.
&lt;br/&gt;&lt;br/&gt;
To give you an idea of the potentially winning methodologies, Scott Turner (The Pain Machine) describes his approach &lt;a href="http://blog.smellthedata.com/2011/03/dr-scott-turner-on-march-madness-pain.html"&gt;here&lt;/a&gt;, and Kevin Lee (Team Delete Kernel) based his model on the method described &lt;a href="http://blog.smellthedata.com/2011/03/algorithms-2011-march-madness.html"&gt;here&lt;/a&gt;.
&lt;br/&gt;&lt;br/&gt;
So it's premature to congratulate a winner yet, but let me tritely say that&lt;a href="http://www.youtube.com/watch?v=Skfw282fJak&amp;feature=related"&gt; I, for one, welcome our new March Madness algorithm overlords&lt;/a&gt;.
&lt;pre&gt;
ENTRANT                         R1  R2  R3  R4  R5  Winner      Pts  Possible
Team Delete Kernel              23  20  16  8   -   Ohio St.    67   67
Human (Lee)*                    25  18  16  0   -   Kansas      59   59
Baseline (Higher Seed)*         25  20  12  0   -   Ohio St.    57   57
The Pain Machine                19  18  8   8   -   Kansas      53   69
Baseline (Nate Silver)*         25  20  8   0   -   Ohio St.    53   53
InItToWinIt                     22  20  8   0   -   Kansas      50   50
Baseline (TrueSkill)            26  18  4   0   -   Ohio St.    48   48
Danny's Dangerous Picks         22  16  8   0   -   Duke        46   46
Baseline (LRMC)                 25  16  4   0   -   Ohio St.    45   45
DukeRepeats                     23  16  4   0   -   Duke        43   43
Point Differential Centrality   23  16  4   0   -   Ohio St.    43   43
dirknbr1                        23  8   4   0   -   Ohio St.    35   35

* Denotes human-involvement.
&lt;/pre&gt;

You can see the full brackets here (Update: actually, it looks like Yahoo took them down).  Also, there is a second, Sweet 16 contest that we haven't mentioned lately.  Stay tuned for an update on that front.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1107147718367558732-6555941734949898522?l=blog.smellthedata.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/riDZhubzh5HXcQ4N8McRXcH1r5c/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/riDZhubzh5HXcQ4N8McRXcH1r5c/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/riDZhubzh5HXcQ4N8McRXcH1r5c/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/riDZhubzh5HXcQ4N8McRXcH1r5c/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/jCb_32ZJPGk" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/6555941734949898522/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=6555941734949898522" title="2 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/6555941734949898522?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/6555941734949898522?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/jCb_32ZJPGk/2011-algorithmic-march-madness-machines.html" title="2011 Algorithmic March Madness: Machines Lock in Victory over Humans" /><author><name>Danny Tarlow</name><uri>http://www.blogger.com/profile/14670021337844708633</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="25" src="http://1.bp.blogspot.com/_cFAlw8-Y0gE/TRrm8pdSK1I/AAAAAAAAA5o/S8w-VVzdc1A/S220/mehak.jpg" /></author><thr:total>2</thr:total><feedburner:origLink>http://blog.smellthedata.com/2011/03/2011-algorithmic-march-madness-machines.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A0IESHo9cSp7ImA9WhZSFk0.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-2319680094058083921</id><published>2011-03-25T21:59:00.000-07:00</published><updated>2011-03-31T15:11:49.469-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-03-31T15:11:49.469-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="march_madness" /><title>Algorithmic March Madness: Elite 8 Update</title><content type="html">The Sweet 16 saw some major upsets, with two of the remaining three number one seeds falling.  Perhaps most impressive was Arizona's (5 seed) 93-77 thumping of Duke (1 seed).  In the Elite 8, we have the following seeds: 4, 2, 5, 3, 1, 11, 8, 2.
&lt;br/&gt;&lt;br/&gt;
No entrants predicted that &lt;span style="text-decoration: line-through;"&gt;Baylor&lt;/span&gt; Butler (8 seed) or VCU (11 seed) would have made it this far, but we're starting to see the first signs of weakness in the Higher Seed and Nate Silver baselines.  At this point, our Commissioner's human-chosen baseline bracket is tied for first with Team Delete Kernel's algorithmically chosen bracket.  Don't count out InItToWinIt or The Pain Machine quite yet, though.  I haven't worked through all the scenarios, but if Kansas wins it all and Arizona beats UConn, InItToWinIt has a good shot at the title.  If UConn makes it to the final game, The Pain Machine is looking strong.  And if Kentucky wins it all, Team Delete Kernel looks to have at least a share of the title locked up.  Interestingly, I haven't been able to find a potential outcome where any baseline other than Lee's human-chosen bracket has a chance at the title.  So after a tough start, the algorithmically-chosen brackets are making their move!
&lt;br/&gt;&lt;br/&gt;
Currently, here are the standings:
&lt;pre&gt;
ENTRANT                         R1  R2  R3  R4  R5  Winner      Pts  Possible
Human (Lee)                     25  18  16  -   -   Kansas      59   131
Team Delete Kernel              23  20  16  -   -   Ohio St.    59   91
Baseline (Higher Seed)          25  20  12  -   -   Ohio St.    57   81
Baseline (Nate Silver)          25  20  8   -   -   Ohio St.    53   77
InItToWinIt                     22  20  8   -   -   Kansas      50   106
Baseline (TrueSkill)            26  18  4   -   -   Ohio St.    48   48
Danny's Dangerous Picks         22  16  8   -   -   Duke        46   70
The Pain Machine                19  18  8   -   -   Kansas      45   125
Baseline (LRMC)                 25  16  4   -   -   Ohio St.    45   69
DukeRepeats                     23  16  4   -   -   Duke        43   67
Point Differential Centrality   23  16  4   -   -   Ohio St.    43   51
dirknbr1                        23  8   4   -   -   Ohio St.    35   59
&lt;/pre&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1107147718367558732-2319680094058083921?l=blog.smellthedata.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/_pkHUw-2ANnPAAGidn5tdXsIUp8/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/_pkHUw-2ANnPAAGidn5tdXsIUp8/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/_pkHUw-2ANnPAAGidn5tdXsIUp8/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/_pkHUw-2ANnPAAGidn5tdXsIUp8/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/rUiI0ci3wdM" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/2319680094058083921/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=2319680094058083921" title="4 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/2319680094058083921?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/2319680094058083921?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/rUiI0ci3wdM/algorithmic-march-madness-elite-8.html" title="Algorithmic March Madness: Elite 8 Update" /><author><name>Danny Tarlow</name><uri>http://www.blogger.com/profile/14670021337844708633</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="25" src="http://1.bp.blogspot.com/_cFAlw8-Y0gE/TRrm8pdSK1I/AAAAAAAAA5o/S8w-VVzdc1A/S220/mehak.jpg" /></author><thr:total>4</thr:total><feedburner:origLink>http://blog.smellthedata.com/2011/03/algorithmic-march-madness-elite-8.html</feedburner:origLink></entry><entry gd:etag="W/&quot;AkEAR3o5cCp7ImA9WhZTF0Q.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-3388203502224514452</id><published>2011-03-22T05:49:00.000-07:00</published><updated>2011-03-22T05:57:26.428-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-03-22T05:57:26.428-07:00</app:edited><title>Sweet Sixteen Bracket</title><content type="html">&lt;p&gt;If you did not have a chance to get your entry in for the &lt;a href="http://tournament.fantasysports.yahoo.com/t1/group/55350/standings"&gt;full 63-pick&lt;/a&gt; tournament, you still have a chance to make picks in the condensed 15-pick format for our &lt;a href="http://tournament.fantasysports.yahoo.com/t2/group/11005"&gt;Sweet Sixteen Bracket&lt;/a&gt;! The next set of games begin Thursday night (March 24) Eastern time, so you will need to submit your brackets by then. If you already entered in the original bracket, you have been invited to the new one. If you are new and would like to participate, please e-mail leezen+MarchMadness at gmail.&lt;/p&gt;
&lt;p&gt;Currently, in the full bracket prediction contest, Team Delete Kernel and InItToWinIt are sitting in the top half of the pack. Don't count The Pain Machine out yet, though, it can still score a possible 153 points, which is more than TrueSkill has available even though it currently ranks third.&lt;/p&gt;
&lt;p&gt;P.S. Sorry I did not get around to posting competitor profiles this weekend.&lt;/p&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1107147718367558732-3388203502224514452?l=blog.smellthedata.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/6SvkcRKsjOR1vxlUZMsur7yJUhM/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/6SvkcRKsjOR1vxlUZMsur7yJUhM/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/6SvkcRKsjOR1vxlUZMsur7yJUhM/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/6SvkcRKsjOR1vxlUZMsur7yJUhM/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/zOnBnvFCejE" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/3388203502224514452/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=3388203502224514452" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/3388203502224514452?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/3388203502224514452?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/zOnBnvFCejE/sweet-sixteen-bracket.html" title="Sweet Sixteen Bracket" /><author><name>Lee</name><uri>http://www.blogger.com/profile/17617335710795529109</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>1</thr:total><feedburner:origLink>http://blog.smellthedata.com/2011/03/sweet-sixteen-bracket.html</feedburner:origLink></entry><entry gd:etag="W/&quot;C0IDSXcycSp7ImA9WhZTF0k.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-7922890455666465805</id><published>2011-03-21T13:38:00.001-07:00</published><updated>2011-03-21T14:06:18.999-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-03-21T14:06:18.999-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="march_madness" /><title>After Round 2: Points from Upsets</title><content type="html">You can see the current standings &lt;a href="http://tournament.fantasysports.yahoo.com/t1/group/55350/standings"&gt;here&lt;/a&gt;.
  At the moment, 3 of the top 5 (including the top 2) entries used some sort of human judgement in their algorithm: picking based on Higher Seed (tied for 1st) is the judgement of the seeding committee; the Nate Silver (tied for 1st) baseline uses seed information along with several other power ratings, some of which are human based, and some of which are computer based; and the Lee bracket (tied for 4th) was filled out by Lee with no computer assistance.  The top two computer entries are the TrueSkill algorithm (3rd) that Scott Turner implemented and the Delete Kernel (tied for 4th) entry from Kevin, who built his entry based on the simplest &lt;a href="http://blog.smellthedata.com/2011/03/algorithms-2011-march-madness.html"&gt;1D probabilistic matrix factorization model&lt;/a&gt; that I wrote about previously (and &lt;a href="http://blog.smellthedata.com/2011/03/march-madness-predictions-code.html"&gt;released code&lt;/a&gt; for).
&lt;br/&gt;&lt;br/&gt;
I think the take-away at this point is that the real winner right now is whoever decided on the seeding of teams.  The Higher Seed bracket is tied in first place, and the strength of the other brackets mostly comes from how closely their picks matched the higher seed.  Yes, there have been a lot of upsets, including some big ones, and the entrants did indeed pick some upsets, but the entrants didn't generally pick the right upsets.
&lt;br/&gt;&lt;br/&gt;
Here's an alternative way of looking at results that reveals this.  I took the point totals for each contestant and split off the contribution to the point total that came from picking an upset.  For the two rounds, I report "A/B" where B is the total number of points the entrant earned in the Yahoo bracket, and A is the number of points that came from predicting upsets.  I define "upset points" to be points gained from a correct prediction, where the winning team is not the best ranked seed that could possibly have made it to that point.  So even though Richmond (12) was the favorite over Morehead (13), predicting Richmond making it to the Sweet 16 would give a contestant 2 "upset points", because the best ranked seed that could have made it to that point was Louisville (4).  Here are the results:
&lt;pre&gt;
Points from upsets

TEAM                   R1      R2     Total
Delete Kernel:        4/23    2/20    6/43
InItToWinIt:          2/22    2/20    4/42
Human (Lee):          1/25    2/18    3/43
Point Differential:   3/23    0/16    3/39
Silver:               3/25    0/20    3/45
LRMC:                 3/25    0/16    3/41
Dirknbr1:             0/23    2/8     2/31
Danny:                2/22    0/16    2/38
TrueSkill:            2/26    0/18    2/44
The Pain Machine:     0/19    0/18    0/37
Higher Seed:          0/25    0/20    0/45
&lt;/pre&gt;

Under this evaluation measure (which is admittedly not what anybody was trying to optimize), the completely computer-based models are doing better.  Perhaps the real take-away at the moment, though, is that predicting upsets is hard!&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1107147718367558732-7922890455666465805?l=blog.smellthedata.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/M7LKsseN1ULK_dp-8PIZ4wX2wfY/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/M7LKsseN1ULK_dp-8PIZ4wX2wfY/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/M7LKsseN1ULK_dp-8PIZ4wX2wfY/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/M7LKsseN1ULK_dp-8PIZ4wX2wfY/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/fScK8yxGhSo" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/7922890455666465805/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=7922890455666465805" title="2 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/7922890455666465805?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/7922890455666465805?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/fScK8yxGhSo/after-round-2-points-from-upsets.html" title="After Round 2: Points from Upsets" /><author><name>Danny Tarlow</name><uri>http://www.blogger.com/profile/14670021337844708633</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="25" src="http://1.bp.blogspot.com/_cFAlw8-Y0gE/TRrm8pdSK1I/AAAAAAAAA5o/S8w-VVzdc1A/S220/mehak.jpg" /></author><thr:total>2</thr:total><feedburner:origLink>http://blog.smellthedata.com/2011/03/after-round-2-points-from-upsets.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A08NSHo8fip7ImA9WhZTFU0.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-7332313013121914281</id><published>2011-03-18T21:32:00.001-07:00</published><updated>2011-03-18T21:44:59.476-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-03-18T21:44:59.476-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="march_madness" /><title>Current Standings</title><content type="html">&lt;p&gt;I've opened up the &lt;a href="http://tournament.fantasysports.yahoo.com/t1/group/55350/"&gt;Yahoo Group&lt;/a&gt; for anyone who wants to see the &lt;a href="http://tournament.fantasysports.yahoo.com/t1/group/55350/standings"&gt;standings&lt;/a&gt; and the submitted brackets.     &lt;br /&gt;    &lt;br /&gt;Remember that if you were not able to submit a bracket in time for the full tournament, you will have a second chance as we will run a Sweet Sixteen bracket as well. The Sweet Sixteen does not start until 3/24, so you have almost a week to tweak and get something working before then!     &lt;br /&gt;    &lt;br /&gt;The first round of play is over and after 32 completed games, the baseline predictors are doing very well against the algorithms. I have pasted the full standings below (I realized after last year's tournament that Yahoo does not preserve the brackets and standings, so I will try to do a better job keeping track of them on the blog this year). &lt;/p&gt;  &lt;table border="0" cellspacing="0" cellpadding="2" width="686"&gt;&lt;tbody&gt;     &lt;tr&gt;       &lt;td valign="top" width="55"&gt;Rank&lt;/td&gt;        &lt;td valign="top" width="200"&gt;Bracket&lt;/td&gt;        &lt;td valign="top" width="40"&gt;R1&lt;/td&gt;        &lt;td valign="top" width="40"&gt;R2&lt;/td&gt;        &lt;td valign="top" width="40"&gt;R3&lt;/td&gt;        &lt;td valign="top" width="40"&gt;R4&lt;/td&gt;        &lt;td valign="top" width="60"&gt;Semis&lt;/td&gt;        &lt;td valign="top" width="90"&gt;Finals&lt;/td&gt;        &lt;td valign="top" width="60"&gt;Points&lt;/td&gt;        &lt;td valign="top" width="60"&gt;Possible&lt;/td&gt;     &lt;/tr&gt;      &lt;tr&gt;       &lt;td valign="top" width="55"&gt;1&lt;/td&gt;        &lt;td valign="top" width="200"&gt;&lt;a href="http://tournament.fantasysports.yahoo.com/t1/2584939" target="_blank"&gt;Baseline (TrueSkill)&lt;/a&gt; &lt;/td&gt;        &lt;td valign="top" width="40"&gt;24&lt;/td&gt;        &lt;td valign="top" width="40"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="40"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="40"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="60"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="90"&gt;Ohio St.&lt;/td&gt;        &lt;td valign="top" width="60"&gt;24&lt;/td&gt;        &lt;td valign="top" width="60"&gt;185&lt;/td&gt;     &lt;/tr&gt;      &lt;tr&gt;       &lt;td valign="top" width="55"&gt;2&lt;/td&gt;        &lt;td valign="top" width="200"&gt;&lt;a href="http://tournament.fantasysports.yahoo.com/t1/292559" target="_blank"&gt;Baseline (Higher Seed)&lt;/a&gt;&lt;/td&gt;        &lt;td valign="top" width="40"&gt;23&lt;/td&gt;        &lt;td valign="top" width="40"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="40"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="40"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="60"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="90"&gt;Ohio St.&lt;/td&gt;        &lt;td valign="top" width="60"&gt;23&lt;/td&gt;        &lt;td valign="top" width="60"&gt;184&lt;/td&gt;     &lt;/tr&gt;      &lt;tr&gt;       &lt;td valign="top" width="55"&gt;2&lt;/td&gt;        &lt;td valign="top" width="200"&gt;&lt;a href="http://tournament.fantasysports.yahoo.com/t1/2838499" target="_blank"&gt;Baseline (Nate Silver)&lt;/a&gt;&lt;/td&gt;        &lt;td valign="top" width="40"&gt;23&lt;/td&gt;        &lt;td valign="top" width="40"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="40"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="40"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="60"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="90"&gt;Ohio St.&lt;/td&gt;        &lt;td valign="top" width="60"&gt;23&lt;/td&gt;        &lt;td valign="top" width="60"&gt;184&lt;/td&gt;     &lt;/tr&gt;      &lt;tr&gt;       &lt;td valign="top" width="55"&gt;2&lt;/td&gt;        &lt;td valign="top" width="200"&gt;&lt;a href="http://tournament.fantasysports.yahoo.com/t1/2831273" target="_blank"&gt;Baseline (LRMC)&lt;/a&gt;&lt;/td&gt;        &lt;td valign="top" width="40"&gt;23&lt;/td&gt;        &lt;td valign="top" width="40"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="40"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="40"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="60"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="90"&gt;Ohio St.&lt;/td&gt;        &lt;td valign="top" width="60"&gt;23&lt;/td&gt;        &lt;td valign="top" width="60"&gt;182&lt;/td&gt;     &lt;/tr&gt;      &lt;tr&gt;       &lt;td valign="top" width="55"&gt;2&lt;/td&gt;        &lt;td valign="top" width="200"&gt;&lt;a href="http://tournament.fantasysports.yahoo.com/t1/2650527" target="_blank"&gt;Human (Lee)&lt;/a&gt;&lt;/td&gt;        &lt;td valign="top" width="40"&gt;23&lt;/td&gt;        &lt;td valign="top" width="40"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="40"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="40"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="60"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="90"&gt;Kansas&lt;/td&gt;        &lt;td valign="top" width="60"&gt;23&lt;/td&gt;        &lt;td valign="top" width="60"&gt;182&lt;/td&gt;     &lt;/tr&gt;      &lt;tr&gt;       &lt;td valign="top" width="55"&gt;6&lt;/td&gt;        &lt;td valign="top" width="200"&gt;&lt;a href="http://tournament.fantasysports.yahoo.com/t1/675902" target="_blank"&gt;Point Differential Centrality&lt;/a&gt;&lt;/td&gt;        &lt;td valign="top" width="40"&gt;22&lt;/td&gt;        &lt;td valign="top" width="40"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="40"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="40"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="60"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="90"&gt;Ohio St.&lt;/td&gt;        &lt;td valign="top" width="60"&gt;22&lt;/td&gt;        &lt;td valign="top" width="60"&gt;153&lt;/td&gt;     &lt;/tr&gt;      &lt;tr&gt;       &lt;td valign="top" width="55"&gt;7&lt;/td&gt;        &lt;td valign="top" width="200"&gt;&lt;a href="http://tournament.fantasysports.yahoo.com/t1/2577954" target="_blank"&gt;Danny's Dangerous Picks&lt;/a&gt;&lt;/td&gt;        &lt;td valign="top" width="40"&gt;21&lt;/td&gt;        &lt;td valign="top" width="40"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="40"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="40"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="60"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="90"&gt;Duke&lt;/td&gt;        &lt;td valign="top" width="60"&gt;21&lt;/td&gt;        &lt;td valign="top" width="60"&gt;182&lt;/td&gt;     &lt;/tr&gt;      &lt;tr&gt;       &lt;td valign="top" width="55"&gt;7&lt;/td&gt;        &lt;td valign="top" width="200"&gt;&lt;a href="http://tournament.fantasysports.yahoo.com/t1/924842" target="_blank"&gt;DukeRepeats&lt;/a&gt;&lt;/td&gt;        &lt;td valign="top" width="40"&gt;21&lt;/td&gt;        &lt;td valign="top" width="40"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="40"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="40"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="60"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="90"&gt;Duke&lt;/td&gt;        &lt;td valign="top" width="60"&gt;21&lt;/td&gt;        &lt;td valign="top" width="60"&gt;180&lt;/td&gt;     &lt;/tr&gt;      &lt;tr&gt;       &lt;td valign="top" width="55"&gt;7&lt;/td&gt;        &lt;td valign="top" width="200"&gt;&lt;a href="http://tournament.fantasysports.yahoo.com/t1/383577" target="_blank"&gt;dirknbr1&lt;/a&gt;&lt;/td&gt;        &lt;td valign="top" width="40"&gt;21&lt;/td&gt;        &lt;td valign="top" width="40"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="40"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="40"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="60"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="90"&gt;Ohio St.&lt;/td&gt;        &lt;td valign="top" width="60"&gt;21&lt;/td&gt;        &lt;td valign="top" width="60"&gt;168&lt;/td&gt;     &lt;/tr&gt;      &lt;tr&gt;       &lt;td valign="top" width="55"&gt;10&lt;/td&gt;        &lt;td valign="top" width="200"&gt;&lt;a href="http://tournament.fantasysports.yahoo.com/t1/2581754" target="_blank"&gt;Team Delete Kernel&lt;/a&gt;&lt;/td&gt;        &lt;td valign="top" width="40"&gt;20&lt;/td&gt;        &lt;td valign="top" width="40"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="40"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="40"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="60"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="90"&gt;Ohio St.&lt;/td&gt;        &lt;td valign="top" width="60"&gt;20&lt;/td&gt;        &lt;td valign="top" width="60"&gt;181&lt;/td&gt;     &lt;/tr&gt;      &lt;tr&gt;       &lt;td valign="top" width="55"&gt;10&lt;/td&gt;        &lt;td valign="top" width="200"&gt;&lt;a href="http://tournament.fantasysports.yahoo.com/t1/1847646" target="_blank"&gt;InItToWinIt&lt;/a&gt;&lt;/td&gt;        &lt;td valign="top" width="40"&gt;20&lt;/td&gt;        &lt;td valign="top" width="40"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="40"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="40"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="60"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="90"&gt;Kansas&lt;/td&gt;        &lt;td valign="top" width="60"&gt;20&lt;/td&gt;        &lt;td valign="top" width="60"&gt;165&lt;/td&gt;     &lt;/tr&gt;      &lt;tr&gt;       &lt;td valign="top" width="55"&gt;12&lt;/td&gt;        &lt;td valign="top" width="200"&gt;&lt;a href="http://tournament.fantasysports.yahoo.com/t1/413450" target="_blank"&gt;The Pain Machine&lt;/a&gt;&lt;/td&gt;        &lt;td valign="top" width="40"&gt;17&lt;/td&gt;        &lt;td valign="top" width="40"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="40"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="40"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="60"&gt;&amp;nbsp;&lt;/td&gt;        &lt;td valign="top" width="90"&gt;Kansas&lt;/td&gt;        &lt;td valign="top" width="60"&gt;17&lt;/td&gt;        &lt;td valign="top" width="60"&gt;174&lt;/td&gt;     &lt;/tr&gt;   &lt;/tbody&gt;&lt;/table&gt;  &lt;p&gt;I will be posting introductions of the various contenders over the course of this weekend. In the meantime, cheer on the algorithms... or human baselines - the higher seed and &amp;quot;Lee&amp;quot; - if you fear the machines! Enjoy the exciting set of games coming this weekend!&lt;/p&gt;  &lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/1107147718367558732-7332313013121914281?l=blog.smellthedata.com' alt='' /&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href="http://feedads.g.doubleclick.net/~a/eTU-Vksm6wIITlbUuFKYNBdnptE/0/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/eTU-Vksm6wIITlbUuFKYNBdnptE/0/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;br/&gt;
&lt;a href="http://feedads.g.doubleclick.net/~a/eTU-Vksm6wIITlbUuFKYNBdnptE/1/da"&gt;&lt;img src="http://feedads.g.doubleclick.net/~a/eTU-Vksm6wIITlbUuFKYNBdnptE/1/di" border="0" ismap="true"&gt;&lt;/img&gt;&lt;/a&gt;&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/9t97e8gOgTA" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/7332313013121914281/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=7332313013121914281" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/7332313013121914281?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/7332313013121914281?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/9t97e8gOgTA/current-standings_18.html" title="Current Standings" /><author><name>Lee</name><uri>http://www.blogger.com/profile/17617335710795529109</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>0</thr:total><feedburner:origLink>http://blog.smellthedata.com/2011/03/current-standings_18.html</feedburner:origLink></entry></feed>

