<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" media="screen" href="/~d/styles/atom10full.xsl"?><?xml-stylesheet type="text/css" media="screen" href="http://feeds.feedburner.com/~d/styles/itemcontent.css"?><feed xmlns="http://www.w3.org/2005/Atom" xmlns:openSearch="http://a9.com/-/spec/opensearch/1.1/" xmlns:blogger="http://schemas.google.com/blogger/2008" xmlns:georss="http://www.georss.org/georss" xmlns:gd="http://schemas.google.com/g/2005" xmlns:thr="http://purl.org/syndication/thread/1.0" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0" gd:etag="W/&quot;A0cARnk4fSp7ImA9WhBbEk0.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732</id><updated>2013-05-10T11:04:07.735-07:00</updated><category term="parallel computing" /><category term="courses" /><category term="ai" /><category term="causality" /><category term="basketball" /><category term="dannys_predictions" /><category term="books" /><category term="data structure" /><category term="challenge problem" /><category term="lawyers" /><category term="toronto" /><category term="methodology" /><category term="art" /><category term="analytics" /><category term="ranking" /><category term="algorithms" /><category term="uncertainty" /><category term="memorization" /><category term="hadoop" /><category term="classification" /><category term="linear_programming" /><category term="psychology" /><category term="online marketing" /><category term="pain machine" /><category term="taxes" /><category term="netflix" /><category term="data analysis" /><category term="scipy" /><category term="schools" /><category term="sports" /><category term="rapidminer" /><category term="scrabble" /><category term="probability" /><category term="c++" /><category term="talent" /><category term="dynamic algorithms" /><category term="computation" /><category term="displaying code" /><category term="san francisco" /><category term="career choice" /><category term="success" /><category term="APIs" /><category term="public_relations" /><category term="incentives" /><category term="rationality" /><category term="controversies" /><category term="social networks" /><category term="summer school" /><category term="nearest neighbors" /><category term="buildings" /><category term="theoretical computer science" /><category term="march_madness" /><category term="conversation starters" /><category term="the_webs" /><category term="statistics" /><category term="mcmc" /><category term="chess" /><category term="MAP_inference" /><category term="conferences" /><category term="google" /><category term="randomness" /><category term="auctions" /><category term="nutsandbolts" /><category term="structured prediction" /><category term="advertising" /><category term="military" /><category term="the real world" /><category term="monte" /><category term="beginners" /><category term="data visualization" /><category term="image_processing" /><category term="python" /><category term="biology" /><category term="lake oswego rental" /><category term="public transportation" /><category term="scott turner" /><category term="max_product_belief_propagation" /><category term="belief propagation" /><category term="code" /><category term="football" /><category term="horse racing" /><category term="learning" /><category term="artificial intelligence" /><category term="science" /><category term="linux" /><category term="computational complexity" /><category term="logistic regression" /><category term="protocol_buffers" /><category term="math" /><category term="recommendation systems" /><category term="emacs" /><category term="research" /><category term="robotics" /><category term="bayesian models" /><category term="programming" /><category term="politics" /><category term="sympy" /><category term="videos" /><category term="graduate school" /><category term="slice sampling" /><category term="matrix factorization" /><category term="graphical_models" /><category term="distributed computing" /><category term="databases" /><category term="seo" /><category term="economics" /><category term="blogger" /><category term="computer vision" /><category term="web2.0" /><category term="constraint_satisfaction" /><category term="george" /><category term="web_security" /><category term="twitter" /><category term="regularization" /><category term="history" /><category term="gambling" /><category term="machine learning" /><category term="data" /><category term="markets" /><category term="sociology" /><category term="energy use" /><category term="medicine" /><title>This Number Crunching Life</title><subtitle type="html">Randomness in the world with a smattering of other randomness</subtitle><link rel="http://schemas.google.com/g/2005#feed" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/posts/default" /><link rel="alternate" type="text/html" href="http://blog.smellthedata.com/" /><link rel="next" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default?start-index=26&amp;max-results=25&amp;redirect=false&amp;v=2" /><author><name>Danny Tarlow</name><uri>http://www.blogger.com/profile/14670021337844708633</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="25" src="http://1.bp.blogspot.com/_cFAlw8-Y0gE/TRrm8pdSK1I/AAAAAAAAA5o/S8w-VVzdc1A/S220/mehak.jpg" /></author><generator version="7.00" uri="http://www.blogger.com">Blogger</generator><openSearch:totalResults>179</openSearch:totalResults><openSearch:startIndex>1</openSearch:startIndex><openSearch:itemsPerPage>25</openSearch:itemsPerPage><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="self" type="application/atom+xml" href="http://feeds.feedburner.com/ThisNumberCrunchingLife" /><feedburner:info uri="thisnumbercrunchinglife" /><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="hub" href="http://pubsubhubbub.appspot.com/" /><entry gd:etag="W/&quot;DkAMSHszcCp7ImA9WhBWFU4.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-3200650323761024968</id><published>2013-04-09T12:13:00.000-07:00</published><updated>2013-04-09T12:13:09.588-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-04-09T12:13:09.588-07:00</app:edited><title>Congratulations to the Machine March Madness Winner</title><content type="html">Well, after another exciting March Madness tournament, Louisville emerged as the winner of March Madness, and Ryan Boesch emerged as the winner of Machine March Madness, with his algorithm beating out the field of 22 other machine competitors and all the human baselines.  Congratulations, Ryan!  
&lt;br/&gt;&lt;br/&gt;
I asked him a few questions, which he answers below:&lt;br/&gt;&lt;br/&gt;

&lt;b&gt;1. What inspired you to compete in the Machine March Madness competition?&lt;/b&gt;&lt;br/&gt;&lt;br/&gt;

Last year I finished a class on Convex Optimization during the winter quarter and was planning to take a Machine Learning class in the spring quarter. I was looking for a project to apply what I had learned. I saw this competition and submitted a last minute bracket. &lt;br/&gt;&lt;br/&gt;
 

&lt;b&gt;2. What do you attribute your win to?  What is your model best at?&lt;/b&gt;&lt;br/&gt;&lt;br/&gt;

The win was of course very lucky. Basketball games are random in nature so to find which model is actually the best it would require many years of tournaments. One tournament is not statistically significant. &lt;br/&gt;&lt;br/&gt;

There is nothing particularly special about my model. I used &lt;a href="http://blog.smellthedata.com/2009/03/data-driven-march-madness-predictions.html"&gt;Danny's model&lt;/a&gt;, only I fit the parameters using convex optimization instead of batched gradient decent. &lt;br/&gt;&lt;br/&gt;


&lt;b&gt;3. What do you think the most promising direction(s) towards improving your model would be?&lt;/b&gt;&lt;br/&gt;&lt;br/&gt;
Most Promising: My current model simply matches teams and sees which has the higher predicted score. It doesn't account for difficult of previously played games in the tournament. For example, say team 1 has a 51% chance to win the first round and also 51% chance to win the second round against team 2. If team 2 has a 95% chance of winning the first round then they are more likely to make it to round 3 even though they only have a 49% chance to beat team 1 in the second round. This is taken into account in &lt;a href="http://www.nytimes.com/interactive/2013/03/18/sports/ncaabasketball/nate-bracket.html"&gt;Nate Silver's picks&lt;/a&gt; for example.&lt;br/&gt;&lt;br/&gt;

Second Most Promising: When in a pool with other competitors the goal is no longer to maximize your expected score, but instead to maximize your expected chance of winning. These two optimizations do not always result in the same picks. I may consider taking this into account in future years. I found &lt;a href="http://dehn.slu.edu/research/papers/pools.pdf"&gt;this paper&lt;/a&gt; on Nate Silver's blog which analyzes this idea.&lt;br/&gt;&lt;br/&gt;
 

&lt;b&gt;4. What advice would you give to future competitors?&lt;/b&gt;&lt;br/&gt;&lt;br/&gt;
Be wary of over fitting your model. &lt;br/&gt;&lt;br/&gt;
 

&lt;b&gt;5. What would you change about the competition in future years?&lt;/b&gt;&lt;br/&gt;&lt;br/&gt;
We should try to get out and advertise for the competition earlier and to a broader audience to maximize participation. &lt;br/&gt;&lt;br/&gt;&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/eu6sjaG_ITQ" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/3200650323761024968/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=3200650323761024968" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/3200650323761024968?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/3200650323761024968?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/eu6sjaG_ITQ/congratulations-to-machine-march.html" title="Congratulations to the Machine March Madness Winner" /><author><name>Danny Tarlow</name><uri>http://www.blogger.com/profile/14670021337844708633</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="25" src="http://1.bp.blogspot.com/_cFAlw8-Y0gE/TRrm8pdSK1I/AAAAAAAAA5o/S8w-VVzdc1A/S220/mehak.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://blog.smellthedata.com/2013/04/congratulations-to-machine-march.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CU8FR3kyeSp7ImA9WhBWEUQ.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-1752808028092964802</id><published>2013-04-05T13:30:00.000-07:00</published><updated>2013-04-05T13:30:16.791-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-04-05T13:30:16.791-07:00</app:edited><title>Machine March Madness Final Four Outlook</title><content type="html">&lt;i&gt;Once again, we have a guest post by Scott Turner, our local Machine March Madness competitor and analyst, who also runs &lt;a href="http://netprophetblog.blogspot.com/"&gt;http://netprophetblog.blogspot.com/&lt;/a&gt;.  Thanks for another great post, Scott!&lt;/i&gt;
&lt;br/&gt;&lt;br/&gt;

The Machine March Madness Contest finds itself in peculiar waters this year -- no doubt because the Tournament is in strange waters itself.  There is only a single team left alive from the twelve #1, #2, and #3 seeds. Going into the Final Four, only four brackets have their champion prediction left alive -- and all four have Louisville.  In fact, only one bracket has anyone left alive other than Louisville -- and that's "Tim J's Nets for Nets", who has Syracuse to the final game (but losing to Ohio State).  But despite the craziness in the Tournament this year, the top four predictors in the Machine March Madness Contest are in the top 5% of all brackets.
&lt;br/&gt;&lt;br/&gt;
If Louisville manages to win out, the champion will be "Ryan's Rank 1 Approximation" with 121 points.  He will beat out all the human competitors as well.
&lt;br/&gt;&lt;br/&gt;
If Louisville loses in the final game, the champion will be "Predict the Madness" (tied with "Danny's Dangerous Picks"), with "Ryan's Rank 1 Approximation" and my own Prediction Machine both a single point behind.
&lt;br/&gt;&lt;br/&gt;
If Louisville somehow loses to Wichita State and Syracuse beats Michigan, then "Tim J's Nets for Nets" will vault all the way from fifteenth into a tie for first with "Predict the Madness".  If Syracuse loses to Michigan, then "Predict the Madness" will win outright.
&lt;br/&gt;&lt;br/&gt;
It has certainly been a crazy year for the Tournament!&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/6Ehoukonfpg" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/1752808028092964802/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=1752808028092964802" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/1752808028092964802?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/1752808028092964802?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/6Ehoukonfpg/machine-march-madness-final-four-outlook.html" title="Machine March Madness Final Four Outlook" /><author><name>Danny Tarlow</name><uri>http://www.blogger.com/profile/14670021337844708633</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="25" src="http://1.bp.blogspot.com/_cFAlw8-Y0gE/TRrm8pdSK1I/AAAAAAAAA5o/S8w-VVzdc1A/S220/mehak.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://blog.smellthedata.com/2013/04/machine-march-madness-final-four-outlook.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A08ASXYzfSp7ImA9WhBXEkk.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-2695874696016295990</id><published>2013-03-25T15:07:00.002-07:00</published><updated>2013-03-25T15:17:28.885-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-03-25T15:17:28.885-07:00</app:edited><title>Upset Analysis by Scott Turner</title><content type="html">&lt;i&gt;This is a guest post by Scott Turner, who is a perennial Machine March Madness competitor, and who runs &lt;a href="http://netprophetblog.blogspot.com/"&gt;http://netprophetblog.blogspot.com/&lt;/a&gt;.&lt;/i&gt;
&lt;br/&gt;&lt;br/&gt;

&lt;style type="text/css"&gt;
table.tableizer-table {
    border: 1px solid #CCC; font-family: Arial, Helvetica, sans-serif;
    font-size: 12px;
} 
.tableizer-table td {
    padding: 4px;
    margin: 3px;
    border: 1px solid #ccc;
}
.tableizer-table th {
    background-color: #104E8B; 
    color: #FFF;
    font-weight: bold;
}
&lt;/style&gt;
There are 22 entries in the Machine Madness contest this year, so analyzing them is a much bigger task than in past years.&amp;nbsp; Nonetheless I dug through all the brackets and looked at all the first round upset predictions to see how well the machines did.&lt;br /&gt;
&lt;br /&gt;
&lt;h3&gt;
Correct Upset Predictions&lt;/h3&gt;
Interestingly enough, every first round upset was picked by at least two of the predictors except for Harvard -- which no one picked -- and Florida Gulf Coast, which only "Larry's Upsetting Picks" predicted.&amp;nbsp;&amp;nbsp; The only consensus upset pick was Minnesota over UCLA, which was predicted by exactly half of the predictors.&amp;nbsp; Iowa State also got broad support (40%) but none of the rest of the picks had more than 4 predictors in support.&amp;nbsp; Here's the full table of the upsets that occurred and who predicted them (stretch your window!):&lt;br /&gt;
&lt;br /&gt;
&lt;table class="tableizer-table"&gt;&lt;tbody&gt;
&lt;tr class="tableizer-firstrow"&gt;&lt;th&gt;Entry&lt;/th&gt;
                                        &lt;th&gt;Sum&lt;/th&gt;
                                        &lt;th&gt;Minnesota&lt;/th&gt;
                                        &lt;th&gt;Iowa St&lt;/th&gt;
                                        &lt;th&gt;Oregon&lt;/th&gt;
                                        &lt;th&gt;Wichita St.&lt;/th&gt;
                                        &lt;th&gt;Mississippi&lt;/th&gt;
                                        &lt;th&gt;Temple&lt;/th&gt;
                                        &lt;th&gt;California&lt;/th&gt;
                                        &lt;th&gt;La Salle&lt;/th&gt;
                                        &lt;th&gt;Fla GC&lt;/th&gt;
                                        &lt;th&gt;Harvard&lt;/th&gt;
                                &lt;/tr&gt;
&lt;tr&gt;
                                        &lt;td&gt;*Danny's Dangerous Picks&lt;/td&gt;
                                        &lt;td&gt;2&lt;/td&gt;
                                        &lt;td style="background-color: #b6d7a8;"&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #b6d7a8;"&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                &lt;/tr&gt;
&lt;tr&gt;
                                        &lt;td&gt;Andy's Astounding Bracket&lt;/td&gt;
                                        &lt;td&gt;2&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #b6d7a8;"&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #b6d7a8;"&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                &lt;/tr&gt;
&lt;tr&gt;
                                        &lt;td&gt;Ask me about my T-Rex&lt;/td&gt;
                                        &lt;td&gt;3&lt;/td&gt;
                                        &lt;td style="background-color: #b6d7a8;"&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #b6d7a8;"&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #b6d7a8;"&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                &lt;/tr&gt;
&lt;tr&gt;
                                        &lt;td&gt;Curtis Lehmann's Crazy Bracket&lt;/td&gt;
                                        &lt;td&gt;0&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                &lt;/tr&gt;
&lt;tr&gt;
                                        &lt;td&gt;Dan Tran's Dazzling Bracket&lt;/td&gt;
                                        &lt;td&gt;3&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #b6d7a8;"&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #b6d7a8;"&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #b6d7a8;"&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                &lt;/tr&gt;
&lt;tr&gt;
                                        &lt;td&gt;Guess O'Bot 3000&lt;/td&gt;
                                        &lt;td&gt;0&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                &lt;/tr&gt;
&lt;tr&gt;
                                        &lt;td&gt;K. V. Southwood's Fine Bracket&lt;/td&gt;
                                        &lt;td&gt;2&lt;/td&gt;
                                        &lt;td style="background-color: #b6d7a8;"&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #b6d7a8;"&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                &lt;/tr&gt;
&lt;tr&gt;
                                        &lt;td&gt;Larry's upsetting picks&lt;/td&gt;
                                        &lt;td&gt;4&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #b6d7a8;"&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #b6d7a8;"&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #b6d7a8;"&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #b6d7a8;"&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                &lt;/tr&gt;
&lt;tr&gt;
                                        &lt;td&gt;LA's Machine Mad Pick&lt;/td&gt;
                                        &lt;td&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #b6d7a8;"&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                &lt;/tr&gt;
&lt;tr&gt;
                                        &lt;td&gt;Leon's Super Legendary Bracket&lt;/td&gt;
                                        &lt;td&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #b6d7a8;"&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                &lt;/tr&gt;
&lt;tr&gt;
                                        &lt;td&gt;Marginal Madness&lt;/td&gt;
                                        &lt;td&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #b6d7a8;"&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                &lt;/tr&gt;
&lt;tr&gt;
                                        &lt;td&gt;Mark's LR bracket&lt;/td&gt;
                                        &lt;td&gt;4&lt;/td&gt;
                                        &lt;td style="background-color: #b6d7a8;"&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #b6d7a8;"&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #b6d7a8;"&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #b6d7a8;"&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                &lt;/tr&gt;
&lt;tr&gt;
                                        &lt;td&gt;MatrixFactorizer&lt;/td&gt;
                                        &lt;td&gt;3&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #b6d7a8;"&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #b6d7a8;"&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #b6d7a8;"&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                &lt;/tr&gt;
&lt;tr&gt;
                                        &lt;td&gt;natebrix's Neat Bracket&lt;/td&gt;
                                        &lt;td&gt;2&lt;/td&gt;
                                        &lt;td style="background-color: #b6d7a8;"&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #b6d7a8;"&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                &lt;/tr&gt;
&lt;tr&gt;
                                        &lt;td&gt;noodlebot&lt;/td&gt;
                                        &lt;td&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #b6d7a8;"&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                &lt;/tr&gt;
&lt;tr&gt;
                                        &lt;td&gt;Predict the Madness&lt;/td&gt;
                                        &lt;td&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #b6d7a8;"&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                &lt;/tr&gt;
&lt;tr&gt;
                                        &lt;td&gt;Ryan's Rank 1 Approximation&lt;/td&gt;
                                        &lt;td&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #b6d7a8;"&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                &lt;/tr&gt;
&lt;tr&gt;
                                        &lt;td&gt;Scott Turner's Prediction Mach&lt;/td&gt;
                                        &lt;td&gt;2&lt;/td&gt;
                                        &lt;td style="background-color: #b6d7a8;"&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #b6d7a8;"&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                &lt;/tr&gt;
&lt;tr&gt;
                                        &lt;td&gt;ScottyJ's Grand Bracket&lt;/td&gt;
                                        &lt;td&gt;0&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                &lt;/tr&gt;
&lt;tr&gt;
                                        &lt;td&gt;The Rosenthal Fit&lt;/td&gt;
                                        &lt;td&gt;2&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #b6d7a8;"&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #b6d7a8;"&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                &lt;/tr&gt;
&lt;tr&gt;
                                        &lt;td&gt;TheSentinel&lt;/td&gt;
                                        &lt;td&gt;2&lt;/td&gt;
                                        &lt;td style="background-color: #b6d7a8;"&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #b6d7a8;"&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                &lt;/tr&gt;
&lt;tr&gt;
                                        &lt;td&gt;Tim J's Nets for Nets&lt;/td&gt;
                                        &lt;td&gt;4&lt;/td&gt;
                                        &lt;td style="background-color: #b6d7a8;"&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #b6d7a8;"&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #b6d7a8;"&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #b6d7a8;"&gt;1&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                        &lt;td style="background-color: #ea9999;"&gt;&lt;/td&gt;
                                &lt;/tr&gt;
&lt;tr&gt;
                                        &lt;td&gt;&amp;nbsp;&amp;nbsp; &lt;b&gt;Ave Correct:&lt;/b&gt;&lt;/td&gt;
                                        &lt;td&gt;&lt;b&gt;1.9&lt;/b&gt;&lt;/td&gt;
                                        &lt;td&gt;50%&lt;/td&gt;
                                        &lt;td&gt;41%&lt;/td&gt;
                                        &lt;td&gt;18%&lt;/td&gt;
                                        &lt;td&gt;18%&lt;/td&gt;
                                        &lt;td&gt;18%&lt;/td&gt;
                                        &lt;td&gt;14%&lt;/td&gt;
                                        &lt;td&gt;14%&lt;/td&gt;
                                        &lt;td&gt;9%&lt;/td&gt;
                                        &lt;td&gt;5%&lt;/td&gt;
                                        &lt;td&gt;0%&lt;/td&gt;
                                &lt;/tr&gt;
&lt;/tbody&gt;
                &lt;/table&gt;
&lt;br /&gt;
My conclusion here is that UCLA-Minnesota and Notre Dame-Iowa State were probably mis-seeded.&lt;br /&gt;
&lt;br /&gt;
UCLA-Minnesota is an interesting case in human psychology.&amp;nbsp; Minnesota lost 11 of its last 16 games, finished 8th in its conference and lost in the first game of the conference tournament, while UCLA won 11 of its last 16, won the Pac-12 regular season conference title, and lost in the title game of the conference tournament.&amp;nbsp; It's no wonder UCLA got a 6 seed and Minnesota an 11.&amp;nbsp; But in fact, Minnesota was playing against much better competition through the conference games, and most of its losses came to ranked opponents and/or on the road.&amp;nbsp; Machines understand the concept of a "good loss" much better than people.&amp;nbsp;&amp;nbsp; &lt;br /&gt;
&lt;br /&gt;
The Notre Dame-Iowa State mis-seeding wasn't so egregious.&amp;nbsp; This probably should have been an 8-9 matchup instead of a 7-10, in which case a win by Iowa State would have hardly been surprising.&lt;br /&gt;
&lt;br /&gt;
All of the rest of the games were probably true upsets.&lt;br /&gt;
&lt;br /&gt;
&lt;h3&gt;
Incorrect Upset Predictions &lt;/h3&gt;
Most of the predictors also made a number of incorrect upset predictions.&amp;nbsp; Most predictors had one or two missed upsets, although six of the predictors made no missed upset predictions (primarily because they made mostly chalk predictions).&amp;nbsp; Here's the full table:&lt;br /&gt;
&lt;br /&gt;
&lt;table class="tableizer-table"&gt;
  &lt;tbody&gt;
&lt;tr class="tableizer-firstrow"&gt;&lt;th&gt;Entry&lt;/th&gt;&lt;th&gt;SUM&lt;/th&gt;&lt;th&gt;Colo&lt;/th&gt;&lt;th&gt;St.Mary&lt;/th&gt;&lt;th&gt;Cincy&lt;/th&gt;&lt;th&gt;Bucknell&lt;/th&gt;&lt;th&gt;Okla&lt;/th&gt;&lt;th&gt;Missou&lt;/th&gt;&lt;th&gt;Belmont&lt;/th&gt;&lt;th&gt;SDak&lt;/th&gt;&lt;th&gt;Davidson&lt;/th&gt;&lt;th&gt;Valpo&lt;/th&gt;&lt;th&gt;Iona&lt;/th&gt;&lt;th&gt;Villa&lt;/th&gt;&lt;th&gt;Akron&lt;/th&gt;&lt;th&gt;Montana&lt;/th&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;*Danny's Dangerous Picks&lt;/td&gt;
    &lt;td&gt;2&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
  &lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;Andy's Astounding Bracket&lt;/td&gt;
    &lt;td&gt;0&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
  &lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;Ask me about my T-Rex&lt;/td&gt;
    &lt;td&gt;2&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
  &lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;Curtis Lehmann's Crazy Bracket&lt;/td&gt;
    &lt;td&gt;1&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
  &lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;Dan Tran's Dazzling Bracket&lt;/td&gt;
    &lt;td&gt;2&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
  &lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;Guess O'Bot 3000&lt;/td&gt;
    &lt;td&gt;3&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
  &lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;K. V. Southwood's Fine Bracket&lt;/td&gt;
    &lt;td&gt;4&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
  &lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;Larry's upsetting picks&lt;/td&gt;
    &lt;td&gt;7&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
  &lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;LA's Machine Mad Pick&lt;/td&gt;
    &lt;td&gt;0&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
  &lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;Leon's Super Legendary Bracket&lt;/td&gt;
    &lt;td&gt;8&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
  &lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;Marginal Madness&lt;/td&gt;
    &lt;td&gt;0&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
  &lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;Mark's LR bracket&lt;/td&gt;
    &lt;td&gt;4&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
  &lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;MatrixFactorizer&lt;/td&gt;
    &lt;td&gt;2&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
  &lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;natebrix's Neat Bracket&lt;/td&gt;
    &lt;td&gt;2&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
  &lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;noodlebot&lt;/td&gt;
    &lt;td&gt;0&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
  &lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;Predict the Madness&lt;/td&gt;
    &lt;td&gt;1&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
  &lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;Ryan's Rank 1 Approximation&lt;/td&gt;
    &lt;td&gt;1&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
  &lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;Scott Turner's Prediction Mach&lt;/td&gt;
    &lt;td&gt;2&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
  &lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;ScottyJ's Grand Bracket&lt;/td&gt;
    &lt;td&gt;0&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
  &lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;The Rosenthal Fit &lt;/td&gt;
    &lt;td&gt;0&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
  &lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;TheSentinel&lt;/td&gt;
    &lt;td&gt;2&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
  &lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;Tim J's Nets for Nets&lt;/td&gt;
    &lt;td&gt;4&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td style="background-color: #ea9999;"&gt;1&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
    &lt;td&gt;&lt;/td&gt;
  &lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;Average Missed:&lt;/td&gt;
    &lt;td&gt;2.1&lt;/td&gt;
    &lt;td&gt;45%&lt;/td&gt;
    &lt;td&gt;36%&lt;/td&gt;
    &lt;td&gt;23%&lt;/td&gt;
    &lt;td&gt;23%&lt;/td&gt;
    &lt;td&gt;18%&lt;/td&gt;
    &lt;td&gt;14%&lt;/td&gt;
    &lt;td&gt;14%&lt;/td&gt;
    &lt;td&gt;9%&lt;/td&gt;
    &lt;td&gt;9%&lt;/td&gt;
    &lt;td&gt;5%&lt;/td&gt;
    &lt;td&gt;5%&lt;/td&gt;
    &lt;td&gt;5%&lt;/td&gt;
    &lt;td&gt;5%&lt;/td&gt;
    &lt;td&gt;5%&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;br /&gt;
As a general rule, the predictors that made the most correct upset picks also made the most incorrect upset picks.&amp;nbsp; Notably, "Larry's Upsetting Picks" made the incredible call of the FGCU upset (and also called the second round upset) but also made seven incorrect upset picks.&lt;br /&gt;
&lt;br /&gt;
There was almost a consensus (45%) on Colorado over Illinois.&amp;nbsp; That's an interesting contrast with the Minnesota pick -- Illinois should have benefited in most of the predictors from a tough B1G conference schedule, but many of the predictors thought Illinois was still vulnerable.&amp;nbsp; Illinois had a 16 point halftime lead in this game, but let it slip away and need some late game heroics to win, so this was certainly a reasonable prediction.&lt;br /&gt;
&lt;br /&gt;
St. Mary's over Memphis was another popular pick.&amp;nbsp; Memphis won by 2 when a last-second shot by St. Mary's missed, so this also seemed like a reasonable upset pick.&lt;br /&gt;
&lt;h3&gt;
&amp;nbsp;&lt;/h3&gt;
&lt;h3&gt;
Upset Profits&lt;/h3&gt;
An important question is whether any of the predictors profited from their upset predictions -- that is, whether the points they gained from correct upset predictions were more than the points they lost from missed upsets.&amp;nbsp; In general, this is complex to calculate because we have to look at how the predictions affect the later rounds of the tournament.&amp;nbsp; But it's easy enough to look at just the first round scoring.&amp;nbsp; Here's the table:&lt;br /&gt;
&lt;br /&gt;
&lt;table class="tableizer-table"&gt;
&lt;tbody&gt;
&lt;tr class="tableizer-firstrow"&gt;&lt;th&gt;Entry&lt;/th&gt;&lt;th&gt;Upsets&lt;/th&gt;&lt;th&gt;Missed&lt;/th&gt;&lt;th&gt;Overall&lt;/th&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;Andy's Astounding Bracket&lt;/td&gt;&lt;td&gt;2&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt;2&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;The Rosenthal Fit &lt;/td&gt;&lt;td&gt;2&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt;2&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;Ask me about my T-Rex&lt;/td&gt;&lt;td&gt;3&lt;/td&gt;&lt;td&gt;2&lt;/td&gt;&lt;td&gt;1&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;Dan Tran's Dazzling Bracket&lt;/td&gt;&lt;td&gt;3&lt;/td&gt;&lt;td&gt;2&lt;/td&gt;&lt;td&gt;1&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;LA's Machine Mad Pick&lt;/td&gt;&lt;td&gt;1&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt;1&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;Marginal Madness&lt;/td&gt;&lt;td&gt;1&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt;1&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;MatrixFactorizer&lt;/td&gt;&lt;td&gt;3&lt;/td&gt;&lt;td&gt;2&lt;/td&gt;&lt;td&gt;1&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;noodlebot&lt;/td&gt;&lt;td&gt;1&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt;1&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;*Danny's Dangerous Picks&lt;/td&gt;&lt;td&gt;2&lt;/td&gt;&lt;td&gt;2&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;Mark's LR bracket&lt;/td&gt;&lt;td&gt;4&lt;/td&gt;&lt;td&gt;4&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;natebrix's Neat Bracket&lt;/td&gt;&lt;td&gt;2&lt;/td&gt;&lt;td&gt;2&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;Predict the Madness&lt;/td&gt;&lt;td&gt;1&lt;/td&gt;&lt;td&gt;1&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;Ryan's Rank 1 Approximation&lt;/td&gt;&lt;td&gt;1&lt;/td&gt;&lt;td&gt;1&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;Scott Turner's Prediction Mach&lt;/td&gt;&lt;td&gt;2&lt;/td&gt;&lt;td&gt;2&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;ScottyJ's Grand Bracket&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;TheSentinel&lt;/td&gt;&lt;td&gt;2&lt;/td&gt;&lt;td&gt;2&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;Tim J's Nets for Nets&lt;/td&gt;&lt;td&gt;4&lt;/td&gt;&lt;td&gt;4&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;Curtis Lehmann's Crazy Bracket&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt;1&lt;/td&gt;&lt;td&gt;-1&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;K. V. Southwood's Fine Bracket&lt;/td&gt;&lt;td&gt;2&lt;/td&gt;&lt;td&gt;4&lt;/td&gt;&lt;td&gt;-2&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;Guess O'Bot 3000&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt;3&lt;/td&gt;&lt;td&gt;-3&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;Larry's upsetting picks&lt;/td&gt;&lt;td&gt;4&lt;/td&gt;&lt;td&gt;7&lt;/td&gt;&lt;td&gt;-3&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;Leon's Super Legendary Bracket&lt;/td&gt;&lt;td&gt;1&lt;/td&gt;&lt;td&gt;8&lt;/td&gt;&lt;td&gt;-7&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;br /&gt;
We see that a couple of the predictors ("The Rosenthal Fit" and "Andy's Astounding Bracket") came out two points positive, fifteen of the predictors gained one or zero points, and five of the predictors lost points.&amp;nbsp; Interestingly, both "The Rosenthal Fit" and "Andy's Astounding Bracket" made only two upset predictions and got both of them right -- and there was no overlap in their predictions.&amp;nbsp; Furthermore, neither of them predicted the "easiest" upset of Minnesota over UCLA. &lt;br /&gt;
&lt;h3&gt;
&amp;nbsp;&lt;/h3&gt;
&lt;h3&gt;
Conclusions&lt;/h3&gt;
None of the predictors performed very well at picking upsets, and there wasn't wide agreement on the upset picks.&amp;nbsp; The consensus would have selected only the Minnesota-UCLA upset and been +1 in scoring, but no individual predictor did that.&amp;nbsp; Most of the predictors did not hurt themselves with their upset picks (at least looking at only the first round), but none really saw significant benefit.&amp;nbsp; Given the potentially large downside of missing upset predictions, in future contests it wouldn't be an unreasonable strategy to force your predictor to make all chalk selections in the first round.&amp;nbsp; &lt;br /&gt;
&lt;br /&gt;&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/CMFEhd8nqiU" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/2695874696016295990/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=2695874696016295990" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/2695874696016295990?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/2695874696016295990?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/CMFEhd8nqiU/upset-analysis-by-scott-turner.html" title="Upset Analysis by Scott Turner" /><author><name>Danny Tarlow</name><uri>http://www.blogger.com/profile/14670021337844708633</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="25" src="http://1.bp.blogspot.com/_cFAlw8-Y0gE/TRrm8pdSK1I/AAAAAAAAA5o/S8w-VVzdc1A/S220/mehak.jpg" /></author><thr:total>1</thr:total><feedburner:origLink>http://blog.smellthedata.com/2013/03/upset-analysis-by-scott-turner.html</feedburner:origLink></entry><entry gd:etag="W/&quot;AkINQ3k4cCp7ImA9WhBXEU4.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-4113238607848107489</id><published>2013-03-24T08:23:00.000-07:00</published><updated>2013-03-24T08:23:12.738-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-03-24T08:23:12.738-07:00</app:edited><title>Some Final Four Analysis...</title><content type="html">... over at Scott Turner's blog:
&lt;a href="http://netprophetblog.blogspot.co.uk/2013/03/machine-madness-some-final-four-analysis.html"&gt;http://netprophetblog.blogspot.co.uk/2013/03/machine-madness-some-final-four-analysis.html&lt;/a&gt;&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/SfnX2prhDEI" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/4113238607848107489/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=4113238607848107489" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/4113238607848107489?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/4113238607848107489?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/SfnX2prhDEI/some-final-four-analysis.html" title="Some Final Four Analysis..." /><author><name>Danny Tarlow</name><uri>http://www.blogger.com/profile/14670021337844708633</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="25" src="http://1.bp.blogspot.com/_cFAlw8-Y0gE/TRrm8pdSK1I/AAAAAAAAA5o/S8w-VVzdc1A/S220/mehak.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://blog.smellthedata.com/2013/03/some-final-four-analysis.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CUYERHc8eCp7ImA9WhBQGUw.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-2643959744519388627</id><published>2013-03-21T13:58:00.000-07:00</published><updated>2013-03-21T17:45:05.970-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-03-21T17:45:05.970-07:00</app:edited><title>Predicting March Madness by Jasper Snoek</title><content type="html">&lt;br /&gt;
&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;Now that March Madness is officially underway, and the deadline to&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;submit new bracket predictions has passed, I'm ready to divulge the&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;details of my super secret, possibly excessively advanced, march&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;madness prediction model. &amp;nbsp;For a few years now, there has been a&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;special "elite" pool to predict march madness. &amp;nbsp;The twist is that all&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;the predictions have to be made by a computer algorithm - no humans&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;allowed. &amp;nbsp;This means we can't use seed information, predictions from&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;experts or the &lt;a href="http://www.youtube.com/watch?v=7vKX6PLjYS0"&gt;POTUS's executive insight&lt;/a&gt;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;. &amp;nbsp;Instead, we predict&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;based only on data (my model uses only scores). &amp;nbsp;This is the second&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;year that I am entering an algorithm. &amp;nbsp;My entry from last year, which&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;won the pool and beat the vast majority of humans in the Yahoo&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;challenge, is being used as a baseline. &amp;nbsp;This means I have to submit&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;something more sophisticated this year to stay on top.&lt;/span&gt;&lt;br /&gt;
&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;&lt;br /&gt;
&lt;/span&gt; &lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;&lt;b&gt;&lt;u&gt;The model:&lt;/u&gt;&lt;/b&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;&lt;br /&gt;
&lt;/span&gt; &lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;&lt;b&gt;The Simple Version:&lt;/b&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;A few years ago, the world of machine learning (a subfield of&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;artificial intelligence that combines statistics, math and computer&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;science to get computers to learn and infer from data) was rocked by&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;the Netflix challenge. &amp;nbsp;Netflix offered a prize of a million dollars&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;to anyone who could beat their movie recommendation system by 10%.&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;One of the most powerful and surprisingly simple algorithms to come&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;out of that challenge was &lt;a href="http://www.cs.utoronto.ca/~amnih/papers/pmf.pdf"&gt;Probabilistic Matrix Factorization&lt;/a&gt; (PMF&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;). &amp;nbsp;The idea was that a&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;movie rating was a simple product of a set of hidden or 'latent'&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;factors pertaining to the movie and the user. &amp;nbsp;Although the factors&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;are not pre-defined, you could imagine that the model may learn one&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;factor for a movie that corresponds to the amount of action and then a&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;user would have a factor encoding how much they like action (and&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;similarly for e.g. romance). &amp;nbsp;We learn the model by adjusting these&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;factors to maximize the probability that the user would give the&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;ratings that we can see. &amp;nbsp;To predict someone's rating for a given&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;movie they haven't seen yet, you just multiply their factors by the&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;movie factors.&lt;/span&gt;&lt;br /&gt;
&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;&lt;br /&gt;
&lt;/span&gt; &lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;Similarly to movie ratings we can create factors for basketball teams&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;to predict game scores. &amp;nbsp;Here the factors (again learned by the model)&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;could correspond to offensive skill and defensive capabilities. &amp;nbsp;This&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;was the basis of my model for last year. &amp;nbsp;There was a small twist in&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;that I altered the way that the model was learned - to focus only on&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;scores for which it predicted the wrong winner.&lt;/span&gt;&lt;br /&gt;
&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;&lt;br /&gt;
&lt;/span&gt; &lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;This year my model is significantly more complex but builds on the&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;same principles. &amp;nbsp;It has two levels of latent or hidden factors. &amp;nbsp;The&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;first encodes factors for each team - such as offensive skill,&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;defensive skill, etc. &amp;nbsp;The second layer combines team factors just&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;like in standard PMF, but instead of mapping directly to the scores&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;they map to a hidden representation that encodes the game. &amp;nbsp;My&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;reasoning is that the resulting score of a game is much more complex&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;than a product of simple factors pertaining to each team. &amp;nbsp; The&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;idea is that the game representation now encodes things like: will the&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;game be close or will it be a blowout - will it be high scoring or a&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;defensive brawl? &amp;nbsp;From the game representation I have a mapping to the&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;difference between the home team score and the away team score. &amp;nbsp;Now&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;this is where things get a little complicated. &amp;nbsp;Since there are&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;relatively only a small number of games in this season (just over&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;5000) and this model is already fairly complex, rather than directly&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;try to learn a function mapping from the game factors to the scores, I&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;model a distribution over all possible mappings. &amp;nbsp;The idea: given all&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;(infinite) reasonable mappings from factors representing the game to&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;scores, what is the most probable outcome? &amp;nbsp;To do this I use a&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;statistical model called a Gaussian process.&lt;/span&gt;&lt;br /&gt;
&lt;table cellpadding="0" cellspacing="0" class="tr-caption-container" style="float: right; margin-left: 1em; text-align: right;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/-k_5gX3kKSEs/UUtyjwRyDVI/AAAAAAAAAaM/yT-PH6JxCN0/s1600/2dteams.svg.jpg" imageanchor="1" style="clear: right; margin-bottom: 1em; margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" height="301" src="http://3.bp.blogspot.com/-k_5gX3kKSEs/UUtyjwRyDVI/AAAAAAAAAaM/yT-PH6JxCN0/s400/2dteams.svg.jpg" width="400" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;The factors encoding teams.&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;
&lt;/span&gt; &lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;&lt;b&gt;Now to learn the model:&lt;/b&gt;&lt;/span&gt;&lt;br /&gt;
&lt;div style="text-align: right;"&gt;
&lt;/div&gt;
&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;I take all of the game scores from the past season. &amp;nbsp;For each game, I&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;tell the model which team is the home team, which is the away team,&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;and then adjust the team factors and game factors in order to maximize&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;the probability of the real score. &amp;nbsp;In order to choose the number of&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;factors at each step, I use a new automatic parameter tuning algorithm&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;&lt;a href="http://books.nips.cc/papers/files/nips25/NIPS2012_1338.pdf"&gt;I personally helped develop&lt;/a&gt; called Bayesian optimization.&lt;/span&gt;&lt;br /&gt;
&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;&lt;/span&gt; &lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;&lt;b&gt;What do the factors look like?&lt;/b&gt;&lt;/span&gt;&lt;br /&gt;
Just to the right I have an example of the factors that are learned if I train the model using just two factors for the teams (for those of you in machine learning, these are the weights of the neural network) and I have plotted where each of the teams are in this factor space (along with their seeds). &amp;nbsp;You can see that the model is putting the better teams in the lower left and the worse teams near the top right. &amp;nbsp;It doesn't seem to fancy the odds of South Dakota... &amp;nbsp;I'll explain later why I call the model "Turducken".&lt;br /&gt;
&lt;br /&gt;
Below this I have a picture zoomed in on just the bottom left. &amp;nbsp;You can see that the powerhouses are all encoded in this region. &amp;nbsp;You can click on these images to zoom in. &amp;nbsp;Now you can see that two factors already encode quite a bit about which teams are better. &amp;nbsp;My model uses two hundred factors - so it is encoding something that is quite significantly more complex.&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;div style="text-align: right;"&gt;
&lt;/div&gt;
&lt;table cellpadding="0" cellspacing="0" class="tr-caption-container" style="float: right; text-align: right;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/-zMIIh_EBpbI/UUtygxoyTTI/AAAAAAAAAaE/9zU8Csy6k3M/s1600/2dteams-zoomed.svg.jpg" imageanchor="1" style="clear: right; margin-bottom: 1em; margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" height="301" src="http://4.bp.blogspot.com/-zMIIh_EBpbI/UUtygxoyTTI/AAAAAAAAAaE/9zU8Csy6k3M/s400/2dteams-zoomed.svg.jpg" width="400" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Zoomed in on the bottom left.&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
Below there is a picture of the factors learned to encode games. &amp;nbsp;There is a dot for each game which is colored by relative score. &amp;nbsp;So a 1 means that the home team wins by a lot and a two means that the home team loses by a lot ("a lot" here actually means about 50 points). &amp;nbsp;So the model takes the team factors on the right and multiplies them to get to the game factors below. &amp;nbsp;Then from the game factors it predicts by how much the home team will win or lose.&lt;br /&gt;
&lt;br /&gt;
&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;&lt;/span&gt; &lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;
&lt;/span&gt; &lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;&lt;b&gt;What is this Bayesian optimization?&lt;/b&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;One really exciting area of machine learning that has advanced a lot&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;over the past year is related to how to build systems that work more&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;automatically. To really eke out the best performance, you usually&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;need an expert to sit and tweak a bunch of knobs, see what happens,&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;and repeat many times. It's really time-consuming and nearly&lt;/span&gt;&lt;br /&gt;
&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;impossible for a non-expert (and even difficult for experts). But&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;there is work on automating this process, building a system to&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;automatically tune the knobs and decipher the results. &amp;nbsp;I am using&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;Bayesian optimization&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;&amp;nbsp;that I left running overnight to&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;automatically determine how many factors to use for teams and for&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;games based on how well the model can predict the scores of 500 games&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;that I pulled out of the set of data that the model learns from. &amp;nbsp;The&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;procedure decided to use 200 factors per team and just two per game.&lt;/span&gt;&lt;br /&gt;
&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;
&lt;/span&gt; &lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;&lt;b&gt;In Machine Leaning Speak:&lt;/b&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;The devil is of course in the details. &amp;nbsp;The model I am using is a&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;buzz-word powerhouse. &amp;nbsp;I call it a deep semi-parametric Bayesian&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;probabilistic matrix factorization that is optimized using Bayesian&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;optimization. &amp;nbsp;My fellow machine learning PhD friend, George Dahl,&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;calls it a "statistical Turducken". &amp;nbsp;It uses a neural network trained&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;with 'dropout' to perform a nonlinear probabilistic matrix&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;factorization into a latent space that encodes games. &amp;nbsp;A Gaussian&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;process mapping is then used to map from games to the score&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;difference. &amp;nbsp;The input to the neural network is a binary encoding of&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;which team is the home team (so the number of dimensions equals the&lt;/span&gt;&lt;br /&gt;
&lt;table cellpadding="0" cellspacing="0" class="tr-caption-container" style="float: right; text-align: right;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/-xMKY3d9-MfQ/UUtrlKoWQpI/AAAAAAAAAZk/hxYsvzgATRo/s1600/2dgamefactors.svg.jpg" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" height="300" src="http://4.bp.blogspot.com/-xMKY3d9-MfQ/UUtrlKoWQpI/AAAAAAAAAZk/hxYsvzgATRo/s400/2dgamefactors.svg.jpg" width="400" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;An example of the factors learned by the model to encode 'games'. &amp;nbsp;&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;number of teams) and then similarly a binary encoding of which team is&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;the away team. &amp;nbsp;So the input to the model is a numTeams x 2&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;dimensional binary encoding with two bits on. &amp;nbsp;This may seem wasteful,&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;but note that now the weights to be learned by the neural network&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;correspond exactly to latent factors pertaining to each team. &amp;nbsp;The&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;teams get different factors depending if they are home or away (as I&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;personally have no college basketball expertise, I have no idea if&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;this is a wise design choice). &amp;nbsp;The neural network maps these factors&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;into a hidden unit representation and then to a latent space. &amp;nbsp;From&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;the latent space I map using a Gaussian process with a squared&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;exponential kernel to score difference.&lt;/span&gt;&lt;br /&gt;
&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;&lt;br /&gt;
&lt;/span&gt; &lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;The model is trained using backpropagation - from the marginal&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;likelihood of the Gaussian process I backpropagate error through the&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;kernel of the GP to the weights of the neural network. &amp;nbsp;I use&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;stochastic gradient descent on randomly chosen minibatches of 250&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;games at a time and a 50% dropout rate on the hidden units of the&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;neural network. &amp;nbsp;I used Bayesian optimization on a validation set of&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;500 games to determine the number of hidden units in the neural&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;network (i.e. the number of factors in the PMF), the latent dimension&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;of the input to the GP and the number of epochs to train the model&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;for.&lt;/span&gt;&lt;br /&gt;
&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;&lt;br /&gt;
&lt;/span&gt; &lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;&lt;b&gt;What did it predict?&lt;/b&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;You can check out the bracket that it predicted here:&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;&lt;a href="http://tournament.fantasysports.yahoo.com/t1/1597161"&gt;http://tournament.fantasysports.yahoo.com/t1/1597161&lt;/a&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;As of this writing, the model is 4/4 including a minor upset of Wichita over Pittsburgh.&lt;/span&gt;&lt;br /&gt;
&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;&lt;br /&gt;
&lt;/span&gt; &lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;You can take a look at our pool here:&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;&lt;a href="http://tournament.fantasysports.yahoo.com/t1/group/162045"&gt;http://tournament.fantasysports.yahoo.com/t1/group/162045&lt;/a&gt;&lt;/span&gt;&lt;br /&gt;
&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;Interestingly, even though it doesn't know anything about the seeds,&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;it predicted the four number one seeds in the final four. &amp;nbsp;According to the turducken, Indiana is going all the way. &amp;nbsp;This is&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;pretty remarkable - the algorithm is in close agreement with some of&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;the top human basketball experts. &amp;nbsp;That is already a validation that&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;it is doing something reasonable. &amp;nbsp;There are not too many&lt;/span&gt;&lt;br /&gt;
&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;controversial predictions here, though it is predicting some upsets&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;(e.g. Notre Dame over Ohio St.). &amp;nbsp;It will be really exciting to see&amp;nbsp;&lt;/span&gt;&lt;span style="color: #222222; font-family: arial, sans-serif;"&gt;how it does as the next days play out!&lt;/span&gt;&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/vDTTzhFgXmo" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/2643959744519388627/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=2643959744519388627" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/2643959744519388627?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/2643959744519388627?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/vDTTzhFgXmo/now-that-march-madness-is-officially.html" title="Predicting March Madness by Jasper Snoek" /><author><name>Jasper</name><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://3.bp.blogspot.com/-k_5gX3kKSEs/UUtyjwRyDVI/AAAAAAAAAaM/yT-PH6JxCN0/s72-c/2dteams.svg.jpg" height="72" width="72" /><thr:total>1</thr:total><feedburner:origLink>http://blog.smellthedata.com/2013/03/now-that-march-madness-is-officially.html</feedburner:origLink></entry><entry gd:etag="W/&quot;C0UHR3k9eyp7ImA9WhBQGUk.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-7690562461452193678</id><published>2013-03-21T13:48:00.000-07:00</published><updated>2013-03-22T01:33:56.763-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-03-22T01:33:56.763-07:00</app:edited><title>The 2013 Machine March Madness Field</title><content type="html">
Thanks everybody who entered this year's Machine March Madness competition.
Based on the descriptions of the approaches, it's clear that a
lot of hard work and ingenuity has gone into the contest.  I'm excited to
see how all the different approaches do.  &lt;br/&gt;&lt;br/&gt;

Below, you can see the competitors's descriptions of their approaches.
We'll also have some longer posts diving into more details 
coming up in the near future.  If there are any in particular that
you're itching to hear more about, leave a note in the comments.&lt;br/&gt;&lt;br/&gt;

If you have entered but not sent me a description of your approach yet,
please do. I'll update this post as more descriptions come in.&lt;br/&gt;&lt;br/&gt;

Without further ado, here is your 2013 Machine March Madness field!&lt;br/&gt;&lt;br/&gt;

-----------------------------------------------------------------------------&lt;br/&gt;&lt;br/&gt;

Marginal Madness&lt;br/&gt;
Kevin Swersky&lt;br/&gt;
&lt;a href="http://tournament.fantasysports.yahoo.com/t1/2909174"&gt;http://tournament.fantasysports.yahoo.com/t1/2909174&lt;/a&gt;&lt;br/&gt;&lt;br/&gt;

I'm using variational Bayesian matrix factorization with normal priors
on the latent factors, and Gaussian-inverse Wishart hyperpriors on the
hyperparameters of the priors. Inference is performed using mean-field
(no direct optimization of any model parameters is done). The entries
of the matrix are R(i,j) = P(team i beats team j) using the empirical
counts over the 2012-2013 season. I found that the brackets produced
using this were much more stable with respect to the number of factors
than any other representation. I used 20 factors, the number of which
was chosen based on squared error on 25% randomly held-out entries of
R. For my predictions, I just took the mean vectors and ignored any
uncertainty learned by the model. Ideally, I should have selected the
number of factors, or assessed the stability of the model by using the
variational lower bound, but I was lazy. To predict the final score, I
used gradient-boosted regression trees from scikit-learn on the
feature vectors produced by the factorization.
&lt;br/&gt;&lt;br/&gt;
-----------------------------------------------------------------------------
&lt;br/&gt;&lt;br/&gt;
Larry's Upsetting Picks&lt;br/&gt;
Laurent&lt;br/&gt;
&lt;a href="http://tournament.fantasysports.yahoo.com/t1/1398519"&gt;http://tournament.fantasysports.yahoo.com/t1/1398519&lt;/a&gt;&lt;br/&gt;&lt;br/&gt;

I'm using a PMF-based model and I'm also modelling several other
aspects such as teams' strength over time (both over a season and
across seasons) as well as conferences' strength. These different
aspects are combined linearly together to form a prediction.&lt;br/&gt;&lt;br/&gt;

I also tried using a team's winning percentage (both over the season
and over the last few games) but that didn't lead to an improvement.&lt;br/&gt;&lt;br/&gt;

On a technical note, I also noticed that in PMF instead of using the
raw score, using the difference in scores gives slightly increased
(winner determination) accuracy.&lt;br/&gt;&lt;br/&gt;


-----------------------------------------------------------------------------&lt;br/&gt;
&lt;br/&gt;
K. V. Southwood's Fine Bracket&lt;br/&gt;
K.V. Southwood&lt;br/&gt;
&lt;a href="http://tournament.fantasysports.yahoo.com/t1/3003299"&gt;http://tournament.fantasysports.yahoo.com/t1/3003299&lt;/a&gt;&lt;br/&gt;&lt;br/&gt;

I created an ensemble model based on 3 individual models:&lt;br/&gt;&lt;br/&gt;

1) multiple linear regression model based on predicting the points margin&lt;br/&gt;&lt;br/&gt;

2) multiple linear regression model based on predicting offensive points scored&lt;br/&gt;&lt;br/&gt;

3) logistic regression model based on predicting win vs. loss&lt;br/&gt;&lt;br/&gt;


-----------------------------------------------------------------------------&lt;br/&gt;

Ryan's Rank 1 Approximation&lt;br/&gt;
Ryan B.&lt;br/&gt;
&lt;a href="http://tournament.fantasysports.yahoo.com/t1/1636526"&gt;http://tournament.fantasysports.yahoo.com/t1/1636526&lt;/a&gt;&lt;br/&gt;&lt;br/&gt;

Brief description of approach (same as last year):
For each season (e.g. 2006-2007) I have enumerated the teams and 
compiled the scores of the games into a matrix S. For example, if team 
1 beat team 2 with a score of 82-72 then S12=82 and S21=72. Ideally, 
each team would play every other team at least once, but this is 
obviously not the case so the matrix S is sparse. Using the method 
proposed by George Dahl, I define vectors o and d which correspond to 
each teams offensive and defensive ability. The approximation to the 
matrix S is then just the outer product od' (for example 
(od')_12=o1d2=S12est). This is a simple rank one approximation for the 
matrix. If each team played each other at least once then the matrix S 
would be dense and the vectors o and d could be found by finding the 
SVD of S (see http://www.stanford.edu/~boyd/ee263/notes/low_rank_approx.pdf). 
Because this is not the case, we instead define a matrix P that 
represents which teams played that season. For example, P12=P21=1 if 
teams 1 and 2 played a game. Now the problem stated by George can be 
expressed compactedly as, "minimize ||P.*(o*d')-S||_F". Here, '.*' 
represents the Hadamard product and ||.||_F is the Frobenius norm. In 
this from, it is easy to see that, for constant vector o and variable 
vector d, this is a convex problem. Also, for constant vector d and 
variable vector o this is a convex problem. Therefore, by solving a 
series of convex problems, alternating the vector variable between o 
and d, the problem converges rapidly in about 5 to 10 steps (see 
"Nonnegative Matrix Factorizations" code here http://cvxr.com/cvx/examples/). 
From this point the problem is easily expanded to handle higher rank 
approximations. 
&lt;br/&gt;&lt;br/&gt;

-----------------------------------------------------------------------------&lt;br/&gt;&lt;br/&gt;

Scott Turner's Prediction Machine&lt;br/&gt;
Scott Turner&lt;br/&gt;
&lt;a href="http://tournament.fantasysports.yahoo.com/t1/1760363"&gt;http://tournament.fantasysports.yahoo.com/t1/1760363&lt;/a&gt;&lt;br/&gt;&lt;br/&gt;

Linear regression on a number of statistics, including strength ratings to
predict MOV (Margin of Victory). The basic model is used to predict game
outcomes throughout the year, but there are some modifications for the
Tournament.  Additions this year include a new metric for analyzing possible
upsets, an algorithm for forcing upset selections based upon the (predicted)
score required to win the pool, and some modifications for neutral-court and
tournament games.  More details at &lt;a href="http://netprophetblog.blogspot.com/"&gt;http://netprophetblog.blogspot.com/&lt;/a&gt;.
&lt;br/&gt;&lt;br/&gt;
-----------------------------------------------------------------------------&lt;br/&gt;

noodlebot&lt;br/&gt;
Joe&lt;br/&gt;
&lt;a href="http://tournament.fantasysports.yahoo.com/t1/2298853"&gt;http://tournament.fantasysports.yahoo.com/t1/2298853&lt;/a&gt;&lt;br/&gt;&lt;br/&gt;

See my blog post and project page.&lt;br/&gt;
&lt;a href="http://joenoodles.com/2013/02/ncaa-d1-basketball-db/"&gt;http://joenoodles.com/2013/02/ncaa-d1-basketball-db/&lt;/a&gt; &lt;br/&gt;
&lt;a href="https://github.com/jnu/ncaa"&gt;https://github.com/jnu/ncaa&lt;/a&gt; &lt;br/&gt;&lt;br/&gt;

-----------------------------------------------------------------------------&lt;br/&gt;

Danny's Dad (Human Baseline)&lt;br/&gt;
Danny's Dad.&lt;br/&gt;
&lt;a href="http://tournament.fantasysports.yahoo.com/t1/2664431"&gt;http://tournament.fantasysports.yahoo.com/t1/2664431&lt;/a&gt;&lt;br/&gt;&lt;br/&gt;

Literally, Danny's Dad's picks.&lt;br/&gt;&lt;br/&gt;

-----------------------------------------------------------------------------&lt;br/&gt;

Obama's Bracket (Human Baseline)&lt;br/&gt;
Barack Obama&lt;br/&gt;
&lt;a href="http://tournament.fantasysports.yahoo.com/t1/1668480"&gt;http://tournament.fantasysports.yahoo.com/t1/1673628&lt;/a&gt; &lt;br/&gt;&lt;br/&gt;

The President's picks.&lt;br/&gt;&lt;br/&gt;

-----------------------------------------------------------------------------&lt;br/&gt;

MatrixFactorizer&lt;br/&gt;
Jasper Snoek&lt;br/&gt;
&lt;a href="http://tournament.fantasysports.yahoo.com/t1/1597161"&gt;http://tournament.fantasysports.yahoo.com/t1/1597161&lt;/a&gt;&lt;br/&gt;&lt;br/&gt;

Probabilistic matrix factorization augmented with Gaussian Processes
and Bayesian optimization.  More details will be forthcoming
in a longer blog post (Update: &lt;a href="http://blog.smellthedata.com/2013/03/now-that-march-madness-is-officially.html"&gt;here&lt;/a&gt;).
&lt;br/&gt;&lt;br/&gt;
-----------------------------------------------------------------------------&lt;br/&gt;&lt;br/&gt;

LA's Machine Mad Pick&lt;br/&gt;
LeAnthony M.&lt;br/&gt;
&lt;a href="http://tournament.fantasysports.yahoo.com/t1/1647581"&gt;http://tournament.fantasysports.yahoo.com/t1/1647581&lt;/a&gt; &lt;br/&gt;&lt;br/&gt;

I used 2011 final four stats data rather than last years. Including RPI, Off
eff, turnovers, &amp; def eff. A fitness function of the final standings NCAA
tournament standings feed into an evolving genetic program giving me a final
equation. I feed in this equations, this years team of 64 to compute the final
standing of the 2013 tournament.&lt;br/&gt;&lt;br/&gt;

-----------------------------------------------------------------------------&lt;br/&gt;&lt;br/&gt;

Predict the Madness &lt;br/&gt;
Monte McNair&lt;br/&gt;
&lt;a href="http://tournament.fantasysports.yahoo.com/t1/2002207"&gt;http://tournament.fantasysports.yahoo.com/t1/2002207&lt;/a&gt; &lt;br/&gt;&lt;br/&gt;

??? &lt;br/&gt;&lt;br/&gt;

-----------------------------------------------------------------------------&lt;br/&gt;&lt;br/&gt;

TheSentinel&lt;br/&gt;
Chuck&lt;br/&gt;
&lt;a href="http://tournament.fantasysports.yahoo.com/t1/2997354"&gt;http://tournament.fantasysports.yahoo.com/t1/2997354&lt;/a&gt; &lt;br/&gt;&lt;br/&gt;

Similar strategy as last year. Used Ken Pomeroy's Pythag ratings with the log5
calculation to determine probability of winning the game.&lt;br/&gt;&lt;br/&gt;

Used a Monte Carlo simulation at 65 iterations which provided a few interesting
upsets, Oregon over Oklahoma St. (I believe they were miss seeded myself!).&lt;br/&gt;&lt;br/&gt;

-----------------------------------------------------------------------------&lt;br/&gt;&lt;br/&gt;

Danny's Dangerous Picks&lt;br/&gt;
Danny&lt;br/&gt;
&lt;a href="http://tournament.fantasysports.yahoo.com/t1/1421921"&gt;http://tournament.fantasysports.yahoo.com/t1/1421921&lt;/a&gt; &lt;br/&gt;&lt;br/&gt;

Developed a variant on probabilistic matrix factorization, where the scores
of a game are modeled as the output of a neural network that takes as input
a learned latent vector for each team as well as the elementwise product of the latent vectors for the
two teams.
Latent vectors for each team are learned for each team for each season jointly with
the neural net parameters, which are shared across all
seasons from 2006-2007 through the present.  I used 5D latent vectors and a one
hidden layer neural net with 50 hidden units.&lt;br/&gt;&lt;br/&gt;

-----------------------------------------------------------------------------&lt;br/&gt;&lt;br/&gt;

Human Bracket&lt;br/&gt;
Lee&lt;br/&gt;
&lt;a href="http://tournament.fantasysports.yahoo.com/t1/3297751"&gt;http://tournament.fantasysports.yahoo.com/t1/3297751&lt;/a&gt; &lt;br/&gt;&lt;br/&gt;

The Commissioner's human bracket.&lt;br/&gt;&lt;br/&gt;

-----------------------------------------------------------------------------&lt;br/&gt;&lt;br/&gt;

The Rosenthal Fit&lt;br/&gt;
Jeffrey Rosenthal&lt;br/&gt;
&lt;a href="http://tournament.fantasysports.yahoo.com/t1/1666195"&gt;http://tournament.fantasysports.yahoo.com/t1/1666195&lt;/a&gt; &lt;br/&gt;&lt;br/&gt;

Details here:
&lt;a href="http://www.tsn.ca/story/?id=418503"&gt;http://www.tsn.ca/story/?id=418503&lt;/a&gt; &lt;br/&gt;&lt;br/&gt;

-----------------------------------------------------------------------------&lt;br/&gt;&lt;br/&gt;

Last Year's Winner (Baseline)&lt;br/&gt;
Jasper Snoek&lt;br/&gt;
&lt;a href="http://tournament.fantasysports.yahoo.com/t1/1644140"&gt;http://tournament.fantasysports.yahoo.com/t1/1644140&lt;/a&gt; &lt;br/&gt;&lt;br/&gt;

(The winning algorithm from last year, run on this year's data but otherwise
unmodified.  Entered as a baseline.) I modified Danny's starter code in two
ways: First, I added an asymmetric component to the loss function, so the model
is rewarded for getting the prediction correct even if the absolute predicted
scores are wrong. Second, I changed the regularization so that latent vectors
are penalized for deviating from the global average over latent vectors, rather
than being penalized for being far from 0. This can be interpreted as imposing a
basic hierarchical prior.&lt;br/&gt;&lt;br/&gt;

I then ran a search over model parameters (e.g., latent dimension,
regularization strength, parameter that trades off the two parts of the loss
function) to find the setting that did best on number of correct predictions
made in the past 5 years's tournaments.&lt;br/&gt;&lt;br/&gt;

-----------------------------------------------------------------------------&lt;br/&gt;&lt;br/&gt;

Leon's Super Legendary Bracket&lt;br/&gt;
Leon&lt;br/&gt;
&lt;a href="http://tournament.fantasysports.yahoo.com/t1/1712730"&gt;http://tournament.fantasysports.yahoo.com/t1/1712730&lt;/a&gt; &lt;br/&gt;&lt;br/&gt;

Defensive efficiency vs Offensive efficiency; tie-breakers favored defense over
offense. Chose final score using season averages in wins/losses.&lt;br/&gt;&lt;br/&gt;

-----------------------------------------------------------------------------&lt;br/&gt;&lt;br/&gt;

Tim J's Nets for Nets&lt;br/&gt;
Tim J.&lt;br/&gt;
&lt;a href="http://tournament.fantasysports.yahoo.com/t1/1546944"&gt;http://tournament.fantasysports.yahoo.com/t1/1546944&lt;/a&gt; &lt;br/&gt;&lt;br/&gt;

Based on full season statics for each team run a discriminant analysis for
correlation with wins including seasons 2000-present.&lt;br/&gt;&lt;br/&gt;

Then I trained a neural network only on neutral location games, measuring both
performance in mean squared error and actual past year bracket scores from
2007-2012, and predicting the bracket for this year.&lt;br/&gt;&lt;br/&gt;

-----------------------------------------------------------------------------&lt;br/&gt;&lt;br/&gt;

natebrix's Neat Bracket&lt;br/&gt;
Nate&lt;br/&gt;
&lt;a href="http://tournament.fantasysports.yahoo.com/t1/1931619"&gt;http://tournament.fantasysports.yahoo.com/t1/1931619&lt;/a&gt; &lt;br/&gt;&lt;br/&gt;

The method is a variation on Boyd Nation's Iterative Strength Rating that
incorporates margin of victory and weights late-season games more strongly. This
link has more:&lt;br/&gt;
&lt;a href="https://nathanbrixius.wordpress.com/2013/03/20/ncaa-tournament-prediction-model-2013/"&gt;https://nathanbrixius.wordpress.com/2013/03/20/ncaa-tournament-prediction-model-2013/&lt;/a&gt; &lt;br/&gt;&lt;br/&gt;

-----------------------------------------------------------------------------&lt;br/&gt;&lt;br/&gt;


-----------------------------------------------------------------------------&lt;br/&gt;&lt;br/&gt;
Mark's LR bracket&lt;br/&gt;
Mark???&lt;br/&gt;
&lt;a href="http://tournament.fantasysports.yahoo.com/t1/2504134"&gt;http://tournament.fantasysports.yahoo.com/t1/2504134&lt;/a&gt; &lt;br/&gt;&lt;br/&gt;

Logistic Regression???&lt;br/&gt;&lt;br/&gt;

-----------------------------------------------------------------------------&lt;br/&gt;&lt;br/&gt;

Ask me about my T-Rex &lt;br/&gt;
Zach Mayer&lt;br/&gt;
&lt;a href="http://tournament.fantasysports.yahoo.com/t1/1827557"&gt;http://tournament.fantasysports.yahoo.com/t1/1827557&lt;/a&gt; &lt;br/&gt;&lt;br/&gt;

???&lt;br/&gt;&lt;br/&gt;
-----------------------------------------------------------------------------&lt;br/&gt;&lt;br/&gt;

ScottyJ's Grand Bracket&lt;br/&gt;
???&lt;br/&gt;
&lt;a href="http://tournament.fantasysports.yahoo.com/t1/1876867"&gt;http://tournament.fantasysports.yahoo.com/t1/1876867&lt;/a&gt; &lt;br/&gt;&lt;br/&gt;

???&lt;br/&gt;&lt;br/&gt;

-----------------------------------------------------------------------------&lt;br/&gt;&lt;br/&gt;

Guess O'Bot 3000&lt;br/&gt;
???&lt;br/&gt;
&lt;a href="http://tournament.fantasysports.yahoo.com/t1/1646914"&gt;http://tournament.fantasysports.yahoo.com/t1/1646914&lt;/a&gt; &lt;br/&gt;&lt;br/&gt;

???&lt;br/&gt;&lt;br/&gt;

-----------------------------------------------------------------------------&lt;br/&gt;&lt;br/&gt;

Andy's Astounding Bracket&lt;br/&gt;&lt;br/&gt;
??? &lt;br/&gt;
&lt;a href="http://tournament.fantasysports.yahoo.com/t1/1645698"&gt;http://tournament.fantasysports.yahoo.com/t1/1645698&lt;/a&gt; &lt;br/&gt;&lt;br/&gt;

???&lt;br/&gt;&lt;br/&gt;

-----------------------------------------------------------------------------&lt;br/&gt;&lt;br/&gt;

Dan Tran's Dazzling Bracket&lt;br/&gt;
???&lt;br/&gt;
&lt;a href="http://tournament.fantasysports.yahoo.com/t1/1668480"&gt;http://tournament.fantasysports.yahoo.com/t1/1668480&lt;/a&gt; &lt;br/&gt;&lt;br/&gt;

???&lt;br/&gt;&lt;br/&gt;&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/Won5AJsmCro" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/7690562461452193678/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=7690562461452193678" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/7690562461452193678?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/7690562461452193678?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/Won5AJsmCro/the-2013-machine-march-madness-field.html" title="The 2013 Machine March Madness Field" /><author><name>Danny Tarlow</name><uri>http://www.blogger.com/profile/14670021337844708633</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="25" src="http://1.bp.blogspot.com/_cFAlw8-Y0gE/TRrm8pdSK1I/AAAAAAAAA5o/S8w-VVzdc1A/S220/mehak.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://blog.smellthedata.com/2013/03/the-2013-machine-march-madness-field.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DkQNSXo6eCp7ImA9WhBQGEU.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-7357195047467587729</id><published>2013-03-21T09:46:00.001-07:00</published><updated>2013-03-21T09:46:38.410-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-03-21T09:46:38.410-07:00</app:edited><title>And we're off!</title><content type="html">The Machine March Madness competition has officially begun.  It looks like we ended up with 22(!) algorithmic submissions, and we have 3 human baselines.  You can see all the picks here: &lt;br/&gt;
&lt;a href="http://tournament.fantasysports.yahoo.com/t1/group/162045"&gt;http://tournament.fantasysports.yahoo.com/t1/group/162045&lt;/a&gt;
&lt;br/&gt;&lt;br/&gt; 
I'll put together a post of all the methods soon.  Note: if you are a competitor and have not emailed me a description of your method yet, please do so ASAP.&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/Iv4nLtnlrLs" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/7357195047467587729/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=7357195047467587729" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/7357195047467587729?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/7357195047467587729?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/Iv4nLtnlrLs/and-were-off.html" title="And we're off!" /><author><name>Danny Tarlow</name><uri>http://www.blogger.com/profile/14670021337844708633</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="25" src="http://1.bp.blogspot.com/_cFAlw8-Y0gE/TRrm8pdSK1I/AAAAAAAAA5o/S8w-VVzdc1A/S220/mehak.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://blog.smellthedata.com/2013/03/and-were-off.html</feedburner:origLink></entry><entry gd:etag="W/&quot;C0QEQ3s_cCp7ImA9WhBQF0Q.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-5428508738900213935</id><published>2013-03-20T07:53:00.000-07:00</published><updated>2013-03-20T07:55:02.548-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-03-20T07:55:02.548-07:00</app:edited><title>Final Stretch for Machine March Madness Predictions</title><content type="html">We're in the home stretch for this 2013 Machine March Madness competition, and it looks like this is going to be an exciting year.  So far, 17 entrants have joined the Yahoo system, which makes it easily the most popular year yet, and I'm expecting at least a few more before the deadline tomorrow.  It also sounds like there are some pretty sophisticated approaches in the works, so I'm excited to see how it all plays out.  We should also have some fun baseline predictors to compare against, although if you have ideas about other non-algorithm brackets you'd like to see added, leave a note in the comments.
&lt;br/&gt;&lt;br/&gt;

Once the brackets are locked in, I'll put up a post describing each of the competitors methodologies, and then we can ask some of the competitors to go into more detail.
&lt;br/&gt;&lt;br/&gt;
Finally, if you are planning on competing, please make sure to read the instructions on the &lt;a href="http://groups.google.com/group/machine-march-madness"&gt;Google group&lt;/a&gt; to see how to enter, and remember to send me a brief description of your method.  Good luck every person, and every machine.
&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/pq5WCVhGHu4" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/5428508738900213935/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=5428508738900213935" title="2 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/5428508738900213935?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/5428508738900213935?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/pq5WCVhGHu4/final-stretch-for-machine-march-madness_20.html" title="Final Stretch for Machine March Madness Predictions" /><author><name>Danny Tarlow</name><uri>http://www.blogger.com/profile/14670021337844708633</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="25" src="http://1.bp.blogspot.com/_cFAlw8-Y0gE/TRrm8pdSK1I/AAAAAAAAA5o/S8w-VVzdc1A/S220/mehak.jpg" /></author><thr:total>2</thr:total><feedburner:origLink>http://blog.smellthedata.com/2013/03/final-stretch-for-machine-march-madness_20.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DEICQX0yeSp7ImA9WhBQF04.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-6557224800020639284</id><published>2013-03-19T16:40:00.001-07:00</published><updated>2013-03-19T16:42:40.391-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-03-19T16:42:40.391-07:00</app:edited><title>March Madness Team Embeddings</title><content type="html">I went with a new approach to Machine March Madness predictions this year.  I won't go into the details right now, but here's a neat visualization that comes out of the algorithm.  What you need to know is that I'm sticking with the basic &lt;a href="http://blog.smellthedata.com/2009/03/data-driven-march-madness-predictions.html"&gt;original idea&lt;/a&gt; of using latent real-valued descriptors for each team, but I'm abandoning the requirement that there are segregated offensive and defensive descriptors for each team.  Instead, the model this year represents each team with a set of numbers that can be used to explain both offensive and defensive performance.
&lt;br/&gt;&lt;br/&gt;
So I'll skip all of the details and jump straight to showing you what the model has learned from this year's regular season.  Below is a visualization of what happens when I ask the model to use two numbers to describe each team, then I plot the learned numbers as x and y coordinates on a standard plot.

&lt;a href="http://3.bp.blogspot.com/-WnFdPXC50VY/UUj14LidxdI/AAAAAAAABrc/3LM4_1MlKnI/s1600/2012_2013_2d_embeddings.png" imageanchor="1" &gt;&lt;img border="0" width="800" src="http://3.bp.blogspot.com/-WnFdPXC50VY/UUj14LidxdI/AAAAAAAABrc/3LM4_1MlKnI/s1600/2012_2013_2d_embeddings.png" /&gt;&lt;/a&gt;

&lt;br/&gt;&lt;br/&gt;

These results lose the easy interpretability as offensive and defensive strengths, but the model is such that teams in similar locations on the plot will typically be predicted to perform similarly.  To help with eyeballing the results, I've color coded 1 through 4 seeds: #1 seeds are blue, #2's are green, #3's are red, and #4's are magenta.


&lt;br/&gt;&lt;br/&gt;

I won't try too hard to explain what's going on, but it does seem to group the stronger teams in the lower and left parts of the plot, and the weaker teams in the upper and right parts.  Anybody notice any other interesting patterns?&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/AHE8Blc6pYk" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/6557224800020639284/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=6557224800020639284" title="6 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/6557224800020639284?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/6557224800020639284?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/AHE8Blc6pYk/march-madness-team-embeddings.html" title="March Madness Team Embeddings" /><author><name>Danny Tarlow</name><uri>http://www.blogger.com/profile/14670021337844708633</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="25" src="http://1.bp.blogspot.com/_cFAlw8-Y0gE/TRrm8pdSK1I/AAAAAAAAA5o/S8w-VVzdc1A/S220/mehak.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://3.bp.blogspot.com/-WnFdPXC50VY/UUj14LidxdI/AAAAAAAABrc/3LM4_1MlKnI/s72-c/2012_2013_2d_embeddings.png" height="72" width="72" /><thr:total>6</thr:total><feedburner:origLink>http://blog.smellthedata.com/2013/03/march-madness-team-embeddings.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DE4HRnc6eip7ImA9WhBQEU0.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-47785746140110327</id><published>2013-03-11T15:47:00.000-07:00</published><updated>2013-03-12T09:48:57.912-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-03-12T09:48:57.912-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="march_madness" /><title>Machine March Madness 2013.  Want to participate?</title><content type="html">Lee and I have been a bit busy this March Madness season and haven't organized the &lt;a href="http://blog.smellthedata.com/2012/02/machine-march-madness-2012.html"&gt;Machine March Madness competition&lt;/a&gt; as well as we usually do.
&lt;br/&gt;&lt;br/&gt;
Now it's March, and the tournament is beginning in a little over a week.
&lt;br/&gt;&lt;br/&gt;
This is a post to see if people would be interested in competing this year despite the short notice (update: there appears to be plenty of interest.  It's on).  &lt;a href="http://netprophetblog.blogspot.com/"&gt;Scott Turner&lt;/a&gt; has graciously volunteered to help out with data needs (update: &lt;a href="https://groups.google.com/group/machine-march-madness/browse_thread/thread/59e478fe9fccc463"&gt;data posted at the Google group&lt;/a&gt;), and there is still &lt;a href="http://blog.smellthedata.com/2012/02/machine-march-madness-2012-starter-code.html"&gt;starter code&lt;/a&gt; and &lt;a href="https://groups.google.com/group/machine-march-madness?pli=1"&gt;the Google group&lt;/a&gt; available as resources.
&lt;br/&gt;&lt;br/&gt;
If you'd be interested in competing, please drop me an email or leave a note in the comments.  Also, if you'd like to compete, but something is holding you back, please leave a note in the comments, and we'll see what we can do.&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/of_oUV0--LU" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/47785746140110327/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=47785746140110327" title="20 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/47785746140110327?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/47785746140110327?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/of_oUV0--LU/machine-march-madness-2013-want-to.html" title="Machine March Madness 2013.  Want to participate?" /><author><name>Danny Tarlow</name><uri>http://www.blogger.com/profile/14670021337844708633</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="25" src="http://1.bp.blogspot.com/_cFAlw8-Y0gE/TRrm8pdSK1I/AAAAAAAAA5o/S8w-VVzdc1A/S220/mehak.jpg" /></author><thr:total>20</thr:total><feedburner:origLink>http://blog.smellthedata.com/2013/03/machine-march-madness-2013-want-to.html</feedburner:origLink></entry><entry gd:etag="W/&quot;D08HR3ozeip7ImA9WhNSGEs.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-4353210548704045705</id><published>2012-11-01T19:50:00.001-07:00</published><updated>2012-11-02T07:17:16.482-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-11-02T07:17:16.482-07:00</app:edited><title>Neural network successes</title><content type="html">The &lt;a href="http://learning.cs.toronto.edu/index.shtml?section=home"&gt;learning group&lt;/a&gt; at University of Toronto has had some great recent successes, both of which were powered by neural networks.  Many of you have probably seen the ImageNet results by   &lt;a href="http://www.cs.utoronto.ca/~kriz/"&gt;Alex Krizhevsky&lt;/a&gt; and collaborators (team SuperVision):
&lt;ul&gt;
&lt;li&gt;&lt;a href="http://www.image-net.org/challenges/LSVRC/2012/results.html"&gt;Results page&lt;/a&gt;
&lt;li&gt;&lt;a href="https://plus.google.com/u/0/104362980539466846301/posts/JBBFfv2XgWM"&gt;Google+ Discussion&lt;/a&gt;
&lt;/ul&gt;

More recently, &lt;a href="http://www.cs.toronto.edu/~gdahl/"&gt;George Dahl&lt;/a&gt;, a favored guest poster on this blog, led a team that won the Merck Molecular Activity Challenge over at Kaggle:
&lt;ul&gt;
&lt;li&gt;&lt;a href="http://blog.kaggle.com/2012/11/01/deep-learning-how-i-did-it-merck-1st-place-interview/"&gt;http://blog.kaggle.com/2012/11/01/deep-learning-how-i-did-it-merck-1st-place-interview/&lt;/a&gt;
&lt;/ul&gt;

It's impressive stuff all around.  Great job, guys.&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/2wcmoxOVKeQ" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/4353210548704045705/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=4353210548704045705" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/4353210548704045705?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/4353210548704045705?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/2wcmoxOVKeQ/neural-network-successes.html" title="Neural network successes" /><author><name>Danny Tarlow</name><uri>http://www.blogger.com/profile/14670021337844708633</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="25" src="http://1.bp.blogspot.com/_cFAlw8-Y0gE/TRrm8pdSK1I/AAAAAAAAA5o/S8w-VVzdc1A/S220/mehak.jpg" /></author><thr:total>1</thr:total><feedburner:origLink>http://blog.smellthedata.com/2012/11/neural-network-successes.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DE4CQn87eSp7ImA9WhBQEU0.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-7857345903016628650</id><published>2012-04-06T08:15:00.005-07:00</published><updated>2013-03-12T09:49:23.101-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2013-03-12T09:49:23.101-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="march_madness" /><title>Final 2012 Full-Bracket Results</title><content type="html">&lt;p&gt;Hopefully everyone had a chance to watch the exciting game between Kentucky and Kansas this past Monday. This post only covers the results of the &lt;a href="http://tournament.fantasysports.yahoo.com/t1/group/9198"&gt;full tournament bracket&lt;/a&gt; and not the second chance Sweet Sixteen bracket.&lt;/p&gt;
&lt;p&gt;
Here are the full standings, including ESPN analysts (E) and my own picks.
&lt;table&gt;
&lt;tr&gt;&lt;td&gt;TheMatrixFactorizer&lt;/td&gt;&lt;td&gt;127&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;Jay Bilas (E)&lt;/td&gt;&lt;td&gt;126&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;Lee's picks&lt;/td&gt;&lt;td&gt;124&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;The Pain Machine&lt;/td&gt;&lt;td&gt;122&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;Baseline&lt;/td&gt;&lt;td&gt;120&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;Danny's Dangerous Picks&lt;/td&gt;&lt;td&gt;117&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;By The Numbers&lt;/td&gt;&lt;td&gt;104&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;Dick Vitale (E)&lt;/td&gt;&lt;td&gt;102&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;Obama&lt;/td&gt;&lt;td&gt;102&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;Predict the Madness&lt;/td&gt;&lt;td&gt;99&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;Ryan Boesch&lt;/td&gt;&lt;td&gt;98&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;TheSentinel&lt;/td&gt;&lt;td&gt;86&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;AJsMadness&lt;/td&gt;&lt;td&gt;73&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;machine_learning_first_try&lt;/td&gt;&lt;td&gt;45&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;
&lt;/p&gt;
&lt;p&gt;Great contest this year and congratulations to this year's winner, TheMatrixFactorizer! It not only won the full-bracket contest, it also squeezed past ESPN analyst Jay Bilas by a point. Once again, machines triumph over humans in our contest. I, for one, welcome our new March Madness predicting robot overlords.&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/EAp88gWALVM" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/7857345903016628650/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=7857345903016628650" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/7857345903016628650?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/7857345903016628650?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/EAp88gWALVM/final-2012-full-bracket-results.html" title="Final 2012 Full-Bracket Results" /><author><name>Lee</name><uri>http://www.blogger.com/profile/17617335710795529109</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>1</thr:total><feedburner:origLink>http://blog.smellthedata.com/2012/04/final-2012-full-bracket-results.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A0MHRHY7eyp7ImA9WhVRE0o.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-980048127593656605</id><published>2012-03-21T18:19:00.003-07:00</published><updated>2012-03-21T18:23:55.803-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-03-21T18:23:55.803-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="scott turner" /><category scheme="http://www.blogger.com/atom/ns#" term="march_madness" /><title>Round 2 Update + Upset Analysis</title><content type="html">&lt;i&gt;Here's another great guest post from &lt;a href="http://netprophetblog.blogspot.ca/"&gt;Scott Turner&lt;/a&gt;, our #1 Machine March Madness guest poster.  Great analysis -- thanks Scott!  If you want more where this came from, check out &lt;a href="http://netprophetblog.blogspot.ca/"&gt;his blog&lt;/a&gt;.&lt;/i&gt;
&lt;br/&gt;&lt;br/&gt;
On my blog &lt;a href="http://netprophetblog.blogspot.com/2012/03/upset-review.html"&gt;here&lt;/a&gt; I took a closer look at how the Pain Machine predicts upsets in the tournament and how effective it was this year.&amp;nbsp; I thought it might be interesting to look at how the top competitors in the &lt;a href="http://tournament.fantasysports.yahoo.com/t1/group/9198"&gt;Machine Madness&lt;/a&gt; contest predicted upsets.&amp;nbsp; I put together the following table with the competitors across the top and an X in every cell where they predicted an upset.&amp;nbsp; Boxes are green for correct predictions and red for incorrect predictions.&amp;nbsp; The final row(s) in the table shows the scores &amp;amp; possible scores for each competitors.
&lt;br/&gt;&lt;br/&gt;
&lt;table border="1"&gt;&lt;tbody&gt;
&lt;tr&gt;       &lt;th style="background-color: white;"&gt;Game&lt;/th&gt;       &lt;th style="background-color: #cfe2f3;"&gt;Pain Machine&lt;/th&gt;       &lt;th style="background-color: #cfe2f3;"&gt;Predict the Madness&lt;/th&gt;       &lt;th style="background-color: #cfe2f3;"&gt;Sentinel&lt;/th&gt;       &lt;th style="background-color: #cfe2f3;"&gt;Danny's
Conservative
Picks &lt;/th&gt;       &lt;th style="background-color: #cfe2f3;"&gt;AJ's Madness&lt;/th&gt;       &lt;th style="background-color: #cfe2f3;"&gt;Matrix Factorizer&lt;/th&gt;     &lt;/tr&gt;
&lt;tr&gt;       &lt;td style="background-color: #fff2cc;"&gt;Texas over Cincy&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;     &lt;/tr&gt;
&lt;tr&gt;       &lt;td style="background-color: #fff2cc;"&gt;Texas over FSU&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;     &lt;/tr&gt;
&lt;tr&gt;       &lt;td style="background-color: #fff2cc;"&gt;WVU over Gonzaga&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;     &lt;/tr&gt;
&lt;tr&gt;       &lt;td style="background-color: #fff2cc;"&gt;Purdue over St. Mary's&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;     &lt;/tr&gt;
&lt;tr&gt;       &lt;td style="background-color: #fff2cc;"&gt;NC State over SDSU&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;
&lt;/td&gt;     &lt;/tr&gt;
&lt;tr&gt;       &lt;td style="background-color: #fff2cc;"&gt;South Florida over Temple&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;     &lt;/tr&gt;
&lt;tr&gt;       &lt;td style="background-color: #fff2cc;"&gt;New Mexico over Louisville&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;     &lt;/tr&gt;
&lt;tr&gt;       &lt;td style="background-color: #fff2cc;"&gt;Virginia over Florida&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;     &lt;/tr&gt;
&lt;tr&gt;       &lt;td style="background-color: #fff2cc;"&gt;Colorado State over Murray State&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;&lt;/td&gt;     &lt;/tr&gt;
&lt;tr&gt;       &lt;td style="background-color: #fff2cc;"&gt;Vandy over Wisconsin&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;     &lt;/tr&gt;
&lt;tr&gt;       &lt;td style="background-color: #fff2cc;"&gt;Wichita State over Indiana&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;     &lt;/tr&gt;
&lt;tr&gt;       &lt;td style="background-color: #fff2cc;"&gt;Murray State over Marquette&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;       &lt;td style="background-color: #b6d7a8; text-align: center;"&gt;
&lt;/td&gt;       &lt;td style="background-color: #ea9999; text-align: center;"&gt;&lt;b&gt;X&lt;/b&gt;&lt;/td&gt;     &lt;/tr&gt;
&lt;tr&gt;       &lt;td style="background-color: white;"&gt;Upset Prediction Rate&lt;/td&gt;       &lt;td style="background-color: white; text-align: center;"&gt;43%&lt;/td&gt;       &lt;td style="background-color: white; text-align: center;"&gt;25%&lt;/td&gt;       &lt;td style="background-color: white; text-align: center;"&gt;33%&lt;/td&gt;       &lt;td style="background-color: white; text-align: center;"&gt;0%&lt;/td&gt;       &lt;td style="background-color: white; text-align: center;"&gt;25%&lt;/td&gt;       &lt;td style="background-color: white; text-align: center;"&gt;29%&lt;/td&gt;     &lt;/tr&gt;
&lt;tr&gt;       &lt;td style="background-color: white;"&gt;Current Score&lt;/td&gt;       &lt;td style="background-color: white; text-align: center;"&gt;42&lt;/td&gt;       &lt;td style="background-color: white; text-align: center;"&gt;43&lt;/td&gt;       &lt;td style="background-color: white; text-align: center;"&gt;42&lt;/td&gt;       &lt;td style="background-color: white; text-align: center;"&gt;41&lt;/td&gt;       &lt;td style="background-color: white; text-align: center;"&gt;41&lt;/td&gt;       &lt;td style="background-color: white; text-align: center;"&gt;39&lt;/td&gt;     &lt;/tr&gt;
&lt;tr&gt;       &lt;td style="background-color: white;"&gt;Possible Points&lt;/td&gt;       &lt;td style="background-color: white; text-align: center;"&gt;166&lt;/td&gt;       &lt;td style="background-color: white; text-align: center;"&gt;155&lt;/td&gt;       &lt;td style="background-color: white; text-align: center;"&gt;166&lt;/td&gt;       &lt;td style="background-color: white; text-align: center;"&gt;161&lt;/td&gt;       &lt;td style="background-color: white; text-align: center;"&gt;137&lt;/td&gt;       &lt;td style="background-color: white; text-align: center;"&gt;163&lt;/td&gt;     &lt;/tr&gt;
&lt;/tbody&gt; &lt;/table&gt;
&lt;br/&gt;&lt;br/&gt;
(I'm not counting #9 over #8 as an upset.  That's why Danny has only 41 points; he predicted a #9 over #8 upsets that did not happen.)
&lt;br/&gt;&lt;br/&gt;
&lt;b&gt;So what do you think? &lt;/b&gt;
&lt;br/&gt;&lt;br/&gt;
One thing that jumps out immediately is that the competitors predicted many more upsets this year than in past years.&amp;nbsp; Historically we'd expect around 7-8 upsets in the first two rounds.&amp;nbsp; Last year the average number of upsets was about 2 (discounting the Pain Machine and LMRC).&amp;nbsp; The Pain Machine is forced to predict this many, but this year the Matrix Factorizer also predicts 7, and Predict the Madness and AJ's Madness predict 4.&amp;nbsp; From what I can glean from the model descriptions, none of these models (other than the Pain Machine) force a certain level of upsets.&amp;nbsp; 
&lt;br/&gt;&lt;br/&gt;
Monte's model ("Predict the Madness") seems to use only statistical inputs, and not any strength measures, or strength of competition measures.&amp;nbsp; This sort of model will value statistics over strength of schedule, and so you might see it making upset picks that would not agree with the team strengths (as proxied by seeds).
&lt;br/&gt;&lt;br/&gt;
The Sentinel uses a Monte Carlo type method to predict games, so rather than always produce the most likely result, it only most likely to produce the most likely result.&amp;nbsp; (If that makes sense :-)&amp;nbsp; The model can be tweaked by choosing how long to run the Monte Carlo simulation.&amp;nbsp; With a setting of 50 it seems to produce about half the expected number of upsets.
&lt;br/&gt;&lt;br/&gt;
Danny's Dangerous Picks are anything but; it is by far the most conservative of the competitors.&amp;nbsp; The pick of Murray State over Marquette suggests that Danny's asymmetric loss function component might have led to his model undervaluing strength of schedule.
&lt;br/&gt;&lt;br/&gt;
AJ's Madness model seems to employ a number of hand-tuned weights for different components of the prediction formula.&amp;nbsp; That may account for the prediction upsets, including the somewhat surprising CSU over Murray State prediction.
&lt;br/&gt;&lt;br/&gt;
The Matrix Factorizer has two features that might lead to a high upset rate.&amp;nbsp; First, there's an asymmetric reward for getting a correct pick, which might skew towards upsets.&amp;nbsp; Secondly, Jasper optimized his model parameters based upon the results of previous tournaments, so that presumably built in a bias towards making some upset picks.
&lt;br/&gt;&lt;br/&gt;
&lt;b&gt;What's interesting about the actual upsets?&lt;/b&gt;
&lt;br/&gt;&lt;br/&gt;
First, Texas over Cincy and Purdue over St. Mary's were consensus picks (excepting Danny's Conservative Picks). &amp;nbsp; This suggests that these teams really were mis-seeded.&amp;nbsp; Purdue vs. St. Mary's is the classic trap seeding problem for humans -- St. Mary's has a much better record, but faced much weaker competition.&amp;nbsp; Texas came very close to beating Cincinnati -- they shot 16% in the first half and still tied the game up late -- which would have made the predictors 2-0 on consensus picks.
&lt;br/&gt;&lt;br/&gt;
Second, the predictors agreed on few of the other picks.&amp;nbsp; Three predictors liked WVU over Gonzaga, and the Pain Machine and the Matrix Factorizer agreed on two other games.&amp;nbsp; Murray State over Marquette is an interesting pick -- another classic trap pick for a predictor that undervalues strength of schedule -- and both Danny's predictor and the Matrix Factorizer "fell" for this pick.
&lt;br/&gt;&lt;br/&gt;
&lt;b&gt;So how did the predictors do?&lt;/b&gt;
&lt;br/&gt;&lt;br/&gt;
The Pain Machine was by far the best, getting 43% of its upset predictions correct.&amp;nbsp; Sentinel was next at 33%.&amp;nbsp; Perhaps not coincidentally, these two predictors have the most possible points remaining. 
&lt;br/&gt;&lt;br/&gt;
In terms of scoring, the Baseline is ahead of all the predictors, so none came out ahead (so far) due to their predictions.&amp;nbsp; The PM and Sentinel do have a slight edge in possible points remaining over the Baseline.
&lt;br/&gt;&lt;br/&gt;
&lt;b&gt;So who will win?&lt;/b&gt;
&lt;br/&gt;&lt;br/&gt;
The contest winner will probably come down to predicting the final game correctly.&amp;nbsp; There's a more interesting spread of champion predictions than I expected -- particularly given the statistical dominance of Kentucky.&amp;nbsp;
&lt;br/&gt;&lt;br/&gt;
If Kentucky wins, the likely winner will be the Baseline or Danny.&amp;nbsp; If Kansas wins, the Pain Machine will likely win unless Wisconsin makes it to the Final Four, in which case AJ should win.&amp;nbsp; If Michigan State wins, then the Sentinel will likely win.&amp;nbsp; And finally, if Ohio State wins, then Predict the Madness should win.&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/djPqu6OWlF8" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/980048127593656605/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=980048127593656605" title="2 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/980048127593656605?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/980048127593656605?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/djPqu6OWlF8/round-2-update-upset-analysis.html" title="Round 2 Update + Upset Analysis" /><author><name>Danny Tarlow</name><uri>http://www.blogger.com/profile/14670021337844708633</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="25" src="http://1.bp.blogspot.com/_cFAlw8-Y0gE/TRrm8pdSK1I/AAAAAAAAA5o/S8w-VVzdc1A/S220/mehak.jpg" /></author><thr:total>2</thr:total><feedburner:origLink>http://blog.smellthedata.com/2012/03/round-2-update-upset-analysis.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A04NR3k6eCp7ImA9WhVREk0.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-7335697462621838289</id><published>2012-03-19T19:03:00.006-07:00</published><updated>2012-03-19T19:19:56.710-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-03-19T19:19:56.710-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="march_madness" /><title>Second Chance Competition Announcement</title><content type="html">For all of you who didn't get your algorithms finished in time, and for all of the original competitors who'd like a fresh start, we're pleased to announce this year's "second chance" Sweet 16 contest.
&lt;br/&gt;&lt;br/&gt;
This one will be run a little bit differently.  For machines, &lt;a href="http://blog.smellthedata.com/2012/03/data-usage-clarification.html"&gt;the rules are all still the same&lt;/a&gt;.  The difference is that there will now be a pool of human competitors in the mix -- Facebook friends and fans of our sponsor, &lt;a href="http://tarlowknee.com/minimally-invasive-knee-replacement/"&gt;a knee doctor who likes robots&lt;/a&gt;.
&lt;br/&gt;&lt;br/&gt;
The prize pool for the second chance tournament will be $50 and $25 gift certificates for first and second place, respectively, and they will go to the top two entrants, whether they be human or computer.
&lt;br/&gt;&lt;br/&gt;
If you want to participate as a human, you need to add &lt;a href="https://www.facebook.com/pages/Advanced-Knee-Care/#!/stefan.tarlow"&gt;Doctor Tarlow&lt;/a&gt; on Facebook and look for his announcement there.  For those who wish to enter an algorithm, here are the instructions:
&lt;ul&gt;
&lt;li&gt;Send me email at dannytarlow+MarchMadness@gmail.com with your team name, along with a short description of your approach.  (If you entered the main competition and haven't significantly changed your algorithm, just send me a mail saying you're in for the Sweet 16.)  Also, consider joining &lt;a href="http://groups.google.com/group/machine-march-madness"&gt;the Google group&lt;/a&gt;.&lt;/li&gt;
&lt;li&gt;Follow this link, and enter your picks before 7PM EST on Thursday.  &lt;b&gt;When entering your bracket name, add "[C]" before your name, to indicate you are a computer entrant:&lt;/b&gt;&lt;br/&gt;
&lt;a href="http://tournament.fantasysports.yahoo.com/t2/register/joinprivategroup_assign_team?GID=9372&amp;P=robotsvshumans"&gt;http://tournament.fantasysports.yahoo.com/t2/register/joinprivategroup_assign_team?GID=9372&amp;P=robotsvshumans&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;

That's it!  Good luck to all the algorithmic competitors out there.  I hope we can pull out a victory over those pesky humans.&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/oHz8g_vUfuE" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/7335697462621838289/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=7335697462621838289" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/7335697462621838289?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/7335697462621838289?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/oHz8g_vUfuE/second-chance-competition-announcement.html" title="Second Chance Competition Announcement" /><author><name>Danny Tarlow</name><uri>http://www.blogger.com/profile/14670021337844708633</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="25" src="http://1.bp.blogspot.com/_cFAlw8-Y0gE/TRrm8pdSK1I/AAAAAAAAA5o/S8w-VVzdc1A/S220/mehak.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://blog.smellthedata.com/2012/03/second-chance-competition-announcement.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CEcCQXgyfip7ImA9WhVREUU.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-8636834382672840040</id><published>2012-03-19T10:57:00.006-07:00</published><updated>2012-03-19T11:34:20.696-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-03-19T11:34:20.696-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="monte" /><category scheme="http://www.blogger.com/atom/ns#" term="march_madness" /><title>"Predict the Madness" by Monte McNair</title><content type="html">&lt;i&gt;This is a guest post by Monte McNair, the man behind team "Predict the Madness," which is the leader of the machine competitors after the second round.&lt;/i&gt;&lt;br/&gt;&lt;br/&gt;
Developing a system to fill out the best NCAA Tournament bracket is composed of two parts: matchup prediction and bracket optimization.
&lt;br/&gt;&lt;br/&gt;
&lt;b&gt;MATCHUP PREDICTION&lt;/b&gt;&lt;br/&gt;
The first thing to do is come up with a method to predict the likelihood of one team beating another. Since we only care about advancement, I want a system that produces a perentage as opposed to a point spread or something else. Therefore, I use a logistic regression with the outcome of games being the dependent variable. For the variables, I use the location of the game, metrics for the team's offense and defense, and metrics of the team's opponents' averages for both offense and defense. The NCAA Tournament is played at all neutral sites, but since I'm training on all games, I want to know how important playing at home is so that I can strip this out for neutral site games. The reason to use components of a team's offense and defense as opposed to simply points is that the different components that contribute to points have varying levels of reliability. As KenPom figured out this year, for example, defensive 3P% is extremely unreliable. My model takes this into account and weights it less than it would be if we used its influence on points against. By breaking it down, we let the model determine which factors are most reliable in predicting future performance.
&lt;br/&gt;&lt;br/&gt;
The main thing we care about is that the model does a good job of predicing future games. Instead of waiting for future games, however, we can just use out of sample games. I took about 1/3 of our games and made them training games and left the other 2/3 as testing games. One thing I did that may be different from most is that I used all of a team's games for the season except for the game in question to create their profile. For example, say North Carolina played Duke on January 7th in one of my training games. For North Carolina's profile, I used stats from all of their games before AND after January 7th. I'm not sure what other systems do but I think they might use all games (without excluding the game in question) or perhaps just games PRIOR to the game in question. In any case, after training the model, I can test it against the out of sample games I set aside for testing. I divided up all the test games into 100 buckets ordered by their predicted win percentage and compared it to the actual win percentage in those games. As we can see, the buckets are closely aligned meaning the predictions are fairly accurate.
&lt;br/&gt;&lt;br/&gt;
&lt;a href="http://3.bp.blogspot.com/-ldrKvY6vLKk/T2d0h12ypzI/AAAAAAAABLs/QTUr6T_8BEI/s1600/Prediction%2BBuckets.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 291px;" src="http://3.bp.blogspot.com/-ldrKvY6vLKk/T2d0h12ypzI/AAAAAAAABLs/QTUr6T_8BEI/s400/Prediction%2BBuckets.png" border="0" alt=""id="BLOGGER_PHOTO_ID_5721669976338900786" /&gt;&lt;/a&gt;
&lt;br/&gt;&lt;br/&gt;
&lt;b&gt;BRACKET OPTIMIZATION&lt;/b&gt;&lt;br/&gt;
The next thing to do is to take our matchup predictions and maximize our expected points based on the scoring system we are presented with. While this is most beneficial when scoring systems provide bonuses for picking upsets or some other unique scoring, it can still be helpful in basic scoring systems and is better than simply advancing winners round by round.
&lt;br/&gt;&lt;br/&gt;
As an example, take Louisville and New Mexico, the 4 and 5 seeds in the West region. My model predicts New Mexico as the favorite in a game against Louisville, projected to win 51.2% of the time. Both are favored in their 1st round matchups as well, so if we were to simply advance them both, we'd then choose New Mexico to advance over Louisville in the 2nd round. However, New Mexico has a tougher 1st round opponent in Long Beach State than Louisville does against Davidson. In the table below, we see that New Mexico wins just 65% against LBSU while Louisville wins 75% of the time against Davidson. This is enough to make it more likely that Louisville advances to the Sweet 16 than New Mexico, despite UNM being the better team.
&lt;br/&gt;&lt;br/&gt;
&lt;table&gt;
&lt;tr&gt;&lt;td/&gt; &lt;td&gt;1st&lt;/td&gt;  &lt;td&gt;2nd&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;New Mexico &lt;td&gt;64.9%&lt;/td&gt; &lt;td&gt;37.2%&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td&gt;Louisville &lt;td&gt;75.3%&lt;/td&gt; &lt;td&gt;40.7%&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;
&lt;br/&gt;
New Mexico over Louisville: 51.2%
&lt;br/&gt;&lt;br/&gt;
In a basic scoring system, this rarely comes into play and when it does, it provides little benefit. But it still is best to be accurate if you can.&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/OLo8bXe-WvQ" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/8636834382672840040/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=8636834382672840040" title="2 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/8636834382672840040?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/8636834382672840040?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/OLo8bXe-WvQ/predict-madness-by-monte-mcnair.html" title="&quot;Predict the Madness&quot; by Monte McNair" /><author><name>Danny Tarlow</name><uri>http://www.blogger.com/profile/14670021337844708633</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="25" src="http://1.bp.blogspot.com/_cFAlw8-Y0gE/TRrm8pdSK1I/AAAAAAAAA5o/S8w-VVzdc1A/S220/mehak.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://3.bp.blogspot.com/-ldrKvY6vLKk/T2d0h12ypzI/AAAAAAAABLs/QTUr6T_8BEI/s72-c/Prediction%2BBuckets.png" height="72" width="72" /><thr:total>2</thr:total><feedburner:origLink>http://blog.smellthedata.com/2012/03/predict-madness-by-monte-mcnair.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CU8NQXg_cSp7ImA9WhVREEg.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-1196472112115966752</id><published>2012-03-17T13:20:00.013-07:00</published><updated>2012-03-17T23:58:10.649-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-03-17T23:58:10.649-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="march_madness" /><title>Machine March Madness: Round 1 Update</title><content type="html">As usual, the first round was full of upsets, with two of the #2 ranked teams falling.  None of our competitors predicted either of those upsets, but they are still putting on a respectable performance.  Here are details of each competitor's entry, along with the current performance.&lt;br/&gt;&lt;br/&gt;
The favorites at this point look like "The Matrix Factorizer" and "The Pain Machine".  Both did quite well in the first round, and both have 7/8 elite eight teams still surviving, along with all 4/4 final four teams still alive.&lt;br/&gt;

&lt;hr&gt;
&lt;b&gt;The Matrix Factorizer&lt;/b&gt; &lt;br/&gt;&lt;br/&gt;
Jasper&lt;br/&gt;&lt;br/&gt;
I modified Danny's starter code in two ways: First, I added an asymmetric component to the
loss function, so the model is rewarded for getting the prediction correct 
even if the absolute predicted scores are wrong.  Second, I changed the regularization
so that latent vectors are penalized for deviating from the global average over latent
vectors, rather than being penalized for being far from 0.  This can be interpreted as
imposing a basic hierarchical prior.
&lt;br/&gt;&lt;br/&gt;
I then ran a search over model parameters (e.g., latent dimension, regularization strength, parameter that
trades off the two parts of the loss function) to find the setting that did best on number of correct
predictions made in the past 5 years's tournaments.
&lt;br/&gt;&lt;br/&gt;
24 of 33 Correct, 25 Pts, 171 Pts Possible&lt;br/&gt;

&lt;hr&gt;
&lt;b&gt;The Pain Machine&lt;/b&gt;&lt;br/&gt;&lt;br/&gt;

Scott Turner&lt;br/&gt;&lt;br/&gt;

Methodology: Linear regression on a number of statistics, including strength ratings to predict MOV (Margin of Victory).  Some modifications for tournament use, particularly to force a likely number of upsets.
&lt;br/&gt;&lt;br/&gt;

23 of 33 Correct, 24 Pts, 170 Pts Possible&lt;br/&gt;

&lt;hr&gt;

&lt;b&gt;TheSentinel&lt;/b&gt;&lt;br/&gt;&lt;br/&gt;
 
Chuck Dickens&lt;br/&gt;&lt;br/&gt;
 
Methodology:  Using Ken Pomeroy's Pythag formula to rate teams, then calculated the actual game probabilities with the log5 formula. 
 
Used a random number generator to determine outcome of games.  This provided some randomness which created a few interesting upsets. 
Simulate the tournament 50 times and record each team's probability to reach subsequent rounds.
Step through each round of the bracket choosing winners based on the team that had a higher probability to win that round.
 &lt;br/&gt;&lt;br/&gt;
I found that running the simulation 50 times gave me the most variability in the final four, running the simulation more than 100 times gave me a bracket that had almost no upsets and most all of the higher seeded teams progressed through the tournament.
&lt;br/&gt;&lt;br/&gt;
23 of 33 Correct, 24 Pts, 172 Pts Possible

&lt;hr&gt;

&lt;b&gt;Baseline&lt;/b&gt;&lt;br/&gt;&lt;/br/&gt;
Always pick the higher seed. &lt;br/&gt;&lt;br/&gt;

23 of 33 Correct, 24 Pts, 168 Pts Possible
&lt;hr&gt;

&lt;b&gt;Ryan's Picks&lt;/b&gt;  &lt;br/&gt;&lt;br/&gt;
Ryan&lt;br/&gt;&lt;br/&gt;

For each season (e.g. 2006-2007) I have enumerated the teams and 
compiled the scores of the games into a matrix S. For example, if team 
1 beat team 2 with a score of 82-72 then S12=82 and S21=72. Ideally, 
each team would play every other team at least once, but this is 
obviously not the case so the matrix S is sparse. Using the method 
proposed by George Dahl, I define vectors o and d which correspond to 
each teams offensive and defensive ability. The approximation to the 
matrix S is then just the outer product od' (for example 
(od')_12=o1d2=S12est). This is a simple rank one approximation for the 
matrix. If each team played each other at least once then the matrix S 
would be dense and the vectors o and d could be found by finding the 
SVD of S (see &lt;a href="http://www.stanford.edu/~boyd/ee263/notes/low_rank_approx.pdf"&gt;http://www.stanford.edu/~boyd/ee263/notes/low_rank_approx.pdf&lt;/a&gt;). 
Because this is not the case, we instead define a matrix P that 
represents which teams played that season. For example, P12=P21=1 if 
teams 1 and 2 played a game. Now the problem stated by George can be 
expressed compactedly as, "minimize ||P.*(o*d')-S||_F". Here, '.*' 
represents the Hadamard product and ||.||_F is the Frobenius norm. In 
this from, it is easy to see that, for constant vector o and variable 
vector d, this is a convex problem. Also, for constant vector d and 
variable vector o this is a convex problem. Therefore, by solving a 
series of convex problems, alternating the vector variable between o 
and d, the problem converges rapidly in about 5 to 10 steps (see 
"Nonnegative Matrix Factorizations" code here &lt;a href="http://cvxr.com/cvx/examples/"&gt;http://cvxr.com/cvx/examples/&lt;/a&gt;). &lt;br/&gt;&lt;br/&gt;

See &lt;a href="http://groups.google.com/group/machine-march-madness/browse_thread/thread/f0efd87778bdbca7"&gt;this post&lt;/a&gt; for more details.&lt;br/&gt;&lt;br/&gt;

23 of 33 Correct, 24 Pts &lt;br/&gt;&lt;br/&gt;

&lt;hr&gt;

&lt;b&gt;Danny's Dangerous Picks&lt;/b&gt;&lt;br/&gt;&lt;br/&gt;

I started with the basic matrix factorization approach from my starter code, then I added small neural networks that applied a transformation to the base latent vectors based on whether the team was playing at home, away, or in the tournament.  These transformation vectors were learned based on season and tournament performance of teams from other years.  I split the data into 5 cross-validation sets, and looked for hyperparameter settings that did best on tournament prediction in past years. Like Jon, I also added an asymmetric component to the loss function.
&lt;br/&gt;&lt;br/&gt;
Interestingly (disappointingly), after finding the setting of parameters that did best on past data, my method made some pretty conservative predictions for this year, predicting only 3 upsets.&lt;br/&gt;&lt;br/&gt;

22 of 33 Correct, 23 Pts, 165 Pts Possible

&lt;hr&gt;

&lt;b&gt;Predict the Madness&lt;/b&gt;&lt;br/&gt;&lt;br/&gt;
Monte McNair &lt;br/&gt;&lt;br/&gt;
Methodology: To determine the probability of any matchup (Team 1 beating Team 2), I use a logistic regression using statistics for offense/defense of team and team's opponents plus location, dependent variable is outcome of the game. To select bracket, I use a program to calculate the best possible bracket by maximizing number of points based on scoring system, this correctly accounts for situations where simply advancing favored teams round by round would fail.&lt;br/&gt;&lt;br/&gt;

22 of 33 Correct, 23 Pts, 157 Pts Possible&lt;br/&gt;&lt;br/&gt;

&lt;hr&gt;

&lt;b&gt;AJ's Madness&lt;/b&gt;&lt;br/&gt;&lt;br/&gt;
AJ Diliberto&lt;br/&gt;&lt;br/&gt;
 
The methodology is that I selected various stats and gave weight to those that I feel are important, such as points for and against, offensive rebounds, and turnover margin. I also factored in whether they were from one of the big conferences, the level of experience and success the coach has had, and then overlaid the formula with a strength of schedule formula that would reduce certain teams scores based on how good or bad the competition was that they played to get those stats. 
&lt;br/&gt;&lt;br/&gt;
  22 of 33 Correct, 23 Pts, 139 Pts Possible&lt;br/&gt;

&lt;hr&gt;

&lt;b&gt;Machine Learning First Try&lt;/b&gt;&lt;br/&gt;&lt;br/&gt;
Joe Gilbert &lt;br/&gt;&lt;br/&gt;

My methodology is as follows:&lt;br/&gt;
1. Develop a matrix that contains only 2011 scores (done using your data)&lt;br/&gt;
2. Develop a matrix that contains all of your teams and generate columns for averages over all players in 2011:  minutes played, FT attempted/made, 3P attempted/made (done), rebounds, turnovers, fouls (again using your data)&lt;br/&gt;
3.  Use machine learning, specifically a traditional Forest algorithm to predict each team's score for each game based on the 2011 data only&lt;br/&gt;
4.  Select the winner for each round and repeat step 3 for the next round to determine the next winners&lt;br/&gt;
Currently, the algorithm predicted the first round modeling each team's score as an "Away" team since they are all technically on the road.  I think I may change it so that the scores are based on a mean value of the model for an Away team and Home team because currently it is predicting LIU Brooklyn over MSU in the 1st round...if it comes true then so be it.&lt;br/&gt;&lt;br/&gt;

20 of 33 Correct, 21 Pts, 91 Pts Possible
&lt;hr&gt;

&lt;b&gt;By The Numbers&lt;/b&gt;&lt;br/&gt;&lt;br/&gt;
  Tim Jacobs &lt;br/&gt;&lt;br/&gt;

Methodology:&lt;br/&gt;
I took the data so generously provided, trained a couple of neural networks on the past performance, then used average away performance for each team to predict performance in the tourney.  The networks are training as I type.&lt;br/&gt;&lt;br/&gt;

17 of 33 Correct, 18 Pts, 166 Pts Possible

&lt;hr&gt;&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/XfcxMuQhFBo" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/1196472112115966752/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=1196472112115966752" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/1196472112115966752?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/1196472112115966752?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/XfcxMuQhFBo/machine-march-madness-round-1-update.html" title="Machine March Madness: Round 1 Update" /><author><name>Danny Tarlow</name><uri>http://www.blogger.com/profile/14670021337844708633</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="25" src="http://1.bp.blogspot.com/_cFAlw8-Y0gE/TRrm8pdSK1I/AAAAAAAAA5o/S8w-VVzdc1A/S220/mehak.jpg" /></author><thr:total>1</thr:total><feedburner:origLink>http://blog.smellthedata.com/2012/03/machine-march-madness-round-1-update.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DkQHQHc8fSp7ImA9WhVSF04.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-6195570397128382413</id><published>2012-03-14T07:06:00.003-07:00</published><updated>2012-03-14T07:12:11.975-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-03-14T07:12:11.975-07:00</app:edited><title>Data Usage Clarification</title><content type="html">&lt;p&gt;I just realized that the &lt;a href="http://groups.google.com/group/machine-march-madness/browse_thread/thread/5ea550b0ceff52e6"&gt;data rules and usage discussion&lt;/a&gt; happened on the Google Group and not everyone may have read it. Similarly, a clarification on &lt;a href="http://groups.google.com/group/machine-march-madness/browse_thread/thread/68be7d1f0a6b240"&gt;hand-tweaking&lt;/a&gt;.&lt;/p&gt;
&lt;p&gt;Basically, no human judgment data should enter your model except for your decisions on how to build the model and hyper-parameters for that model. Also, if you do use data that we did not provide, please let us know and please make it available to all the other competitors so that they might have the opportunity to use it as well.&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/HjDygs9a4aM" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/6195570397128382413/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=6195570397128382413" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/6195570397128382413?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/6195570397128382413?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/HjDygs9a4aM/data-usage-clarification.html" title="Data Usage Clarification" /><author><name>Lee</name><uri>http://www.blogger.com/profile/17617335710795529109</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>0</thr:total><feedburner:origLink>http://blog.smellthedata.com/2012/03/data-usage-clarification.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DkQFRHw-eyp7ImA9WhVSFko.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-2426953320198026060</id><published>2012-03-13T14:27:00.003-07:00</published><updated>2012-03-13T14:31:55.253-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-03-13T14:31:55.253-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="march_madness" /><title>Fast Company Article</title><content type="html">&lt;a href="http://www.fastcompany.com/user/david-holmes"&gt;David Holmes&lt;/a&gt; over at &lt;a href="http://www.fastcompany.com/"&gt;Fast Company&lt;/a&gt; wrote a nice article on about our Machine March Madness contest:&lt;br/&gt;
&lt;a href="http://www.fastcompany.com/1824382/march-madness-ncaa-tournament-predictions-algorithms"&gt;http://www.fastcompany.com/1824382/march-madness-ncaa-tournament-predictions-algorithms&lt;/a&gt;

&lt;br/&gt;&lt;br/&gt;
Thanks David!
&lt;br/&gt;&lt;br/&gt;
To everybody else: I hope you're hard at work on your algorithm.&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/ULVuydh1Bps" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/2426953320198026060/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=2426953320198026060" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/2426953320198026060?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/2426953320198026060?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/ULVuydh1Bps/fast-company-article.html" title="Fast Company Article" /><author><name>Danny Tarlow</name><uri>http://www.blogger.com/profile/14670021337844708633</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="25" src="http://1.bp.blogspot.com/_cFAlw8-Y0gE/TRrm8pdSK1I/AAAAAAAAA5o/S8w-VVzdc1A/S220/mehak.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://blog.smellthedata.com/2012/03/fast-company-article.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CEIEQHcycSp7ImA9WhVSFkU.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-8615127815074076292</id><published>2012-03-13T07:56:00.009-07:00</published><updated>2012-03-13T16:48:21.999-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-03-13T16:48:21.999-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="march_madness" /><title>Prizes and deadline reminder</title><content type="html">Now is the time to make a final push for getting your &lt;a href="http://blog.smellthedata.com/2012/02/machine-march-madness-2012.html"&gt;Machine March Madness&lt;/a&gt; algorithms tuned and running smoothly.  Remember, submissions are due before tip-off of the first game on Thursday, but you probably want to get them in a little early, just to be safe.
&lt;br/&gt;&lt;br/&gt;
I'm also pleased to announce the prizes: for the main competition, the winning algorithm's owner will get a $50 Amazon or Apple gift certificate, while second place will get a $25 one.
&lt;br/&gt;&lt;br/&gt;
Also, for the "second chance" Sweet 16 contest, we will be hosting a humans versus computers contest, with our field of computers competing against Facebook friends and fans of our sponsor, &lt;a href="http://tarlowknee.com/minimally-invasive-knee-replacement/"&gt;a knee doctor who is into robotic-assisted surgery&lt;/a&gt;.
The prize pool for the second chance tournament will also be $50/$25 gift certificates, but the prizes could go either to a human or computer.
&lt;br/&gt;&lt;br/&gt;
If you want to participate as a human, you need to add &lt;a href="https://www.facebook.com/pages/Advanced-Knee-Care/#!/stefan.tarlow"&gt;Doctor Tarlow&lt;/a&gt; on Facebook, but if you're reading this blog, hopefully you'll enter an algorithm and participate on our team instead.
&lt;br/&gt;&lt;br/&gt;
The human team has chosen the name, "Dr. T's Robot Powers".  We'll need to come up with something better for our computer team.  Ideas are welcome in the comments.&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/nttKpd_m2S4" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/8615127815074076292/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=8615127815074076292" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/8615127815074076292?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/8615127815074076292?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/nttKpd_m2S4/prizes-and-deadline-reminder.html" title="Prizes and deadline reminder" /><author><name>Danny Tarlow</name><uri>http://www.blogger.com/profile/14670021337844708633</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="25" src="http://1.bp.blogspot.com/_cFAlw8-Y0gE/TRrm8pdSK1I/AAAAAAAAA5o/S8w-VVzdc1A/S220/mehak.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://blog.smellthedata.com/2012/03/prizes-and-deadline-reminder.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CkUBQ3g5cSp7ImA9WhVSFUQ.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-7112423992500248435</id><published>2012-03-12T14:45:00.008-07:00</published><updated>2012-03-12T15:10:52.629-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-03-12T15:10:52.629-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="scott turner" /><category scheme="http://www.blogger.com/atom/ns#" term="march_madness" /><title>How to pick upsets?</title><content type="html">&lt;i&gt;&lt;a href="http://netprophetblog.blogspot.com/"&gt;Scott Turner&lt;/a&gt; writes...&lt;/i&gt;
&lt;blockquote&gt;Doing well in a tournament picking contest probably comes down to picking the right upsets.  Anyone can pick the higher seeds to win.
&lt;br/&gt;&lt;br/&gt;
Define an upset as a lower seed beating a higher seed, and ignore upsets where there's only 1 step differential (i.e., a #9 beating a #8).  If my math from last year is correct, the upset rate in the tournament is around 22%.  Half those upsets happen in the first round, about 7.
&lt;br/&gt;&lt;br/&gt;
Some recent thoughts about upsets:
&lt;br/&gt;&lt;br/&gt;
&lt;a href="http://harvardsportsanalysis.wordpress.com/2012/03/12/predicting-ncaa-tournament-upsets-the-importance-of-turnovers-and-rebounding/"&gt;http://harvardsportsanalysis.wordpress.com/2012/03/12/predicting-ncaa-tournament-upsets-the-importance-of-turnovers-and-rebounding/&lt;/a&gt; &lt;br/&gt;
&lt;a href="http://courtsideanalyst.wordpress.com/2012/03/12/two-potential-ncaa-upset-picks-with-supporting-math/"&gt;http://courtsideanalyst.wordpress.com/2012/03/12/two-potential-ncaa-upset-picks-with-supporting-math/&lt;/a&gt; &lt;br/&gt;
&lt;a href="http://www.teamrankings.com/blog/ncaa-basketball/why-you-should-ignore-the-seeds-when-filling-out-your-2012-ncaa-brackets"&gt;http://www.teamrankings.com/blog/ncaa-basketball/why-you-should-ignore-the-seeds-when-filling-out-your-2012-ncaa-brackets&lt;/a&gt;
&lt;br/&gt;&lt;br/&gt;
I leave it to Danny / Lee to turn this into a blog posting :-)
&lt;/blockquote&gt;
My response...
&lt;br/&gt;&lt;br/&gt;
From a machine learning perspective, I think Scott raises an interesting issue here.  Let me rephrase the problem a little more abstractly, to more clearly get at the crux of the issue.  Suppose that some oracle were to come down and tell us that exactly 15 of the games in this year's March Madness tournament will be upsets.  How should this affect our prediction strategy?
&lt;br/&gt;&lt;br/&gt;
There are probably two natural answers:
&lt;ul&gt;
&lt;li&gt;Don't change anything.  I have my prediction for each game, and I think it's going to lead to the most number of correct predictions.&lt;/li&gt;
&lt;li&gt;Make my base predictions, but go back and find the games that I'm most uncertain about, and flip predictions until I am predicting exactly 15 upsets.&lt;/li&gt;
&lt;/ul&gt;

Actually, these both are reasonable strategies, but they say something different about the objective function that we are optimizing with our picks.  If the goal is to just get as many games right as possible, and we believe our model captures all of the information we have about the outcome of the games (and we believe the game outcomes are statistically independent), then the first strategy will still maximize the expected number of games that we will get correct.  However, by making this choice, assuming our model isn't predicting 15 upsets already, then we've eliminated ourselves from contention for the $5 million prize that Yahoo offers to anybody who picks the perfect bracket.
&lt;br/&gt;&lt;br/&gt;
So if the goal is to win the $5 million prize and you believe the oracle, then the right strategy is to pick the 15 upsets that the model thinks are most likely.
&lt;br/&gt;&lt;br/&gt;
However, while both of these strategies make some sense, they both seem too extreme.  Perhaps the more natural objective should be to ensure that we win this year's &lt;a href="http://blog.smellthedata.com/2012/02/machine-march-madness-2012.html"&gt;Machine March Madness prediction contest&lt;/a&gt;.  If that's our goal, what's the best strategy?  What if we had the predictions from all of the competitors for past years, and I told you that this year's field was going to be drawn from a similar set of competitors?
&lt;br/&gt;&lt;br/&gt;
See Scott's &lt;a href="http://netprophetblog.blogspot.com/2012/03/its-upsetting.html"&gt;picks for most likely upsets over at his blog.&lt;/a&gt;&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/e0MPSEZ2FLI" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/7112423992500248435/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=7112423992500248435" title="2 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/7112423992500248435?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/7112423992500248435?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/e0MPSEZ2FLI/how-to-pick-upsets.html" title="How to pick upsets?" /><author><name>Danny Tarlow</name><uri>http://www.blogger.com/profile/14670021337844708633</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="25" src="http://1.bp.blogspot.com/_cFAlw8-Y0gE/TRrm8pdSK1I/AAAAAAAAA5o/S8w-VVzdc1A/S220/mehak.jpg" /></author><thr:total>2</thr:total><feedburner:origLink>http://blog.smellthedata.com/2012/03/how-to-pick-upsets.html</feedburner:origLink></entry><entry gd:etag="W/&quot;D04FQH87fCp7ImA9WhVSFUw.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-987183390113650731</id><published>2012-03-11T18:26:00.003-07:00</published><updated>2012-03-11T18:31:51.104-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-03-11T18:31:51.104-07:00</app:edited><title>2012 Contest Registration</title><content type="html">&lt;p&gt;In order to facilitate in the contest, we will be using Yahoo! again for you to enter your bracket entries. Please do the following to register your team and participate in the contest:
&lt;ol&gt;
&lt;li&gt;Send an e-mail to "leezen+marchmadness" at gmail to provide your: team name, team member names, and a brief description of your methodology.&lt;/li&gt;
&lt;li&gt;Enter your picks in the Yahoo! &lt;a href="http://tournament.fantasysports.yahoo.com/t1/register/joinprivategroup_assign_team?GID=9198&amp;P=robotsvshumans"&gt;tournament group&lt;/a&gt; with the entry name being your team name.&lt;/li&gt;
&lt;li&gt;Watch the tournament with your friends and have fun!&lt;/li&gt;
&lt;/ol&gt;
&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/LVjdMHkuevg" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/987183390113650731/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=987183390113650731" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/987183390113650731?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/987183390113650731?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/LVjdMHkuevg/yahoo-group.html" title="2012 Contest Registration" /><author><name>Lee</name><uri>http://www.blogger.com/profile/17617335710795529109</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>0</thr:total><feedburner:origLink>http://blog.smellthedata.com/2012/03/yahoo-group.html</feedburner:origLink></entry><entry gd:etag="W/&quot;D0IHRX4_fCp7ImA9WhVSFUw.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-7809167936687637131</id><published>2012-03-11T18:15:00.002-07:00</published><updated>2012-03-11T18:25:34.044-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-03-11T18:25:34.044-07:00</app:edited><title>Data for 2012</title><content type="html">&lt;p&gt;Selection Sunday! What a day! First we have a great post by Scott Turner on using RapidMiner. Then, the Selection Committee has &lt;a href="http://espn.go.com/mens-college-basketball/tournament/2012/story/_/id/7673645/ncaa-tournament-kentucky-wildcats-syracuse-orange-north-carolina-tar-heels-michigan-st-spartans-top-seeds"&gt;set the seeding&lt;/a&gt;. Now, it's &lt;b&gt;YOUR&lt;/b&gt; turn to predict who will win the 2012 NCAA Tournament.&lt;/p&gt;
&lt;p&gt;
There are two files you can download:
&lt;ul&gt;
&lt;li&gt;&lt;a href="https://docs.google.com/open?id=0BysperLdI86MbnFqZ3B2T0dUM2F2UTA1MzY2V0hfdw"&gt;Scores for all games 2006-2011 seasons&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="https://docs.google.com/open?id=0BysperLdI86MdTg2M1JyUkxRME9jMGZUT1Q5T2JKQQ"&gt;Player-level data for all games 2006-2011 seasons&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
The includes everything from the beginning of the 2006 season up to and including the March 11, 2012 games. Please let us know if you find any issues with the data. One known issue is that some scores in the first file do not match the scores if you were to add up all the player scores from the player-level data. This is due to the fact that data we crawled is occasionally inconsistent in this regard and might be off by a few points.
&lt;/p&gt;
&lt;p&gt;The data format is as before for both files, except that the aggregate game data is now tab-separated. Please see &lt;a href="http://blog.smellthedata.com/2011/03/aggregate-game-results.html"&gt;aggregate game data schema&lt;/a&gt; and &lt;a href="http://blog.smellthedata.com/2011/03/selection-sunday-today.html"&gt;player-level data schema&lt;/a&gt; for details. Good luck!&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/H7HmRZ-ZgTM" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/7809167936687637131/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=7809167936687637131" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/7809167936687637131?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/7809167936687637131?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/H7HmRZ-ZgTM/data-for-2012.html" title="Data for 2012" /><author><name>Lee</name><uri>http://www.blogger.com/profile/17617335710795529109</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>1</thr:total><feedburner:origLink>http://blog.smellthedata.com/2012/03/data-for-2012.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CU8AQnw7fip7ImA9WhVSFEQ.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-1803882667758473517</id><published>2012-03-11T12:15:00.004-07:00</published><updated>2012-03-11T12:24:03.206-07:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-03-11T12:24:03.206-07:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="scott turner" /><category scheme="http://www.blogger.com/atom/ns#" term="rapidminer" /><category scheme="http://www.blogger.com/atom/ns#" term="march_madness" /><title>Using RapidMiner to Predict March Madness</title><content type="html">&lt;i&gt;This is a guest post by Dr. Scott Turner, who won the Machine March Madness prediction contest last year, and who was the co-winner of the Sweet 16 contest from two years ago.  If you like this post, check out his great blog all about algorithmic prediction of NCAA basketball: &lt;a href="http://netprophetblog.blogspot.com/"&gt;http://netprophetblog.blogspot.com/&lt;/a&gt;.
&lt;br/&gt;&lt;br/&gt;
Dr. Turner has a Ph.D. in Artificial Intelligence from UCLA. His dissertation subject was a program called MINSTREL that told stories about King Arthur and his knights, as a way to explore issues in creativity and storytelling. Since obtaining his Ph.D. in 1993, Dr. Turner has worked for the Aerospace Corporation, where he advises the nation's space programs on software and systems engineering issues. &lt;/i&gt;
&lt;br/&gt;&lt;br/&gt;
Danny &amp;amp; Lee asked me to contribute a guest post as part of the Machine Madness contest.  I started writing a posting about using RapidMiner as part of a prediction workflow, but unfortunately I became overwhelmed with other tasks and wasn't able to finish it.&amp;nbsp; I had given up on finishing it when I realized that anyone entering the Machine Madness contest at this late date might well appreciate a tool that could make creating the routine parts of building a predictive model very fast.&amp;nbsp; So I quickly finished it up and hope it will prove helpful to someone.&amp;nbsp; Readers who are expert data miners won't find much here, but I hope that it might be useful to the interested amateur who knows more about basketball (football, baseball, etc.) than about statistics and data mining and wants to put in a quick entry. 
&lt;br/&gt;&lt;br/&gt;
I will assume that you have some program or method for generating the statistics or ratings you want to use to predict games and that you've saved those results as an Excel file.&amp;nbsp; (These might just be season averages of the statistics Danny &amp;amp; Lee are providing.)&amp;nbsp; As a tool RapidMiner is not well-suited for this part of the problem; it's strengths are in pulling the predictive value out of those statistics rather than generating them.&amp;nbsp; (Or perhaps I should say that it's not well-suited as I understand it.&amp;nbsp; I wouldn't be surprised to learn that it has useful features in this area that I don't know about.)&amp;nbsp; The Excel file should have one line for each game, with columns for the team names, statistics, ratings, and scores.
&lt;br/&gt;&lt;br/&gt;
The next step is to download and install RapidMiner.&amp;nbsp; You can do that &lt;a href="http://rapid-i.com/content/view/26/84/"&gt;here&lt;/a&gt;.&amp;nbsp; The "community edition" of RapidMiner is completely free.&amp;nbsp; (I like free.)&amp;nbsp; There's a user forum &lt;a href="http://forum.rapid-i.com/"&gt;here&lt;/a&gt; where questions usually get a fairly quick response.
&lt;br/&gt;&lt;br/&gt;
Once you've installed, start up RapidMiner.&amp;nbsp; You'll see this:&amp;nbsp; 

&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://2.bp.blogspot.com/-mlOccj6QQ8w/T1ku4V5lq_I/AAAAAAAAGTs/OzxI1mHmdZw/s1600/Image1.jpg" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://2.bp.blogspot.com/-mlOccj6QQ8w/T1ku4V5lq_I/AAAAAAAAGTs/OzxI1mHmdZw/s1600/Image1.jpg" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;br/&gt;&lt;br/&gt;
RapidMiner has three default perspectives: Design, Results, and Welcome.&amp;nbsp; It starts up in Welcome.&amp;nbsp; Switch to Design by clicking on the icon that looks like a pencil writing in a notebook, from the View menu, or by hitting F8.&amp;nbsp; The Design view looks like this:
&lt;br/&gt;&lt;br/&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://4.bp.blogspot.com/-kUEI6RJsHWM/T1kvd8seywI/AAAAAAAAGT0/zqxF7hKxuNw/s1600/Image2.jpg" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://4.bp.blogspot.com/-kUEI6RJsHWM/T1kvd8seywI/AAAAAAAAGT0/zqxF7hKxuNw/s1600/Image2.jpg" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;br/&gt;&lt;br/&gt;
The blank central area is the canvas where you'll graphically build your RapidMiner process.&amp;nbsp; The left-side has a menu of Operators as well as Repositories (where processes are stored).&amp;nbsp; The right-side has details about the current operator (Just a blank "Process" in this case because we haven't added anything yet.)
&lt;br/&gt;&lt;br/&gt;
To start, let's read in our Excel file of game data.&amp;nbsp; In the list of Operators on the left-side of the RapidMiner window, you'll see a folder labeled "Import".&amp;nbsp; Clicking on that reveals sub-folders labeled "Data," "Models", and so on.&amp;nbsp; Click on the Data folder and you'll see a list of operators.&amp;nbsp; "Read Excel" should be near the top.&amp;nbsp; Click and drag that operator onto the blank area in the middle of the screen and release.&amp;nbsp; You'll see this:
&lt;br/&gt;&lt;br/&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://3.bp.blogspot.com/-OJdYE9b0CB4/T1kxKpP6V1I/AAAAAAAAGT8/LSEsWoTJ7h0/s1600/Image3.jpg" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://3.bp.blogspot.com/-OJdYE9b0CB4/T1kxKpP6V1I/AAAAAAAAGT8/LSEsWoTJ7h0/s1600/Image3.jpg" /&gt;&lt;/a&gt;&lt;/div&gt;
There are a couple of things to note.&amp;nbsp; First, RapidMiner has automatically drawn a connection from the output of this process (the little semi-circle node on the right of the box) to the right edge of the workspace.&amp;nbsp; Anything going out to that edge will show up in the Results view when the process is executed.&amp;nbsp; Second, the message window at the bottom of the workspace shows an error.&amp;nbsp; It is complaining "The mandatory parameter "excel file" is undefined."
&lt;br/&gt;&lt;br/&gt;
To fix this, look to the right-side.&amp;nbsp; You'll see that is now showing the details for the highlighted "Read Excel" operator.&amp;nbsp; Just below there you'll see a button for an "Import Configuration Wizard" and then some input boxes for the various parameters for this operator, including the "excel file" parameter being complained about.&amp;nbsp; There's also a description/help box for the operator below the parameters section.
&lt;br/&gt;&lt;br/&gt;
Use the "Import Configuration Wizard" to find your Excel file and prepare it to be read in.&amp;nbsp; The wizard does some basic data checking, so you may discover a problem in your file at this point.&amp;nbsp; Here's what the final step of the wizard looks like for my sample data:
&lt;br/&gt;&lt;br/&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://1.bp.blogspot.com/-zW2Q5lnFIao/T1kzQcYSwfI/AAAAAAAAGUE/Ulq9PssJSlg/s1600/Image5.jpg" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://1.bp.blogspot.com/-zW2Q5lnFIao/T1kzQcYSwfI/AAAAAAAAGUE/Ulq9PssJSlg/s1600/Image5.jpg" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;br/&gt;&lt;br/&gt;
There are 8 columns to my data:&amp;nbsp; name, score, TrueSkill mean, and home winning percentage.&amp;nbsp; (The TrueSkill mean is a rating system.&amp;nbsp; You can read more about it &lt;a href="http://netprophetblog.blogspot.com/2011/04/trueskill.html"&gt;here&lt;/a&gt;.)&amp;nbsp; These will be the inputs to my prediction model.
&lt;br/&gt;&lt;br/&gt;
To run a process in RapidMiner, you click the right-facing blue triangle button near the top of the window.&amp;nbsp; Right now our process isn't very interesting -- it just reads in the Excel file and sends it to the Results -- but let's run it and see what happens.&amp;nbsp; You may be asked to save your model and whether you want to switch to the Results view.&amp;nbsp; For both questions you can save a default answer, which is handy.&amp;nbsp; When you switch to the Results view you'll see something like this:
&lt;br/&gt;&lt;br/&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://2.bp.blogspot.com/-KXVkVbgr8K4/T1k0yAseHkI/AAAAAAAAGUM/hitcdfAL4bc/s1600/Image6.jpg" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://2.bp.blogspot.com/-KXVkVbgr8K4/T1k0yAseHkI/AAAAAAAAGUM/hitcdfAL4bc/s1600/Image6.jpg" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;br/&gt;&lt;br/&gt;
The data you read in creates an "Example Set" and this window is showing you the Meta Data View for the data set.&amp;nbsp; In my case, the data set has 3699 examples (games), and for each attribute in the examples, the window shows the Role, Name, Type, Statistics, Range and Missings.&amp;nbsp; There's some interesting stuff here -- for example, home teams scored between 28 and 124 points in this season.&amp;nbsp; A home team scored only 28 points?!&amp;nbsp; That's pretty intriguing.
&lt;br/&gt;&lt;br/&gt;
Let's follow up.&amp;nbsp; Click on the "Data View" checkbutton and then on the Hscore column to look at the actual data sorted by home team's score:
&lt;br/&gt;&lt;br/&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://3.bp.blogspot.com/--IKQZAB7CW0/T1k1uyuDV6I/AAAAAAAAGUU/ulQdYBHS8u4/s1600/Image7.jpg" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://3.bp.blogspot.com/--IKQZAB7CW0/T1k1uyuDV6I/AAAAAAAAGUU/ulQdYBHS8u4/s1600/Image7.jpg" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;br/&gt;&lt;br/&gt;
Apparently that 28 point performance was put in by &lt;a href="http://rivals.yahoo.com/ncaa/basketball/recap?gid=201202150515"&gt;SMU against UAB&lt;/a&gt;.&amp;nbsp; That had to be fun to watch! You can do some interesting data analysis with the Plot View and Advanced Chart options here, but let's continue on with building a process.
&lt;br/&gt;&lt;br/&gt;
Switch back to the Design view&amp;nbsp; and let's work on conditioning the data.&amp;nbsp; In many cases, there are problems in the input data -- such as missing values -- that will corrupt your prediction models.&amp;nbsp; RapidMiner provides a number of operators for fixing these sorts of problems.&amp;nbsp; Let's work on fixing missing values.&amp;nbsp; In the Design View on the Operators tab on the right part of the screen you'll see a search box.&amp;nbsp; This is handy for finding operators by name.&amp;nbsp; Type "missing" into the Search box and you should see this:
&lt;br/&gt;&lt;br/&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://4.bp.blogspot.com/-6nriugTEFWc/T1yyCrTWHMI/AAAAAAAAGUc/oRIDmamkGtQ/s1600/Image8.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://4.bp.blogspot.com/-6nriugTEFWc/T1yyCrTWHMI/AAAAAAAAGUc/oRIDmamkGtQ/s1600/Image8.png" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;br/&gt;&lt;br/&gt;

Click on the "Missing Values"operator, drag it onto the canvas in the middle of the screen and drop it.&amp;nbsp; You'll now have this:
&lt;br/&gt;&lt;br/&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://4.bp.blogspot.com/-ziPjTFRwfco/T1yyZ7P2YZI/AAAAAAAAGUk/p5mWS1PqMkM/s1600/Image9.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://4.bp.blogspot.com/-ziPjTFRwfco/T1yyZ7P2YZI/AAAAAAAAGUk/p5mWS1PqMkM/s1600/Image9.png" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;br/&gt;&lt;br/&gt;
You'll see that RapidMiner is complaining of an error in our process: we don't have an input to the Replace Missing Values operator.&amp;nbsp; We want to connect the output of our Excel file to the input of this operator.&amp;nbsp; To do this, we left click on the output of the Read Excel operator, and drag the resulting orange line to the input of the Replace Missing Values operator and release.&amp;nbsp; This causes a pop-up box asking if we really want to disconnect the current output connection or not.&amp;nbsp; Allow RapidMiner to disconnect the port and you should have this:
&lt;br/&gt;&lt;br/&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://2.bp.blogspot.com/-3tVYTcU92mY/T1yzQ_euqYI/AAAAAAAAGUs/H9dMZYb4YjU/s1600/Image10.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://2.bp.blogspot.com/-3tVYTcU92mY/T1yzQ_euqYI/AAAAAAAAGUs/H9dMZYb4YjU/s1600/Image10.png" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;br/&gt;&lt;br/&gt;
And that's all you need do:&amp;nbsp; Add operators and hook them together into a process.&amp;nbsp; By default, the Replace Missing Values operators replaces all missing values with the average value for that attribute.&amp;nbsp; That's fine for now, so we'll leave it as is.
&lt;br/&gt;&lt;br/&gt;
One very important step we need to take is to create a "label".&amp;nbsp; The label is the attribute that we're trying to predict.&amp;nbsp; In our case, we'll be trying to predict the winner of the game: "Home" or "Away".&amp;nbsp; We don't actually have that in our input data, so we'll need to create a new attribute and set it to be our label.
&lt;br/&gt;&lt;br/&gt;
To do this, find the "Generate Attributes" operator and the "Set Role" operator and modify your process to look like this:
&lt;br/&gt;&lt;br/&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://1.bp.blogspot.com/-tPzY8xl-JEo/T1y0oUE9J2I/AAAAAAAAGU0/aPPEEox3m3k/s1600/Image11.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://1.bp.blogspot.com/-tPzY8xl-JEo/T1y0oUE9J2I/AAAAAAAAGU0/aPPEEox3m3k/s1600/Image11.png" /&gt;&lt;/a&gt;&lt;/div&gt;

Now click on the "Generate Attributes" operator.&amp;nbsp; On the right you'll see a button labeled "function descriptions" and "Edit List(0)".&amp;nbsp; Click on this to bring up a view that will let us define a new attribute in our data set.
&lt;br/&gt;&lt;br/&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://2.bp.blogspot.com/-PYRjJ37xmUE/T1y1Tkb-5MI/AAAAAAAAGU8/UPyVX4rW718/s1600/Image12.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://2.bp.blogspot.com/-PYRjJ37xmUE/T1y1Tkb-5MI/AAAAAAAAGU8/UPyVX4rW718/s1600/Image12.png" /&gt;&lt;/a&gt;&lt;/div&gt;

This is fairly simple to use.&amp;nbsp; We type in a name for our new attribute in the left-hand column and then an expression for calculating it in the right hand column.&amp;nbsp; We can use any existing attribute in our expression, and if you click on the calculator icon, it will bring up a tool to help create expressions.&amp;nbsp; In our case, we want to create a new attribute called "winner" that has the value "Home" if the home team scored more than the Away team, and "Away" otherwise.&amp;nbsp; The expression to do this is 'if(Hscore&amp;gt;Ascore,"Home","Away")':
&lt;br/&gt;&lt;br/&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://3.bp.blogspot.com/-o_8sbIyws3U/T1y2QR--HfI/AAAAAAAAGVE/VTPfvA0Iiuk/s1600/Image13.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://3.bp.blogspot.com/-o_8sbIyws3U/T1y2QR--HfI/AAAAAAAAGVE/VTPfvA0Iiuk/s1600/Image13.png" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;br/&gt;&lt;br/&gt;
And that's it for creating the new attribute.&amp;nbsp; Now we need to set the Role of this attribute to "label" so that our models will know what we're trying to predict.&amp;nbsp; To do this, click on the Set Role operator and in the right-side pane, select our new attribute from the drop-down box next to Name, and "label" from the drop-down box next to "target role":

&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://4.bp.blogspot.com/-N4RCbmdhIBE/T1y3QQC35sI/AAAAAAAAGVM/IL5qtNhrtQw/s1600/Image14.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://4.bp.blogspot.com/-N4RCbmdhIBE/T1y3QQC35sI/AAAAAAAAGVM/IL5qtNhrtQw/s1600/Image14.png" /&gt;&lt;/a&gt;&lt;/div&gt;

We're almost ready to start modeling, but let's check to make sure we've added the "winner" attribute correctly.&amp;nbsp; Hit the run button to run the process and let's look at the output in the Results view:

&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://1.bp.blogspot.com/-pNZoduA7kow/T1y4A8fAPqI/AAAAAAAAGVU/79g448DVlmk/s1600/Image15.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://1.bp.blogspot.com/-pNZoduA7kow/T1y4A8fAPqI/AAAAAAAAGVU/79g448DVlmk/s1600/Image15.png" /&gt;&lt;/a&gt;&lt;/div&gt;

At the top of the results (colored light yellow because of its role as "label") we see the new attribute "winner".&amp;nbsp; In this data set, the Home team won almost twice as often as the Away team.&amp;nbsp; If you click on the Data View button, you can check a few games to make sure the calculation is correct:

&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;/div&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;/div&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;/div&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://3.bp.blogspot.com/-LTH_093-Gm4/T1y4f0v3sAI/AAAAAAAAGVc/qlJWyVW0liI/s1600/Image16.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://3.bp.blogspot.com/-LTH_093-Gm4/T1y4f0v3sAI/AAAAAAAAGVc/qlJWyVW0liI/s1600/Image16.png" /&gt;&lt;/a&gt;&lt;/div&gt;

Looks good, so let's go back to Design View and train a model.&amp;nbsp; Switch back to the Design View and find the k-NN model, drag it into the process and connect it up to look like this:

&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://4.bp.blogspot.com/-m7ygxJEKPyo/T1y9y5_HvMI/AAAAAAAAGWM/qOBbd7upM84/s1600/Image22.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://4.bp.blogspot.com/-m7ygxJEKPyo/T1y9y5_HvMI/AAAAAAAAGWM/qOBbd7upM84/s1600/Image22.png" /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://3.bp.blogspot.com/-fyvhEtRUECU/T1y5IC71eoI/AAAAAAAAGVk/QblvG3Woass/s1600/Image17.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;/a&gt;&lt;/div&gt;
&amp;nbsp; 
Along the right-side you can see the parameters for the k-NN operator.&amp;nbsp; Change "k" to 3.&amp;nbsp; We're almost ready to create a model, but we need to add one last step.&amp;nbsp; Right now the input data to our model includes the scores of both teams.&amp;nbsp; It isn't very hard to predict who will win the game if we know who scored the most points :-) so we'll need to remove that information from our examples.&amp;nbsp; To do this, we need an operator called "Select Attributes".&amp;nbsp; Drop this into our process between "Set Role" and "k-NN".

&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://4.bp.blogspot.com/-rxzz_4ZteY0/T1y_hTVRykI/AAAAAAAAGWc/jVl5vgsQBcQ/s1600/Image24.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://4.bp.blogspot.com/-rxzz_4ZteY0/T1y_hTVRykI/AAAAAAAAGWc/jVl5vgsQBcQ/s1600/Image24.png" /&gt;&lt;/a&gt;&lt;/div&gt;


Highlight  the new operator, and on the right-side, set the "attribute filter  type" to subset and then click on "Select Attributes".&amp;nbsp; That will bring  up this dialog:

&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://3.bp.blogspot.com/-2epMjqz031E/T1y7Rms2h5I/AAAAAAAAGV8/eS95NlNA_Rk/s1600/Image20.png" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://3.bp.blogspot.com/-2epMjqz031E/T1y7Rms2h5I/AAAAAAAAGV8/eS95NlNA_Rk/s1600/Image20.png" /&gt;&lt;/a&gt;&lt;/div&gt;

Now  we simply select attributes we want to include from the left side and  use the green arrow to move them to the right side.&amp;nbsp; We want to leave  out the Hscore, Ascore and Date attributes.

&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://2.bp.blogspot.com/-RKxv6h8Drmo/T1y75mVZMgI/AAAAAAAAGWE/RWGRI9_68m8/s1600/Image21.png" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://2.bp.blogspot.com/-RKxv6h8Drmo/T1y75mVZMgI/AAAAAAAAGWE/RWGRI9_68m8/s1600/Image21.png" /&gt;&lt;/a&gt;&lt;/div&gt;

Save this and we're now ready to run the process to create a model.&amp;nbsp;&amp;nbsp; Hit the Run button and you should see results that look like this:

&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://1.bp.blogspot.com/-j-j40vSEbIg/T1zAI6w-2xI/AAAAAAAAGWk/9Qp73apLM4c/s1600/Image25.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://1.bp.blogspot.com/-j-j40vSEbIg/T1zAI6w-2xI/AAAAAAAAGWk/9Qp73apLM4c/s1600/Image25.png" /&gt;&lt;/a&gt;&lt;/div&gt;

Great, we created a model!&amp;nbsp; But how good is it?&amp;nbsp; We don't have any idea.&amp;nbsp; To figure that out, we need to apply the model and then measure its performance.&amp;nbsp; Let's do that.
&lt;br/&gt;&lt;br/&gt;
Switch back to the Design View, and find the "Apply Model" and the "Performance (Classification)" and add them to your process after the k-NN operator like so:

&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://2.bp.blogspot.com/-E2p8tYAFTGs/T1zBj-cqQ4I/AAAAAAAAGWs/c_bCzWczMC4/s1600/Image26.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://2.bp.blogspot.com/-E2p8tYAFTGs/T1zBj-cqQ4I/AAAAAAAAGWs/c_bCzWczMC4/s1600/Image26.png" /&gt;&lt;/a&gt;&lt;/div&gt;

Note that the model output of the K-NN operator goes into the model input for the Apply Model operator, and the example set output goes into the unlabeled input.&amp;nbsp; The labeled output of Apply Model goes into the labeled input of the Performance operator, and the performance output of that operator goes out the right-hand side of our process.
&lt;br/&gt;&lt;br/&gt;
Run this, and you should get a Results View that looks something like this:

&lt;div class="separator" style="clear: both; text-align: center;"&gt;
&lt;a href="http://4.bp.blogspot.com/-WyoRhjThU9Y/T1zCNsuaq9I/AAAAAAAAGW0/Pas0ujmYS2w/s1600/Image27.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://4.bp.blogspot.com/-WyoRhjThU9Y/T1zCNsuaq9I/AAAAAAAAGW0/Pas0ujmYS2w/s1600/Image27.png" /&gt;&lt;/a&gt;&lt;/div&gt;

Wow, 83% accuracy predicting the winner of the game -- pretty good!&amp;nbsp; Good enough to win the Machine Madness contest?&amp;nbsp; Who can say? :-)
&lt;br/&gt;&lt;br/&gt;
This illustrates the basics of using RapidMiner for prediction.&amp;nbsp; RapidMiner has a wealth of features and options, and there are many improvements you can make to the simple process flow I've illustrated above.&amp;nbsp; But hopefully this has given you enough guidance to get started, and good luck!&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/VIH_T5m__lk" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/1803882667758473517/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=1803882667758473517" title="4 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/1803882667758473517?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/1803882667758473517?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/VIH_T5m__lk/using-rapidminer-to-predict-march.html" title="Using RapidMiner to Predict March Madness" /><author><name>Danny Tarlow</name><uri>http://www.blogger.com/profile/14670021337844708633</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="25" src="http://1.bp.blogspot.com/_cFAlw8-Y0gE/TRrm8pdSK1I/AAAAAAAAA5o/S8w-VVzdc1A/S220/mehak.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://2.bp.blogspot.com/-mlOccj6QQ8w/T1ku4V5lq_I/AAAAAAAAGTs/OzxI1mHmdZw/s72-c/Image1.jpg" height="72" width="72" /><thr:total>4</thr:total><feedburner:origLink>http://blog.smellthedata.com/2012/03/using-rapidminer-to-predict-march.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DUQAQno9eCp7ImA9WhVTFEg.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-6025240714062715649</id><published>2012-02-28T11:26:00.003-08:00</published><updated>2012-02-28T11:29:03.460-08:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-02-28T11:29:03.460-08:00</app:edited><title>Preliminary Aggregate Data</title><content type="html">&lt;p&gt;For those of you who want to play with just aggregate game result data, I've posted an &lt;a href="https://docs.google.com/open?id=0BysperLdI86MQmFjSTdUUk1TT2FLQlN3OEhWSVN4UQ"&gt;updated version&lt;/a&gt; that you can play with. The format is the same as described in &lt;a href="http://blog.smellthedata.com/2011/03/aggregate-game-results.html"&gt;a previous post&lt;/a&gt;: date, home team, away team, home score, away score, and whether or not the home team won.&lt;/p&gt;
&lt;p&gt;This data covers the 2006 season through 2/26/2012 and, as with the player-level data, will be updated on Selection Sunday to reflect the most up to date information.&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/oGD03eFOD84" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/6025240714062715649/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=6025240714062715649" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/6025240714062715649?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/6025240714062715649?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/oGD03eFOD84/preliminary-aggregate-data.html" title="Preliminary Aggregate Data" /><author><name>Lee</name><uri>http://www.blogger.com/profile/17617335710795529109</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>0</thr:total><feedburner:origLink>http://blog.smellthedata.com/2012/02/preliminary-aggregate-data.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CEYDSX8-cCp7ImA9WhVTE0Q.&quot;"><id>tag:blogger.com,1999:blog-1107147718367558732.post-544540249661372932</id><published>2012-02-27T17:17:00.002-08:00</published><updated>2012-02-27T17:22:58.158-08:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-02-27T17:22:58.158-08:00</app:edited><title>Preliminary 2011 Season Data</title><content type="html">&lt;p&gt;In addition to data from the 2006-2010 seasons shared publicly via &lt;a href="https://docs.google.com/open?id=0BysperLdI86MNjA3ZWIzNDUtNTE1NC00MWMzLTlmZmMtOGRiMjQwYjhkM2Q0"&gt;Google Docs&lt;/a&gt;&lt;/p&gt;
&lt;p&gt;
We've published some preliminary data for the 2011 season. This uses the same format as past seasons' data and spans the beginning of the 2011 season through 2/26.
&lt;ul&gt;&lt;li&gt;&lt;a href="https://docs.google.com/open?id=0BysperLdI86MMUgyYXlNTHJUQmVWUGljbTlob1dOZw"&gt;Preliminary 2011 data&lt;/a&gt;&lt;/li&gt;&lt;/ul&gt;
&lt;/p&gt;
&lt;p&gt;
After Selection Sunday (March 11th), we will publish an updated set of data for the 2011 season. Please let us know if you find any problems with the preliminary data.
&lt;/p&gt;&lt;img src="http://feeds.feedburner.com/~r/ThisNumberCrunchingLife/~4/Vc_Re9FM2zI" height="1" width="1"/&gt;</content><link rel="replies" type="application/atom+xml" href="http://blog.smellthedata.com/feeds/544540249661372932/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=1107147718367558732&amp;postID=544540249661372932" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/544540249661372932?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/1107147718367558732/posts/default/544540249661372932?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/ThisNumberCrunchingLife/~3/Vc_Re9FM2zI/preliminary-2011-season-data.html" title="Preliminary 2011 Season Data" /><author><name>Lee</name><uri>http://www.blogger.com/profile/17617335710795529109</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif" /></author><thr:total>0</thr:total><feedburner:origLink>http://blog.smellthedata.com/2012/02/preliminary-2011-season-data.html</feedburner:origLink></entry></feed>
