{
"metadata": {
"name": ""
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"import statsmodels.formula.api as sm\n",
"import pandas as pd\n",
"import numpy as np"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n"
]
}
],
"prompt_number": 32
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#df = pd.read_csv(\"/Users/williamliu/Dropbox/NYC-DAT-08/Homework_5/input/2013_NCAA_Game.csv\", sep=',')\n",
"df = pd.read_csv(r\"C:\\Users\\wliu\\Dropbox\\NYC-DAT-08\\Homework_5\\input\\2013_NCAA_Game.csv\", sep=',')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 33
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print df.head()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
" Date Team Opponent Team Score Opponent Score \\\n",
"0 3/10/13 Albany NY Stony Brook 61 59 \n",
"1 3/10/13 Stony Brook Albany NY 59 61 \n",
"2 3/10/13 Indiana Michigan 72 71 \n",
"3 3/10/13 Michigan Indiana 71 72 \n",
"4 3/10/13 Michigan St Northwestern 71 61 \n",
"\n",
" Location Team Margin Team Result Team Location \\\n",
"0 Albany NY 2 Win Home \n",
"1 Albany NY -2 Loss Away \n",
"2 Michigan 1 Win Away \n",
"3 Michigan -1 Loss Home \n",
"4 Michigan St 10 Win Home \n",
"\n",
" Team Avg Scoring Margin Opponent Average Scoring Margin \n",
"0 4.12 9.90 \n",
"1 9.90 4.12 \n",
"2 18.55 11.93 \n",
"3 11.93 18.55 \n",
"4 8.67 -2.58 \n",
"\n",
"[5 rows x 11 columns]\n"
]
}
],
"prompt_number": 34
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df.columns = ['Date', 'Team', 'Opponent', 'TeamScore', 'OpponentScore', 'Location', 'TeamMargin', 'TeamResult', 'TeamLocation', 'TeamAvgScoringMargin', 'OpponentAverageScoringMargin']"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 35
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#print df.columns # Note from Haski: Use list comprehension, can reassignquicker"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 36
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df = df.rename(columns={'Team Score':'TeamScore', 'Opponent Score':'OpponentScore', 'Team Margin':'TeamMargin', 'Team Result':'TeamResult', 'Team Location':'TeamLocation', 'Team Avg Scoring Margin':'TeamAvgScoringMargin', 'Opponent Average Scoring Margin':'OpponentAverageScoringMargin'})"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 37
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"mycolumns = df.columns"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 38
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print type(mycolumns)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
" 5 rows \u00d7 1012 columns\n",
" \n",
"
\n",
"\n",
" \n",
" \n",
" \n",
" \n",
" Intercept \n",
" Team[T.Akron] \n",
" Team[T.Alabama] \n",
" Team[T.Alabama A&M] \n",
" Team[T.Alabama St] \n",
" Team[T.Albany NY] \n",
" Team[T.Alcorn St] \n",
" Team[T.American Univ] \n",
" Team[T.Appalachian St] \n",
" Team[T.Arizona] \n",
" Team[T.Arizona St] \n",
" Team[T.Ark Little Rock] \n",
" Team[T.Ark Pine Bluff] \n",
" Team[T.Arkansas] \n",
" Team[T.Arkansas St] \n",
" Team[T.Army] \n",
" Team[T.Auburn] \n",
" Team[T.Austin Peay] \n",
" Team[T.BYU] \n",
" Team[T.Ball St] \n",
" \n",
" \n",
" \n",
" 0 \n",
" 1 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" ... \n",
" \n",
" \n",
" 1 \n",
" 1 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" ... \n",
" \n",
" \n",
" 2 \n",
" 1 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" ... \n",
" \n",
" \n",
" 3 \n",
" 1 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" ... \n",
" \n",
" \n",
" \n",
"4 \n",
" 1 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" ... \n",
"