每支球队的最佳阵型
def best_lineup(nationality, lineup):
lineup_count = [lineup.count(i) for i in lineup]
df_lineup = pd.DataFrame({'position': lineup, 'count': lineup_count})
positions_non_repeated = df_lineup[df_lineup['count'] <= 1]['position'].values
positions_repeated = df_lineup[df_lineup['count'] > 1]['position'].values
df_squad = best_squad(nationality)
df_lineup = pd.concat([
df_squad[df_squad['player_positions'].isin(positions_non_repeated)].drop_duplicates('player_positions', keep='first'),
df_squad[df_squad['player_positions'].isin(positions_repeated)]]
)
return df_lineup[['short_name', 'overall', 'club_name', 'player_positions']]
dict_formation = {
'4-3-3': ['GK', 'RB', 'CB', 'CB', 'LB', 'CDM', 'CM', 'CAM', 'RW', 'ST', 'LW'],
'4-4-2': ['GK', 'RB', 'CB', 'CB', 'LB', 'RM', 'CM', 'CM', 'LM', 'ST', 'ST'],
'4-2-3-1': ['GK', 'RB', 'CB', 'CB', 'LB', 'CDM', 'CDM', 'CAM', 'CAM', 'CAM', 'ST'],
}
for index, row in df_average_overall[:9].iterrows():
max_average = None
for key, values in dict_formation.items():
average = best_lineup(row['Teams'], values)['overall'].mean()
if max_average is None or average>max_average:
max_average = average
formation = key
print(row['Teams'], formation, max_average)
· Spain 4-2-3-1 85.1
· Portugal 4-2-3-1 84.9
· England 4-4-2 84.45454545454545
· Brazil 4-3-3 84.81818181818181
· France 4-2-3-1 83.9
· Argentina 4-3-3 83.54545454545455
· Germany 4-2-3-1 84.1
· Belgium 4-3-3 82.54545454545455
· Netherlands 4-4-2 82.54545454545455
# best_lineup('Spain', dict_formation['4-2-3-1'])
# best_lineup('Argentina', dict_formation['4-3-3'])
best_lineup('Brazil', dict_formation['4-3-3'])
由于在世界杯中,几乎所有的球队都在中立球场比赛,所以在这次分析中没有考虑主场/客场的因素。
一旦有了每个国家队的进/丢球数,就创建了一个函数,预测每支球队在小组赛中会得到多少分。
预测小组赛阶段
下面是我用来预测每个国家队在小组赛阶段会得到多少分的代码。
计算球队实力
dict_table = pickle.load(open('dict_table','rb'))
df_historical_data = pd.read_csv('clean_fifa_worldcup_matches.csv')
df_fixture = pd.read_csv('clean_fifa_worldcup_fixture.csv')
df_home = df_historical_data[['HomeTeam', 'HomeGoals', 'AwayGoals']]
df_away = df_historical_data[['AwayTeam', 'HomeGoals', 'AwayGoals']]
df_home = df_home.rename(columns={'HomeTeam':'Team', 'HomeGoals': 'GoalsScored', 'AwayGoals': 'GoalsConceded'})
df_away = df_away.rename(columns={'AwayTeam':'Team', 'HomeGoals': 'GoalsConceded', 'AwayGoals': 'GoalsScored'})
df_team_strength = pd.concat([df_home, df_away], ignore_index=True).groupby(['Team']).mean()
df_team_strength
from scipy.stats import poisson
def predict_points(home, away):
if home in df_team_strength.index and away in df_team_strength.index:
lamb_home = df_team_strength.at[home,'GoalsScored'] * df_team_strength.at[away,'GoalsConceded']
lamb_away = df_team_strength.at[away,'GoalsScored'] * df_team_strength.at[home,'GoalsConceded']
prob_home, prob_away, prob_draw = 0, 0, 0
for x in range(0,11): #number of goals home team
for y in range(0, 11): #number of goals away team
p = poisson.pmf(x, lamb_home) * poisson.pmf(y, lamb_away)
if x == y:
prob_draw += p
elif x > y:
prob_home += p
else:
prob_away += p
points_home = 3 * prob_home + prob_draw
points_away = 3 * prob_away + prob_draw
return (points_home, points_away)
else:
return (0, 0)
通俗地说,predict_points 计算的是主队和客队会得到多少分。这里使用公式计算每支球队的lambda,即average_goals_scored * average_goals_conceded 。
然后模拟了一场比赛从0-0到10-10的所有可能的比分(最后的那个比分只是我的进球范围的极限)。一旦有了lambda和x,就可以使用泊松分布的公式来计算p。
prob_home、prob_draw和prob_away分别累积了p的值,如果说比赛以1-0(主场获胜)、1-1(平局)或0-1(客场获胜)结束。最后,用下面的公式计算积分。
point_home = 3 * prob_home + prob_draw
point_away = 3 * prob_away + prob_draw
如果我们用predict_points来预测英格兰对美国的比赛,我们会得到这个结果。
>>> print(predict_points('England', 'United States'))
(2.2356147635326007, 0.5922397535606193)
这意味着英格兰将得到2.23分,而美国将得到0.59分。因为这里使用的是概率,因此得到的是小数。
如果将这个predict_points函数应用于小组赛阶段的所有比赛,我们将得到每个小组的第1和第2名,从而得到以下淘汰赛的比赛。
df_fixture_group_48 = df_fixture[:48].copy()
df_fixture_knockout = df_fixture[48:56].copy()
df_fixture_quarter = df_fixture[56:60].copy()
df_fixture_semi = df_fixture[60:62].copy()
df_fixture_final = df_fixture[62:].copy()
for group in dict_table:
teams_in_group = dict_table[group]['Team'].values
df_fixture_group_6 = df_fixture_group_48[df_fixture_group_48['home'].isin(teams_in_group)]
for index, row in df_fixture_group_6.iterrows():
home, away = row['home'], row['away']
points_home, points_away = predict_points(home, away)
dict_table[group].loc[dict_table[group]['Team'] == home, 'Pts'] += points_home
dict_table[group].loc[dict_table[group]['Team'] == away, 'Pts'] += points_away
dict_table[group] = dict_table[group].sort_values('Pts', ascending=False).reset_index()
dict_table[group] = dict_table[group][['Team', 'Pts']]
dict_table[group] = dict_table[group].round(0)
dict_table['Group A']
预测淘汰赛
df_fixture_knockout
for group in dict_table:
group_winner = dict_table[group].loc[0, 'Team']
runners_up = dict_table[group].loc[1, 'Team']
df_fixture_knockout.replace({f'Winners {group}':group_winner,
f'Runners-up {group}':runners_up}, inplace=True)
df_fixture_knockout['winner'] = '?'
df_fixture_knockout
df_fixture_knockout['winner'] = '?' df_fixture_knockout 图片 对于淘汰赛,我不需要预测分数,而是预测每个小组的获胜者。这就是为什么我在之前的 predict_points 函数基础上创建了一个新的 get_winner 函数。
def get_winner(df_fixture_updated):
for index, row in df_fixture_updated.iterrows():
home, away = row['home'], row['away']
points_home, points_away = predict_points(home, away)
if points_home > points_away:
winner = home
else:
winner = away
df_fixture_updated.loc[index, 'winner'] = winner
return df_fixture_updated
简单地说,如果主队的积分大于客队的积分,那么赢家就是主队,否则,赢家就是客队。
使用get_winner函数可以得到如下的结果。
预测四分之一决赛、半决赛和决赛的情况
def update_table(df_fixture_round_1, df_fixture_round_2):
for index, row in df_fixture_round_1.iterrows():
winner = df_fixture_round_1.loc[index, 'winner']
match = df_fixture_round_1.loc[index, 'score']
df_fixture_round_2.replace({f'Winners {match}':winner}, inplace=True)
df_fixture_round_2['winner'] = '?'
return df_fixture_round_2
四分之一决赛
半决赛
决赛
如果我使用 get_winner,我可以预测世界杯的冠军。这是最后的结果!!
通过再一次运行该函数,我得到的赢家是...巴西!
本文内容不用于商业目的,如涉及知识产权问题,请权利人联系51Testing小编(021-64471599-8017),我们将立即处理