import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib
from matplotlib import pyplot as plt
import plotly as pltly
import plotly.express as px
from sklearn.metrics import accuracy_score
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import train_test_split


db = 'Fifa 23 Players Data.csv'
df = pd.read_csv(db)
df.head()


columns_drop = [
   #   'Known As', 'Full Name', 'Overall', 'Potential', 
    'Value(in Euro)', 'Positions Played',
   #   'Best Position', 
    'Nationality', 'Image Link', 
    #   'Age', 'Height(in cm)', 'Weight(in kg)', 'TotalStats', 'BaseStats',
       'Club Name', 'Wage(in Euro)', 'Release Clause', 'Club Position',
       'Contract Until', 'Club Jersey Number', 'Joined On', 'On Loan',
    #   'Preferred Foot', 'Weak Foot Rating', 'Skill Moves',
       'International Reputation', 'National Team Name',
       'National Team Image Link', 'National Team Position',
       'National Team Jersey Number',
    #   'Attacking Work Rate', 'Defensive Work Rate', 
       'Pace Total', 'Shooting Total', 'Passing Total',
       'Dribbling Total', 'Defending Total', 'Physicality Total',
    #    'Crossing', 'Finishing', 'Heading Accuracy', 'Short Passing', 'Volleys',
    #    'Dribbling', 'Curve', 'Freekick Accuracy', 'LongPassing', 'BallControl',
    #    'Acceleration', 'Sprint Speed', 'Agility', 'Reactions', 'Balance',
    #    'Shot Power', 'Jumping', 'Stamina', 'Strength', 'Long Shots',
    #    'Aggression', 'Interceptions', 'Positioning', 'Vision', 'Penalties',
    #    'Composure', 'Marking', 'Standing Tackle', 'Sliding Tackle',
    #    'Goalkeeper Diving', 'Goalkeeper Handling', ' GoalkeeperKicking',
    #    'Goalkeeper Positioning', 'Goalkeeper Reflexes', 
       'ST Rating', 'LW Rating', 'LF Rating', 'CF Rating', 'RF Rating', 'RW Rating',
       'CAM Rating', 'LM Rating', 'CM Rating', 'RM Rating', 'LWB Rating',
       'CDM Rating', 'RWB Rating', 'LB Rating', 'CB Rating', 'RB Rating',
       'GK Rating']

df.drop(columns=columns_drop, inplace=True)
df


df.rename(columns={'Best Position': 'player_position'}, inplace=True)


df.drop(df[df.player_position == "GK"].index, inplace=True)


position_count = df.groupby(df['player_position']).count().reset_index()
position_count = position_count[['player_position','Known As']]
position_count.rename(columns={'player_position': 'Position',"Known As": "Count"}, inplace = True)
position_count.sort_values('Count', inplace=True, ascending=False)

fig = pltly.graph_objects.Figure(data=[pltly.graph_objects.Pie(labels=position_count['Position'], values=position_count['Count'])])
fig.update_layout(title='Number of Players at each Position')
fig.show()


def get_pos_groups_dfs(df):
    positions_groups_df = {}

    def get_pos_grp_lst(group):
        res = df.loc[df['player_position'].isin(group)]
        return res

    def categorize_pos(pos):
        for k, v in positions.items():
            if pos in v:
                return k

    positions_groups_df['Center Backs'] = get_pos_grp_lst(['CB'])
    positions_groups_df['Wing Backs'] = get_pos_grp_lst(['RB', 'RWB', 'LB', 'LWB'])
    positions_groups_df['Center Midfielders'] = get_pos_grp_lst(['CDM', 'CM', 'CAM'])
    positions_groups_df['Midfielders'] = get_pos_grp_lst(['LM', 'LW', 'RM', 'RW'])
    positions_groups_df['Strikers'] = get_pos_grp_lst(['ST', 'CF', 'LF', 'RF'])
    return positions_groups_df


# Creating groups by Position
positions_groups = {}
positions_groups['Center Backs'] = ['CB']
positions_groups['Wing Backs'] = ['RB', 'RWB', 'LB', 'LWB']
positions_groups['Center Midfielders'] = ['CM', 'CAM', 'CDM']
positions_groups['Midfielders'] = ['LM', 'LW', 'RM', 'RW']
positions_groups['Strikers'] = ['ST', 'CF', 'LF', 'RF']


def categorize_pos(pos):
    for key, value in positions_groups.items():
        if pos in value:
            return key


df['Position Group'] = df.apply(
    lambda row: categorize_pos(row['player_position']), axis=1)

# getting dictionary with dataframe for each of the groups of positions groups
positions_groups_df = get_pos_groups_dfs(df)

counts = []
for p_df in positions_groups_df.values():
    counts.append(len(p_df['Full Name']))

fig = pltly.graph_objects.Figure(data=[pltly.graph_objects.Pie(
    labels=list(positions_groups_df.keys()), values=counts)])
fig.update_layout(title='Total Number of Players in Each Position Group')
fig.show()


all_positions_count = {'CAM': [], 'CB': [], 'CDM': [], 'CF': [], 'CM': [], 'LB': [], 'LM': [], 'LW': [], 'LWB': [], 'RB': [], 'RM': [], 'RW': [], 'RWB': [], 'ST': []}
bar_data_df = pd.DataFrame()
bar_data_df['Position Group'] = list(positions_groups_df.keys())
bar_data_df.set_index('Position Group', inplace=True)
for group, p_df in positions_groups_df.items():
    for pos, lst in all_positions_count.items():
        lst.append(len(list(p_df.loc[df['player_position'] == pos]['player_position'])))
for pos, lst in all_positions_count.items():
    bar_data_df[pos] = lst
    
bar_data_df.plot.bar(stacked=True, subplots=False, figsize=(15,10), title='Full Position Breakdown of FIFA Players')

<AxesSubplot: title={'center': 'Full Position Breakdown of FIFA Players'}, xlabel='Position Group'>


phy = ['Jumping', 'Stamina', 'Strength', 'Aggression']
pas = ['Vision', 'Crossing', 'Freekick Accuracy',
       'Short Passing', 'LongPassing', 'Curve']
pac = ['Acceleration', 'Sprint Speed',]
sho = ['Positioning', 'Finishing', 'Shot Power',
       'Long Shots', 'Volleys', 'Penalties']
dri = ['Dribbling', 'BallControl', 'Agility',
       'Reactions', 'Balance', 'Composure']
defe = ['Heading Accuracy', 'Interceptions',
        'Standing Tackle', 'Sliding Tackle', 'Marking']


df['phy'] = df[phy].mean(axis=1)
df['pas'] = df[pas].mean(axis=1)
df['pac'] = df[pac].mean(axis=1)
df['sho'] = df[sho].mean(axis=1)
df['dri'] = df[dri].mean(axis=1)
df['defe'] = df[defe].mean(axis=1)
category_ratings_df = df[['Full Name', 'player_position', 'phy',
                          'pas', 'pac', 'sho', 'dri', 'defe', 'Overall']]
category_ratings_df = category_ratings_df.rename(
    columns={'player_position': 'position'})
category_ratings_df.head(10)


positions_groups_df = get_pos_groups_dfs(df)


plt.title("Correlation Heatmap of Overall and Attributes (Midfielders)", fontsize=12)
sns.heatmap(positions_groups_df["Midfielders"][[
            'phy', 'pas', 'pac', 'sho', 'dri', 'defe', 'Overall']].corr())

<AxesSubplot: title={'center': 'Correlation Heatmap of Overall and Attributes (Midfielders)'}>


plt.title("Correlation Heatmap of Overall and Attributes (Attackers)", fontsize=12)
sns.heatmap(positions_groups_df["Strikers"][[
            'phy', 'pas', 'pac', 'sho', 'dri', 'defe', 'Overall']].corr())

<AxesSubplot: title={'center': 'Correlation Heatmap of Overall and Attributes (Attackers)'}>


plt.title("Correlation Heatmap of Overall and Attributes (Defenders)", fontsize=12)
sns.heatmap(positions_groups_df["Center Backs"][[
            'phy', 'pas', 'pac', 'sho', 'dri', 'defe', 'Overall']].corr())
# positions_groups['Center Backs'] = ['CB']
# positions_groups['Wing Backs'] = ['RB', 'RWB', 'LB', 'LWB']
# positions_groups['Center Midfielders'] = ['CM', 'CAM', 'CDM']
# positions_groups['Midfielders'] = ['LM', 'LW', 'RM', 'RW']
# positions_groups['Strikers'] = ['ST', 'CF', 'LF', 'RF']

<AxesSubplot: title={'center': 'Correlation Heatmap of Overall and Attributes (Defenders)'}>


# Divide player dataframe into features and labels (features are the attributes and labels are the positional group)
Features = df.iloc[:, 50:56].to_numpy()
labels = df.iloc[:, 49].to_numpy()

Features_train, Features_test, labels_train, labels_test = train_test_split(
    Features, labels, test_size=0.3)


# Create LDA model and train it on the training dataset
lda = LinearDiscriminantAnalysis()
lda.fit(Features_train, labels_train)

# Make predictions and assess accuracy of the predictions
predictions = lda.predict(Features_test)
print('Accuracy of our LDA model is {}'.format(
    accuracy_score(labels_test, predictions)))

Accuracy of our LDA model is 0.751415857605178


# Build dictionaries containing the predicted and true frequencies of positional groups
prediction_frequencies = {'Center Backs': 0, 'Wing Backs': 0, 'Center Midfielders': 0, 'Midfielders': 0, 'Strikers': 0}

true_frequencies = {'Center Backs': 0, 'Wing Backs': 0, 'Center Midfielders': 0, 'Midfielders': 0, 'Strikers': 0}

# For each instance of a positional group encountered in the predictions, increase the group's frequency by 1
for prediction in predictions:
    prediction_frequencies[prediction] += 1

# For each instance of a positional group encountered in the test labels, increase the group's frequency by 1
for label in labels_test:
    true_frequencies[label] += 1

# Make a dataframe to hold the frequencies for each position group
# With a row for the prediction and a row for the true positions
frequency_df = pd.DataFrame().append(prediction_frequencies,
                                     ignore_index=True).append(true_frequencies, ignore_index=True)
frequency_df

/var/folders/0w/0myfk66n1bq57sx6ncpb6dzm0000gn/T/ipykernel_48294/317219970.py:16: FutureWarning:

The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

/var/folders/0w/0myfk66n1bq57sx6ncpb6dzm0000gn/T/ipykernel_48294/317219970.py:16: FutureWarning:

The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


# We need to transpose in order to get the data in a form suitable for plotting with matplotlib
transposed = frequency_df.transpose()
transposed.rename(columns={0: "Predicted Frequency",
                  1: "True Frequency"}, inplace=True)

# Create a bar chart to show the difference in predicted and true frequencies for positional groups
ax = transposed.plot.bar(color=["SkyBlue", "IndianRed"],
                         title="Predicted Versus True Frequencies of Positional Groups", figsize=(10, 10))
ax.set_xlabel("Positional Group")
ax.set_ylabel("Frequency")
matplotlib.rcParams.update({'font.size': 12})
plt.show()


# Confusion Matrix
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
confusion_matrix(labels_test, predictions)
# Accuracy
accuracy_score(labels_test, predictions)
# Recall
recall_score(labels_test, predictions, average=None)
# Precision
precision_score(labels_test, predictions, average=None)

array([0.82816652, 0.70118964, 0.6620603 , 0.90968586, 0.67312349])

	Known As	Full Name	Overall	Potential	Value(in Euro)	Positions Played	Best Position	Nationality	Image Link	Age	...	LM Rating	CM Rating	RM Rating	LWB Rating	CDM Rating	RWB Rating	LB Rating	CB Rating	RB Rating	GK Rating
0	L. Messi	Lionel Messi	91	91	54000000	RW	CAM	Argentina	https://cdn.sofifa.net/players/158/023/23_60.png	35	...	91	88	91	67	66	67	62	53	62	22
1	K. Benzema	Karim Benzema	91	91	64000000	CF,ST	CF	France	https://cdn.sofifa.net/players/165/153/23_60.png	34	...	89	84	89	67	67	67	63	58	63	21
2	R. Lewandowski	Robert Lewandowski	91	91	84000000	ST	ST	Poland	https://cdn.sofifa.net/players/188/545/23_60.png	33	...	86	83	86	67	69	67	64	63	64	22
3	K. De Bruyne	Kevin De Bruyne	91	91	107500000	CM,CAM	CM	Belgium	https://cdn.sofifa.net/players/192/985/23_60.png	31	...	91	91	91	82	82	82	78	72	78	24
4	K. Mbappé	Kylian Mbappé	91	95	190500000	ST,LW	ST	France	https://cdn.sofifa.net/players/231/747/23_60.png	23	...	92	84	92	70	66	70	66	57	66	21

	Known As	Full Name	Overall	Potential	Best Position	Age	Height(in cm)	Weight(in kg)	TotalStats	BaseStats	...	Penalties	Composure	Marking	Standing Tackle	Sliding Tackle	Goalkeeper Diving	Goalkeeper Handling	GoalkeeperKicking	Goalkeeper Positioning	Goalkeeper Reflexes
0	L. Messi	Lionel Messi	91	91	CAM	35	169	67	2190	452	...	75	96	20	35	24	6	11	15	14	8
1	K. Benzema	Karim Benzema	91	91	CF	34	185	81	2147	455	...	84	90	43	24	18	13	11	5	5	7
2	R. Lewandowski	Robert Lewandowski	91	91	ST	33	185	81	2205	458	...	90	88	35	42	19	15	6	12	8	10
3	K. De Bruyne	Kevin De Bruyne	91	91	CM	31	181	70	2303	483	...	83	89	68	65	53	15	13	5	10	13
4	K. Mbappé	Kylian Mbappé	91	95	ST	23	182	73	2177	470	...	80	88	26	34	32	13	5	7	11	6
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
18534	D. Collins	Darren Collins	47	56	CAM	21	174	68	1287	274	...	40	47	39	29	27	6	9	5	13	8
18535	Yang Dejiang	Dejiang Yang	47	57	CDM	17	175	60	1289	267	...	33	45	46	50	52	6	12	11	8	6
18536	L. Mullan	Liam Mullan	47	67	RM	18	170	65	1333	277	...	43	59	39	37	48	11	12	8	7	12
18537	D. McCallion	Daithí McCallion	47	61	CB	17	178	65	1113	226	...	37	41	50	54	54	8	14	13	7	8
18538	N. Rabha	Nabin Rabha	47	50	LB	25	176	66	1277	269	...	35	32	47	44	43	13	13	6	14	14

	Full Name	position	phy	pas	pac	sho	dri	defe	Overall
0	Lionel Messi	CAM	62.50	90.833333	81.5	87.166667	93.666667	37.8	91
1	Karim Benzema	CF	76.50	80.666667	79.5	87.166667	85.000000	42.8	91
2	Robert Lewandowski	ST	82.25	78.333333	75.5	90.333333	85.666667	47.2	91
3	Kevin De Bruyne	CM	75.00	91.000000	74.5	87.000000	85.333333	61.4	91
4	Kylian Mbappé	ST	76.00	77.666667	97.0	86.333333	89.833333	40.4	91
5	Mohamed Salah	RW	73.50	79.833333	90.0	87.166667	90.666667	47.2	90
8	C. Ronaldo dos Santos Aveiro	ST	77.75	78.500000	81.0	91.166667	84.333333	39.8	90
9	Virgil van Dijk	CB	85.00	68.833333	79.5	58.500000	73.166667	89.4	90
10	Harry Kane	ST	81.25	80.500000	68.0	90.666667	81.666667	50.6	89
11	Neymar da Silva Santos Jr.	LW	64.00	85.666667	87.0	84.333333	90.833333	39.2	89

	Center Backs	Wing Backs	Center Midfielders	Midfielders	Strikers
0	1129	826	1429	796	764
1	1092	796	1428	847	781

CMSC 320: Introduction to Data Science - Final Project - FIFA Positions Predictor¶

Dhairya Gandhi¶

1. Introduction¶

1.1. Background Information¶

1.1.1 A look into FIFA Ratings¶

1.2. Libraries Used¶

2. Data Collection¶

2.1. About the Dataset¶

2.2. Load and View Data¶

2.3 Data Cleaning¶

3. Data Processing¶

3.1. Positional Breakdown¶

3.2. Groups by Positions¶

4. Exploratory Data Analysis¶

4.1. Attribute Categories¶

4.2. Correlation of Attributes for different positions¶

4.2.1. Midfielders¶

4.2.2. Attackers¶

4.2.3. Defenders¶

5. Machine Learning & Visualization¶

5.1. Creation and Training Model¶

6. Conclusion¶

6.1. Recap¶

6.2. Extension¶

6.3. Final Thoughts¶