import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
fifa21_rawdata = pd.read_csv('fifa21_raw_data_v2.csv')
fifa21_rawdata
# fifa21_rawdata.shape
C:\Users\imgal\AppData\Local\Temp\ipykernel_11392\2118387280.py:1: DtypeWarning: Columns (76) have mixed types. Specify dtype option on import or set low_memory=False. fifa21_rawdata = pd.read_csv('fifa21_raw_data_v2.csv')
ID | Name | LongName | photoUrl | playerUrl | Nationality | Age | ↓OVA | POT | Club | ... | A/W | D/W | IR | PAC | SHO | PAS | DRI | DEF | PHY | Hits | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 158023 | L. Messi | Lionel Messi | https://cdn.sofifa.com/players/158/023/21_60.png | http://sofifa.com/player/158023/lionel-messi/2... | Argentina | 33 | 93 | 93 | \n\n\n\nFC Barcelona | ... | Medium | Low | 5 ★ | 85 | 92 | 91 | 95 | 38 | 65 | 771 |
1 | 20801 | Cristiano Ronaldo | C. Ronaldo dos Santos Aveiro | https://cdn.sofifa.com/players/020/801/21_60.png | http://sofifa.com/player/20801/c-ronaldo-dos-s... | Portugal | 35 | 92 | 92 | \n\n\n\nJuventus | ... | High | Low | 5 ★ | 89 | 93 | 81 | 89 | 35 | 77 | 562 |
2 | 200389 | J. Oblak | Jan Oblak | https://cdn.sofifa.com/players/200/389/21_60.png | http://sofifa.com/player/200389/jan-oblak/210006/ | Slovenia | 27 | 91 | 93 | \n\n\n\nAtlético Madrid | ... | Medium | Medium | 3 ★ | 87 | 92 | 78 | 90 | 52 | 90 | 150 |
3 | 192985 | K. De Bruyne | Kevin De Bruyne | https://cdn.sofifa.com/players/192/985/21_60.png | http://sofifa.com/player/192985/kevin-de-bruyn... | Belgium | 29 | 91 | 91 | \n\n\n\nManchester City | ... | High | High | 4 ★ | 76 | 86 | 93 | 88 | 64 | 78 | 207 |
4 | 190871 | Neymar Jr | Neymar da Silva Santos Jr. | https://cdn.sofifa.com/players/190/871/21_60.png | http://sofifa.com/player/190871/neymar-da-silv... | Brazil | 28 | 91 | 91 | \n\n\n\nParis Saint-Germain | ... | High | Medium | 5 ★ | 91 | 85 | 86 | 94 | 36 | 59 | 595 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
18974 | 247223 | Xia Ao | Ao Xia | https://cdn.sofifa.com/players/247/223/21_60.png | http://sofifa.com/player/247223/ao-xia/210006/ | China PR | 21 | 47 | 55 | \n\n\n\nWuhan Zall | ... | Medium | Medium | 1 ★ | 64 | 28 | 26 | 38 | 48 | 51 | NaN |
18975 | 258760 | B. Hough | Ben Hough | https://cdn.sofifa.com/players/258/760/21_60.png | http://sofifa.com/player/258760/ben-hough/210006/ | England | 17 | 47 | 67 | \n\n\n\nOldham Athletic | ... | Medium | Medium | 1 ★ | 64 | 40 | 48 | 49 | 35 | 45 | NaN |
18976 | 252757 | R. McKinley | Ronan McKinley | https://cdn.sofifa.com/players/252/757/21_60.png | http://sofifa.com/player/252757/ronan-mckinley... | England | 18 | 47 | 65 | \n\n\n\nDerry City | ... | Medium | Medium | 1 ★ | 63 | 39 | 44 | 46 | 40 | 53 | NaN |
18977 | 243790 | Wang Zhen'ao | Zhen'ao Wang | https://cdn.sofifa.com/players/243/790/21_60.png | http://sofifa.com/player/243790/zhenao-wang/21... | China PR | 20 | 47 | 57 | \n\n\n\nDalian YiFang FC | ... | Medium | Medium | 1 ★ | 58 | 49 | 41 | 49 | 30 | 44 | NaN |
18978 | 252520 | Zhou Xiao | Xiao Zhou | https://cdn.sofifa.com/players/252/520/21_60.png | http://sofifa.com/player/252520/xiao-zhou/210006/ | China PR | 21 | 47 | 57 | \n\n\n\nDalian YiFang FC | ... | Medium | Medium | 1 ★ | 62 | 22 | 39 | 42 | 45 | 55 | NaN |
18979 rows × 77 columns
# View the data
fifa21_rawdata.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 18979 entries, 0 to 18978 Data columns (total 77 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 ID 18979 non-null int64 1 Name 18979 non-null object 2 LongName 18979 non-null object 3 photoUrl 18979 non-null object 4 playerUrl 18979 non-null object 5 Nationality 18979 non-null object 6 Age 18979 non-null int64 7 ↓OVA 18979 non-null int64 8 POT 18979 non-null int64 9 Club 18979 non-null object 10 Contract 18979 non-null object 11 Positions 18979 non-null object 12 Height 18979 non-null object 13 Weight 18979 non-null object 14 Preferred Foot 18979 non-null object 15 BOV 18979 non-null int64 16 Best Position 18979 non-null object 17 Joined 18979 non-null object 18 Loan Date End 1013 non-null object 19 Value 18979 non-null object 20 Wage 18979 non-null object 21 Release Clause 18979 non-null object 22 Attacking 18979 non-null int64 23 Crossing 18979 non-null int64 24 Finishing 18979 non-null int64 25 Heading Accuracy 18979 non-null int64 26 Short Passing 18979 non-null int64 27 Volleys 18979 non-null int64 28 Skill 18979 non-null int64 29 Dribbling 18979 non-null int64 30 Curve 18979 non-null int64 31 FK Accuracy 18979 non-null int64 32 Long Passing 18979 non-null int64 33 Ball Control 18979 non-null int64 34 Movement 18979 non-null int64 35 Acceleration 18979 non-null int64 36 Sprint Speed 18979 non-null int64 37 Agility 18979 non-null int64 38 Reactions 18979 non-null int64 39 Balance 18979 non-null int64 40 Power 18979 non-null int64 41 Shot Power 18979 non-null int64 42 Jumping 18979 non-null int64 43 Stamina 18979 non-null int64 44 Strength 18979 non-null int64 45 Long Shots 18979 non-null int64 46 Mentality 18979 non-null int64 47 Aggression 18979 non-null int64 48 Interceptions 18979 non-null int64 49 Positioning 18979 non-null int64 50 Vision 18979 non-null int64 51 Penalties 18979 non-null int64 52 Composure 18979 non-null int64 53 Defending 18979 non-null int64 54 Marking 18979 non-null int64 55 Standing Tackle 18979 non-null int64 56 Sliding Tackle 18979 non-null int64 57 Goalkeeping 18979 non-null int64 58 GK Diving 18979 non-null int64 59 GK Handling 18979 non-null int64 60 GK Kicking 18979 non-null int64 61 GK Positioning 18979 non-null int64 62 GK Reflexes 18979 non-null int64 63 Total Stats 18979 non-null int64 64 Base Stats 18979 non-null int64 65 W/F 18979 non-null object 66 SM 18979 non-null object 67 A/W 18979 non-null object 68 D/W 18979 non-null object 69 IR 18979 non-null object 70 PAC 18979 non-null int64 71 SHO 18979 non-null int64 72 PAS 18979 non-null int64 73 DRI 18979 non-null int64 74 DEF 18979 non-null int64 75 PHY 18979 non-null int64 76 Hits 16384 non-null object dtypes: int64(54), object(23) memory usage: 11.1+ MB
I made a copy of the database in case I need to go back to the original data
# f21_copy = fifa21_rawdata.copy()
fdc = fifa21_rawdata
Let´s replace the columns 'Name' and 'LongName', with two other columns with the 'Name' and 'Surname'
fdc['LongName'].dtype
dtype('O')
# The name of the column was changed to better identify the values in it
fdc = fdc.rename(columns={'LongName':'Surname'})
def split_names(df_3):
# Split the 'LongName' column into two columns using the space character
df_3[['TempName', 'Surname']] = df_3['Surname'].str.split(n=1, expand=True)
# For the 'Name' column, keeping only the first part after splitting
df_3['Name'] = df_3['Name'].str.split().str[0]
# The pattern is used to identify the names similar to L. or K.
pattern = r'^[A-Z]\.'
# Checking the pattern of the names to modify them if necessary
condition = (df_3['Name'].str.contains(pattern)) | (df_3['Name'] == df_3['Surname'])
# Use loc to update values based on the condition
df_3.loc[condition, 'Name'] = df_3['TempName']
# Drop the temporary column when is no more needed
df_3 = df_3.drop(columns=['TempName'])
return df_3
fdc = split_names(fdc)
fdc[['Name', 'Surname']].head(10)
Name | Surname | |
---|---|---|
0 | Lionel | Messi |
1 | Cristiano | Ronaldo dos Santos Aveiro |
2 | Jan | Oblak |
3 | Kevin | De Bruyne |
4 | Neymar | da Silva Santos Jr. |
5 | Robert | Lewandowski |
6 | Mohamed | Salah |
7 | Alisson | Ramses Becker |
8 | Kylian | Mbappé |
9 | Marc-André | ter Stegen |
They are not going to be used in the analysis, so I decided to delete them
fdc = fdc.drop(['photoUrl','playerUrl'], axis=1)
Let´s check if the values are ok
fdc['Nationality'].dtype
dtype('O')
fdc['Nationality'].unique()
array(['Argentina', 'Portugal', 'Slovenia', 'Belgium', 'Brazil', 'Poland', 'Egypt', 'France', 'Germany', 'Netherlands', 'Senegal', 'Spain', 'England', 'Scotland', 'Korea Republic', 'Costa Rica', 'Italy', 'Gabon', 'Croatia', 'Uruguay', 'Switzerland', 'Serbia', 'Slovakia', 'Morocco', 'Algeria', 'Denmark', 'Hungary', 'Bosnia Herzegovina', 'Nigeria', 'Cameroon', 'Norway', 'Ghana', 'Mexico', 'Austria', 'Albania', 'Colombia', 'Chile', 'Ivory Coast', 'Greece', 'Finland', 'Wales', 'Sweden', 'Togo', 'Czech Republic', 'Russia', 'Venezuela', 'Canada', 'United States', 'Guinea', 'Montenegro', 'Israel', 'Republic of Ireland', 'Ukraine', 'Turkey', 'Ecuador', 'Jamaica', 'DR Congo', 'Australia', 'China PR', 'Armenia', 'Northern Ireland', 'North Macedonia', 'Kosovo', 'Mali', 'Peru', 'Central African Republic', 'Iceland', 'Burkina Faso', 'Paraguay', 'Japan', 'Romania', 'New Zealand', 'Iran', 'Angola', 'Tunisia', 'Syria', 'Dominican Republic', 'Cape Verde', 'Equatorial Guinea', 'Kenya', 'Georgia', 'Panama', 'Zambia', 'Tanzania', 'Zimbabwe', 'Congo', 'South Africa', 'Moldova', 'Mozambique', 'Iraq', 'Guinea Bissau', 'Honduras', 'Cuba', 'Cyprus', 'Lithuania', 'Estonia', 'Madagascar', 'Benin', 'Curacao', 'Saudi Arabia', 'Gambia', 'Uzbekistan', 'Chad', 'United Arab Emirates', 'Saint Kitts and Nevis', 'Libya', 'Sierra Leone', 'Philippines', 'Liberia', 'Bulgaria', 'Comoros', 'Namibia', 'Luxembourg', 'Trinidad & Tobago', 'Bermuda', 'Thailand', 'Burundi', 'New Caledonia', 'Puerto Rico', 'Bolivia', 'Kazakhstan', 'Antigua & Barbuda', 'Latvia', 'Malawi', 'Montserrat', 'São Tomé & Príncipe', 'El Salvador', 'Mauritania', 'Jordan', 'Eritrea', 'Aruba', 'Uganda', 'Chinese Taipei', 'Azerbaijan', 'Afghanistan', 'Faroe Islands', 'Haiti', 'Sudan', 'Grenada', 'Lebanon', 'Guam', 'Palestine', 'Belarus', 'Guyana', 'Rwanda', 'Liechtenstein', 'Saint Lucia', 'Papua New Guinea', 'India', 'Ethiopia', 'Belize', 'Andorra', 'Guatemala', 'Malta', 'Niger', 'Korea DPR', 'Barbados', 'Macau', 'South Sudan', 'Singapore', 'Hong Kong', 'Nicaragua', 'Malaysia', 'Indonesia'], dtype=object)
Verifying if the values are ok for the ages.
fdc['Age'].dtype
dtype('int64')
fdc['Age'].unique()
array([33, 35, 27, 29, 28, 31, 21, 34, 32, 25, 26, 30, 20, 24, 22, 23, 19, 38, 42, 36, 37, 18, 17, 39, 40, 41, 16, 43, 53], dtype=int64)
The columns OVA and POT refer to OVERALL and POTENTIAL. Let´s change the name of the columns to make it clearer. It also important that our data type is int64 for future operations if is necessary
fdc['↓OVA'].dtype
dtype('int64')
fdc['↓OVA'].unique()
array([93, 92, 91, 90, 89, 88, 87, 86, 85, 84, 83, 82, 81, 80, 79, 78, 77, 76, 75, 74, 73, 72, 71, 70, 69, 68, 67, 66, 65, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47], dtype=int64)
fdc['POT'].dtype
dtype('int64')
fdc['POT'].unique()
array([93, 92, 91, 90, 95, 89, 88, 87, 86, 85, 84, 83, 82, 81, 80, 79, 78, 77, 76, 75, 74, 73, 72, 71, 70, 69, 68, 67, 66, 65, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47], dtype=int64)
# Now let´s change the name of the columns
fdc = fdc.rename(columns={'↓OVA':'Overall','POT':'Potential'})
fdc
ID | Name | Surname | Nationality | Age | Overall | Potential | Club | Contract | Positions | ... | A/W | D/W | IR | PAC | SHO | PAS | DRI | DEF | PHY | Hits | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 158023 | Lionel | Messi | Argentina | 33 | 93 | 93 | \n\n\n\nFC Barcelona | 2004 ~ 2021 | RW, ST, CF | ... | Medium | Low | 5 ★ | 85 | 92 | 91 | 95 | 38 | 65 | 771 |
1 | 20801 | Cristiano | Ronaldo dos Santos Aveiro | Portugal | 35 | 92 | 92 | \n\n\n\nJuventus | 2018 ~ 2022 | ST, LW | ... | High | Low | 5 ★ | 89 | 93 | 81 | 89 | 35 | 77 | 562 |
2 | 200389 | Jan | Oblak | Slovenia | 27 | 91 | 93 | \n\n\n\nAtlético Madrid | 2014 ~ 2023 | GK | ... | Medium | Medium | 3 ★ | 87 | 92 | 78 | 90 | 52 | 90 | 150 |
3 | 192985 | Kevin | De Bruyne | Belgium | 29 | 91 | 91 | \n\n\n\nManchester City | 2015 ~ 2023 | CAM, CM | ... | High | High | 4 ★ | 76 | 86 | 93 | 88 | 64 | 78 | 207 |
4 | 190871 | Neymar | da Silva Santos Jr. | Brazil | 28 | 91 | 91 | \n\n\n\nParis Saint-Germain | 2017 ~ 2022 | LW, CAM | ... | High | Medium | 5 ★ | 91 | 85 | 86 | 94 | 36 | 59 | 595 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
18974 | 247223 | Ao | Xia | China PR | 21 | 47 | 55 | \n\n\n\nWuhan Zall | 2018 ~ 2022 | CB | ... | Medium | Medium | 1 ★ | 64 | 28 | 26 | 38 | 48 | 51 | NaN |
18975 | 258760 | Ben | Hough | England | 17 | 47 | 67 | \n\n\n\nOldham Athletic | 2020 ~ 2021 | CM | ... | Medium | Medium | 1 ★ | 64 | 40 | 48 | 49 | 35 | 45 | NaN |
18976 | 252757 | Ronan | McKinley | England | 18 | 47 | 65 | \n\n\n\nDerry City | 2019 ~ 2020 | CM | ... | Medium | Medium | 1 ★ | 63 | 39 | 44 | 46 | 40 | 53 | NaN |
18977 | 243790 | Zhen'ao | Wang | China PR | 20 | 47 | 57 | \n\n\n\nDalian YiFang FC | 2020 ~ 2022 | RW | ... | Medium | Medium | 1 ★ | 58 | 49 | 41 | 49 | 30 | 44 | NaN |
18978 | 252520 | Xiao | Zhou | China PR | 21 | 47 | 57 | \n\n\n\nDalian YiFang FC | 2019 ~ 2023 | CB, LB | ... | Medium | Medium | 1 ★ | 62 | 22 | 39 | 42 | 45 | 55 | NaN |
18979 rows × 75 columns
Checking the names of the clubs, and transforming them into strings data type
fdc['Club'].dtype
dtype('O')
fdc['Club'].unique()
array(['\n\n\n\nFC Barcelona', '\n\n\n\nJuventus', '\n\n\n\nAtlético Madrid', '\n\n\n\nManchester City', '\n\n\n\nParis Saint-Germain', '\n\n\n\nFC Bayern München', '\n\n\n\nLiverpool', '\n\n\n\nReal Madrid', '\n\n\n\nChelsea', '\n\n\n\nTottenham Hotspur', '\n\n\n\nInter', '\n\n\n\nNapoli', '\n\n\n\nBorussia Dortmund', '\n\n\n\nManchester United', '\n\n\n\nArsenal', '\n\n\n\nLazio', '\n\n\n\nLeicester City', '\n\n\n\nBorussia Mönchengladbach', '\n\n\n\nReal Sociedad', '\n\n\n\nAtalanta', '\n\n\n\nOlympique Lyonnais', '\n\n\n\nMilan', '\n\n\n\nVillarreal CF', '\n\n\n\nRB Leipzig', '\n\n\n\nCagliari', '\n\n\n\nAjax', '\n\n\n\nSL Benfica', '\n\n\n\nAS Monaco', '\n\n\n\nWolverhampton Wanderers', '\n\n\n\nEverton', '\n\n\n\nFiorentina', '\n\n\n\nFC Porto', '\n\n\n\nRC Celta', '\n\n\n\nTorino', '\n\n\n\nSevilla FC', '\n\n\n\nGrêmio', '\n\n\n\nReal Betis', '\n\n\n\nRoma', '\n\n\n\nNewcastle United', '\n\n\n\nEintracht Frankfurt', '\n\n\n\nValencia CF', '\n\n\n\nMedipol Başakşehir FK', '\n\n\n\nInter Miami', '\n\n\n\nBayer 04 Leverkusen', '\n\n\n\nLevante UD', '\n\n\n\nCrystal Palace', '\n\n\n\nAthletic Club de Bilbao', '\n\n\n\nShanghai SIPG FC', '\n\n\n\nVfL Wolfsburg', '\n\n\n\nGuangzhou Evergrande Taobao FC', '\n\n\n\nAl Shabab', '\n\n\n\nOlympique de Marseille', '\n\n\n\nLos Angeles FC', '\n\n\n\nBeijing Sinobo Guoan FC', '\n\n\n\nGetafe CF', '\n\n\n\nSV Werder Bremen', '\n\n\n\nTSG 1899 Hoffenheim', '\n\n\n\nLOSC Lille', '\n\n\n\nDeportivo Alavés', '\n\n\n\nBurnley', '\n\n\n\nLeeds United', '\n\n\n\nFulham', '\n\n\n\nPFC CSKA Moscow', '\n\n\n\nGenoa', '\n\n\n\nGalatasaray SK', '\n\n\n\nAtlanta United', '\n\n\n\nAtlético Mineiro', '\n\n\n\nFlamengo', '\n\n\n\nPalmeiras', 'No Club', '\n\n\n\nSão Paulo', '\n\n\n\nWest Ham United', '\n\n\n\nSD Eibar', '\n\n\n\nRiver Plate', '\n\n\n\nOGC Nice', '\n\n\n\nReal Valladolid CF', '\n\n\n\nSporting CP', '\n\n\n\nPSV', '\n\n\n\nFeyenoord', '\n\n\n\nClub Brugge KV', '\n\n\n\nShakhtar Donetsk', '\n\n\n\nVissel Kobe', '\n\n\n\nBoca Juniors', '\n\n\n\n1. FC Union Berlin', '\n\n\n\nDalian YiFang FC', '\n\n\n\nAS Saint-Étienne', '\n\n\n\nSassuolo', '\n\n\n\nTigres U.A.N.L.', '\n\n\n\nDinamo Zagreb', '\n\n\n\nDynamo Kyiv', '\n\n\n\nFluminense', '\n\n\n\nUdinese', '\n\n\n\nAston Villa', '\n\n\n\nStade Rennais FC', '\n\n\n\nBeşiktaş JK', '\n\n\n\nBrighton & Hove Albion', '\n\n\n\nHebei China Fortune FC', '\n\n\n\nSouthampton', '\n\n\n\nLokomotiv Moscow', '\n\n\n\nJiangsu Suning FC', '\n\n\n\nAl Hilal', '\n\n\n\nWest Bromwich Albion', '\n\n\n\nFenerbahçe SK', '\n\n\n\nFC Groningen', '\n\n\n\nGuangzhou R&F FC', '\n\n\n\nOrlando City SC', '\n\n\n\nSampdoria', '\n\n\n\nStade de Reims', '\n\n\n\nSheffield United', '\n\n\n\nSantos', '\n\n\n\nInternacional', '\n\n\n\nCoritiba', '\n\n\n\nCA Osasuna', '\n\n\n\nLA Galaxy', '\n\n\n\nAl Nassr', '\n\n\n\nSC Freiburg', '\n\n\n\nSC Braga', '\n\n\n\nOlympiacos CFP', '\n\n\n\nToronto FC', '\n\n\n\nGranada CF', '\n\n\n\nFC Schalke 04', '\n\n\n\nRC Strasbourg Alsace', '\n\n\n\nHertha BSC', '\n\n\n\nAZ Alkmaar', '\n\n\n\nSeattle Sounders FC', '\n\n\n\nMontpellier HSC', '\n\n\n\nShanghai Greenland Shenhua FC', '\n\n\n\nHamburger SV', '\n\n\n\nGirona FC', '\n\n\n\nRacing Club', '\n\n\n\nPortland Timbers', '\n\n\n\nFC Girondins de Bordeaux', '\n\n\n\nWatford', '\n\n\n\nEstudiantes de La Plata', '\n\n\n\nClub América', '\n\n\n\nVitória Guimarães', '\n\n\n\nSK Slavia Praha', '\n\n\n\nTrabzonspor', '\n\n\n\nRCD Espanyol', '\n\n\n\nFC Nantes', '\n\n\n\nAngers SCO', '\n\n\n\nCruz Azul', '\n\n\n\nBSC Young Boys', '\n\n\n\nClub León', '\n\n\n\nSpartak Moscow', '\n\n\n\nSivasspor', '\n\n\n\nElche CF', '\n\n\n\nFC Augsburg', '\n\n\n\nWuhan Zall', '\n\n\n\nParma', '\n\n\n\nRoyal Antwerp FC', '\n\n\n\nMonterrey', '\n\n\n\nSan Lorenzo de Almagro', '\n\n\n\nAC Monza', '\n\n\n\nClub Atlético Lanús', '\n\n\n\nSD Huesca', '\n\n\n\nDerby County', '\n\n\n\nKayserispor', '\n\n\n\nRSC Anderlecht', '\n\n\n\nCD Leganés', '\n\n\n\nVélez Sarsfield', '\n\n\n\nStade Brestois 29', '\n\n\n\nTianjin TEDA FC', '\n\n\n\nBirmingham City', '\n\n\n\nHellas Verona', '\n\n\n\n1. FC Köln', '\n\n\n\nAl Wehda', '\n\n\n\nBournemouth', '\n\n\n\nAlanyaspor', '\n\n\n\nBologna', '\n\n\n\nAmiens SC', '\n\n\n\nCeltic', '\n\n\n\nShijiazhuang Ever Bright F.C.', '\n\n\n\nClub Atlas', '\n\n\n\nAl Ahli', '\n\n\n\nNorwich City', '\n\n\n\nFatih Karagümrük S.K.', '\n\n\n\nClub Tijuana', '\n\n\n\nUniversidad Católica', '\n\n\n\nGoiás', '\n\n\n\nBenevento', '\n\n\n\nIndependiente', '\n\n\n\nQingdao Huanghai F.C.', '\n\n\n\nBrentford', '\n\n\n\nAEK Athens', '\n\n\n\nRacing Club de Lens', '\n\n\n\nOlimpia Asunción', '\n\n\n\nShenzhen FC', '\n\n\n\nÇaykur Rizespor', '\n\n\n\nKAA Gent', '\n\n\n\nNew England Revolution', '\n\n\n\nCádiz CF', '\n\n\n\n1. FSV Mainz 05', '\n\n\n\nVasco da Gama', '\n\n\n\nFC Midtjylland', '\n\n\n\nAl Ain FC', '\n\n\n\nSantos Laguna', '\n\n\n\nSPAL', '\n\n\n\nPerth Glory', '\n\n\n\nFC Utrecht', '\n\n\n\nMontreal Impact', '\n\n\n\nUD Almería', '\n\n\n\nFortaleza', '\n\n\n\nSpezia', '\n\n\n\nFC Basel 1893', '\n\n\n\nFC København', '\n\n\n\nGodoy Cruz', '\n\n\n\nDeportivo Toluca', '\n\n\n\nNacional de Montevideo', '\n\n\n\nAalborg BK', '\n\n\n\nRio Ave FC', '\n\n\n\nFC Metz', '\n\n\n\nChongqing Dangdai Lifan FC SWM Team', '\n\n\n\nRosenborg BK', '\n\n\n\nFC Red Bull Salzburg', '\n\n\n\nBotafogo', '\n\n\n\nDSC Arminia Bielefeld', '\n\n\n\nClub Athletico Paranaense', '\n\n\n\nRCD Mallorca', '\n\n\n\nPachuca', '\n\n\n\nSwansea City', '\n\n\n\nKRC Genk', '\n\n\n\nEn Avant de Guingamp', '\n\n\n\nLecce', '\n\n\n\nNîmes Olympique', '\n\n\n\nColo-Colo', '\n\n\n\nStade Malherbe Caen', '\n\n\n\nColumbus Crew SC', '\n\n\n\nJunior FC', '\n\n\n\nVfB Stuttgart', '\n\n\n\nMoreirense FC', '\n\n\n\nRangers FC', '\n\n\n\nMalmö FF', '\n\n\n\nShandong Luneng TaiShan FC', '\n\n\n\nSparta Praha', '\n\n\n\nHatayspor', '\n\n\n\nChicago Fire', '\n\n\n\nToulouse Football Club', '\n\n\n\nFC Seoul', '\n\n\n\nAntalyaspor', '\n\n\n\nDeportivo Cali', '\n\n\n\nKaizer Chiefs', '\n\n\n\nReal Zaragoza', '\n\n\n\nAtlético Clube Goianiense', '\n\n\n\nPuebla FC', '\n\n\n\nFortuna Düsseldorf', '\n\n\n\nWestern United FC', '\n\n\n\nAstra Giurgiu', '\n\n\n\nHenan Jianye FC', '\n\n\n\nBahia', '\n\n\n\nFamalicão', '\n\n\n\nAtiker Konyaspor', '\n\n\n\nPeñarol', '\n\n\n\nDC United', '\n\n\n\nNew York City FC', '\n\n\n\nArgentinos Juniors', '\n\n\n\nU.N.A.M.', '\n\n\n\nMinnesota United FC', '\n\n\n\nNottingham Forest', '\n\n\n\nIndependiente del Valle', '\n\n\n\nDefensa y Justicia', '\n\n\n\nPhiladelphia Union', "\n\n\n\nNewell's Old Boys", '\n\n\n\nRosario Central', '\n\n\n\nAtlético de San Luis', '\n\n\n\nFC Dallas', '\n\n\n\nDenizlispor', '\n\n\n\nYeni Malatyaspor', '\n\n\n\nClub Atlético Colón', '\n\n\n\nHouston Dynamo', '\n\n\n\nMillwall', '\n\n\n\nClube Sport Marítimo', '\n\n\n\nMKE Ankaragücü', '\n\n\n\nGuadalajara', '\n\n\n\nBoavista FC', '\n\n\n\nSporting Kansas City', '\n\n\n\nAtlético Nacional', '\n\n\n\nCD Lugo', '\n\n\n\nNew York Red Bulls', '\n\n\n\nPAOK', '\n\n\n\nSC Heerenveen', '\n\n\n\nDaegu FC', '\n\n\n\nPreston North End', '\n\n\n\nDamac FC', '\n\n\n\nBrescia', '\n\n\n\nVfL Bochum 1848', '\n\n\n\nVancouver Whitecaps FC', '\n\n\n\nSV Sandhausen', '\n\n\n\nRoyal Excel Mouscron', '\n\n\n\nDijon FCO', '\n\n\n\nMolde FK', '\n\n\n\nPanathinaikos FC', '\n\n\n\nLASK Linz', '\n\n\n\nUlsan Hyundai FC', '\n\n\n\nReal Salt Lake', '\n\n\n\nFC Luzern', '\n\n\n\nViktoria Plzeň', '\n\n\n\nOrlando Pirates', '\n\n\n\nAl Ittihad', '\n\n\n\n1. FC Nürnberg', '\n\n\n\nStandard de Liège', '\n\n\n\nRayo Vallecano', '\n\n\n\nClub Libertad', '\n\n\n\nJeonbuk Hyundai Motors', '\n\n\n\nWestern Sydney Wanderers', '\n\n\n\nUnión de Santa Fe', '\n\n\n\nFC Sion', '\n\n\n\nSydney FC', '\n\n\n\nHannover 96', '\n\n\n\nReal Sporting de Gijón', '\n\n\n\nFC Cincinnati', '\n\n\n\nESTAC Troyes', '\n\n\n\nSan Jose Earthquakes', '\n\n\n\nClub Guaraní', '\n\n\n\nFC Lorient', '\n\n\n\nGençlerbirliği SK', '\n\n\n\nSporting de Charleroi', '\n\n\n\nWisła Kraków', '\n\n\n\nAl Taawoun', '\n\n\n\nReading', '\n\n\n\nGazişehir Gaziantep F.K.', '\n\n\n\nBlackburn Rovers', '\n\n\n\nKashiwa Reysol', '\n\n\n\nFK Bodø/Glimt', '\n\n\n\nWillem II', '\n\n\n\nVitesse', '\n\n\n\nGil Vicente FC', '\n\n\n\nSønderjyskE', '\n\n\n\nYokohama F. Marinos', '\n\n\n\nFC St. Gallen', '\n\n\n\nClub Atlético Aldosivi', '\n\n\n\nGöztepe SK', '\n\n\n\nHolstein Kiel', '\n\n\n\nFC Juárez', '\n\n\n\nFCSB (Steaua)', '\n\n\n\nEttifaq FC', '\n\n\n\nKAS Eupen', '\n\n\n\nFC Tokyo', '\n\n\n\nMelbourne City FC', '\n\n\n\nUniversitatea Craiova', '\n\n\n\nCD Tondela', '\n\n\n\nWolfsberger AC', '\n\n\n\nCeará Sporting Club', '\n\n\n\nRiver Plate Asunción', '\n\n\n\nStoke City', '\n\n\n\nParis FC', '\n\n\n\nIFK Norrköping', '\n\n\n\nCrotone', '\n\n\n\nCardiff City', '\n\n\n\nAmérica de Cali', '\n\n\n\nAl Faisaly', '\n\n\n\nIndependiente Medellín', '\n\n\n\nPortimonense SC', '\n\n\n\nCD Tenerife', '\n\n\n\nHeracles Almelo', '\n\n\n\nNashville SC', '\n\n\n\nSK Rapid Wien', '\n\n\n\nKawasaki Frontale', '\n\n\n\nClub Necaxa', '\n\n\n\nChievo Verona', '\n\n\n\nLech Poznań', '\n\n\n\nAIK', '\n\n\n\nFK Austria Wien', '\n\n\n\nQuerétaro', '\n\n\n\n1. FC Heidenheim 1846', '\n\n\n\nLegia Warszawa', '\n\n\n\nClub Atlético Banfield', '\n\n\n\nOs Belenenses', '\n\n\n\nHuddersfield Town', '\n\n\n\nKSV Cercle Brugge', '\n\n\n\nLDU Quito', '\n\n\n\nGimnasia y Esgrima La Plata', '\n\n\n\nGamba Osaka', '\n\n\n\nMiddlesbrough', '\n\n\n\nServette FC', '\n\n\n\nEmpoli', '\n\n\n\nReal Oviedo', '\n\n\n\nKasimpaşa SK', '\n\n\n\nCFR Cluj', '\n\n\n\nMillonarios FC', '\n\n\n\nFC Paços de Ferreira', '\n\n\n\nSint-Truidense VV', '\n\n\n\nAD Alcorcón', '\n\n\n\nColorado Rapids', '\n\n\n\nDinamo Bucureşti', '\n\n\n\nCerezo Osaka', '\n\n\n\nFC Emmen', '\n\n\n\nSheffield Wednesday', '\n\n\n\nFC Erzgebirge Aue', '\n\n\n\nRaków Częstochowa', '\n\n\n\nAtlético Tucumán', '\n\n\n\nSV Darmstadt 98', '\n\n\n\nBristol City', '\n\n\n\nOud-Heverlee Leuven', '\n\n\n\nBarnsley', '\n\n\n\nQueens Park Rangers', '\n\n\n\nSangju Sangmu FC', '\n\n\n\nHelsingborgs IF', '\n\n\n\nAC Ajaccio', '\n\n\n\nOceânico FC', '\n\n\n\nClub Atlético Talleres', '\n\n\n\nAl Hazem', '\n\n\n\nSC Paderborn 07', '\n\n\n\nClub Atlético Huracán', '\n\n\n\nKV Kortrijk', '\n\n\n\nEl Nacional', '\n\n\n\nMazatlán FC', '\n\n\n\nAberdeen', '\n\n\n\nCD Mirandés', '\n\n\n\nSV Zulte-Waregem', '\n\n\n\nPiast Gliwice', '\n\n\n\nDjurgårdens IF', '\n\n\n\nCentral Córdoba', '\n\n\n\nWellington Phoenix', '\n\n\n\nDeportivo Binacional', '\n\n\n\nPatronato', '\n\n\n\nSpVgg Greuther Fürth', '\n\n\n\nIFK Göteborg', '\n\n\n\nFC Cartagena', '\n\n\n\nSanta Clara', '\n\n\n\nFC Twente', '\n\n\n\nFarense', '\n\n\n\nADO Den Haag', '\n\n\n\nAJ Auxerre', '\n\n\n\nAl Fateh', '\n\n\n\nValenciennes FC', '\n\n\n\nHokkaido Consadole Sapporo', '\n\n\n\nFC St. Pauli', '\n\n\n\nAlbacete BP', '\n\n\n\n1. FC Kaiserslautern', '\n\n\n\nAl Fayha', '\n\n\n\nIncheon United FC', '\n\n\n\nKV Mechelen', '\n\n\n\nPohang Steelers', '\n\n\n\nCD Nacional', '\n\n\n\nUnión La Calera', '\n\n\n\nBB Erzurumspor', '\n\n\n\nKashima Antlers', '\n\n\n\nArsenal de Sarandí', '\n\n\n\nHJK Helsinki', '\n\n\n\nKarlsruher SC', '\n\n\n\nAarhus GF', '\n\n\n\nFC Zürich', '\n\n\n\nBarcelona Sporting Club', '\n\n\n\nSG Dynamo Dresden', '\n\n\n\nSanfrecce Hiroshima', '\n\n\n\nAl Raed', '\n\n\n\nSD Ponferradina', '\n\n\n\nBrøndby IF', '\n\n\n\nLechia Gdańsk', '\n\n\n\nFC Viitorul', '\n\n\n\nSK Sturm Graz', '\n\n\n\nIF Elfsborg', '\n\n\n\nGangwon FC', '\n\n\n\nMelgar FBC', '\n\n\n\nClub Atlético Grau', '\n\n\n\nJagiellonia Białystok', '\n\n\n\nWaasland-Beveren', '\n\n\n\nVfL Osnabrück', '\n\n\n\nNagoya Grampus', '\n\n\n\nCF Fuenlabrada', '\n\n\n\nShamrock Rovers', '\n\n\n\nShimizu S-Pulse', '\n\n\n\nRKC Waalwijk', '\n\n\n\nVålerenga Fotball', '\n\n\n\nClermont Foot 63', '\n\n\n\nFC Ingolstadt 04', '\n\n\n\nFC Würzburger Kickers', '\n\n\n\nGórnik Zabrze', '\n\n\n\nCharlton Athletic', '\n\n\n\nBeerschot AC', '\n\n\n\nCoquimbo Unido', '\n\n\n\nViktoria Köln', '\n\n\n\nHull City', '\n\n\n\nMálaga CF', '\n\n\n\nFC Nordsjælland', '\n\n\n\nUD Las Palmas', '\n\n\n\nVVV-Venlo', '\n\n\n\nGwangJu FC', '\n\n\n\nChamois Niortais Football Club', '\n\n\n\nFC Lugano', '\n\n\n\nCusco FC', '\n\n\n\nSparta Rotterdam', '\n\n\n\nSol de América', '\n\n\n\nPEC Zwolle', '\n\n\n\nPogoń Szczecin', '\n\n\n\nDoncaster Rovers', '\n\n\n\nKristiansund BK', '\n\n\n\nLiverpool Fútbol Club', '\n\n\n\nClub Bolívar', '\n\n\n\nUrawa Red Diamonds', '\n\n\n\nSK Brann', '\n\n\n\nEintracht Braunschweig', '\n\n\n\nNacional Asunción', '\n\n\n\nMelbourne Victory', '\n\n\n\nKilmarnock', '\n\n\n\nDundalk', '\n\n\n\nSSV Jahn Regensburg', '\n\n\n\nAbha Club', '\n\n\n\nClub Atlético Tigre', '\n\n\n\nSuwon Samsung Bluewings', '\n\n\n\nShonan Bellmare', '\n\n\n\nSD Aucas', '\n\n\n\nEmelec', '\n\n\n\nFortuna Sittard', '\n\n\n\nLa Berrichonne de Châteauroux', '\n\n\n\nŚląsk Wrocław', '\n\n\n\nCoventry City', '\n\n\n\nSunderland', '\n\n\n\nAlianza Lima', '\n\n\n\nHammarby IF', '\n\n\n\nCracovia', '\n\n\n\nCE Sabadell FC', '\n\n\n\nOita Trinita', '\n\n\n\nBK Häcken', '\n\n\n\nLuton Town', '\n\n\n\nLe Havre AC', '\n\n\n\nIK Sirius', '\n\n\n\nZagłębie Lubin', '\n\n\n\nGrenoble Foot 38', '\n\n\n\nMacarthur FC', '\n\n\n\nRodez Aveyron Football', '\n\n\n\nVegalta Sendai', '\n\n\n\nMotherwell', '\n\n\n\nSV Wehen Wiesbaden', '\n\n\n\nAudax Italiano', '\n\n\n\nGaz Metan Mediaş', '\n\n\n\nPortsmouth', '\n\n\n\nHibernian', '\n\n\n\nAlways Ready', '\n\n\n\nUD Logroñés', '\n\n\n\nFC Voluntari', '\n\n\n\nFC Hermannstadt', '\n\n\n\nAS Nancy Lorraine', '\n\n\n\nRotherham United', '\n\n\n\nFleetwood Town', '\n\n\n\nJorge Wilstermann', '\n\n\n\nOdense Boldklub', '\n\n\n\nMSV Duisburg', '\n\n\n\nFC Sochaux-Montbéliard', '\n\n\n\nMilton Keynes Dons', '\n\n\n\nRiver Plate Montevideo', '\n\n\n\nDelfín SC', '\n\n\n\nUniversidad Católica del Ecuador', '\n\n\n\nDeportivo Pasto', '\n\n\n\nKV Oostende', '\n\n\n\nClub Plaza Colonia', '\n\n\n\nWigan Athletic', '\n\n\n\nFC Botoşani', '\n\n\n\nC.D. Castellón', '\n\n\n\nSCR Altach', '\n\n\n\nÖrebro SK', '\n\n\n\nTSV Hartberg', '\n\n\n\nStabæk Fotball', '\n\n\n\nWycombe Wanderers', '\n\n\n\nSt. Johnstone FC', '\n\n\n\nOxford United', '\n\n\n\nLincoln City', '\n\n\n\nRanders FC', '\n\n\n\nViking FK', '\n\n\n\nFC Chambly Oise', '\n\n\n\nDundee United', '\n\n\n\nOriente Petrolero', '\n\n\n\nOdds BK', '\n\n\n\nNacional Potosí', '\n\n\n\nSepsi OSK', '\n\n\n\nFC Hansa Rostock', '\n\n\n\nLyngby BK', '\n\n\n\nSpVgg Unterhaching', '\n\n\n\nBrisbane Roar', '\n\n\n\nSeongnam FC', '\n\n\n\nSV Ried', '\n\n\n\nCD Huachipato', '\n\n\n\nWSG Tirol', '\n\n\n\nPodbeskidzie Bielsko-Biała', '\n\n\n\nPeterborough United', '\n\n\n\nClub Blooming', '\n\n\n\nWisła Płock', '\n\n\n\nBusan IPark', '\n\n\n\nFSV Zwickau', '\n\n\n\nMjøndalen IF', '\n\n\n\nVejle Boldklub', '\n\n\n\nPau FC', '\n\n\n\nFC Lausanne-Sport', '\n\n\n\nEstudiantes de Mérida', '\n\n\n\nSagan Tosu', '\n\n\n\nSV Waldhof Mannheim', '\n\n\n\nSport Huancayo', '\n\n\n\nHallescher FC', '\n\n\n\nKalmar FF', '\n\n\n\nSt. Mirren', '\n\n\n\nSKN St. Pölten', '\n\n\n\nSarpsborg 08 FF', '\n\n\n\nShrewsbury', '\n\n\n\nTSV 1860 München', '\n\n\n\nSV Meppen', '\n\n\n\nIpswich Town', '\n\n\n\nIK Start', '\n\n\n\nAdelaide United', '\n\n\n\nStal Mielec', '\n\n\n\nCentro Atlético Fénix', '\n\n\n\nNewcastle Jets', '\n\n\n\nLivingston FC', '\n\n\n\nAC Mineros de Guayana', '\n\n\n\nAFC Wimbledon', '\n\n\n\nBurton Albion', '\n\n\n\nAl Adalah', '\n\n\n\nWarta Poznań', '\n\n\n\nUSL Dunkerque', '\n\n\n\nBristol Rovers', '\n\n\n\nUTA Arad', '\n\n\n\nÖstersunds FK', '\n\n\n\nSportivo Luqueño', '\n\n\n\nPolitehnica Iaşi', '\n\n\n\nSalford City', '\n\n\n\nCentral Coast Mariners', '\n\n\n\nCaracas FC', '\n\n\n\nAC Horsens', '\n\n\n\nPlymouth Argyle', '\n\n\n\nAalesunds FK', '\n\n\n\nCrewe Alexandra', '\n\n\n\nChindia Târgovişte', '\n\n\n\nOldham Athletic', '\n\n\n\nYokohama FC', '\n\n\n\nBlackpool', '\n\n\n\nNorthampton Town', '\n\n\n\nBayern München II', '\n\n\n\nFK Haugesund', '\n\n\n\nKFC Uerdingen 05', '\n\n\n\nForest Green Rovers', '\n\n\n\nCheltenham Town', '\n\n\n\nStrømsgodset IF', '\n\n\n\nMjällby AIF', '\n\n\n\nExeter City', '\n\n\n\nCambridge United', '\n\n\n\nGrimsby Town', '\n\n\n\nFC Vaduz', '\n\n\n\nTürkgücü München', '\n\n\n\nVfB Lübeck', '\n\n\n\nBolton Wanderers', '\n\n\n\nBradford City', '\n\n\n\n1. FC Magdeburg', '\n\n\n\nFC Admira Wacker Mödling', '\n\n\n\nHamilton Academical FC', '\n\n\n\nAragua FC', '\n\n\n\nAccrington Stanley', '\n\n\n\nSwindon Town', '\n\n\n\nCarlisle United', '\n\n\n\nFC Argeș', '\n\n\n\nPort Vale', '\n\n\n\nAcademica Clinceni', '\n\n\n\nRochdale', '\n\n\n\nGillingham', '\n\n\n\nTranmere Rovers', '\n\n\n\nSouthend United', '\n\n\n\n1. FC Saarbrücken', '\n\n\n\nColchester United', '\n\n\n\nMansfield Town', '\n\n\n\nFalkenbergs FF', '\n\n\n\nSC Verl', '\n\n\n\nVarbergs BoIS', '\n\n\n\nCrawley Town', '\n\n\n\nSandefjord Fotball', '\n\n\n\nNewport County', '\n\n\n\nBohemian FC', '\n\n\n\nRoss County FC', '\n\n\n\nScunthorpe United', '\n\n\n\nMorecambe', '\n\n\n\nWalsall', '\n\n\n\nLeyton Orient', '\n\n\n\nStevenage', '\n\n\n\nBarrow', '\n\n\n\nDerry City', '\n\n\n\nLlaneros de Guanare', '\n\n\n\nSligo Rovers', '\n\n\n\nZamora FC', "\n\n\n\nSt. Patrick's Athletic", '\n\n\n\nCork City', '\n\n\n\nShelbourne FC', '\n\n\n\nHarrogate Town', '\n\n\n\nWaterford FC', '\n\n\n\nFinn Harps'], dtype=object)
fdc['Club'] = fdc['Club'].str.strip()
fdc['Club'].unique()
array(['FC Barcelona', 'Juventus', 'Atlético Madrid', 'Manchester City', 'Paris Saint-Germain', 'FC Bayern München', 'Liverpool', 'Real Madrid', 'Chelsea', 'Tottenham Hotspur', 'Inter', 'Napoli', 'Borussia Dortmund', 'Manchester United', 'Arsenal', 'Lazio', 'Leicester City', 'Borussia Mönchengladbach', 'Real Sociedad', 'Atalanta', 'Olympique Lyonnais', 'Milan', 'Villarreal CF', 'RB Leipzig', 'Cagliari', 'Ajax', 'SL Benfica', 'AS Monaco', 'Wolverhampton Wanderers', 'Everton', 'Fiorentina', 'FC Porto', 'RC Celta', 'Torino', 'Sevilla FC', 'Grêmio', 'Real Betis', 'Roma', 'Newcastle United', 'Eintracht Frankfurt', 'Valencia CF', 'Medipol Başakşehir FK', 'Inter Miami', 'Bayer 04 Leverkusen', 'Levante UD', 'Crystal Palace', 'Athletic Club de Bilbao', 'Shanghai SIPG FC', 'VfL Wolfsburg', 'Guangzhou Evergrande Taobao FC', 'Al Shabab', 'Olympique de Marseille', 'Los Angeles FC', 'Beijing Sinobo Guoan FC', 'Getafe CF', 'SV Werder Bremen', 'TSG 1899 Hoffenheim', 'LOSC Lille', 'Deportivo Alavés', 'Burnley', 'Leeds United', 'Fulham', 'PFC CSKA Moscow', 'Genoa', 'Galatasaray SK', 'Atlanta United', 'Atlético Mineiro', 'Flamengo', 'Palmeiras', 'No Club', 'São Paulo', 'West Ham United', 'SD Eibar', 'River Plate', 'OGC Nice', 'Real Valladolid CF', 'Sporting CP', 'PSV', 'Feyenoord', 'Club Brugge KV', 'Shakhtar Donetsk', 'Vissel Kobe', 'Boca Juniors', '1. FC Union Berlin', 'Dalian YiFang FC', 'AS Saint-Étienne', 'Sassuolo', 'Tigres U.A.N.L.', 'Dinamo Zagreb', 'Dynamo Kyiv', 'Fluminense', 'Udinese', 'Aston Villa', 'Stade Rennais FC', 'Beşiktaş JK', 'Brighton & Hove Albion', 'Hebei China Fortune FC', 'Southampton', 'Lokomotiv Moscow', 'Jiangsu Suning FC', 'Al Hilal', 'West Bromwich Albion', 'Fenerbahçe SK', 'FC Groningen', 'Guangzhou R&F FC', 'Orlando City SC', 'Sampdoria', 'Stade de Reims', 'Sheffield United', 'Santos', 'Internacional', 'Coritiba', 'CA Osasuna', 'LA Galaxy', 'Al Nassr', 'SC Freiburg', 'SC Braga', 'Olympiacos CFP', 'Toronto FC', 'Granada CF', 'FC Schalke 04', 'RC Strasbourg Alsace', 'Hertha BSC', 'AZ Alkmaar', 'Seattle Sounders FC', 'Montpellier HSC', 'Shanghai Greenland Shenhua FC', 'Hamburger SV', 'Girona FC', 'Racing Club', 'Portland Timbers', 'FC Girondins de Bordeaux', 'Watford', 'Estudiantes de La Plata', 'Club América', 'Vitória Guimarães', 'SK Slavia Praha', 'Trabzonspor', 'RCD Espanyol', 'FC Nantes', 'Angers SCO', 'Cruz Azul', 'BSC Young Boys', 'Club León', 'Spartak Moscow', 'Sivasspor', 'Elche CF', 'FC Augsburg', 'Wuhan Zall', 'Parma', 'Royal Antwerp FC', 'Monterrey', 'San Lorenzo de Almagro', 'AC Monza', 'Club Atlético Lanús', 'SD Huesca', 'Derby County', 'Kayserispor', 'RSC Anderlecht', 'CD Leganés', 'Vélez Sarsfield', 'Stade Brestois 29', 'Tianjin TEDA FC', 'Birmingham City', 'Hellas Verona', '1. FC Köln', 'Al Wehda', 'Bournemouth', 'Alanyaspor', 'Bologna', 'Amiens SC', 'Celtic', 'Shijiazhuang Ever Bright F.C.', 'Club Atlas', 'Al Ahli', 'Norwich City', 'Fatih Karagümrük S.K.', 'Club Tijuana', 'Universidad Católica', 'Goiás', 'Benevento', 'Independiente', 'Qingdao Huanghai F.C.', 'Brentford', 'AEK Athens', 'Racing Club de Lens', 'Olimpia Asunción', 'Shenzhen FC', 'Çaykur Rizespor', 'KAA Gent', 'New England Revolution', 'Cádiz CF', '1. FSV Mainz 05', 'Vasco da Gama', 'FC Midtjylland', 'Al Ain FC', 'Santos Laguna', 'SPAL', 'Perth Glory', 'FC Utrecht', 'Montreal Impact', 'UD Almería', 'Fortaleza', 'Spezia', 'FC Basel 1893', 'FC København', 'Godoy Cruz', 'Deportivo Toluca', 'Nacional de Montevideo', 'Aalborg BK', 'Rio Ave FC', 'FC Metz', 'Chongqing Dangdai Lifan FC SWM Team', 'Rosenborg BK', 'FC Red Bull Salzburg', 'Botafogo', 'DSC Arminia Bielefeld', 'Club Athletico Paranaense', 'RCD Mallorca', 'Pachuca', 'Swansea City', 'KRC Genk', 'En Avant de Guingamp', 'Lecce', 'Nîmes Olympique', 'Colo-Colo', 'Stade Malherbe Caen', 'Columbus Crew SC', 'Junior FC', 'VfB Stuttgart', 'Moreirense FC', 'Rangers FC', 'Malmö FF', 'Shandong Luneng TaiShan FC', 'Sparta Praha', 'Hatayspor', 'Chicago Fire', 'Toulouse Football Club', 'FC Seoul', 'Antalyaspor', 'Deportivo Cali', 'Kaizer Chiefs', 'Real Zaragoza', 'Atlético Clube Goianiense', 'Puebla FC', 'Fortuna Düsseldorf', 'Western United FC', 'Astra Giurgiu', 'Henan Jianye FC', 'Bahia', 'Famalicão', 'Atiker Konyaspor', 'Peñarol', 'DC United', 'New York City FC', 'Argentinos Juniors', 'U.N.A.M.', 'Minnesota United FC', 'Nottingham Forest', 'Independiente del Valle', 'Defensa y Justicia', 'Philadelphia Union', "Newell's Old Boys", 'Rosario Central', 'Atlético de San Luis', 'FC Dallas', 'Denizlispor', 'Yeni Malatyaspor', 'Club Atlético Colón', 'Houston Dynamo', 'Millwall', 'Clube Sport Marítimo', 'MKE Ankaragücü', 'Guadalajara', 'Boavista FC', 'Sporting Kansas City', 'Atlético Nacional', 'CD Lugo', 'New York Red Bulls', 'PAOK', 'SC Heerenveen', 'Daegu FC', 'Preston North End', 'Damac FC', 'Brescia', 'VfL Bochum 1848', 'Vancouver Whitecaps FC', 'SV Sandhausen', 'Royal Excel Mouscron', 'Dijon FCO', 'Molde FK', 'Panathinaikos FC', 'LASK Linz', 'Ulsan Hyundai FC', 'Real Salt Lake', 'FC Luzern', 'Viktoria Plzeň', 'Orlando Pirates', 'Al Ittihad', '1. FC Nürnberg', 'Standard de Liège', 'Rayo Vallecano', 'Club Libertad', 'Jeonbuk Hyundai Motors', 'Western Sydney Wanderers', 'Unión de Santa Fe', 'FC Sion', 'Sydney FC', 'Hannover 96', 'Real Sporting de Gijón', 'FC Cincinnati', 'ESTAC Troyes', 'San Jose Earthquakes', 'Club Guaraní', 'FC Lorient', 'Gençlerbirliği SK', 'Sporting de Charleroi', 'Wisła Kraków', 'Al Taawoun', 'Reading', 'Gazişehir Gaziantep F.K.', 'Blackburn Rovers', 'Kashiwa Reysol', 'FK Bodø/Glimt', 'Willem II', 'Vitesse', 'Gil Vicente FC', 'SønderjyskE', 'Yokohama F. Marinos', 'FC St. Gallen', 'Club Atlético Aldosivi', 'Göztepe SK', 'Holstein Kiel', 'FC Juárez', 'FCSB (Steaua)', 'Ettifaq FC', 'KAS Eupen', 'FC Tokyo', 'Melbourne City FC', 'Universitatea Craiova', 'CD Tondela', 'Wolfsberger AC', 'Ceará Sporting Club', 'River Plate Asunción', 'Stoke City', 'Paris FC', 'IFK Norrköping', 'Crotone', 'Cardiff City', 'América de Cali', 'Al Faisaly', 'Independiente Medellín', 'Portimonense SC', 'CD Tenerife', 'Heracles Almelo', 'Nashville SC', 'SK Rapid Wien', 'Kawasaki Frontale', 'Club Necaxa', 'Chievo Verona', 'Lech Poznań', 'AIK', 'FK Austria Wien', 'Querétaro', '1. FC Heidenheim 1846', 'Legia Warszawa', 'Club Atlético Banfield', 'Os Belenenses', 'Huddersfield Town', 'KSV Cercle Brugge', 'LDU Quito', 'Gimnasia y Esgrima La Plata', 'Gamba Osaka', 'Middlesbrough', 'Servette FC', 'Empoli', 'Real Oviedo', 'Kasimpaşa SK', 'CFR Cluj', 'Millonarios FC', 'FC Paços de Ferreira', 'Sint-Truidense VV', 'AD Alcorcón', 'Colorado Rapids', 'Dinamo Bucureşti', 'Cerezo Osaka', 'FC Emmen', 'Sheffield Wednesday', 'FC Erzgebirge Aue', 'Raków Częstochowa', 'Atlético Tucumán', 'SV Darmstadt 98', 'Bristol City', 'Oud-Heverlee Leuven', 'Barnsley', 'Queens Park Rangers', 'Sangju Sangmu FC', 'Helsingborgs IF', 'AC Ajaccio', 'Oceânico FC', 'Club Atlético Talleres', 'Al Hazem', 'SC Paderborn 07', 'Club Atlético Huracán', 'KV Kortrijk', 'El Nacional', 'Mazatlán FC', 'Aberdeen', 'CD Mirandés', 'SV Zulte-Waregem', 'Piast Gliwice', 'Djurgårdens IF', 'Central Córdoba', 'Wellington Phoenix', 'Deportivo Binacional', 'Patronato', 'SpVgg Greuther Fürth', 'IFK Göteborg', 'FC Cartagena', 'Santa Clara', 'FC Twente', 'Farense', 'ADO Den Haag', 'AJ Auxerre', 'Al Fateh', 'Valenciennes FC', 'Hokkaido Consadole Sapporo', 'FC St. Pauli', 'Albacete BP', '1. FC Kaiserslautern', 'Al Fayha', 'Incheon United FC', 'KV Mechelen', 'Pohang Steelers', 'CD Nacional', 'Unión La Calera', 'BB Erzurumspor', 'Kashima Antlers', 'Arsenal de Sarandí', 'HJK Helsinki', 'Karlsruher SC', 'Aarhus GF', 'FC Zürich', 'Barcelona Sporting Club', 'SG Dynamo Dresden', 'Sanfrecce Hiroshima', 'Al Raed', 'SD Ponferradina', 'Brøndby IF', 'Lechia Gdańsk', 'FC Viitorul', 'SK Sturm Graz', 'IF Elfsborg', 'Gangwon FC', 'Melgar FBC', 'Club Atlético Grau', 'Jagiellonia Białystok', 'Waasland-Beveren', 'VfL Osnabrück', 'Nagoya Grampus', 'CF Fuenlabrada', 'Shamrock Rovers', 'Shimizu S-Pulse', 'RKC Waalwijk', 'Vålerenga Fotball', 'Clermont Foot 63', 'FC Ingolstadt 04', 'FC Würzburger Kickers', 'Górnik Zabrze', 'Charlton Athletic', 'Beerschot AC', 'Coquimbo Unido', 'Viktoria Köln', 'Hull City', 'Málaga CF', 'FC Nordsjælland', 'UD Las Palmas', 'VVV-Venlo', 'GwangJu FC', 'Chamois Niortais Football Club', 'FC Lugano', 'Cusco FC', 'Sparta Rotterdam', 'Sol de América', 'PEC Zwolle', 'Pogoń Szczecin', 'Doncaster Rovers', 'Kristiansund BK', 'Liverpool Fútbol Club', 'Club Bolívar', 'Urawa Red Diamonds', 'SK Brann', 'Eintracht Braunschweig', 'Nacional Asunción', 'Melbourne Victory', 'Kilmarnock', 'Dundalk', 'SSV Jahn Regensburg', 'Abha Club', 'Club Atlético Tigre', 'Suwon Samsung Bluewings', 'Shonan Bellmare', 'SD Aucas', 'Emelec', 'Fortuna Sittard', 'La Berrichonne de Châteauroux', 'Śląsk Wrocław', 'Coventry City', 'Sunderland', 'Alianza Lima', 'Hammarby IF', 'Cracovia', 'CE Sabadell FC', 'Oita Trinita', 'BK Häcken', 'Luton Town', 'Le Havre AC', 'IK Sirius', 'Zagłębie Lubin', 'Grenoble Foot 38', 'Macarthur FC', 'Rodez Aveyron Football', 'Vegalta Sendai', 'Motherwell', 'SV Wehen Wiesbaden', 'Audax Italiano', 'Gaz Metan Mediaş', 'Portsmouth', 'Hibernian', 'Always Ready', 'UD Logroñés', 'FC Voluntari', 'FC Hermannstadt', 'AS Nancy Lorraine', 'Rotherham United', 'Fleetwood Town', 'Jorge Wilstermann', 'Odense Boldklub', 'MSV Duisburg', 'FC Sochaux-Montbéliard', 'Milton Keynes Dons', 'River Plate Montevideo', 'Delfín SC', 'Universidad Católica del Ecuador', 'Deportivo Pasto', 'KV Oostende', 'Club Plaza Colonia', 'Wigan Athletic', 'FC Botoşani', 'C.D. Castellón', 'SCR Altach', 'Örebro SK', 'TSV Hartberg', 'Stabæk Fotball', 'Wycombe Wanderers', 'St. Johnstone FC', 'Oxford United', 'Lincoln City', 'Randers FC', 'Viking FK', 'FC Chambly Oise', 'Dundee United', 'Oriente Petrolero', 'Odds BK', 'Nacional Potosí', 'Sepsi OSK', 'FC Hansa Rostock', 'Lyngby BK', 'SpVgg Unterhaching', 'Brisbane Roar', 'Seongnam FC', 'SV Ried', 'CD Huachipato', 'WSG Tirol', 'Podbeskidzie Bielsko-Biała', 'Peterborough United', 'Club Blooming', 'Wisła Płock', 'Busan IPark', 'FSV Zwickau', 'Mjøndalen IF', 'Vejle Boldklub', 'Pau FC', 'FC Lausanne-Sport', 'Estudiantes de Mérida', 'Sagan Tosu', 'SV Waldhof Mannheim', 'Sport Huancayo', 'Hallescher FC', 'Kalmar FF', 'St. Mirren', 'SKN St. Pölten', 'Sarpsborg 08 FF', 'Shrewsbury', 'TSV 1860 München', 'SV Meppen', 'Ipswich Town', 'IK Start', 'Adelaide United', 'Stal Mielec', 'Centro Atlético Fénix', 'Newcastle Jets', 'Livingston FC', 'AC Mineros de Guayana', 'AFC Wimbledon', 'Burton Albion', 'Al Adalah', 'Warta Poznań', 'USL Dunkerque', 'Bristol Rovers', 'UTA Arad', 'Östersunds FK', 'Sportivo Luqueño', 'Politehnica Iaşi', 'Salford City', 'Central Coast Mariners', 'Caracas FC', 'AC Horsens', 'Plymouth Argyle', 'Aalesunds FK', 'Crewe Alexandra', 'Chindia Târgovişte', 'Oldham Athletic', 'Yokohama FC', 'Blackpool', 'Northampton Town', 'Bayern München II', 'FK Haugesund', 'KFC Uerdingen 05', 'Forest Green Rovers', 'Cheltenham Town', 'Strømsgodset IF', 'Mjällby AIF', 'Exeter City', 'Cambridge United', 'Grimsby Town', 'FC Vaduz', 'Türkgücü München', 'VfB Lübeck', 'Bolton Wanderers', 'Bradford City', '1. FC Magdeburg', 'FC Admira Wacker Mödling', 'Hamilton Academical FC', 'Aragua FC', 'Accrington Stanley', 'Swindon Town', 'Carlisle United', 'FC Argeș', 'Port Vale', 'Academica Clinceni', 'Rochdale', 'Gillingham', 'Tranmere Rovers', 'Southend United', '1. FC Saarbrücken', 'Colchester United', 'Mansfield Town', 'Falkenbergs FF', 'SC Verl', 'Varbergs BoIS', 'Crawley Town', 'Sandefjord Fotball', 'Newport County', 'Bohemian FC', 'Ross County FC', 'Scunthorpe United', 'Morecambe', 'Walsall', 'Leyton Orient', 'Stevenage', 'Barrow', 'Derry City', 'Llaneros de Guanare', 'Sligo Rovers', 'Zamora FC', "St. Patrick's Athletic", 'Cork City', 'Shelbourne FC', 'Harrogate Town', 'Waterford FC', 'Finn Harps'], dtype=object)
With this column, the data is going to be split and differentiated between a player under contract, on loan, or free.
fdc['Contract'].dtype
dtype('O')
fdc['Contract'].unique()
array(['2004 ~ 2021', '2018 ~ 2022', '2014 ~ 2023', '2015 ~ 2023', '2017 ~ 2022', '2017 ~ 2023', '2018 ~ 2024', '2014 ~ 2022', '2018 ~ 2023', '2016 ~ 2023', '2013 ~ 2023', '2011 ~ 2023', '2009 ~ 2022', '2005 ~ 2021', '2011 ~ 2021', '2015 ~ 2022', '2017 ~ 2024', '2010 ~ 2024', '2012 ~ 2021', '2019 ~ 2024', '2015 ~ 2024', '2017 ~ 2025', '2020 ~ 2025', '2019 ~ 2023', '2008 ~ 2023', '2015 ~ 2021', '2020 ~ 2022', '2012 ~ 2022', '2016 ~ 2025', '2013 ~ 2022', '2011 ~ 2022', '2012 ~ 2024', '2016 ~ 2021', '2012 ~ 2023', '2008 ~ 2022', '2019 ~ 2022', '2017 ~ 2021', '2013 ~ 2024', '2020 ~ 2024', '2010 ~ 2022', '2020 ~ 2021', '2011 ~ 2024', '2020 ~ 2023', '2014 ~ 2024', '2013 ~ 2026', '2016 ~ 2022', '2010 ~ 2021', '2013 ~ 2021', '2019 ~ 2025', '2018 ~ 2025', '2016 ~ 2024', '2018 ~ 2021', '2009 ~ 2024', '2007 ~ 2022', 'Jun 30, 2021 On Loan', '2009 ~ 2021', '2019 ~ 2021', '2019 ~ 2026', 'Free', '2012 ~ 2028', '2010 ~ 2023', '2014 ~ 2021', '2015 ~ 2025', '2014 ~ 2026', '2012 ~ 2025', '2017 ~ 2020', '2002 ~ 2022', '2020 ~ 2027', '2013 ~ 2025', 'Dec 31, 2020 On Loan', '2019 ~ 2020', '2011 ~ 2025', '2016 ~ 2020', '2007 ~ 2021', '2020 ~ 2026', '2010 ~ 2025', '2009 ~ 2023', '2008 ~ 2021', '2020 ~ 2020', '2016 ~ 2026', 'Jan 30, 2021 On Loan', '2012 ~ 2020', '2014 ~ 2025', 'Jun 30, 2022 On Loan', '2015 ~ 2020', 'May 31, 2021 On Loan', '2018 ~ 2020', '2014 ~ 2020', '2013 ~ 2020', '2006 ~ 2024', 'Jul 5, 2021 On Loan', 'Dec 31, 2021 On Loan', '2004 ~ 2025', '2011 ~ 2020', 'Jul 1, 2021 On Loan', 'Jan 1, 2021 On Loan', '2006 ~ 2023', 'Aug 31, 2021 On Loan', '2006 ~ 2021', '2005 ~ 2023', '2003 ~ 2020', '2009 ~ 2020', '2002 ~ 2020', '2005 ~ 2020', '2005 ~ 2022', 'Jan 31, 2021 On Loan', '2010 ~ 2020', 'Dec 30, 2021 On Loan', '2008 ~ 2020', '2007 ~ 2020', '2003 ~ 2021', 'Jun 23, 2021 On Loan', 'Jan 3, 2021 On Loan', 'Nov 27, 2021 On Loan', '2002 ~ 2021', 'Jan 17, 2021 On Loan', 'Jun 30, 2023 On Loan', '1998 ~ 2021', '2003 ~ 2022', '2007 ~ 2023', 'Jul 31, 2021 On Loan', 'Nov 22, 2020 On Loan', 'May 31, 2022 On Loan', '2006 ~ 2020', 'Dec 30, 2020 On Loan', '2007 ~ 2025', 'Jan 4, 2021 On Loan', 'Nov 30, 2020 On Loan', '2004 ~ 2020', '2009 ~ 2025', 'Aug 1, 2021 On Loan'], dtype=object)
# let´s search for the values 'On Loan' and 'Free' in 'Contract'
for index, row in fdc.iterrows():
if 'On Loan' in row['Contract'] or 'Free' in row['Contract']:
print(row['Contract'])
Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Free Free Free Free Free Free Free Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Free Free Free Jun 30, 2021 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Free Free Free Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Jan 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Free Free Free Free Jun 30, 2021 On Loan Jun 30, 2022 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Free Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Free Jun 30, 2022 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Free Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan May 31, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Dec 31, 2020 On Loan Jun 30, 2021 On Loan Free Free Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Free Free Jun 30, 2021 On Loan May 31, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2022 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan May 31, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan May 31, 2021 On Loan Jun 30, 2021 On Loan May 31, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Dec 31, 2020 On Loan Jul 5, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2022 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Dec 31, 2021 On Loan Free Dec 31, 2020 On Loan Dec 31, 2020 On Loan May 31, 2021 On Loan Free Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Free Dec 31, 2020 On Loan Free Jun 30, 2021 On Loan Free Free Jun 30, 2021 On Loan May 31, 2021 On Loan Free Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan May 31, 2021 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jul 1, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Dec 31, 2021 On Loan Free May 31, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan May 31, 2021 On Loan Jun 30, 2021 On Loan Jan 1, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Free Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Aug 31, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan May 31, 2021 On Loan Free Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2020 On Loan Free Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free May 31, 2021 On Loan Jun 30, 2021 On Loan Free Free Dec 31, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan May 31, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan May 31, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2020 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2022 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan May 31, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Free Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan May 31, 2021 On Loan Jun 30, 2021 On Loan May 31, 2021 On Loan Free Jun 30, 2021 On Loan Dec 31, 2020 On Loan Free Free Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2022 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2020 On Loan Free Free Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Free May 31, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan May 31, 2021 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Free Jun 30, 2021 On Loan Jan 31, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan May 31, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Free Free Jun 30, 2021 On Loan May 31, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Dec 30, 2021 On Loan Free Jun 30, 2021 On Loan Dec 31, 2020 On Loan Free Jun 30, 2021 On Loan Free Dec 31, 2021 On Loan Free Jun 30, 2021 On Loan Free Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2020 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jan 31, 2021 On Loan Free Free Jun 30, 2021 On Loan Free Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Dec 31, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2022 On Loan Dec 31, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan May 31, 2021 On Loan Dec 31, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jan 1, 2021 On Loan Dec 31, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2020 On Loan Free Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Dec 31, 2020 On Loan Free Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Dec 31, 2020 On Loan Free Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jan 1, 2021 On Loan Jun 30, 2021 On Loan Jan 31, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 23, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Jan 31, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan May 31, 2021 On Loan Dec 31, 2020 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jan 1, 2021 On Loan Free Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Free Free Jun 30, 2021 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2020 On Loan Free Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2022 On Loan Free Dec 31, 2020 On Loan Jun 30, 2021 On Loan Free Dec 31, 2020 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Free Dec 31, 2021 On Loan Dec 31, 2020 On Loan Free Free Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2020 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jan 31, 2021 On Loan Jun 30, 2021 On Loan Free Jan 31, 2021 On Loan Jun 30, 2021 On Loan Free Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan May 31, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jan 3, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2020 On Loan Dec 31, 2020 On Loan May 31, 2021 On Loan Jun 30, 2021 On Loan Nov 27, 2021 On Loan Free Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2020 On Loan Dec 31, 2020 On Loan May 31, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2022 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2021 On Loan Free Free Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Nov 27, 2021 On Loan Free Jan 31, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Dec 31, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 23, 2021 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jan 17, 2021 On Loan Free Jun 30, 2021 On Loan Jan 31, 2021 On Loan Free Jun 30, 2021 On Loan Free Jun 30, 2023 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jan 31, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jan 1, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan May 31, 2021 On Loan Dec 31, 2021 On Loan Free Jun 30, 2021 On Loan May 31, 2021 On Loan Free Jun 23, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 23, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2022 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 23, 2021 On Loan Jun 30, 2021 On Loan Free Dec 31, 2020 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jan 3, 2021 On Loan Jan 31, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Free Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jul 31, 2021 On Loan Dec 31, 2020 On Loan Nov 27, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Nov 22, 2020 On Loan May 31, 2021 On Loan Jun 30, 2021 On Loan May 31, 2022 On Loan Jun 30, 2021 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Dec 31, 2020 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Nov 27, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Nov 22, 2020 On Loan Jun 30, 2021 On Loan Free Nov 22, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jan 3, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Nov 22, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 23, 2021 On Loan Jun 30, 2021 On Loan Jan 31, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jan 31, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2020 On Loan Free Free Jun 30, 2021 On Loan Dec 30, 2020 On Loan Jun 30, 2021 On Loan Dec 31, 2020 On Loan Free Dec 31, 2020 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2020 On Loan Dec 31, 2021 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Nov 27, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2020 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jan 1, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jan 31, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jan 4, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2022 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jan 1, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Free Free Free Jun 30, 2021 On Loan Jan 1, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Nov 30, 2020 On Loan May 31, 2022 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jan 1, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2022 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2020 On Loan Dec 31, 2020 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Free May 31, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2022 On Loan Dec 31, 2021 On Loan Jan 31, 2021 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jan 31, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jan 1, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Dec 31, 2020 On Loan Jan 31, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jan 1, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Nov 22, 2020 On Loan Jun 30, 2021 On Loan Dec 31, 2020 On Loan Jan 31, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2022 On Loan Jun 30, 2021 On Loan Jun 30, 2022 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Free Dec 31, 2020 On Loan Jun 30, 2021 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Free Jun 23, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Jan 31, 2021 On Loan Jun 30, 2021 On Loan Nov 27, 2021 On Loan Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2020 On Loan Free Dec 31, 2020 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jan 1, 2021 On Loan Nov 30, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jan 1, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Nov 30, 2020 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Free Jun 30, 2021 On Loan Free Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Nov 30, 2020 On Loan Jun 30, 2021 On Loan Aug 1, 2021 On Loan Jun 30, 2021 On Loan Free Jun 30, 2021 On Loan Nov 30, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Free Dec 31, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2020 On Loan Jan 1, 2021 On Loan Jun 30, 2021 On Loan Free Free Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Nov 30, 2020 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jan 4, 2021 On Loan Jun 30, 2021 On Loan Nov 30, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2021 On Loan Free Jun 30, 2021 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jan 1, 2021 On Loan Dec 31, 2020 On Loan Jun 30, 2021 On Loan Jun 30, 2021 On Loan Aug 31, 2021 On Loan Jun 30, 2021 On Loan Dec 31, 2020 On Loan Dec 31, 2020 On Loan Dec 31, 2020 On Loan
Let´s extract the values and put them into different columns. Identifying when the contract starts or ends, with those values, we want to know the contract length. For contracts 'Free' or 'On Loan' we use NaN values.
# Let´s use a function to extract the values into new columns
def extract_contract_info(contract):
if contract == 'Free' or 'On Loan' in contract:
start_date = np.nan
end_date = np.nan
contract_length = 0
else:
start_date, end_date = contract.split(' ~ ')
start_year = int(start_date[:4])
end_year = int(end_date[:4])
contract_length = end_year - start_year
return start_date, end_date, contract_length
# We apply the function to the 'Contract' column. This will create 3 new columns
new_cols = ['Contract Start', 'Contract End', 'Contract Length(years)']
new_data = fdc['Contract'].apply(lambda x: pd.Series(extract_contract_info(x)))
# Now we need a loop to go through all the values of the column
for i in range(len(new_cols)):
fdc.insert(loc=fdc.columns.get_loc('Contract')+1+i, column=new_cols[i], value=new_data[i])
# Run to see the changes
fdc_new_col = fdc[['Contract', 'Contract Start', 'Contract End', 'Contract Length(years)']]
fdc_new_col.head()
Contract | Contract Start | Contract End | Contract Length(years) | |
---|---|---|---|---|
0 | 2004 ~ 2021 | 2004 | 2021 | 17.0 |
1 | 2018 ~ 2022 | 2018 | 2022 | 4.0 |
2 | 2014 ~ 2023 | 2014 | 2023 | 9.0 |
3 | 2015 ~ 2023 | 2015 | 2023 | 8.0 |
4 | 2017 ~ 2022 | 2017 | 2022 | 5.0 |
It´s time to determine the contract status of the players by using a function. Reflecting the results in a new column
# Let´s define the contract categories
def categorize_contract_status(contract):
if contract == 'Free':
return 'Free'
elif 'On Loan' in contract:
return 'On Loan'
else:
return 'Contract'
# Add the new column 'Contract Status'
fdc.insert(fdc.columns.get_loc('Contract Length(years)')+1, 'Contract Status', fdc['Contract'].apply(categorize_contract_status))
fdc_new_col = fdc[['Contract', 'Contract Start', 'Contract End', 'Contract Length(years)', 'Contract Status']]
fdc_new_col.sample(5)
Contract | Contract Start | Contract End | Contract Length(years) | Contract Status | |
---|---|---|---|---|---|
8515 | Jun 30, 2021 On Loan | NaN | NaN | 0.0 | On Loan |
7857 | 2020 ~ 2024 | 2020 | 2024 | 4.0 | Contract |
18063 | 2019 ~ 2021 | 2019 | 2021 | 2.0 | Contract |
17428 | 2018 ~ 2020 | 2018 | 2020 | 2.0 | Contract |
2662 | 2020 ~ 2021 | 2020 | 2021 | 1.0 | Contract |
Let´s check the values from the column Positions
fdc['Positions'].dtype
dtype('O')
fdc['Positions'].unique()
array(['RW, ST, CF', 'ST, LW', 'GK', 'CAM, CM', 'LW, CAM', 'ST', 'RW', 'ST, LW, RW', 'CB', 'LW', 'CDM', 'CF, ST', 'LW, RW', 'CDM, CM', 'CDM, RB', 'CF, CAM', 'LW, ST', 'CM', 'ST, CF, LW', 'RM, LM, CAM', 'RB', 'RW, CAM, CM', 'LB', 'LM, CF', 'CF', 'RW, LW', 'CAM, RM, RW', 'CM, CDM', 'CAM, CF, ST', 'CM, CDM, CAM', 'CF, LW, CAM', 'CAM, RM, CF', 'LM, ST', 'RM, LM, RW', 'LM', 'CAM, RW', 'CB, CDM', 'RW, RM', 'LW, CF', 'CM, RM, LM', 'LB, LM', 'CAM, CM, RM', 'CAM, CM, CF', 'CAM, CF', 'LM, RM, LW', 'LM, LB, CM', 'CM, LM, LB', 'RM, RW', 'RM, CM', 'CAM, CM, LW', 'CB, LB', 'RM, RB', 'ST, RW', 'LM, RW, LW', 'RB, LB', 'RB, RM', 'RM', 'LM, RM, CF', 'CAM, RM', 'RB, RWB', 'CDM, CB, CM', 'CAM, RM, ST', 'LM, LW, RM', 'CM, CAM', 'ST, RM, CF', 'LM, RM', 'RM, CF', 'LM, LWB', 'RW, RM, CF', 'RB, CM', 'LW, CAM, RW', 'CAM, LW, CM', 'CM, CAM, CDM', 'RW, LW, CAM', 'CM, CAM, LM', 'CM, RM, ST', 'CDM, CM, RB', 'ST, CAM', 'CAM, LW, ST', 'LB, CB, LWB', 'RM, ST', 'CB, CDM, LB', 'RWB, RM', 'CM, LM, RM', 'RB, CDM, CM', 'RW, LW, RM', 'LM, LW', 'CM, LM', 'LM, LB', 'RM, LM, CF', 'LB, LM, RM', 'CDM, CM, CAM', 'ST, LW, RM', 'CAM, CM, ST', 'ST, CF', 'LWB, LB', 'LW, RW, LM', 'RM, RW, ST', 'LWB', 'CF, ST, CAM', 'LM, CAM, RM', 'RB, CB', 'ST, LM', 'RW, CAM', 'LM, CAM', 'RWB, RB', 'ST, RW, LW', 'CAM', 'RB, RM, RW', 'LB, LWB', 'RM, CAM', 'CAM, ST', 'CDM, CB', 'CF, LM, LW', 'CAM, LM, LW', 'LW, RW, CAM', 'CB, RB', 'RB, CB, RWB', 'LM, LW, ST', 'LW, ST, CM', 'RM, CAM, CM', 'CB, RB, RWB', 'RW, ST, LW', 'LW, CAM, CM', 'CM, LM, CDM', 'LB, LM, LWB', 'CB, LB, RB', 'CAM, LW', 'RWB, RB, RM', 'CF, CAM, ST', 'RM, CAM, RW', 'RW, ST', 'LW, LM', 'RB, RM, CM', 'ST, CAM, RW', 'CM, RB, LB', 'CAM, LM, RM', 'RB, RW', 'LM, CM', 'RM, LM, CM', 'CDM, CM, CB', 'CM, CF', 'CF, LW, RW', 'ST, RM', 'CAM, CM, CDM', 'LB, CB', 'RW, RWB', 'ST, LM, RM', 'RM, RWB, RB', 'LM, ST, CAM', 'CAM, ST, CF', 'LW, CM', 'RB, RWB, LB', 'RM, LM', 'CM, CAM, RM', 'ST, LW, CAM', 'RM, RW, CAM', 'CM, CDM, RM', 'RB, RM, RWB', 'CDM, CB, LB', 'CAM, ST, LM', 'LM, CM, RM', 'CF, RW', 'CAM, RM, LW', 'CM, RM, CDM', 'LB, LWB, LM', 'LW, RW, CF', 'LW, LM, CAM', 'LWB, LW, LB', 'CDM, CM, LM', 'CB, CM', 'RWB, RW, RB', 'ST, RW, RM', 'LW, RW, CM', 'LM, RM, CAM', 'LB, RB', 'RWB, RM, RB', 'LM, RB, LB', 'LW, LM, ST', 'ST, RM, LM', 'CAM, CM, LM', 'CF, CM', 'LW, ST, RW', 'ST, CAM, CF', 'LB, CB, LM', 'CM, CDM, LM', 'RM, RWB', 'LB, LM, RB', 'RB, LB, CB', 'CM, CDM, RB', 'LB, CM', 'CF, CAM, LW', 'RM, LWB', 'CF, ST, LM', 'LB, RB, CB', 'LW, CF, ST', 'RM, ST, CAM', 'LW, RW, ST', 'ST, LW, LM', 'CM, LW', 'CDM, CM, LB', 'CM, RM, CAM', 'ST, CF, CAM', 'CM, LM, CAM', 'RWB', 'LM, ST, RM', 'CAM, RM, CM', 'CAM, LM, CM', 'RW, LW, ST', 'CAM, RM, LM', 'CF, ST, LW', 'LWB, LB, LM', 'RM, CM, CAM', 'LB, LM, CAM', 'CAM, CDM', 'RW, RB, RM', 'RM, ST, RW', 'CM, CAM, LW', 'CF, ST, RW', 'LM, RM, RB', 'LM, RM, CM', 'CB, RB, RM', 'RB, CDM', 'RM, ST, LM', 'LB, LWB, CB', 'CDM, LB', 'LM, RM, ST', 'RWB, RB, LWB', 'ST, LM, CAM', 'ST, RM, CAM', 'RB, RWB, RM', 'CF, LM, CAM', 'CAM, CF, RW', 'RB, RM, LB', 'CDM, RWB', 'CM, RW, CAM', 'ST, CF, LM', 'RM, LM, RWB', 'RB, LB, RM', 'LM, LW, LB', 'RM, LM, ST', 'CAM, LM', 'ST, LM, LW', 'LW, RW, RM', 'CF, LW', 'LB, CM, RB', 'RB, CB, CDM', 'CB, RB, LB', 'CAM, RW, RM', 'LWB, LM', 'LW, CF, LM', 'CAM, CF, CM', 'LWB, CB', 'CM, CDM, LW', 'LM, CM, LB', 'CM, RM', 'RW, LW, CF', 'CM, CDM, CB', 'LM, RM, RW', 'RM, RW, LM', 'RM, CAM, LM', 'LWB, RM, LB', 'RM, LB, RB', 'CM, CF, RM', 'RM, RB, LB', 'ST, LM, RW', 'CAM, ST, LW', 'CF, LW, ST', 'LM, LW, CAM', 'RM, RW, CF', 'LW, LM, CF', 'CM, RW, LW', 'LM, CM, CAM', 'CAM, RW, ST', 'CM, CAM, CF', 'RW, LWB, LW', 'CB, RB, CDM', 'RW, RM, LW', 'LW, ST, LM', 'RWB, RW', 'ST, CAM, LM', 'CM, CB', 'RM, RW, CM', 'LWB, CM', 'RM, LM, LB', 'RM, CAM, ST', 'CDM, RM, RB', 'LM, LB, RB', 'LB, RB, RWB', 'RM, RWB, CAM', 'CAM, LM, CF', 'LM, CAM, CM', 'CF, CAM, RW', 'CDM, CM, RM', 'CF, CAM, CM', 'ST, RM, LW', 'CB, CDM, CM', 'RB, RW, LB', 'ST, RW, CAM', 'CM, LB', 'LW, RM, RW', 'CM, RM, RW', 'RM, CF, LM', 'CF, LM, RM', 'CAM, ST, RM', 'RW, CF', 'CM, CAM, ST', 'CAM, CDM, CM', 'RM, RW, LW', 'CAM, LM, LB', 'CAM, RW, LW', 'CDM, CAM', 'LWB, LM, LB', 'RW, LB', 'LW, CAM, CF', 'RB, RWB, CB', 'LM, CF, RM', 'RB, LB, RWB', 'RM, LW', 'CAM, LM, ST', 'LW, LM, LB', 'LB, RB, RM', 'CM, LWB', 'CDM, RB, RM', 'RM, CM, RB', 'ST, RM, RW', 'LM, RM, LWB', 'RW, LW, LM', 'LW, CF, RW', 'CM, CDM, LB', 'ST, LM, RB', 'LB, CDM', 'CM, CB, CDM', 'CB, LB, CDM', 'RW, ST, RM', 'RM, ST, CF', 'CAM, LW, LM', 'LM, ST, LW', 'CAM, RM, RB', 'RB, CM, RM', 'CM, LW, LM', 'ST, CAM, RM', 'RW, ST, CAM', 'CM, LB, LM', 'RB, CDM, RM', 'LM, RWB, RM', 'LM, CAM, LB', 'CAM, LW, RW', 'CM, RM, RWB', 'RW, CAM, LW', 'RB, CB, LB', 'LB, LM, LW', 'RW, RB', 'LM, LWB, CM', 'RM, CAM, RB', 'RM, CM, ST', 'RB, RM, LM', 'LM, LB, LW', 'LM, ST, CF', 'RW, LWB, RM', 'LM, CAM, LW', 'CB, RWB', 'RM, LW, CAM', 'LB, LW', 'CDM, CM, ST', 'LB, CM, CDM', 'CF, CAM, LM', 'RW, CAM, RM', 'RW, LM, LW', 'RW, CM, CAM', 'LM, LW, RW', 'RM, LW, RW', 'LW, LWB, RWB', 'RM, LM, RB', 'CB, CDM, RB', 'CAM, RW, CM', 'ST, CAM, CM', 'LWB, LM, CM', 'RW, LW, RWB', 'CB, LB, LWB', 'CM, LM, ST', 'CM, LWB, LM', 'LB, LW, LM', 'RB, CM, CAM', 'RM, CAM, CF', 'CM, RW', 'RW, RM, CAM', 'CB, LWB', 'CAM, RB', 'LM, CM, CDM', 'RM, LB, LM', 'CB, RB, CM', 'RB, CM, RWB', 'LB, RB, LM', 'CAM, ST, RW', 'LB, LW, RW', 'CDM, CAM, CM', 'CAM, ST, CM', 'LM, CAM, ST', 'LM, ST, CM', 'CAM, CF, RM', 'CF, ST, RM', 'LM, ST, LB', 'ST, CAM, LW', 'LWB, CM, LB', 'RWB, RM, LWB', 'LWB, LM, RB', 'CM, LW, ST', 'LB, LM, CB', 'CM, RM, RB', 'RWB, CB', 'RM, RB, RWB', 'LB, CM, LM', 'ST, LW, CF', 'RM, RWB, ST', 'CF, ST, CM', 'LM, LB, LWB', 'LM, RW', 'LM, LWB, LB', 'LB, LWB, RB', 'RM, RB, LM', 'RM, CM, LM', 'LW, ST, CAM', 'RB, RM, CB', 'LWB, RWB, LB', 'CF, RM, RW', 'RM, CF, CAM', 'LM, LW, CF', 'RB, LM, LB', 'CAM, CM, RW', 'LB, CM, LW', 'CM, CF, CAM', 'RW, RM, ST', 'CDM, RB, CM', 'CM, RB', 'RB, RWB, RW', 'LM, RW, RM', 'CM, RB, CDM', 'LM, RM, LB', 'CDM, RB, CB', 'RB, CB, RM', 'RWB, LW, LM', 'ST, CF, RW', 'RM, CM, CDM', 'LM, CF, ST', 'LW, CAM, LM', 'CDM, LM, CM', 'ST, CM, CAM', 'LM, RM, RWB', 'CM, CAM, RW', 'CAM, LM, RW', 'LW, LM, RM', 'CAM, RW, CF', 'LM, ST, RW', 'CB, LWB, LB', 'RW, CF, LW', 'CB, CM, CDM', 'CB, CAM', 'LW, LB', 'CDM, RB, LB', 'ST, CM, RB', 'RWB, LWB, RB', 'CM, CB, RB', 'LB, LW, RB', 'CF, RM, RWB', 'LB, RM', 'RM, RW, RB', 'LB, RM, RB', 'CDM, RM, CM', 'CDM, LB, CM', 'LM, CDM, CM', 'RW, LM, CAM', 'LM, LB, RM', 'RM, CF, ST', 'RW, CM', 'RM, RWB, LM', 'CF, CM, ST', 'RW, RM, LM', 'LM, CM, RW', 'RWB, CM', 'RB, RM, CDM', 'CM, CB, CAM', 'CF, CM, LM', 'RWB, LWB, RM', 'CF, RM', 'CM, CF, ST', 'RWB, RB, CB', 'LWB, RWB', 'RM, LM, LW', 'LW, RM, CM', 'LW, CAM, ST', 'ST, RM, RWB', 'ST, CM', 'CDM, CM, RWB', 'LB, CB, RB', 'CB, ST, CAM', 'CF, RM, CM', 'CDM, CB, RB', 'CM, LM, RB', 'RB, CB, LWB', 'LM, LB, CDM', 'LW, LWB', 'RW, CF, CAM', 'RM, RB, CAM', 'LB, RB, CM', 'RM, CDM, LM', 'LWB, LB, CB', 'LB, RB, CDM', 'RB, CM, LB', 'CDM, RM', 'LM, RB, CB', 'RW, RWB, RM', 'RM, LW, ST', 'LB, RB, LWB', 'RM, LM, CDM', 'RB, RM, CAM', 'RM, ST, LW', 'CF, LM', 'RB, LB, CDM', 'RW, LW, RB', 'RB, RWB, CDM', 'CM, CAM, LB', 'LW, RW, LWB', 'CB, LWB, RWB', 'LB, CB, CDM', 'ST, RB, RM', 'CM, RB, RM', 'RW, LM', 'RM, CF, RB', 'CB, RM, RB', 'RWB, LWB', 'CM, ST, CDM', 'LWB, LW', 'CB, RB, ST', 'LB, LWB, CDM', 'CB, RWB, RM', 'CB, ST', 'CM, ST', 'LM, LWB, CB', 'ST, LW, LWB', 'LM, CDM, RM', 'LWB, LB, RW', 'RB, CM, CB', 'LM, RW, ST', 'CM, RM, LW', 'ST, RW, LM', 'RWB, CM, RM', 'CAM, RWB, CM', 'CM, RB, LM', 'RM, RB, RW', 'LW, LM, CM', 'RM, LM, LWB', 'ST, LM, CF', 'LW, RM, CAM', 'RB, RWB, LWB', 'LB, CM, ST', 'ST, LWB', 'RB, ST, LM', 'CDM, CAM, LM', 'LM, ST, LWB', 'LB, RB, RW', 'RM, CDM', 'RB, LB, LWB', 'RW, CAM, LM', 'LB, CDM, LM', 'RW, RB, LM', 'CB, LM', 'RW, LM, RM', 'RM, LB, CM', 'LW, CAM, RM', 'RW, RB, LB', 'LW, LB, LM', 'CDM, RB, RWB', 'CB, RW', 'CM, LM, LW', 'LM, RM, CB', 'LM, LWB, ST', 'ST, CB, CDM', 'LB, CDM, CB', 'CDM, CB, RM', 'RB, RWB, CM', 'ST, RW, CF', 'ST, CB', 'ST, LW, CM', 'LM, LB, ST', 'ST, CB, RB', 'RB, LM', 'GK, RB', 'LM, LW, CM', 'LM, CM, LW', 'CB, CAM, CM', 'LM, RB', 'RWB, CAM', 'RB, CAM', 'CB, CDM, RM', 'ST, RB', 'ST, RWB', 'CAM, CDM, LM', 'CB, RWB, RB', 'LM, LWB, RM', 'LB, LM, CM', 'CM, CF, LM', 'CM, RWB', 'CDM, LM', 'ST, CF, RM', 'CAM, LB', 'RB, RM, ST', 'LM, CDM', 'CDM, RW, RB', 'LM, CF, CAM', 'LWB, LM, RWB', 'CF, RM, ST', 'CAM, CF, LM', 'RB, ST, CB', 'RW, RB, LW', 'LB, LM, CDM', 'RB, CB, ST', 'RWB, CB, RB', 'CDM, ST', 'LW, RM, LM', 'RB, LWB', 'CDM, LB, CB', 'LM, LB, CF', 'RB, CDM, LB', 'LB, LWB, CM', 'RM, LM, CB', 'CAM, LW, CF', 'LB, LW, CM', 'RB, CM, CDM', 'LWB, LM, ST', 'CM, RW, RM', 'CB, RM', 'CM, LM, CB', 'LM, LB, CB', 'ST, RW, CM', 'RM, ST, CM', 'RWB, RM, LM', 'CM, CAM, RB', 'CM, RWB, CDM', 'LB, LWB, LW'], dtype=object)
missing_values = fdc['Positions'].isnull().sum()
print("Number of missing values: ", missing_values)
Number of missing values: 0
Player's height in cm. Transform all the data to be 'int' data type. Also, transform the values expressed as feet and inches to the same values as the ones expressed in cm.
# Let´s check the data type of the values
fdc['Height'].dtype
dtype('O')
fdc['Height'].unique()
array(['170cm', '187cm', '188cm', '181cm', '175cm', '184cm', '191cm', '178cm', '193cm', '185cm', '199cm', '173cm', '168cm', '176cm', '177cm', '183cm', '180cm', '189cm', '179cm', '195cm', '172cm', '182cm', '186cm', '192cm', '165cm', '194cm', '167cm', '196cm', '163cm', '190cm', '174cm', '169cm', '171cm', '197cm', '200cm', '166cm', '6\'2"', '164cm', '198cm', '6\'3"', '6\'5"', '5\'11"', '6\'4"', '6\'1"', '6\'0"', '5\'10"', '5\'9"', '5\'6"', '5\'7"', '5\'4"', '201cm', '158cm', '162cm', '161cm', '160cm', '203cm', '157cm', '156cm', '202cm', '159cm', '206cm', '155cm'], dtype=object)
# Let´s create a function to clean all the numb and transform the others to cm
def convert_height(height):
if 'cm' in height:
return int(height.strip('cm'))
else:
feet, inches = height.split("'")
total_inches = int(feet) * 12 + int(inches.strip('"'))
return round(total_inches * 2.54)
# Apply the function to the 'Height' column
fdc['Height'] = fdc['Height'].apply(convert_height)
fdc['Height'].unique()
array([170, 187, 188, 181, 175, 184, 191, 178, 193, 185, 199, 173, 168, 176, 177, 183, 180, 189, 179, 195, 172, 182, 186, 192, 165, 194, 167, 196, 163, 190, 174, 169, 171, 197, 200, 166, 164, 198, 201, 158, 162, 161, 160, 203, 157, 156, 202, 159, 206, 155], dtype=int64)
Let´s change the name of the column for one more representative
# Rename the 'Height' column to express that the values are in cm
fdc = fdc.rename(columns={'Height':'Height(cm)'})
fdc['Height(cm)'].sample(3)
14801 174 9579 180 17348 180 Name: Height(cm), dtype: int64
The player's weight is in kilograms. We do the same as done with the height. Removing the 'kg' and 'lbs', and also transforming the 'lbs' into kg.
# Let´s check the data type
fdc['Weight'].dtype
dtype('O')
# How the data is composed
fdc['Weight'].unique()
array(['72kg', '83kg', '87kg', '70kg', '68kg', '80kg', '71kg', '91kg', '73kg', '85kg', '92kg', '69kg', '84kg', '96kg', '81kg', '82kg', '75kg', '86kg', '89kg', '74kg', '76kg', '64kg', '78kg', '90kg', '66kg', '60kg', '94kg', '79kg', '67kg', '65kg', '59kg', '61kg', '93kg', '88kg', '97kg', '77kg', '62kg', '63kg', '95kg', '100kg', '58kg', '183lbs', '179lbs', '172lbs', '196lbs', '176lbs', '185lbs', '170lbs', '203lbs', '168lbs', '161lbs', '146lbs', '130lbs', '190lbs', '174lbs', '148lbs', '165lbs', '159lbs', '192lbs', '181lbs', '139lbs', '154lbs', '157lbs', '163lbs', '98kg', '103kg', '99kg', '102kg', '56kg', '101kg', '57kg', '55kg', '104kg', '107kg', '110kg', '53kg', '50kg', '54kg', '52kg'], dtype=object)
# Let´s create a function to create our data in kg and int
def convert_weight(weight):
if "kg" in weight:
return int(weight.strip('kg'))
else:
pounds = int(weight.strip('lbs'))
return round(pounds/2.205)
# Apply the function to the weight column
fdc['Weight'] = fdc['Weight'].apply(convert_weight)
fdc['Weight'].unique()
array([ 72, 83, 87, 70, 68, 80, 71, 91, 73, 85, 92, 69, 84, 96, 81, 82, 75, 86, 89, 74, 76, 64, 78, 90, 66, 60, 94, 79, 67, 65, 59, 61, 93, 88, 97, 77, 62, 63, 95, 100, 58, 98, 103, 99, 102, 56, 101, 57, 55, 104, 107, 110, 53, 50, 54, 52], dtype=int64)
Let´s change the name of the column for one more representative
# Rename the 'Weight' column
fdc = fdc.rename(columns={'Weight':'Weight(kg)'})
fdc['Weight(kg)'].sample(3)
5504 68 18272 70 7078 81 Name: Weight(kg), dtype: int64
Checking the data from the 'Preferred Foot' column.
fdc['Preferred Foot'].dtype
dtype('O')
fdc['Preferred Foot'].unique()
array(['Left', 'Right'], dtype=object)
fdc[['Preferred Foot']].head(10)
Preferred Foot | |
---|---|
0 | Left |
1 | Right |
2 | Right |
3 | Right |
4 | Right |
5 | Right |
6 | Left |
7 | Right |
8 | Right |
9 | Right |
fdc['BOV'].dtype
dtype('int64')
fdc['BOV'].unique()
array([93, 92, 91, 90, 89, 88, 87, 86, 85, 84, 83, 82, 81, 80, 79, 78, 77, 76, 75, 74, 73, 72, 71, 70, 69, 68, 67, 66, 65, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48], dtype=int64)
missing_values = fdc['BOV'].isnull().sum()
print("Number of missing values: ", missing_values)
Number of missing values: 0
Check the values from the column
fdc['Best Position'].unique()
array(['RW', 'ST', 'GK', 'CAM', 'LW', 'CB', 'CDM', 'CF', 'CM', 'RB', 'LB', 'LM', 'RM', 'LWB', 'RWB'], dtype=object)
missing_values = fdc['Best Position'].isnull().sum()
print("Number of missing values: ", missing_values)
Number of missing values: 0
It´s time to verify and clean the values from the column
fdc['Joined'].dtype
dtype('O')
fdc['Joined'].unique()
array(['01-Jul-04', '10-Jul-18', '16-Jul-14', ..., '22-Sep-18', '28-Feb-15', '06-Mar-18'], dtype=object)
missing_values = fdc['Joined'].isnull().sum()
print("Number of missing values: ", missing_values)
Number of missing values: 0
Now that we know we don´t have any NULL values, let´s change the data type to datetime
fdc['Joined'] = pd.to_datetime(fdc['Joined'], dayfirst=True, format='%d-%b-%y')
fdc['Joined'].sample(15)
7652 2019-07-01 5197 2019-07-07 6777 2020-09-14 11794 2017-12-08 6256 2020-08-25 16396 2017-09-01 12845 2018-01-07 15884 2019-07-03 2631 2017-07-01 6790 2019-01-12 12664 2016-07-01 9871 2016-08-21 11394 2020-09-04 4683 2017-07-04 3250 2020-08-11 Name: Joined, dtype: datetime64[ns]
print(fdc['Joined'].dt.strftime('%d-%b-%y'))
0 01-Jul-04 1 10-Jul-18 2 16-Jul-14 3 30-Aug-15 4 03-Aug-17 ... 18974 13-Jul-18 18975 01-Aug-20 18976 08-Mar-19 18977 22-Sep-20 18978 29-Jul-19 Name: Joined, Length: 18979, dtype: object
Date when the Loan ends (if the player is on loan)
# Check the data type
fdc['Loan Date End'].dtype
dtype('O')
fdc['Loan Date End'].unique()
array([nan, '30-Jun-21', '31-Dec-20', '30-Jan-21', '30-Jun-22', '31-May-21', '05-Jul-21', '31-Dec-21', '01-Jul-21', '01-Jan-21', '31-Aug-21', '31-Jan-21', '30-Dec-21', '23-Jun-21', '03-Jan-21', '27-Nov-21', '17-Jan-21', '30-Jun-23', '31-Jul-21', '22-Nov-20', '31-May-22', '30-Dec-20', '04-Jan-21', '30-Nov-20', '01-Aug-21'], dtype=object)
# Let´s compare the data with the one stored in column 'Contract Status'
on_loan = fdc[fdc['Contract Status'] == 'On Loan']
on_loan[['Contract', 'Contract Status', 'Loan Date End']]
Contract | Contract Status | Loan Date End | |
---|---|---|---|
205 | Jun 30, 2021 On Loan | On Loan | 30-Jun-21 |
248 | Jun 30, 2021 On Loan | On Loan | 30-Jun-21 |
254 | Jun 30, 2021 On Loan | On Loan | 30-Jun-21 |
302 | Jun 30, 2021 On Loan | On Loan | 30-Jun-21 |
306 | Jun 30, 2021 On Loan | On Loan | 30-Jun-21 |
... | ... | ... | ... |
18472 | Aug 31, 2021 On Loan | On Loan | 31-Aug-21 |
18571 | Jun 30, 2021 On Loan | On Loan | 30-Jun-21 |
18600 | Dec 31, 2020 On Loan | On Loan | 31-Dec-20 |
18622 | Dec 31, 2020 On Loan | On Loan | 31-Dec-20 |
18680 | Dec 31, 2020 On Loan | On Loan | 31-Dec-20 |
1013 rows × 3 columns
Now is time to verify is the values are correct
fdc['Value'].dtype
dtype('O')
fdc['Value'].unique()
array(['€103.5M', '€63M', '€120M', '€129M', '€132M', '€111M', '€120.5M', '€102M', '€185.5M', '€110M', '€113M', '€90.5M', '€82M', '€17.5M', '€83.5M', '€33.5M', '€114.5M', '€78M', '€103M', '€109M', '€92M', '€10M', '€76.5M', '€89.5M', '€87.5M', '€79.5M', '€124M', '€114M', '€95M', '€92.5M', '€105.5M', '€88.5M', '€85M', '€81.5M', '€26M', '€21M', '€56M', '€67.5M', '€53M', '€36.5M', '€51M', '€65.5M', '€46.5M', '€61.5M', '€72.5M', '€77.5M', '€43.5M', '€32.5M', '€36M', '€32M', '€54M', '€49.5M', '€57M', '€66.5M', '€74.5M', '€71.5M', '€121M', '€99M', '€67M', '€86.5M', '€93.5M', '€70M', '€62M', '€66M', '€58M', '€44M', '€81M', '€37M', '€14.5M', '€46M', '€47.5M', '€52.5M', '€54.5M', '€34.5M', '€57.5M', '€51.5M', '€44.5M', '€55M', '€48M', '€60.5M', '€63.5M', '€61M', '€29M', '€58.5M', '€55.5M', '€42M', '€40.5M', '€43M', '€45.5M', '€34M', '€26.5M', '€42.5M', '€35.5M', '€45M', '€41.5M', '€40M', '€11M', '€13.5M', '€29.5M', '€27M', '€15.5M', '€38.5M', '€52M', '€33M', '€19M', '€73.5M', '€38M', '€35M', '€47M', '€24M', '€30.5M', '€18M', '€28M', '€25.5M', '€25M', '€31M', '€23.5M', '€30M', '€31.5M', '€22.5M', '€28.5M', '€4M', '€12.5M', '€37.5M', '€27.5M', '€16M', '€15M', '€20.5M', '€22M', '€3.4M', '€5M', '€56.5M', '€62.5M', '€0', '€39M', '€24.5M', '€21.5M', '€13M', '€8M', '€20M', '€8.5M', '€2.9M', '€9M', '€4.6M', '€50M', '€23M', '€18.5M', '€7M', '€19.5M', '€5.5M', '€7.5M', '€3.8M', '€14M', '€10.5M', '€16.5M', '€3.6M', '€9.5M', '€39.5M', '€17M', '€12M', '€11.5M', '€4.9M', '€3M', '€1.9M', '€6.5M', '€1.7M', '€2.4M', '€3.1M', '€6M', '€3.7M', '€4.7M', '€4.3M', '€2.1M', '€1.2M', '€1.8M', '€4.8M', '€3.2M', '€1.3M', '€825K', '€2.3M', '€1.5M', '€3.9M', '€2.6M', '€3.5M', '€2.8M', '€2.7M', '€4.4M', '€4.1M', '€950K', '€1.6M', '€625K', '€1.1M', '€4.5M', '€4.2M', '€2.2M', '€3.3M', '€1.4M', '€2M', '€475K', '€925K', '€750K', '€725K', '€2.5M', '€1M', '€350K', '€525K', '€600K', '€850K', '€800K', '€550K', '€250K', '€400K', '€425K', '€575K', '€210K', '€325K', '€900K', '€875K', '€650K', '€700K', '€500K', '€975K', '€375K', '€775K', '€275K', '€180K', '€450K', '€675K', '€150K', '€240K', '€300K', '€130K', '€220K', '€200K', '€110K', '€170K', '€230K', '€90K', '€120K', '€80K', '€190K', '€140K', '€160K', '€100K', '€60K', '€50K', '€70K', '€45K', '€35K', '€40K', '€25K', '€20K', '€15K', '€30K', '€9K'], dtype=object)
For the values, I am going to transform them into numbers, some are expressed in millions, and others in thousands. First, is necessary to extract the €, M, and K from the values, replace the dot with a comma, and change the values to millions. For future operations is necessary to have values type number.
# Convert the values to strings
fdc['Value'] = fdc['Value'].astype(str)
# The values are strings, so we replace the € currency
fdc['Value'] = fdc['Value'].str.replace('€','')
fdc['Value'].dtype
dtype('O')
fdc['Value'].unique()
array(['103.5M', '63M', '120M', '129M', '132M', '111M', '120.5M', '102M', '185.5M', '110M', '113M', '90.5M', '82M', '17.5M', '83.5M', '33.5M', '114.5M', '78M', '103M', '109M', '92M', '10M', '76.5M', '89.5M', '87.5M', '79.5M', '124M', '114M', '95M', '92.5M', '105.5M', '88.5M', '85M', '81.5M', '26M', '21M', '56M', '67.5M', '53M', '36.5M', '51M', '65.5M', '46.5M', '61.5M', '72.5M', '77.5M', '43.5M', '32.5M', '36M', '32M', '54M', '49.5M', '57M', '66.5M', '74.5M', '71.5M', '121M', '99M', '67M', '86.5M', '93.5M', '70M', '62M', '66M', '58M', '44M', '81M', '37M', '14.5M', '46M', '47.5M', '52.5M', '54.5M', '34.5M', '57.5M', '51.5M', '44.5M', '55M', '48M', '60.5M', '63.5M', '61M', '29M', '58.5M', '55.5M', '42M', '40.5M', '43M', '45.5M', '34M', '26.5M', '42.5M', '35.5M', '45M', '41.5M', '40M', '11M', '13.5M', '29.5M', '27M', '15.5M', '38.5M', '52M', '33M', '19M', '73.5M', '38M', '35M', '47M', '24M', '30.5M', '18M', '28M', '25.5M', '25M', '31M', '23.5M', '30M', '31.5M', '22.5M', '28.5M', '4M', '12.5M', '37.5M', '27.5M', '16M', '15M', '20.5M', '22M', '3.4M', '5M', '56.5M', '62.5M', '0', '39M', '24.5M', '21.5M', '13M', '8M', '20M', '8.5M', '2.9M', '9M', '4.6M', '50M', '23M', '18.5M', '7M', '19.5M', '5.5M', '7.5M', '3.8M', '14M', '10.5M', '16.5M', '3.6M', '9.5M', '39.5M', '17M', '12M', '11.5M', '4.9M', '3M', '1.9M', '6.5M', '1.7M', '2.4M', '3.1M', '6M', '3.7M', '4.7M', '4.3M', '2.1M', '1.2M', '1.8M', '4.8M', '3.2M', '1.3M', '825K', '2.3M', '1.5M', '3.9M', '2.6M', '3.5M', '2.8M', '2.7M', '4.4M', '4.1M', '950K', '1.6M', '625K', '1.1M', '4.5M', '4.2M', '2.2M', '3.3M', '1.4M', '2M', '475K', '925K', '750K', '725K', '2.5M', '1M', '350K', '525K', '600K', '850K', '800K', '550K', '250K', '400K', '425K', '575K', '210K', '325K', '900K', '875K', '650K', '700K', '500K', '975K', '375K', '775K', '275K', '180K', '450K', '675K', '150K', '240K', '300K', '130K', '220K', '200K', '110K', '170K', '230K', '90K', '120K', '80K', '190K', '140K', '160K', '100K', '60K', '50K', '70K', '45K', '35K', '40K', '25K', '20K', '15K', '30K', '9K'], dtype=object)
Let´s remember that some contract values were defined as 'Free' or 'On Loan', which can give us an error when converting the values into data type 'int'. Let´s try to identify them.
# Check for missing values or null
fdc['Value'].isna()
0 False 1 False 2 False 3 False 4 False ... 18974 False 18975 False 18976 False 18977 False 18978 False Name: Value, Length: 18979, dtype: bool
missing_values = fdc['Value'].isnull().sum()
print("Number of missing values: ", missing_values)
Number of missing values: 0
print(fdc[['Value','Surname']].head(15))
Value Surname 0 103.5M Messi 1 63M Ronaldo dos Santos Aveiro 2 120M Oblak 3 129M De Bruyne 4 132M da Silva Santos Jr. 5 111M Lewandowski 6 120.5M Salah 7 102M Ramses Becker 8 185.5M Mbappé 9 110M ter Stegen 10 113M van Dijk 11 120.5M Mané 12 90.5M Henrique Venancio Casimiro 13 82M Courtois 14 17.5M Neuer
Now is the time to remove the 'M' and 'K' from the values and convert them into 'int' data type.
# Use a lambda function to represent the values as they are
fdc['Value'] = fdc['Value'].apply(lambda x: float(x[:-1]) * 1e6
if x[-1] == 'M' and x[:-1] else float(x[:-1]) * 1e3
if x[-1] == 'K' and x[:-1] else float(x) if x else 0)
# # Because our values are expressed as float numbers, let´s convert them to int
fdc['Value'] = fdc['Value'].astype(int)
fdc['Value'].head(10)
0 103500000 1 63000000 2 120000000 3 129000000 4 132000000 5 111000000 6 120500000 7 102000000 8 185500000 9 110000000 Name: Value, dtype: int32
It´s time to change the column name for one more representative
fdc = fdc.rename(columns={'Value':'Market Price(€)'})
fdc['Market Price(€)'].sample(3)
1302 11500000 255 35500000 17039 375000 Name: Market Price(€), dtype: int32
Let's verify the player's salaries
fdc['Wage'].dtype
dtype('O')
fdc['Wage'].unique()
array(['€560K', '€220K', '€125K', '€370K', '€270K', '€240K', '€250K', '€160K', '€260K', '€210K', '€310K', '€130K', '€350K', '€300K', '€190K', '€145K', '€195K', '€100K', '€140K', '€290K', '€82K', '€110K', '€230K', '€155K', '€200K', '€165K', '€95K', '€170K', '€105K', '€115K', '€150K', '€135K', '€55K', '€58K', '€81K', '€34K', '€120K', '€59K', '€90K', '€65K', '€56K', '€71K', '€18K', '€75K', '€47K', '€20K', '€84K', '€86K', '€74K', '€78K', '€27K', '€68K', '€85K', '€25K', '€46K', '€83K', '€54K', '€79K', '€175K', '€43K', '€49K', '€45K', '€38K', '€41K', '€39K', '€23K', '€51K', '€50K', '€87K', '€30K', '€14K', '€69K', '€31K', '€64K', '€53K', '€35K', '€21K', '€28K', '€17K', '€33K', '€70K', '€32K', '€89K', '€26K', '€40K', '€76K', '€72K', '€48K', '€36K', '€29K', '€60K', '€16K', '€37K', '€24K', '€52K', '€0', '€62K', '€73K', '€63K', '€19K', '€1K', '€66K', '€80K', '€12K', '€2K', '€42K', '€13K', '€900', '€57K', '€77K', '€61K', '€22K', '€67K', '€44K', '€15K', '€11K', '€8K', '€850', '€10K', '€88K', '€500', '€7K', '€6K', '€9K', '€5K', '€700', '€950', '€750', '€3K', '€650', '€600', '€4K', '€800', '€550'], dtype=object)
missing_values = fdc['Wage'].isnull().sum()
print("Number of missing values: ", missing_values)
Number of missing values: 0
Time to correct the salaries values by removing the '€' from the values
# Replace the '€'
fdc['Wage'] = fdc['Wage'].str.replace('€', '')
print(fdc['Wage'])
0 560K 1 220K 2 125K 3 370K 4 270K ... 18974 1K 18975 500 18976 500 18977 2K 18978 1K Name: Wage, Length: 18979, dtype: object
# Convert the values
fdc['Wage'] = fdc['Wage'].replace({'K': '*1e3'}, regex=True).map(pd.eval).astype(int)
print(fdc['Wage'])
0 560000 1 220000 2 125000 3 370000 4 270000 ... 18974 1000 18975 500 18976 500 18977 2000 18978 1000 Name: Wage, Length: 18979, dtype: int32
Now it´s time to analyze the values from the column 'Release Clause'
fdc['Release Clause'].dtype
dtype('O')
missing_values = fdc['Release Clause'].isnull().sum()
print("Number of missing values: ", missing_values)
Number of missing values: 0
fdc['Release Clause'].unique()
array(['€138.4M', '€75.9M', '€159.4M', ..., '€59K', '€35K', '€64K'], dtype=object)
We are going to replace the '€' and transform the values with the 'M' or 'K'
# Convert the values to strings
fdc['Release Clause'] = fdc['Release Clause'].astype(str)
# The values are strings, so we replace the € currency
fdc['Release Clause'] = fdc['Release Clause'].str.replace('€','')
# Use a lambda function to represent the values as they are
fdc['Release Clause'] = fdc['Release Clause'].apply(lambda x: float(x[:-1]) * 1e6
if x[-1] == 'M' and x[:-1] else float(x[:-1]) * 1e3
if x[-1] == 'K' and x[:-1] else float(x) if x else 0)
# # Because our values are expressed as float numbers, let´s convert them to int
fdc['Release Clause'] = fdc['Release Clause'].astype(int)
fdc['Release Clause'].head(10)
0 138400000 1 75900000 2 159400000 3 161000000 4 166500000 5 132000000 6 144300000 7 120300000 8 203100000 9 147700000 Name: Release Clause, dtype: int32
# Change the name of the column
fdc = fdc.rename(columns={'Release Clause': 'Release Clause(€)'})
fdc['Release Clause(€)'].head()
0 138400000 1 75900000 2 159400000 3 161000000 4 166500000 Name: Release Clause(€), dtype: int32
The following values in the columns all belong to stats; let's check their data types and make sure there are no missing values. The columns we are working with are:
'Attacking','Crossing', 'Finishing', 'Heading Accuracy', 'Short Passing', 'Volleys','Skill', 'Dribbling', 'Curve', 'FK Accuracy', 'Long Passing', 'Ball Control', 'Movement', 'Acceleration', 'Sprint Speed', 'Agility', 'Reactions', 'Balance', 'Power', 'Shot Power', 'Jumping', 'Stamina', 'Strength', 'Long Shots', 'Mentality', 'Aggression', 'Interceptions', 'Positioning', 'Vision', 'Penalties', 'Composure', 'Defending', 'Marking', 'Standing Tackle', 'Sliding Tackle', 'Goalkeeping', 'GK Diving', 'GK Handling', 'GK Kicking', 'GK Positioning','GK Reflexes', 'Total Stats', 'Base Stats'
fdc['Attacking'].dtype
dtype('int64')
missing_values = fdc['Attacking'].isna().sum()
print("Number of missing values: ", missing_values)
Number of missing values: 0
fdc['Attacking'].unique()
array([429, 437, 95, 407, 408, 423, 392, 114, 118, 316, 410, 349, 86, 119, 426, 374, 411, 360, 328, 383, 405, 123, 420, 224, 388, 397, 425, 373, 365, 371, 311, 396, 345, 399, 400, 78, 280, 330, 403, 379, 380, 94, 394, 419, 339, 293, 344, 390, 84, 359, 372, 377, 346, 389, 386, 308, 277, 382, 368, 402, 292, 298, 366, 352, 363, 322, 361, 91, 364, 341, 385, 355, 305, 321, 262, 93, 375, 387, 356, 253, 285, 391, 353, 367, 90, 295, 378, 256, 338, 331, 69, 105, 85, 358, 343, 319, 271, 113, 350, 406, 340, 393, 247, 334, 351, 342, 302, 329, 354, 98, 301, 115, 384, 208, 72, 376, 92, 258, 362, 74, 417, 99, 263, 88, 279, 101, 395, 100, 81, 87, 55, 310, 82, 117, 409, 318, 323, 248, 315, 381, 348, 327, 309, 130, 283, 336, 369, 106, 252, 320, 290, 370, 126, 251, 108, 335, 297, 284, 80, 75, 357, 270, 97, 306, 337, 73, 286, 325, 326, 324, 333, 103, 259, 273, 313, 296, 61, 312, 347, 401, 304, 278, 83, 43, 314, 291, 264, 272, 317, 231, 250, 268, 54, 261, 255, 70, 281, 265, 299, 287, 68, 294, 77, 219, 300, 269, 332, 289, 288, 107, 282, 122, 244, 89, 112, 274, 276, 307, 229, 96, 109, 76, 125, 102, 239, 227, 241, 257, 254, 228, 233, 124, 215, 246, 110, 245, 214, 242, 266, 104, 66, 303, 260, 63, 230, 275, 50, 238, 249, 111, 67, 240, 221, 237, 56, 235, 234, 243, 267, 232, 203, 223, 64, 213, 222, 226, 225, 211, 207, 52, 173, 57, 217, 236, 71, 204, 216, 199, 59, 189, 60, 194, 116, 205, 201, 193, 65, 192, 209, 218, 128, 210, 79, 45, 206, 162, 220, 49, 197, 202, 212, 58, 190, 181, 51, 62, 200, 198, 195, 191, 131, 185, 42, 180, 182, 196, 188, 169, 187, 178, 53, 183, 184, 186, 165, 172, 47, 171, 176, 159, 46, 179, 175, 167, 174, 161, 170, 177, 164, 134, 168, 163, 166, 158, 150, 143, 48, 152, 160, 148, 151, 157, 154, 141, 146, 147, 149, 156, 153, 138, 145, 142, 139, 155, 144, 136, 137], dtype=int64)
fdc['Crossing'].dtype
dtype('int64')
missing_values = fdc['Crossing'].isna().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['Crossing'].unique()
array([85, 84, 13, 94, 71, 79, 17, 78, 18, 53, 76, 58, 14, 15, 75, 66, 70, 68, 91, 82, 20, 12, 30, 77, 88, 83, 93, 90, 87, 81, 73, 11, 54, 62, 86, 80, 55, 42, 57, 65, 63, 64, 52, 40, 69, 47, 60, 9, 16, 44, 72, 50, 56, 46, 89, 34, 45, 74, 49, 67, 24, 35, 36, 61, 19, 27, 25, 10, 51, 38, 43, 59, 39, 48, 23, 8, 28, 92, 41, 29, 32, 22, 26, 37, 33, 31, 21, 7, 6], dtype=int64)
fdc['Finishing'].dtype
dtype('int64')
missing_values = fdc['Finishing'].isnull().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['Finishing'].unique()
array([95, 11, 82, 87, 94, 91, 13, 14, 52, 90, 64, 88, 65, 85, 66, 84, 10, 22, 76, 81, 56, 79, 57, 45, 77, 63, 86, 80, 15, 33, 67, 12, 72, 92, 93, 51, 46, 60, 75, 55, 73, 83, 50, 42, 39, 40, 9, 68, 48, 37, 70, 78, 69, 8, 53, 89, 25, 62, 71, 74, 44, 26, 19, 32, 18, 61, 58, 30, 54, 36, 29, 16, 38, 59, 27, 34, 47, 20, 31, 49, 43, 41, 28, 5, 7, 6, 21, 17, 35, 23, 24, 4, 3], dtype=int64)
fdc['Heading Accuracy'].dtype
dtype('int64')
missing_values = fdc['Heading Accuracy'].isnull().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['Heading Accuracy'].unique()
array([70, 90, 15, 55, 62, 85, 59, 19, 73, 11, 87, 84, 80, 13, 25, 91, 92, 78, 46, 54, 72, 64, 14, 10, 61, 58, 83, 38, 69, 51, 67, 86, 75, 68, 16, 81, 21, 79, 53, 65, 82, 12, 42, 48, 88, 66, 76, 74, 52, 23, 40, 49, 60, 44, 20, 37, 71, 17, 45, 77, 50, 63, 43, 39, 57, 56, 47, 24, 18, 31, 28, 35, 34, 41, 36, 93, 7, 30, 89, 8, 26, 33, 27, 32, 22, 29, 9, 5, 6], dtype=int64)
fdc['Short Passing'].dtype
dtype('int64')
missing_values = fdc['Short Passing'].isnull().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['Short Passing'].unique()
array([91, 82, 43, 94, 87, 84, 45, 83, 61, 79, 85, 33, 55, 86, 57, 81, 42, 74, 93, 88, 30, 65, 89, 77, 32, 50, 80, 78, 90, 69, 40, 92, 75, 73, 34, 76, 35, 70, 37, 23, 44, 38, 48, 26, 60, 25, 46, 28, 24, 36, 51, 17, 18, 39, 71, 67, 27, 72, 66, 20, 31, 68, 29, 11, 64, 62, 41, 63, 19, 54, 16, 22, 49, 59, 56, 14, 58, 15, 21, 52, 53, 12, 47, 13, 8, 7], dtype=int64)
fdc['Volleys'].dtype
dtype('int64')
missing_values = fdc['Volleys'].isna().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['Volleys'].unique()
array([88, 86, 13, 82, 87, 89, 79, 20, 83, 14, 45, 75, 63, 12, 11, 69, 67, 56, 18, 85, 62, 70, 32, 40, 47, 81, 44, 84, 78, 76, 90, 49, 42, 64, 57, 60, 8, 72, 71, 59, 74, 80, 73, 37, 31, 38, 61, 10, 77, 68, 58, 66, 30, 33, 65, 27, 51, 15, 16, 50, 43, 35, 24, 17, 34, 28, 9, 39, 52, 46, 22, 19, 53, 55, 48, 54, 23, 5, 41, 25, 21, 36, 26, 29, 6, 7, 4, 3], dtype=int64)
fdc['Skill'].dtype
dtype('int64')
missing_values = fdc['Skill'].isnull().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['Skill'].unique()
array([470, 414, 109, 441, 448, 407, 406, 138, 394, 144, 363, 391, 369, 110, 160, 404, 381, 397, 387, 336, 400, 436, 157, 395, 100, 262, 427, 432, 429, 380, 426, 411, 358, 351, 433, 365, 403, 98, 276, 386, 383, 99, 413, 115, 341, 375, 143, 359, 309, 435, 330, 325, 355, 96, 420, 412, 388, 319, 269, 399, 106, 402, 425, 297, 312, 418, 372, 352, 439, 409, 349, 116, 371, 428, 104, 345, 430, 295, 405, 440, 422, 252, 401, 417, 396, 233, 377, 251, 382, 368, 84, 356, 342, 410, 271, 350, 83, 126, 103, 370, 362, 343, 328, 344, 415, 378, 275, 416, 119, 127, 373, 384, 77, 393, 348, 317, 408, 376, 300, 220, 89, 107, 334, 72, 390, 419, 305, 289, 398, 281, 354, 102, 339, 385, 139, 292, 97, 421, 91, 105, 73, 335, 101, 340, 337, 306, 113, 122, 123, 302, 364, 250, 347, 333, 323, 389, 361, 322, 86, 367, 258, 392, 92, 90, 310, 331, 338, 121, 260, 82, 245, 324, 346, 379, 299, 284, 320, 283, 108, 278, 286, 296, 315, 274, 88, 114, 264, 288, 94, 326, 366, 117, 360, 424, 93, 318, 124, 125, 327, 249, 75, 332, 303, 374, 239, 272, 357, 353, 266, 321, 277, 268, 314, 294, 240, 95, 227, 112, 118, 263, 280, 140, 282, 81, 329, 201, 87, 221, 257, 285, 316, 287, 307, 270, 256, 313, 311, 228, 247, 254, 130, 80, 85, 232, 293, 298, 301, 213, 168, 291, 216, 290, 308, 261, 171, 267, 242, 219, 248, 237, 243, 279, 246, 273, 78, 255, 253, 230, 74, 210, 235, 231, 208, 259, 304, 241, 199, 224, 206, 61, 129, 222, 223, 141, 149, 131, 225, 71, 189, 265, 226, 70, 179, 192, 134, 209, 173, 234, 76, 236, 212, 69, 218, 120, 177, 238, 204, 229, 215, 165, 211, 195, 64, 202, 194, 190, 193, 203, 67, 214, 79, 205, 244, 196, 111, 187, 65, 200, 63, 198, 217, 135, 68, 184, 167, 148, 207, 142, 185, 133, 191, 181, 197, 43, 66, 175, 182, 51, 180, 169, 186, 137, 188, 176, 132, 60, 178, 147, 163, 183, 162, 152, 170, 172, 174, 159, 161, 154, 153, 128, 62, 166, 53, 155, 56, 151, 164, 158, 46, 150, 59, 55, 58, 156, 146, 52, 136, 54, 47, 48, 145, 40, 57], dtype=int64)
fdc['Dribbling'].dtype
dtype('int64')
missing_values = fdc['Dribbling'].isna().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['Dribbling'].unique()
array([96, 88, 12, 95, 85, 90, 27, 92, 21, 70, 91, 69, 13, 30, 87, 65, 79, 83, 23, 80, 18, 93, 77, 63, 76, 16, 59, 81, 11, 84, 10, 75, 78, 55, 15, 86, 66, 67, 28, 57, 64, 82, 62, 19, 53, 72, 50, 26, 43, 89, 73, 20, 14, 68, 71, 74, 22, 54, 56, 61, 9, 24, 60, 25, 8, 17, 47, 58, 46, 42, 51, 52, 49, 44, 35, 48, 39, 29, 40, 45, 34, 31, 33, 38, 41, 32, 7, 37, 36, 5, 6], dtype=int64)
fdc['Curve'].dtype
dtype('int64')
missing_values = fdc['Curve'].isna().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['Curve'].unique()
array([93, 81, 13, 85, 88, 79, 83, 19, 18, 60, 76, 63, 14, 74, 77, 49, 15, 80, 12, 28, 86, 84, 82, 61, 71, 11, 66, 16, 89, 70, 21, 46, 78, 67, 58, 65, 48, 34, 90, 59, 55, 87, 62, 9, 56, 36, 30, 32, 73, 69, 68, 75, 45, 10, 72, 64, 41, 23, 47, 20, 51, 25, 44, 17, 54, 57, 53, 33, 40, 50, 39, 35, 52, 42, 37, 43, 26, 31, 92, 91, 29, 94, 27, 38, 22, 24, 8, 6, 7, 5, 4], dtype=int64)
fdc['FK Accuracy'].dtype
dtype('int64')
missing_values = fdc['FK Accuracy'].isna().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['FK Accuracy'].unique()
array([94, 76, 14, 83, 89, 85, 69, 18, 63, 12, 70, 64, 74, 20, 11, 73, 49, 61, 88, 68, 28, 79, 84, 48, 67, 38, 87, 53, 65, 15, 31, 78, 82, 10, 51, 59, 19, 47, 52, 57, 43, 13, 77, 54, 75, 86, 55, 30, 62, 32, 58, 93, 8, 66, 71, 81, 92, 44, 17, 60, 40, 16, 72, 46, 35, 45, 29, 21, 56, 80, 24, 22, 39, 42, 26, 41, 9, 37, 27, 50, 33, 25, 36, 91, 34, 23, 7, 6, 90, 5], dtype=int64)
fdc['Long Passing'].dtype
dtype('int64')
missing_values = fdc['Long Passing'].isna().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['Long Passing'].unique()
array([91, 77, 40, 93, 81, 70, 75, 44, 63, 86, 71, 84, 35, 59, 73, 83, 64, 69, 79, 82, 68, 89, 76, 80, 87, 37, 65, 36, 50, 53, 78, 47, 74, 48, 31, 85, 24, 55, 90, 54, 62, 32, 49, 66, 67, 51, 28, 46, 52, 72, 56, 41, 45, 22, 88, 61, 33, 12, 60, 17, 27, 29, 23, 38, 16, 58, 34, 25, 39, 21, 30, 42, 43, 57, 20, 26, 18, 19, 13, 15, 11, 14, 9, 10, 5, 8], dtype=int64)
fdc['Ball Control'].dtype
dtype('int64')
missing_values = fdc['Ball Control'].isna().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['Ball Control'].unique()
array([96, 92, 30, 95, 88, 89, 90, 77, 79, 23, 46, 83, 80, 85, 94, 40, 84, 16, 74, 91, 87, 82, 78, 19, 61, 22, 34, 38, 81, 25, 86, 76, 69, 28, 93, 75, 35, 60, 63, 73, 18, 71, 15, 21, 72, 14, 65, 20, 24, 27, 70, 33, 17, 62, 64, 9, 68, 67, 32, 26, 66, 52, 11, 57, 58, 29, 12, 37, 10, 36, 13, 31, 55, 59, 39, 54, 56, 48, 44, 51, 50, 47, 49, 53, 5, 42, 8, 45, 43, 41, 7], dtype=int64)
fdc['Movement'].dtype
dtype('int64')
missing_values = fdc['Movement'].isna().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['Movement'].unique()
array([451, 431, 307, 398, 453, 407, 460, 268, 458, 254, 354, 343, 284, 286, 388, 378, 424, 464, 420, 399, 437, 322, 367, 272, 328, 448, 332, 425, 435, 391, 434, 400, 331, 349, 429, 416, 312, 326, 418, 419, 417, 386, 321, 409, 374, 304, 403, 351, 401, 365, 414, 292, 323, 299, 433, 350, 348, 413, 320, 281, 427, 353, 364, 410, 428, 316, 381, 442, 375, 288, 395, 385, 251, 319, 444, 383, 298, 411, 412, 415, 393, 397, 443, 423, 387, 422, 327, 390, 362, 352, 406, 277, 361, 421, 396, 384, 450, 338, 363, 359, 287, 297, 430, 382, 377, 380, 438, 449, 257, 371, 339, 341, 404, 345, 394, 295, 246, 265, 258, 366, 294, 314, 266, 405, 218, 337, 267, 220, 376, 309, 283, 426, 347, 244, 240, 291, 340, 250, 305, 290, 317, 334, 355, 333, 389, 330, 318, 441, 402, 344, 335, 219, 264, 408, 274, 373, 379, 256, 229, 392, 372, 360, 262, 346, 278, 248, 368, 279, 269, 336, 342, 236, 370, 243, 315, 249, 227, 329, 239, 369, 223, 282, 358, 271, 313, 270, 356, 263, 184, 311, 436, 432, 221, 301, 190, 259, 308, 235, 260, 217, 275, 285, 210, 234, 276, 310, 447, 180, 446, 300, 303, 209, 247, 252, 231, 357, 226, 238, 280, 440, 237, 245, 296, 325, 273, 306, 196, 242, 199, 178, 222, 445, 324, 293, 302, 289, 214, 192, 206, 225, 197, 241, 230, 188, 202, 208, 203, 216, 213, 224, 439, 212, 232, 253, 228, 189, 204, 205, 207, 198, 168, 255, 215, 194, 191, 185, 145, 261, 156, 201, 193, 181, 233, 195, 183, 152, 211, 160, 173, 170, 176, 147, 143, 159, 187, 169, 200, 165, 163, 177, 179, 167, 139, 162, 175, 155, 166, 172, 174, 154, 164, 182, 150, 186, 146, 138, 157, 137, 135, 171, 158, 161, 149, 124, 144, 151, 148, 141, 134, 153, 126, 142, 125, 132, 127, 140, 133, 130, 131, 136, 122], dtype=int64)
fdc['Acceleration'].dtype
dtype('int64')
missing_values = fdc['Acceleration'].isna().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['Acceleration'].unique()
array([91, 87, 43, 77, 94, 56, 96, 38, 72, 95, 60, 42, 54, 79, 89, 64, 66, 51, 73, 57, 80, 86, 85, 78, 40, 82, 76, 65, 68, 90, 48, 46, 88, 70, 83, 84, 93, 52, 74, 92, 55, 58, 59, 67, 81, 62, 44, 71, 69, 50, 53, 45, 49, 75, 41, 61, 63, 35, 47, 34, 36, 37, 39, 30, 97, 31, 33, 32, 27, 28, 26, 29, 25, 17, 19, 24, 15, 23, 21, 20, 22, 16, 18, 13, 14], dtype=int64)
fdc['Sprint Speed'].dtype
dtype('int64')
missing_values = fdc['Sprint Speed'].isna().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['Sprint Speed'].unique()
array([80, 91, 60, 76, 89, 78, 92, 47, 96, 50, 79, 93, 69, 52, 72, 70, 90, 66, 82, 63, 55, 77, 86, 81, 83, 85, 65, 68, 53, 43, 94, 62, 58, 61, 87, 64, 67, 54, 88, 75, 95, 73, 49, 84, 56, 44, 74, 51, 57, 46, 59, 71, 37, 34, 33, 42, 30, 35, 48, 39, 45, 40, 18, 38, 41, 27, 32, 29, 28, 36, 26, 31, 22, 25, 23, 15, 20, 17, 16, 24, 19, 21, 12, 14], dtype=int64)
fdc['Agility'].dtype
dtype('int64')
missing_values = fdc['Agility'].isna().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['Agility'].unique()
array([91, 87, 67, 78, 96, 77, 40, 92, 37, 61, 93, 51, 79, 84, 94, 82, 60, 69, 47, 52, 63, 74, 59, 66, 86, 85, 57, 55, 76, 75, 73, 62, 72, 90, 68, 64, 80, 56, 48, 83, 41, 81, 54, 88, 33, 65, 49, 71, 89, 45, 70, 43, 50, 32, 42, 39, 58, 36, 34, 53, 46, 95, 44, 38, 21, 29, 35, 31, 19, 26, 30, 22, 28, 24, 25, 23, 27, 14, 18, 15, 20], dtype=int64)
fdc['Reactions'].dtype
dtype('int64')
missing_values = fdc['Reactions'].isna().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['Reactions'].unique()
array([94, 95, 88, 91, 93, 92, 86, 89, 87, 84, 90, 83, 85, 82, 81, 79, 80, 74, 75, 78, 77, 73, 76, 71, 70, 68, 72, 66, 69, 65, 67, 64, 59, 60, 62, 63, 61, 58, 57, 56, 50, 54, 53, 55, 52, 32, 49, 48, 45, 51, 46, 47, 37, 34, 44, 40, 38, 43, 41, 35, 42, 33, 39, 31, 36, 30, 24, 29, 28], dtype=int64)
fdc['Balance'].dtype
dtype('int64')
missing_values = fdc['Balance'].isnull().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['Balance'].unique()
array([95, 71, 49, 76, 83, 82, 91, 37, 43, 53, 86, 66, 45, 35, 69, 94, 92, 84, 90, 48, 73, 36, 41, 93, 74, 60, 79, 65, 78, 61, 57, 50, 68, 51, 54, 77, 81, 39, 75, 58, 87, 85, 63, 38, 88, 67, 72, 62, 80, 44, 46, 42, 55, 40, 70, 32, 89, 52, 59, 47, 64, 27, 56, 30, 31, 25, 34, 29, 24, 96, 33, 28, 20, 23, 22, 26, 21, 17, 97, 19, 12, 18], dtype=int64)
fdc['Power'].dtype
dtype('int64')
missing_values = fdc['Power'].isnull().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['Power'].unique()
array([389, 444, 268, 408, 357, 420, 393, 240, 404, 402, 406, 437, 249, 284, 400, 403, 358, 381, 382, 273, 424, 264, 316, 361, 355, 328, 370, 350, 365, 348, 411, 395, 385, 257, 337, 250, 379, 371, 409, 223, 398, 388, 241, 347, 308, 426, 378, 343, 341, 262, 325, 345, 359, 399, 421, 396, 315, 253, 368, 336, 340, 366, 387, 369, 375, 260, 326, 346, 373, 412, 364, 279, 376, 372, 415, 356, 333, 338, 342, 410, 407, 430, 394, 354, 331, 239, 234, 392, 270, 422, 374, 360, 391, 300, 335, 242, 327, 215, 397, 321, 390, 339, 383, 265, 288, 224, 351, 252, 429, 416, 380, 413, 377, 405, 349, 232, 386, 362, 192, 320, 251, 329, 271, 237, 427, 259, 255, 266, 227, 353, 258, 243, 263, 291, 302, 306, 332, 363, 256, 247, 301, 287, 322, 419, 312, 245, 297, 401, 344, 235, 289, 233, 317, 334, 216, 367, 352, 318, 226, 324, 219, 319, 292, 244, 423, 323, 304, 208, 314, 313, 193, 299, 303, 311, 229, 211, 225, 309, 330, 238, 305, 220, 296, 212, 231, 283, 207, 198, 281, 384, 307, 272, 298, 248, 310, 267, 214, 282, 274, 280, 230, 228, 221, 277, 276, 285, 290, 269, 246, 294, 293, 195, 236, 295, 217, 189, 275, 201, 278, 194, 206, 218, 176, 205, 185, 196, 222, 204, 188, 197, 209, 286, 168, 254, 200, 183, 179, 159, 180, 187, 164, 178, 190, 213, 202, 186, 191, 261, 210, 203, 173, 199, 169, 152, 181, 175, 184, 182, 170, 160, 162, 167, 177, 139, 161, 172, 165, 171, 128, 174, 158, 153, 166, 155, 163, 151, 122, 142, 143, 156, 149, 144, 157, 147, 154, 150, 134, 140], dtype=int64)
fdc['Shot Power'].dtype
dtype('int64')
missing_values = fdc['Shot Power'].isnull().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['Shot Power'].unique()
array([86, 94, 59, 91, 80, 89, 64, 66, 81, 84, 88, 56, 68, 79, 78, 71, 82, 70, 55, 76, 61, 83, 51, 52, 90, 87, 62, 72, 77, 74, 50, 57, 58, 85, 60, 75, 67, 65, 93, 46, 54, 69, 41, 73, 40, 53, 95, 43, 63, 42, 48, 31, 44, 37, 49, 39, 45, 38, 47, 30, 33, 25, 34, 36, 28, 27, 32, 26, 35, 23, 22, 29, 20, 24, 21, 18], dtype=int64)
fdc['Jumping'].dtype
dtype('int64')
missing_values = fdc['Jumping'].isnull().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['Jumping'].unique()
array([68, 95, 78, 63, 62, 84, 69, 52, 77, 79, 90, 86, 87, 93, 57, 75, 66, 82, 56, 32, 51, 76, 72, 81, 74, 71, 67, 65, 73, 64, 70, 80, 85, 37, 89, 60, 49, 50, 83, 58, 53, 59, 88, 38, 92, 34, 61, 46, 43, 36, 91, 39, 45, 42, 40, 54, 33, 55, 31, 44, 35, 47, 48, 30, 41, 94, 28, 29, 27, 24, 19, 26, 17, 15, 22], dtype=int64)
fdc['Stamina'].dtype
dtype('int64')
missing_values = fdc['Stamina'].isnull().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['Stamina'].unique()
array([72, 84, 41, 89, 81, 76, 85, 32, 86, 35, 75, 88, 90, 38, 43, 78, 79, 96, 95, 70, 82, 77, 93, 94, 87, 39, 54, 80, 45, 83, 69, 65, 73, 91, 34, 66, 71, 92, 62, 67, 64, 63, 68, 36, 61, 74, 42, 40, 23, 44, 31, 57, 20, 37, 29, 30, 56, 60, 52, 48, 58, 25, 51, 26, 27, 59, 28, 53, 33, 49, 97, 55, 50, 46, 24, 21, 22, 15, 47, 17, 19, 16, 18, 14, 12], dtype=int64)
fdc['Strength'].dtype
dtype('int64')
missing_values = fdc['Strength'].isnull().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['Strength'].unique()
array([69, 78, 74, 50, 86, 75, 76, 92, 70, 91, 80, 85, 65, 72, 67, 60, 84, 71, 94, 63, 73, 62, 54, 81, 64, 87, 58, 43, 77, 66, 53, 89, 68, 46, 44, 61, 79, 88, 59, 83, 55, 34, 82, 95, 56, 37, 90, 57, 93, 49, 39, 51, 52, 40, 48, 41, 47, 35, 42, 33, 45, 32, 38, 30, 31, 36, 29, 27, 24, 28, 16, 97, 96, 20, 25, 26, 23], dtype=int64)
fdc['Long Shots'].dtype
dtype('int64')
missing_values = fdc['Long Shots'].isnull().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['Long Shots'].unique()
array([94, 93, 12, 91, 84, 85, 14, 79, 10, 64, 78, 81, 17, 16, 65, 87, 18, 86, 19, 15, 82, 63, 74, 76, 47, 89, 70, 90, 77, 13, 49, 54, 88, 80, 53, 58, 51, 73, 66, 75, 83, 30, 46, 35, 71, 61, 72, 69, 43, 48, 62, 41, 60, 11, 26, 57, 59, 68, 67, 7, 27, 56, 20, 52, 92, 50, 22, 40, 39, 44, 31, 42, 9, 6, 55, 28, 23, 38, 24, 25, 34, 36, 29, 4, 8, 45, 33, 37, 21, 32, 5], dtype=int64)
fdc['Mentality'].dtype
dtype('int64')
missing_values = fdc['Mentality'].isna().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['Mentality'].unique()
array([347, 353, 140, 408, 356, 391, 376, 341, 171, 358, 396, 122, 188, 363, 414, 332, 386, 379, 348, 172, 382, 123, 294, 378, 313, 371, 331, 412, 345, 377, 161, 306, 387, 339, 135, 360, 138, 369, 359, 170, 361, 321, 397, 394, 385, 366, 162, 337, 362, 344, 319, 315, 144, 336, 340, 373, 398, 324, 300, 338, 384, 139, 364, 372, 134, 354, 342, 308, 322, 383, 263, 149, 304, 367, 357, 390, 291, 279, 310, 388, 375, 349, 351, 365, 133, 334, 303, 380, 153, 392, 169, 318, 350, 352, 401, 302, 325, 346, 132, 399, 281, 335, 403, 307, 368, 141, 126, 328, 245, 131, 320, 127, 421, 400, 137, 374, 305, 92, 316, 311, 120, 389, 145, 355, 148, 343, 142, 130, 121, 157, 329, 323, 115, 150, 298, 154, 317, 295, 100, 301, 326, 327, 197, 273, 287, 370, 290, 103, 393, 312, 297, 89, 271, 299, 124, 333, 258, 309, 158, 272, 118, 314, 330, 292, 404, 101, 280, 277, 296, 248, 285, 278, 109, 93, 146, 286, 284, 288, 105, 152, 111, 160, 119, 156, 95, 99, 238, 104, 266, 276, 275, 265, 106, 254, 293, 282, 168, 260, 136, 102, 267, 113, 289, 96, 270, 176, 164, 128, 268, 283, 244, 182, 243, 240, 116, 264, 112, 274, 261, 114, 269, 110, 257, 179, 155, 252, 262, 151, 247, 108, 256, 117, 249, 253, 231, 159, 163, 84, 251, 97, 91, 75, 147, 129, 230, 242, 250, 259, 125, 381, 77, 175, 82, 88, 90, 165, 83, 195, 87, 246, 255, 85, 94, 226, 216, 236, 220, 107, 241, 228, 198, 239, 225, 181, 233, 219, 166, 183, 98, 237, 235, 86, 229, 217, 143, 232, 209, 234, 224, 206, 227, 222, 80, 78, 186, 221, 173, 214, 187, 79, 68, 167, 81, 218, 212, 199, 210, 74, 223, 208, 213, 201, 72, 215, 202, 205, 203, 204, 190, 76, 211, 207, 192, 70, 194, 196, 189, 66, 193, 200, 67, 191, 184, 71, 64, 65, 69, 177, 63, 73, 51, 58, 180, 185, 174, 60, 55, 178, 62, 50, 59], dtype=int64)
fdc['Aggression'].dtype
dtype('int64')
missing_values = fdc['Aggression'].isna().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['Aggression'].unique()
array([44, 63, 34, 76, 51, 81, 27, 62, 43, 83, 75, 91, 23, 29, 90, 65, 59, 89, 48, 38, 25, 87, 54, 60, 73, 74, 69, 85, 70, 86, 32, 40, 31, 77, 84, 80, 78, 79, 71, 56, 42, 30, 61, 58, 28, 82, 46, 52, 36, 92, 55, 35, 67, 37, 72, 57, 50, 64, 39, 47, 20, 68, 15, 66, 33, 93, 88, 22, 24, 45, 17, 18, 26, 21, 11, 41, 53, 19, 12, 49, 94, 16, 95, 13, 14, 96, 10, 9], dtype=int64)
fdc['Interceptions'].dtype
dtype('int64')
missing_values = fdc['Interceptions'].isna().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['Interceptions'].unique()
array([40, 29, 19, 66, 36, 49, 55, 11, 38, 22, 90, 35, 87, 15, 30, 39, 88, 24, 91, 82, 42, 27, 41, 79, 74, 58, 20, 85, 48, 83, 64, 21, 50, 81, 78, 28, 86, 26, 34, 52, 37, 80, 25, 56, 23, 47, 45, 77, 84, 44, 53, 18, 46, 72, 61, 89, 54, 63, 65, 73, 16, 32, 76, 59, 13, 70, 31, 69, 33, 17, 75, 68, 60, 51, 71, 12, 57, 10, 43, 67, 14, 9, 62, 8, 7, 6, 4, 5, 3], dtype=int64)
fdc['Positioning'].dtype
dtype('int64')
missing_values = fdc['Positioning'].isna().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['Positioning'].unique()
array([93, 95, 11, 88, 87, 94, 91, 13, 47, 92, 72, 12, 90, 73, 80, 85, 20, 35, 76, 89, 83, 77, 54, 70, 86, 16, 28, 14, 84, 78, 10, 75, 52, 71, 81, 64, 56, 15, 82, 79, 44, 30, 59, 7, 68, 38, 48, 67, 24, 26, 34, 69, 74, 32, 66, 62, 65, 51, 18, 31, 9, 25, 49, 55, 63, 27, 61, 17, 39, 58, 29, 50, 40, 19, 8, 42, 60, 57, 37, 45, 43, 53, 5, 4, 36, 6, 46, 41, 23, 22, 33, 21, 3, 2], dtype=int64)
fdc['Vision'].dtype
dtype('int64')
missing_values = fdc['Vision'].isna().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['Vision'].unique()
array([95, 82, 65, 94, 90, 79, 84, 66, 80, 70, 85, 44, 87, 71, 83, 41, 52, 86, 68, 50, 77, 48, 88, 30, 61, 74, 59, 73, 72, 64, 91, 78, 63, 57, 89, 62, 56, 69, 42, 67, 27, 76, 81, 55, 75, 60, 49, 45, 58, 22, 53, 46, 25, 43, 51, 40, 93, 33, 31, 34, 35, 39, 47, 21, 32, 28, 37, 36, 38, 54, 24, 23, 14, 11, 15, 26, 19, 18, 12, 20, 17, 10, 29, 13, 16, 9], dtype=int64)
fdc['Penalties'].dtype
dtype('int64')
missing_values = fdc['Penalties'].isna().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['Penalties'].unique()
array([75, 84, 11, 92, 88, 83, 23, 70, 25, 62, 71, 66, 27, 47, 69, 54, 44, 86, 17, 90, 33, 87, 73, 60, 55, 68, 91, 72, 50, 78, 18, 82, 40, 29, 45, 43, 64, 24, 59, 46, 56, 81, 67, 49, 61, 74, 58, 63, 79, 38, 80, 32, 20, 76, 77, 41, 19, 26, 85, 21, 52, 34, 53, 65, 57, 16, 42, 89, 15, 13, 14, 22, 51, 37, 9, 48, 12, 31, 36, 39, 10, 30, 35, 28, 8, 7, 6], dtype=int64)
fdc['Composure'].dtype
dtype('int64')
missing_values = fdc['Composure'].isna().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['Composure'].unique()
array([96, 95, 68, 91, 93, 88, 90, 65, 84, 70, 66, 80, 85, 69, 82, 89, 81, 87, 83, 86, 67, 92, 94, 57, 78, 79, 75, 45, 61, 76, 58, 62, 77, 74, 59, 55, 48, 40, 64, 73, 39, 71, 72, 63, 60, 52, 53, 56, 44, 54, 41, 32, 49, 46, 31, 51, 50, 25, 18, 38, 30, 24, 21, 36, 33, 26, 23, 47, 22, 28, 34, 35, 37, 43, 27, 12, 42, 17, 29, 13, 19, 14, 16, 20, 15], dtype=int64)
fdc['Defending'].dtype
dtype('int64')
missing_values = fdc['Defending'].isna().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['Defending'].unique()
array([ 91, 84, 57, 186, 94, 96, 122, 50, 100, 48, 272, 259, 54, 38, 89, 263, 83, 147, 264, 245, 120, 52, 130, 267, 205, 162, 105, 241, 148, 248, 266, 194, 258, 117, 166, 56, 249, 92, 45, 214, 140, 99, 150, 59, 251, 262, 243, 195, 160, 40, 114, 236, 244, 231, 80, 123, 253, 132, 103, 257, 261, 98, 78, 209, 229, 230, 60, 101, 206, 242, 138, 61, 256, 171, 260, 226, 224, 44, 131, 113, 240, 77, 232, 225, 109, 228, 247, 93, 121, 238, 111, 128, 188, 173, 250, 255, 41, 144, 239, 217, 106, 165, 246, 235, 126, 118, 203, 234, 135, 215, 175, 192, 108, 39, 33, 151, 156, 174, 47, 216, 237, 102, 227, 161, 233, 67, 213, 75, 212, 36, 254, 196, 88, 81, 134, 53, 155, 223, 43, 125, 46, 51, 137, 71, 95, 35, 208, 110, 170, 87, 107, 55, 204, 177, 69, 152, 163, 37, 181, 252, 159, 133, 124, 207, 82, 97, 65, 42, 79, 104, 211, 129, 49, 157, 153, 185, 189, 146, 86, 112, 73, 127, 31, 220, 164, 191, 219, 139, 64, 183, 66, 197, 90, 218, 34, 72, 221, 222, 142, 63, 136, 179, 85, 169, 180, 74, 210, 62, 187, 145, 198, 184, 199, 32, 30, 58, 172, 178, 116, 176, 70, 202, 141, 115, 193, 149, 29, 201, 167, 168, 182, 119, 190, 200, 76, 143, 158, 154, 68, 28, 27, 25, 24, 26, 23, 20, 21], dtype=int64)
fdc['Marking'].dtype
dtype('int64')
missing_values = fdc['Marking'].isna().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['Marking'].unique()
array([32, 28, 27, 68, 35, 38, 15, 34, 25, 93, 42, 84, 20, 17, 47, 85, 30, 89, 82, 29, 56, 91, 72, 59, 79, 49, 83, 86, 50, 60, 94, 41, 57, 78, 63, 88, 90, 9, 58, 74, 39, 92, 45, 36, 44, 87, 70, 76, 53, 80, 67, 77, 12, 48, 55, 75, 81, 11, 64, 69, 14, 24, 52, 65, 19, 31, 13, 10, 66, 71, 54, 46, 22, 40, 18, 51, 37, 43, 61, 26, 73, 21, 7, 33, 62, 16, 23, 8, 6, 5, 4, 3], dtype=int64)
fdc['Standing Tackle'].dtype
dtype('int64')
missing_values = fdc['Standing Tackle'].isnull().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['Standing Tackle'].unique()
array([35, 32, 12, 65, 30, 42, 43, 19, 34, 13, 93, 88, 18, 10, 24, 29, 53, 90, 84, 48, 15, 36, 89, 27, 73, 54, 41, 83, 59, 67, 87, 64, 14, 55, 75, 45, 33, 57, 21, 82, 50, 86, 80, 79, 31, 46, 85, 40, 44, 56, 20, 70, 76, 81, 71, 16, 68, 37, 38, 78, 39, 77, 11, 74, 28, 49, 47, 72, 61, 51, 22, 17, 52, 63, 23, 60, 25, 26, 9, 62, 58, 66, 69, 7, 8, 6, 5], dtype=int64)
fdc['Sliding Tackle'].dtype
dtype('int64')
missing_values = fdc['Sliding Tackle'].isnull().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['Sliding Tackle'].unique()
array([24, 18, 53, 29, 19, 41, 16, 32, 10, 86, 38, 87, 11, 90, 47, 85, 79, 40, 8, 13, 22, 60, 49, 81, 88, 55, 33, 42, 14, 80, 36, 12, 52, 71, 46, 83, 65, 84, 34, 82, 77, 78, 74, 20, 43, 35, 69, 70, 30, 68, 45, 57, 44, 21, 75, 26, 51, 76, 39, 48, 28, 63, 59, 66, 72, 17, 67, 64, 31, 25, 15, 54, 58, 62, 56, 23, 37, 73, 50, 27, 9, 61, 7, 6, 4], dtype=int64)
fdc['Goalkeeping'].dtype
dtype('int64')
missing_values = fdc['Goalkeeping'].isnull().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['Goalkeeping'].unique()
array([ 54, 58, 437, 56, 59, 51, 62, 439, 42, 67, 420, 440, 41, 46, 63, 60, 26, 435, 424, 43, 45, 52, 50, 47, 53, 44, 418, 15, 48, 416, 153, 413, 65, 64, 20, 423, 49, 55, 66, 40, 57, 421, 13, 39, 61, 419, 21, 409, 37, 406, 410, 36, 408, 34, 29, 405, 403, 402, 407, 16, 69, 391, 401, 398, 400, 22, 68, 396, 38, 78, 73, 399, 390, 393, 395, 397, 80, 70, 389, 394, 388, 27, 30, 75, 71, 386, 74, 378, 385, 384, 380, 392, 381, 10, 387, 383, 375, 382, 19, 379, 24, 369, 356, 368, 373, 370, 372, 72, 374, 376, 364, 25, 367, 17, 377, 371, 365, 352, 362, 359, 363, 366, 82, 35, 361, 358, 76, 294, 83, 357, 360, 355, 354, 77, 229, 350, 353, 347, 351, 32, 349, 169, 346, 348, 343, 345, 339, 342, 33, 341, 28, 119, 337, 338, 340, 344, 335, 98, 324, 248, 334, 298, 336, 328, 331, 321, 332, 81, 79, 333, 278, 329, 261, 325, 31, 327, 330, 322, 305, 326, 283, 320, 323, 318, 18, 319, 316, 317, 272, 315, 88, 311, 310, 314, 313, 307, 312, 309, 308, 301, 304, 292, 303, 306, 296, 289, 300, 302, 297, 290, 299, 293, 295, 291, 288, 93, 284, 287, 286, 285, 273, 282, 279, 281, 280, 277, 275, 276, 274, 270, 271, 268, 269, 267, 260, 265, 262, 266, 263, 264, 251, 259, 254, 257, 252, 255, 256, 258, 247, 250, 243, 253, 249, 245, 236, 246, 234, 241, 231], dtype=int64)
fdc['GK Diving'].dtype
dtype('int64')
missing_values = fdc['GK Diving'].isnull().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['GK Diving'].unique()
array([ 6, 7, 87, 15, 9, 14, 86, 13, 88, 10, 84, 11, 8, 5, 12, 90, 3, 27, 89, 80, 16, 85, 2, 82, 79, 83, 4, 81, 77, 18, 78, 17, 75, 74, 76, 73, 71, 72, 52, 68, 70, 54, 69, 32, 66, 65, 67, 61, 22, 64, 23, 40, 63, 55, 19, 50, 62, 58, 60, 59, 56, 57, 53, 51, 49, 46, 48, 47, 45], dtype=int64)
fdc['GK Handling'].dtype
dtype('int64')
missing_values = fdc['GK Handling'].isnull().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['GK Handling'].unique()
array([11, 92, 13, 9, 6, 14, 88, 5, 85, 10, 89, 87, 8, 15, 12, 4, 82, 81, 3, 7, 25, 86, 83, 2, 80, 16, 77, 79, 78, 76, 84, 75, 72, 74, 71, 69, 73, 70, 67, 68, 65, 61, 62, 64, 41, 63, 66, 33, 22, 17, 57, 18, 54, 55, 59, 49, 19, 40, 60, 58, 43, 45, 53, 47, 56, 51, 52, 50, 48, 46], dtype=int64)
fdc['GK Kicking'].dtype
dtype('int64')
missing_values = fdc['GK Kicking'].isnull().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['GK Kicking'].unique()
array([15, 78, 5, 12, 9, 85, 7, 88, 13, 16, 74, 91, 6, 10, 4, 93, 11, 73, 14, 75, 2, 31, 68, 76, 8, 80, 82, 3, 87, 72, 83, 77, 79, 81, 69, 71, 20, 67, 70, 64, 65, 63, 44, 60, 84, 54, 48, 61, 18, 66, 17, 59, 62, 90, 43, 38, 58, 57, 28, 40, 53, 23, 47, 46, 19, 51, 55, 52, 56, 22, 30, 25, 42, 35, 21, 49, 50, 36, 45], dtype=int64)
fdc['GK Positioning'].dtype
dtype('int64')
missing_values = fdc['GK Positioning'].isna().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['GK Positioning'].unique()
array([14, 90, 10, 15, 8, 11, 91, 88, 7, 12, 85, 86, 5, 89, 13, 6, 82, 4, 9, 87, 33, 84, 16, 83, 2, 3, 79, 81, 80, 76, 78, 19, 77, 17, 75, 74, 73, 71, 18, 72, 70, 69, 66, 68, 40, 64, 20, 32, 67, 62, 65, 63, 24, 23, 50, 55, 58, 51, 59, 56, 61, 57, 60, 46, 54, 53, 52, 47, 49, 48, 43, 45, 42, 38, 44, 41], dtype=int64)
fdc['GK Reflexes'].dtype
dtype('int64')
missing_values = fdc['GK Reflexes'].isna().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['GK Reflexes'].unique()
array([ 8, 11, 90, 13, 10, 14, 89, 6, 12, 88, 7, 9, 15, 5, 3, 37, 85, 86, 4, 16, 82, 83, 84, 87, 78, 80, 20, 18, 79, 81, 19, 77, 17, 2, 74, 71, 76, 73, 75, 72, 69, 46, 66, 51, 70, 34, 67, 23, 68, 45, 65, 21, 59, 54, 47, 61, 64, 63, 62, 60, 58, 56, 57, 55, 53, 50, 52, 49, 48, 44], dtype=int64)
fdc['Total Stats'].dtype
dtype('int64')
missing_values = fdc['Total Stats'].isna().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['Total Stats'].unique()
array([2231, 2221, 1413, ..., 757, 747, 956], dtype=int64)
fdc['Base Stats'].dtype
dtype('int64')
missing_values = fdc['Base Stats'].isna().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['Base Stats'].unique()
array([466, 464, 489, 485, 451, 457, 470, 490, 484, 455, 469, 463, 468, 497, 442, 439, 473, 452, 498, 449, 477, 401, 446, 447, 465, 430, 461, 422, 476, 460, 453, 467, 471, 399, 424, 441, 459, 438, 437, 454, 428, 445, 431, 474, 421, 435, 448, 475, 403, 444, 443, 419, 405, 420, 423, 396, 388, 482, 478, 385, 394, 480, 433, 450, 462, 456, 436, 434, 429, 400, 440, 425, 410, 458, 398, 413, 373, 406, 408, 472, 426, 407, 432, 427, 415, 481, 417, 372, 380, 418, 383, 414, 409, 412, 411, 386, 362, 402, 390, 404, 391, 416, 375, 389, 361, 397, 366, 392, 393, 382, 368, 387, 352, 376, 384, 378, 379, 341, 354, 369, 395, 357, 381, 377, 344, 360, 370, 338, 333, 367, 363, 349, 355, 345, 358, 348, 374, 351, 343, 342, 353, 321, 350, 365, 364, 371, 327, 331, 359, 347, 356, 339, 319, 317, 335, 346, 329, 315, 324, 322, 325, 332, 336, 337, 330, 316, 313, 306, 307, 328, 310, 340, 308, 318, 334, 301, 289, 302, 320, 323, 326, 311, 297, 314, 304, 292, 305, 312, 294, 287, 300, 299, 285, 303, 288, 278, 296, 277, 309, 291, 283, 286, 293, 295, 298, 276, 282, 272, 284, 290, 271, 275, 279, 281, 262, 263, 280, 268, 270, 269, 264, 273, 265, 252, 267, 257, 274, 266, 259, 247, 261, 251, 233, 239, 253, 258, 254, 260, 244, 240, 255, 256, 250, 238, 243, 249, 248, 245, 241, 232], dtype=int64)
Player´s Weak foot rating (above 5). The rating is expressed in stars, for future operations we remove the stars and keep the numbers only.
fdc['W/F'].dtype
dtype('O')
fdc['W/F'].unique()
array(['4 ★', '3 ★', '5 ★', '2 ★', '1 ★'], dtype=object)
# If we want to do some calculations we need to remove the stars
fdc['W/F'] = fdc['W/F'].str.replace('★', '')
fdc['W/F'].unique()
array(['4 ', '3 ', '5 ', '2 ', '1 '], dtype=object)
# Remove the whitespace
fdc['W/F'] = fdc['W/F'].str.strip()
fdc['W/F'].unique()
array(['4', '3', '5', '2', '1'], dtype=object)
SM refers to 'Skill Move'. It´s a rating based on stars. The more starts better the skill move of the player.
fdc['SM'].dtype
dtype('O')
fdc['SM'].unique()
array(['4★', '5★', '1★', '2★', '3★'], dtype=object)
For this type of values, I am going to replace the numbers for the stars
# Replace the stars
fdc['SM'] = fdc['SM'].str.replace('★','').astype(int)
fdc['SM'].dtype
dtype('int32')
# Using a function to iterate the rows and replace the numbers with stars
def replace_num(value):
if value == 5:
return '★★★★★'
elif value == 4:
return '★★★★'
elif value == 3:
return '★★★'
elif value == 2:
return'★★'
else:
return '★'
fdc['SM'] = fdc['SM'].apply(replace_num)
fdc['SM'] = fdc['SM'].str.strip()
fdc['SM'].head(10)
0 ★★★★ 1 ★★★★★ 2 ★ 3 ★★★★ 4 ★★★★★ 5 ★★★★ 6 ★★★★ 7 ★ 8 ★★★★★ 9 ★ Name: SM, dtype: object
Refers to the player's attacking work rate.
fdc['A/W'].dtype
dtype('O')
fdc['A/W'].unique()
array(['Medium', 'High', 'Low'], dtype=object)
fdc['A/W'] = fdc['A/W'].str.strip()
missing_values = fdc['A/W'].isnull().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
Refers to the player's defensive work rate.
fdc['D/W'].dtype
dtype('O')
fdc['D/W'].unique()
array(['Low', 'Medium', 'High'], dtype=object)
fdc['D/W'] = fdc['D/W'].str.strip()
fdc['D/W'].head(10)
0 Low 1 Low 2 Medium 3 High 4 Medium 5 Medium 6 Medium 7 Medium 8 Low 9 Medium Name: D/W, dtype: object
missing_values = fdc['D/W'].isnull().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
This value refers to the player's international reputation rating
fdc['IR'].dtype
dtype('O')
fdc['IR'].unique()
array(['5 ★', '3 ★', '4 ★', '2 ★', '1 ★'], dtype=object)
For these values, I am going to use only the numerical ones.
fdc['IR'] = fdc['IR'].str.replace('★', '')
fdc['IR'] = fdc['IR'].str.strip()
fdc['IR'].unique()
array(['5', '3', '4', '2', '1'], dtype=object)
All the values expressed in these columns are integers. They are ratings from 100 to 0. I am going to check for null values and check the type.
missing_values = fdc['PAC'].isnull().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['PAC'].unique()
array([85, 89, 87, 76, 91, 78, 93, 86, 96, 88, 94, 65, 84, 74, 71, 77, 68, 75, 54, 79, 83, 80, 81, 82, 63, 67, 90, 66, 42, 73, 70, 64, 57, 58, 69, 72, 50, 59, 92, 60, 62, 55, 52, 56, 61, 53, 45, 37, 95, 43, 44, 46, 48, 49, 47, 34, 39, 40, 51, 41, 36, 32, 33, 30, 31, 38, 35, 28, 29, 25], dtype=int64)
missing_values = fdc['SHO'].isna().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['SHO'].unique()
array([92, 93, 86, 85, 91, 88, 60, 73, 89, 87, 70, 90, 81, 66, 72, 82, 28, 74, 77, 62, 50, 83, 69, 80, 46, 76, 54, 49, 61, 58, 79, 68, 59, 41, 45, 64, 78, 55, 75, 65, 63, 48, 42, 56, 51, 30, 47, 84, 40, 57, 25, 71, 37, 43, 53, 67, 38, 52, 39, 35, 36, 44, 32, 34, 33, 31, 27, 22, 29, 26, 23, 18, 24, 20, 16, 21, 19, 17], dtype=int64)
fdc['PAS'].unique()
array([91, 81, 78, 93, 86, 85, 88, 71, 80, 76, 74, 77, 79, 84, 73, 55, 83, 87, 72, 75, 58, 89, 82, 68, 67, 64, 66, 59, 69, 90, 65, 53, 63, 62, 70, 56, 42, 54, 61, 57, 60, 48, 52, 47, 46, 44, 45, 50, 51, 49, 43, 36, 38, 40, 41, 35, 39, 34, 33, 37, 30, 32, 29, 31, 26, 28, 25, 27], dtype=int64)
missing_values = fdc['PAS'].isna().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['DRI'].unique()
array([95, 89, 90, 88, 94, 85, 91, 71, 72, 86, 73, 81, 84, 92, 80, 68, 77, 87, 60, 83, 78, 64, 67, 79, 69, 66, 65, 70, 82, 75, 61, 74, 54, 76, 49, 63, 59, 62, 56, 55, 50, 57, 58, 52, 53, 51, 48, 47, 46, 39, 44, 43, 36, 40, 45, 41, 37, 34, 35, 42, 32, 38, 31, 33, 30, 29, 28, 25, 27], dtype=int64)
missing_values = fdc['DRI'].isnull().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['DEF'].unique()
array([38, 35, 52, 64, 36, 43, 45, 51, 39, 91, 44, 86, 48, 57, 40, 88, 33, 81, 63, 47, 53, 89, 71, 37, 80, 68, 85, 61, 90, 83, 49, 56, 58, 82, 87, 79, 66, 55, 78, 32, 50, 76, 77, 70, 75, 41, 29, 73, 65, 59, 84, 54, 72, 46, 42, 69, 34, 31, 30, 74, 24, 62, 25, 20, 26, 60, 27, 23, 28, 67, 22, 19, 18, 21, 17, 15, 16, 12], dtype=int64)
missing_values = fdc['DEF'].isnull().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
fdc['PHY'].unique()
array([65, 77, 90, 78, 59, 82, 75, 91, 76, 88, 86, 85, 73, 67, 79, 63, 83, 89, 66, 69, 72, 64, 71, 81, 87, 68, 84, 80, 55, 70, 44, 62, 51, 57, 60, 58, 56, 74, 52, 61, 53, 45, 50, 54, 47, 48, 49, 42, 37, 40, 39, 43, 38, 46, 41, 34, 35, 36, 31, 32, 33, 29, 28], dtype=int64)
missing_values = fdc['PHY'].isna().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 0
Let's experiment and do the same for the 5 columns using a function
cols_to_check = ['SHO', 'PAS', 'DRI', 'DEF', 'PHY']
unique_values = fdc[cols_to_check].astype(str).apply(lambda x: ' '.join(x), axis=1).unique()
unique_values
array(['92 91 95 38 65', '93 81 89 35 77', '92 78 90 52 90', ..., '39 44 46 40 53', '49 41 49 30 44', '22 39 42 45 55'], dtype=object)
cols_to_check = ['SHO', 'PAS', 'DRI', 'DEF', 'PHY']
null_values = fdc[cols_to_check].astype(str).apply(lambda x: ' '.join(x), axis=1).isnull().sum()
print('Number of null values are: ', null_values)
Number of null values are: 0
This column expresses the number of times a player was searched in FIFA´s database. For future operations, we are going to remove the 'K' and correct the values.
fdc['Hits'].dtype
dtype('O')
fdc['Hits'].unique()
array(['771', '562', '150', '207', '595', '248', '246', '120', '1.6K', '130', '321', '189', '175', '96', '118', '216', '212', '154', '205', '202', '339', '408', '103', '332', '86', '173', '161', '396', '1.1K', '433', '242', '206', '177', '1.5K', '198', '459', '117', '119', '209', '84', '187', '165', '203', '65', '336', '126', '313', '124', '145', '538', '182', '101', '45', '377', '99', '194', '403', '414', '593', '374', '245', '3.2K', '266', '299', '309', '215', '265', '211', '112', '337', '70', '159', '688', '116', '63', '144', '123', '71', '224', '113', '168', '61', '89', '137', '278', '75', '148', '176', '197', '264', '214', '247', '402', '440', '1.7K', '2.3K', '171', '320', '657', '87', '259', '200', '255', '253', '196', '60', '97', '85', '169', '256', '132', '239', '166', '121', '109', '32', '46', '122', '48', '527', '199', '282', '51', '1.9K', '642', '155', '323', '288', '497', '509', '79', '49', '270', '511', '80', '128', '115', '156', '204', '143', '140', '152', '220', '134', '225', '94', '74', '135', '142', '50', '77', '40', '107', '193', '179', '34', '64', '453', '57', '81', '28', '78', '133', '43', '425', '88', '42', '36', '233', '376', '210', '444', '100', '263', '98', '29', '160', '39', '257', '6', '310', '138', '62', '293', '285', '362', '66', '69', '58', '21', '20', '131', '38', '406', '68', '108', '110', '93', '512', '443', '306', '352', '422', '585', '346', '178', '841', '76', '394', '72', '172', '44', '407', '230', '367', '295', '157', '243', '56', '111', '326', '679', '18', '92', '59', '25', '184', '53', '12', '90', '55', '73', '11', '566', '180', '83', '262', '17', '26', '31', '280', '359', '213', '297', '387', '480', '381', '677', '486', '8', '244', '129', '388', '275', '319', '2K', '52', '91', '421', '153', '27', '41', '222', '35', '102', '23', '30', '33', '146', '13', '19', '14', '106', '276', '568', '353', '47', '478', '249', '254', '369', '219', '565', '237', '227', '434', '375', '162', '605', '654', '3', '7', '9', '104', '114', '186', '446', '756', '22', '139', '500', '67', '147', '149', '16', '82', '54', '37', '15', '1.3K', '3K', '952', '5', '749', '541', '330', '393', '517', '770', '409', '170', '125', '283', '342', '363', '580', '105', '217', '24', '141', '10', '427', '158', '426', '4', '666', '181', '324', '979', '1.4K', '302', '751', '298', '411', '944', '2', '947', '292', '349', '621', '1', '2.8K', '338', '287', '261', '218', '1.8K', '240', '279', '229', '188', '315', '664', '613', '190', '706', '127', '462', '386', '695', '491', '167', '281', '250', '307', '95', '231', '174', '680', '633', '221', '348', '602', '183', '653', '195', '164', '151', '258', '8.4K', '343', '419', '655', '136', '399', '531', '357', '228', '385', '312', '340', '238', '487', '355', '499', '4.3K', '296', '515', '943', '1.2K', '903', '335', '191', '594', '267', '617', '516', '504', '331', '652', '410', '550', '473', '442', '344', '208', '1K', '2.5K', '273', '485', '826', '192', '405', '941', '477', '644', '303', '417', '6K', nan, 11.0, 2.0, 1.0, 31.0, 3.0, 10.0, 9.0, 17.0, 7.0, 4.0, 6.0], dtype=object)
missing_values = fdc['Hits'].isna().sum()
print('Number of missing values: ', missing_values)
Number of missing values: 2595
fdc['Hits'].fillna(0, inplace=True)
fdc['Hits'].sample(5)
11750 2 14087 2 17601 0 16785 0 13875 1 Name: Hits, dtype: object
fdc['Hits'].unique()
array(['771', '562', '150', '207', '595', '248', '246', '120', '1.6K', '130', '321', '189', '175', '96', '118', '216', '212', '154', '205', '202', '339', '408', '103', '332', '86', '173', '161', '396', '1.1K', '433', '242', '206', '177', '1.5K', '198', '459', '117', '119', '209', '84', '187', '165', '203', '65', '336', '126', '313', '124', '145', '538', '182', '101', '45', '377', '99', '194', '403', '414', '593', '374', '245', '3.2K', '266', '299', '309', '215', '265', '211', '112', '337', '70', '159', '688', '116', '63', '144', '123', '71', '224', '113', '168', '61', '89', '137', '278', '75', '148', '176', '197', '264', '214', '247', '402', '440', '1.7K', '2.3K', '171', '320', '657', '87', '259', '200', '255', '253', '196', '60', '97', '85', '169', '256', '132', '239', '166', '121', '109', '32', '46', '122', '48', '527', '199', '282', '51', '1.9K', '642', '155', '323', '288', '497', '509', '79', '49', '270', '511', '80', '128', '115', '156', '204', '143', '140', '152', '220', '134', '225', '94', '74', '135', '142', '50', '77', '40', '107', '193', '179', '34', '64', '453', '57', '81', '28', '78', '133', '43', '425', '88', '42', '36', '233', '376', '210', '444', '100', '263', '98', '29', '160', '39', '257', '6', '310', '138', '62', '293', '285', '362', '66', '69', '58', '21', '20', '131', '38', '406', '68', '108', '110', '93', '512', '443', '306', '352', '422', '585', '346', '178', '841', '76', '394', '72', '172', '44', '407', '230', '367', '295', '157', '243', '56', '111', '326', '679', '18', '92', '59', '25', '184', '53', '12', '90', '55', '73', '11', '566', '180', '83', '262', '17', '26', '31', '280', '359', '213', '297', '387', '480', '381', '677', '486', '8', '244', '129', '388', '275', '319', '2K', '52', '91', '421', '153', '27', '41', '222', '35', '102', '23', '30', '33', '146', '13', '19', '14', '106', '276', '568', '353', '47', '478', '249', '254', '369', '219', '565', '237', '227', '434', '375', '162', '605', '654', '3', '7', '9', '104', '114', '186', '446', '756', '22', '139', '500', '67', '147', '149', '16', '82', '54', '37', '15', '1.3K', '3K', '952', '5', '749', '541', '330', '393', '517', '770', '409', '170', '125', '283', '342', '363', '580', '105', '217', '24', '141', '10', '427', '158', '426', '4', '666', '181', '324', '979', '1.4K', '302', '751', '298', '411', '944', '2', '947', '292', '349', '621', '1', '2.8K', '338', '287', '261', '218', '1.8K', '240', '279', '229', '188', '315', '664', '613', '190', '706', '127', '462', '386', '695', '491', '167', '281', '250', '307', '95', '231', '174', '680', '633', '221', '348', '602', '183', '653', '195', '164', '151', '258', '8.4K', '343', '419', '655', '136', '399', '531', '357', '228', '385', '312', '340', '238', '487', '355', '499', '4.3K', '296', '515', '943', '1.2K', '903', '335', '191', '594', '267', '617', '516', '504', '331', '652', '410', '550', '473', '442', '344', '208', '1K', '2.5K', '273', '485', '826', '192', '405', '941', '477', '644', '303', '417', '6K', 0, 11.0, 2.0, 1.0, 31.0, 3.0, 10.0, 9.0, 17.0, 7.0, 4.0, 6.0], dtype=object)
For this column, I am going to replace NaN values for 0 and express the K values with their full number
# Convert the values
fdc['Hits'] = fdc['Hits'].replace({'K': '*1e3'}, regex=True).map(pd.eval).astype(int)
fdc['Hits'].head(10)
0 771 1 562 2 150 3 207 4 595 5 248 6 246 7 120 8 1600 9 130 Name: Hits, dtype: int32
It´s time to check our database how it ended after our data-cleaning process
fdc.columns
Index(['ID', 'Name', 'Surname', 'Nationality', 'Age', 'Overall', 'Potential', 'Club', 'Contract', 'Contract Start', 'Contract End', 'Contract Length(years)', 'Contract Status', 'Positions', 'Height(cm)', 'Weight(kg)', 'Preferred Foot', 'BOV', 'Best Position', 'Joined', 'Loan Date End', 'Market Price(€)', 'Wage', 'Release Clause(€)', 'Attacking', 'Crossing', 'Finishing', 'Heading Accuracy', 'Short Passing', 'Volleys', 'Skill', 'Dribbling', 'Curve', 'FK Accuracy', 'Long Passing', 'Ball Control', 'Movement', 'Acceleration', 'Sprint Speed', 'Agility', 'Reactions', 'Balance', 'Power', 'Shot Power', 'Jumping', 'Stamina', 'Strength', 'Long Shots', 'Mentality', 'Aggression', 'Interceptions', 'Positioning', 'Vision', 'Penalties', 'Composure', 'Defending', 'Marking', 'Standing Tackle', 'Sliding Tackle', 'Goalkeeping', 'GK Diving', 'GK Handling', 'GK Kicking', 'GK Positioning', 'GK Reflexes', 'Total Stats', 'Base Stats', 'W/F', 'SM', 'A/W', 'D/W', 'IR', 'PAC', 'SHO', 'PAS', 'DRI', 'DEF', 'PHY', 'Hits'], dtype='object')
Now the data cleaned, it´s time to export the data for future analysis
# Export the data to an Excel file
fdc.to_excel('fifa21_clean_data.xlsx', sheet_name="fifa21_data_analysis", index=False)
# Export the data to a .csv file
fdc.to_csv('fifa21_clean_data.csv', index=False)
Now that the data is cleaned, it´s time to work with it. I am going to use Numpy for the calculations needed and Matplotlib to visualize the data.
I am not going to use all the columns, so I'm going to delete some of them. First let's make a copy of our data.
f21 = fdc.copy()
f21.sample(5)
ID | Name | Surname | Nationality | Age | Overall | Potential | Club | Contract | Contract Start | ... | A/W | D/W | IR | PAC | SHO | PAS | DRI | DEF | PHY | Hits | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
4487 | 245992 | Billy | Gilmour | Scotland | 19 | 71 | 86 | Chelsea | 2018 ~ 2023 | 2018 | ... | Medium | Medium | 1 | 68 | 54 | 70 | 73 | 64 | 50 | 419 |
18752 | 258681 | Ryan | Hillier | Wales | 17 | 50 | 65 | Newport County | 2020 ~ 2021 | 2020 | ... | Medium | Medium | 1 | 68 | 49 | 40 | 49 | 16 | 45 | 0 |
1560 | 216335 | Yuriy | Gazinskiy | Russia | 30 | 75 | 75 | No Club | Free | NaN | ... | Medium | Medium | 1 | 66 | 55 | 66 | 68 | 72 | 72 | 9 |
15929 | 256145 | Felipe | Zenobio | Argentina | 20 | 59 | 72 | Club Atlético Tigre | 2019 ~ 2024 | 2019 | ... | Medium | Medium | 1 | 57 | 59 | 57 | 60 | 31 | 56 | 1 |
276 | 239231 | Cucurella | Cucurella Saseta | Spain | 21 | 81 | 89 | Getafe CF | 2020 ~ 2023 | 2020 | ... | High | Medium | 1 | 81 | 67 | 78 | 80 | 73 | 74 | 443 |
5 rows × 79 columns
# Deleting the columns
f21 = f21.drop(['Potential','Contract','BOV','Best Position'], axis=1)
f21.columns
Index(['ID', 'Name', 'Surname', 'Nationality', 'Age', 'Overall', 'Club', 'Contract Start', 'Contract End', 'Contract Length(years)', 'Contract Status', 'Positions', 'Height(cm)', 'Weight(kg)', 'Preferred Foot', 'Joined', 'Loan Date End', 'Market Price(€)', 'Wage', 'Release Clause(€)', 'Attacking', 'Crossing', 'Finishing', 'Heading Accuracy', 'Short Passing', 'Volleys', 'Skill', 'Dribbling', 'Curve', 'FK Accuracy', 'Long Passing', 'Ball Control', 'Movement', 'Acceleration', 'Sprint Speed', 'Agility', 'Reactions', 'Balance', 'Power', 'Shot Power', 'Jumping', 'Stamina', 'Strength', 'Long Shots', 'Mentality', 'Aggression', 'Interceptions', 'Positioning', 'Vision', 'Penalties', 'Composure', 'Defending', 'Marking', 'Standing Tackle', 'Sliding Tackle', 'Goalkeeping', 'GK Diving', 'GK Handling', 'GK Kicking', 'GK Positioning', 'GK Reflexes', 'Total Stats', 'Base Stats', 'W/F', 'SM', 'A/W', 'D/W', 'IR', 'PAC', 'SHO', 'PAS', 'DRI', 'DEF', 'PHY', 'Hits'], dtype='object')
Let's see some descriptive statistics from our data
# Me are going to use the .describe() method
f21.describe()
ID | Age | Overall | Contract Length(years) | Height(cm) | Weight(kg) | Market Price(€) | Wage | Release Clause(€) | Attacking | ... | GK Reflexes | Total Stats | Base Stats | PAC | SHO | PAS | DRI | DEF | PHY | Hits | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | 18979.000000 | 18979.000000 | 18979.000000 | 18979.000000 | 18979.000000 | 18979.000000 | 1.897900e+04 | 18979.000000 | 1.897900e+04 | 18979.000000 | ... | 18979.000000 | 18979.000000 | 18979.000000 | 18979.000000 | 18979.000000 | 18979.000000 | 18979.000000 | 18979.000000 | 18979.000000 | 18979.000000 |
mean | 226403.384794 | 25.194109 | 65.718636 | 3.491965 | 181.200221 | 75.019021 | 2.865063e+06 | 9092.062279 | 3.962951e+06 | 248.938142 | ... | 16.519627 | 1595.286949 | 355.702197 | 67.453975 | 53.457031 | 57.681016 | 62.875020 | 49.866221 | 64.368934 | 22.936720 |
std | 27141.054157 | 4.710520 | 6.968999 | 2.401495 | 6.840054 | 7.073542 | 7.685154e+06 | 19707.021089 | 9.772762e+06 | 74.299428 | ... | 17.854079 | 269.874789 | 40.761117 | 10.677859 | 13.827425 | 10.081857 | 9.927415 | 16.443213 | 9.601883 | 119.861937 |
min | 41.000000 | 16.000000 | 47.000000 | 0.000000 | 155.000000 | 50.000000 | 0.000000e+00 | 0.000000 | 0.000000e+00 | 42.000000 | ... | 2.000000 | 747.000000 | 232.000000 | 25.000000 | 16.000000 | 25.000000 | 25.000000 | 12.000000 | 28.000000 | 0.000000 |
25% | 210135.000000 | 21.000000 | 61.000000 | 2.000000 | 176.000000 | 70.000000 | 4.750000e+05 | 1000.000000 | 4.235000e+05 | 222.000000 | ... | 8.000000 | 1452.000000 | 327.000000 | 61.000000 | 44.000000 | 51.000000 | 57.000000 | 35.000000 | 58.000000 | 1.000000 |
50% | 232418.000000 | 25.000000 | 66.000000 | 3.000000 | 181.000000 | 75.000000 | 9.500000e+05 | 3000.000000 | 1.000000e+06 | 263.000000 | ... | 11.000000 | 1627.000000 | 356.000000 | 68.000000 | 56.000000 | 58.000000 | 64.000000 | 53.000000 | 65.000000 | 4.000000 |
75% | 246922.500000 | 29.000000 | 70.000000 | 5.000000 | 186.000000 | 80.000000 | 2.000000e+06 | 8000.000000 | 2.800000e+06 | 297.000000 | ... | 14.000000 | 1781.000000 | 384.000000 | 75.000000 | 64.000000 | 64.000000 | 69.000000 | 63.000000 | 71.000000 | 12.000000 |
max | 259216.000000 | 53.000000 | 93.000000 | 23.000000 | 206.000000 | 110.000000 | 1.855000e+08 | 560000.000000 | 2.031000e+08 | 437.000000 | ... | 90.000000 | 2316.000000 | 498.000000 | 96.000000 | 93.000000 | 93.000000 | 95.000000 | 91.000000 | 91.000000 | 8400.000000 |
8 rows × 59 columns
Now that our data is cleaned, we can start to use the data to answer the questions we need
Let's analyze the data from the 'Preferred Foot' column to answer this question and discover the player's preferred foot. To visualize the data, I am going to use a pie chart.
# Count the values from the column
f21_pf = f21['Preferred Foot'].value_counts()
print(f21_pf)
# Creating the pie chart with the data
explode = [0.0, 0.1]
f21['Preferred Foot'].value_counts().plot(
kind='pie',
explode=explode,
title="Player's preferred foot",
autopct='%1.1f%%',
shadow=True,
startangle=90,
ylabel="")
Right 14445 Left 4534 Name: Preferred Foot, dtype: int64
<Axes: title={'center': "Player's preferred foot"}>
Let's analyze the number of players under contract, on loan or free
# Count the values of the column
f21_cs = f21['Contract Status'].value_counts()
print(f21_cs)
# Creating the bar plot
bars = plt.bar(f21_cs.index, f21_cs.values, color=['blue', 'orange', 'green'])
# Adding total values at the top of each bar
for i, v in enumerate(f21_cs):
plt.text(i, v + 100, str(v), ha='center', va='bottom', fontsize=10)
# Customize labels and title
plt.xlabel("Contract categories")
plt.ylabel("Player's count")
plt.title("Counts of Contracts Status")
plt.xticks(rotation=45)
plt.show()
Contract 17729 On Loan 1013 Free 237 Name: Contract Status, dtype: int64
Now we can also analyze the clubs that pay more in salaries to their player's
# The data is group by clubs and sum their player's salaries
f21_csum = f21.groupby('Club')['Wage'].sum().sort_values()
print(f21_csum)
# Because there are a lot of clubs, I decided to apply a filter
ylab = f21_csum[f21_csum >= 1250000]
# Now let's create a barh chart to visualize our data
plt.barh(ylab.index, ylab.values, color="red")
plt.xlabel('Total Salary')
plt.ylabel('Clubs')
plt.title("Total Salaries for Clubs (>= 1250000)")
plt.show()
Club No Club 0 Llaneros de Guanare 10000 Central Coast Mariners 10350 Aragua FC 10500 Waterford FC 10650 ... Manchester United 2986000 Liverpool 3028500 Manchester City 3639000 FC Barcelona 4083000 Real Madrid 4687000 Name: Wage, Length: 682, dtype: int32
Let's say we want to figure out whether the player's 'Overall' is equal to or greater than 80
f21_ov = f21[(f21['Overall'] >= 85)]
list_ov = f21_ov[['Name', 'Surname', 'Overall']].sort_values(by='Overall', ascending=False)
print(list_ov)
Name Surname Overall 0 Lionel Messi 93 1 Cristiano Ronaldo dos Santos Aveiro 92 2 Jan Oblak 91 3 Kevin De Bruyne 91 4 Neymar da Silva Santos Jr. 91 .. ... ... ... 73 Clément Lenglet 85 74 Marquinhos Aoás Corrêa 85 75 Riyad Mahrez 85 76 Ricardo Barbosa Pereira 85 98 Marco Reus 85 [99 rows x 3 columns]
Now it's time to know the player's by their Nationality.
f21_count = f21['Nationality'].value_counts()
f21_count
England 1705 Germany 1195 Spain 1065 France 1003 Argentina 943 ... Malawi 1 Rwanda 1 São Tomé & Príncipe 1 Aruba 1 Indonesia 1 Name: Nationality, Length: 164, dtype: int64
Because there are too many countries with only one player, let's group them, and the condition will be, countries with 100 players or fewer</i>.
# Let's see the list of countries with 100 players or fewer
f21_less_than_100 = f21_count[f21_count <= 100].index.tolist()
print(f21_less_than_100)
['Greece', 'Northern Ireland', 'Cameroon', 'Morocco', 'Russia', 'Canada', 'South Africa', 'Bosnia Herzegovina', 'Ukraine', 'Slovakia', 'DR Congo', 'Finland', 'Mali', 'Iceland', 'Slovenia', 'Algeria', 'Albania', 'Kosovo', 'New Zealand', 'Hungary', 'Bulgaria', 'Tunisia', 'Egypt', 'India', 'Costa Rica', 'Montenegro', 'Guinea', 'Cape Verde', 'United Arab Emirates', 'Jamaica', 'North Macedonia', 'Gambia', 'Georgia', 'Burkina Faso', 'Israel', 'Iran', 'Guinea Bissau', 'Angola', 'Gabon', 'Honduras', 'Congo', 'Togo', 'Zimbabwe', 'Comoros', 'Panama', 'Moldova', 'Luxembourg', 'Benin', 'Haiti', 'Curacao', 'Zambia', 'Kenya', 'Lithuania', 'Sierra Leone', 'Madagascar', 'Uganda', 'Cyprus', 'Guyana', 'Uzbekistan', 'Mauritania', 'Latvia', 'Burundi', 'Kazakhstan', 'Azerbaijan', 'Equatorial Guinea', 'Dominican Republic', 'Trinidad & Tobago', 'Faroe Islands', 'Cuba', 'Estonia', 'Mozambique', 'Liechtenstein', 'Libya', 'Iraq', 'El Salvador', 'Niger', 'Antigua & Barbuda', 'Syria', 'Grenada', 'Liberia', 'Armenia', 'Thailand', 'Sudan', 'Montserrat', 'Jordan', 'Belarus', 'Lebanon', 'Philippines', 'Central African Republic', 'Namibia', 'Belize', 'Ethiopia', 'South Sudan', 'Palestine', 'Hong Kong', 'Eritrea', 'Afghanistan', 'Chinese Taipei', 'Saint Kitts and Nevis', 'Guatemala', 'Malaysia', 'Nicaragua', 'Chad', 'Singapore', 'Tanzania', 'Macau', 'Barbados', 'Korea DPR', 'Malta', 'Andorra', 'Guam', 'Bermuda', 'New Caledonia', 'Puerto Rico', 'Papua New Guinea', 'Saint Lucia', 'Malawi', 'Rwanda', 'São Tomé & Príncipe', 'Aruba', 'Indonesia']
# Make a copy of the column
new_f21_count = f21['Nationality'].copy()
print(new_f21_count)
0 Argentina 1 Portugal 2 Slovenia 3 Belgium 4 Brazil ... 18974 China PR 18975 England 18976 England 18977 China PR 18978 China PR Name: Nationality, Length: 18979, dtype: object
# Replace the countries with 100 or fewer players
new_f21_count.loc[new_f21_count.isin(f21_less_than_100)] = 'Others'
print(new_f21_count)
0 Argentina 1 Portugal 2 Others 3 Belgium 4 Brazil ... 18974 China PR 18975 England 18976 England 18977 China PR 18978 China PR Name: Nationality, Length: 18979, dtype: object
# Count the new values
new_count = new_f21_count.value_counts(ascending=True)
new_count
Czech Republic 107 Ivory Coast 107 Ghana 115 Wales 127 Nigeria 129 Serbia 129 Senegal 129 Croatia 134 Bolivia 148 Peru 164 Chile 189 Venezuela 199 Switzerland 211 Australia 238 Paraguay 239 Ecuador 249 Scotland 285 Belgium 295 Denmark 300 Turkey 317 Austria 319 Romania 321 Saudi Arabia 330 Colombia 339 Republic of Ireland 340 Korea Republic 340 Uruguay 348 Poland 354 Portugal 361 Norway 362 Mexico 362 China PR 364 United States 376 Sweden 380 Italy 387 Netherlands 438 Japan 485 Brazil 887 Argentina 943 France 1003 Spain 1065 Germany 1195 England 1705 Others 2164 Name: Nationality, dtype: int64
# Creating our chart barh
colors = ['red' if index != 'Others' else 'blue' for index in new_count.index]
ax = new_count.plot(kind='barh', figsize=(10,20), color=colors)
plt.xlabel('Count')
plt.ylabel('Nationality')
plt.title('Player Count by Nationality')
for i, v in enumerate(new_count):
ax.text(v +10, i, str(v), color='black', va='center')