import requests
import numpy
from bs4 import BeautifulSoup
import pandas
import re
import seaborn as sns
import matplotlib.pyplot as plot


#Uses code from Project 1- Solar Flare to extract data, as the data I am aollecting is from an html
head = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36'}
r=requests.get("https://en.hispanosnba.com/players/hall-of-fame/index",headers=head)
text=BeautifulSoup(r.content,'html')
text=pandas.read_html(text.find('table').prettify())
hof_data=text[0]


hof_PG= []
hof_SG= []
hof_SF= []
hof_PF= []
hof_C= []
#Adds all 1980 Players to respective position arrays.
for x in range(135):
    if "198" in hof_data["In the NBA"]["Years"][x]:
        if ("PG" in hof_data["Pos."]["Pos."][x]):
            hof_PG.append(hof_data["Name"]["Name"][x])
        if ("SG" in hof_data["Pos."]["Pos."][x]):
            hof_SG.append(hof_data["Name"]["Name"][x])
        if ("SF" in hof_data["Pos."]["Pos."][x]):
            hof_SF.append(hof_data["Name"]["Name"][x])
        if ("PF" in hof_data["Pos."]["Pos."][x]):
            hof_PF.append(hof_data["Name"]["Name"][x])
        if ("C" in hof_data["Pos."]["Pos."][x]):
            hof_C.append(hof_data["Name"]["Name"][x])

#Edge Case relating to the getUrl method. The website I collected the names from uses a different name for Tiny Archibald
hof_PG[0]="Tiny Archibald"


#Creates Url with pattern that was easily recognizable from basketball-refernce.com
def getUrl(playerType):
    urls=[]
    for x in range(len(playerType)):
        if(playerType[x] == "K.C. Jones"):
            url="https://www.basketball-reference.com/players/j/joneskc01.html"
        elif(playerType[x] == "Jo Jo White"):
            url="https://www.basketball-reference.com/players/w/whitejo01.html"
        elif(playerType[x] == "Jaxson Hayes"):
            url="https://www.basketball-reference.com/players/h/hayesja02.html"
        elif(playerType[x] == "Cameron Johnson"):
            url="https://www.basketball-reference.com/players/j/johnsca02.html"
        else:
            name=playerType[x].split(" ")
            big1=name[1][0:5].lower()
            big2=name[0][0:2].lower()
            small=name[1][0].lower()
            url="https://www.basketball-reference.com/players/"+small+"/"+big1+big2+"01.html"
        urls.append(url)
    return urls


#Gets data depending on what link is sent in. This is useful for getting the stats of all the players, as the links are all different.
def getData(link):
    r=requests.get(link,headers=head)
    text=BeautifulSoup(r.content,'html')
    text=pandas.read_html(text.find('table').prettify())
    data=text[0]
    return data


stats=["PTS","FG%","TRB","AST","TOV"]

#Gets all the stats for 15 years of playing, for points, shooting percent, rebounds, and assists. Then al these numbers get added up and placed
#into an array.
def getStats(playerType):
    All_Player= [[]] * len(playerType)
    for a in range(len(playerType)):
        player=[]
        player_data=getData(getUrl(playerType)[a])
        #Fifteen years
        for x in range(15):
            try:
                points=float(player_data[stats[0]][x])
                shooting=float(player_data[stats[1]][x])
                rebounds=float(player_data[stats[2]][x])
                assists=float(player_data[stats[3]][x])
                data_point=(points+shooting+rebounds+assists)
                player.append(points)
            except:
                continue
        All_Player[a]=player
    return All_Player

#These variables make it easier to get the stats for each hof position.
PGstats=getStats(hof_PG)
SGstats=getStats(hof_SG)
SFstats=getStats(hof_SF)
PFstats=getStats(hof_PF)
Cstats=getStats(hof_C)


import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model

#The graph method creates a violin plot for the inputted player position
def graph(playerType):
    finalxlist=[]
    finalylist=[]
    #Initialized size of graph
    plt.figure(figsize=(10,7))
    #We will only be using five years
    for x in range(5):
        xlist=[]
        ylist=[]
        for j in range(len(playerType)):
            if(playerType==hof_PG):
                y=PGstats[j][x]
            elif(playerType==hof_SG):
                y=SGstats[j][x]
            elif(playerType==hof_SF):
                y=SFstats[j][x]
            elif(playerType==hof_PF):
                y=PFstats[j][x]
            else:
                y=Cstats[j][x]
            xlist.append(x)
            ylist.append(y)
        finalxlist.append(xlist)
        finalylist.append(ylist)
        
        #Creates Graph Labels
        if(playerType==hof_PG):
                plt.title("Years vs Hall of Fame Point Guard Statistics")
        elif(playerType==hof_SG):
            plt.title("Years vs Hall of Fame Shooting Guard Statistics")
        elif(playerType==hof_SF):
            plt.title("Years vs Hall of Fame Small Forward Statistics")
        elif(playerType==hof_PF):
            plt.title("Years vs Hall of Fame Power Forward Statistics")
        else:
            plt.title("Years vs Hall of Fame Center Statistics")
        
        plt.xlabel("Years Passed in Career")
        plt.ylabel("Statistic")
    
    #Seaborn only takes in one dimensional arrays
    final_xlist=(np.array(finalxlist)).flatten()
    final_ylist=(np.array(finalylist)).flatten()

    sns.violinplot(x =final_xlist, y = final_ylist)




graph(hof_PG)
graph(hof_SG)
graph(hof_SF)
graph(hof_PF)
graph(hof_C)


#Gets data for a list of the 2019 NBA Draft class
r=requests.get("https://www.basketball-reference.com/draft/NBA_2019.html",headers=head)
text=BeautifulSoup(r.content,'html')
text=pandas.read_html(text.find('table').prettify())
rookie_data=text[0]
rookies=[]
for x in range(30):
    rookies.append(rookie_data["Round 1"]["Player"][x])

url_rookies=getUrl(rookies)
#Two edge cases where there was players with the same last name and first name
url_rookies[7]="https://www.basketball-reference.com/players/h/hayesja02.html"
url_rookies[10]="https://www.basketball-reference.com/players/j/johnsca02.html"

PG_Name=[]
PG_Stat=[]
SG_Name=[]
SG_Stat=[]
SF_Name=[]
SF_Stat=[]
PF_Name=[]
PF_Stat=[]
C_Name=[]
C_Stat=[]


#Adds the name and (statistic=pts+shooting%+rebounds+assists) to an array to make graphing the linear model for
#rookies easier
for x in range(15):
    statistic=0
    r=requests.get(url_rookies[x],headers=head)
    text=BeautifulSoup(r.content,'html')
    text=pandas.read_html(text.findAll("table")[1].prettify())
    rookie_data=text[0]
    statistic=rookie_data["PTS"][0]+rookie_data["FG%"][0]+rookie_data["TRB"][0]+rookie_data["AST"][0]
    if(rookie_data["Pos"][0]=="PG"):
        PG_Name.append(rookies[x])
        PG_Stat.append(statistic)
    if(rookie_data["Pos"][0]=="SG"):
        SG_Name.append(rookies[x])
        SG_Stat.append(statistic)
    if(rookie_data["Pos"][0]=="SF"):
        SF_Name.append(rookies[x])
        SF_Stat.append(statistic)
    if(rookie_data["Pos"][0]=="PF"):
        PF_Name.append(rookies[x])
        PF_Stat.append(statistic)
    if(rookie_data["Pos"][0]=="C"):
        C_Name.append(rookies[x])
        C_Stat.append(statistic)


#Creates equations based on the slopes I found while creating linear models for the hall of fame players.
for x in range(len(PG_Name)):
    PG_Stat[x]='1.3*x+'+str(PG_Stat[x])
for x in range(len(SG_Name)):
    SG_Stat[x]='1.7*x+'+str(SG_Stat[x])
for x in range(len(SF_Name)):
    SF_Stat[x]='1.5*x+'+str(SF_Stat[x])
for x in range(len(PF_Name)):
    PF_Stat[x]='0.7*x+'+str(PF_Stat[x])
for x in range(len(C_Name)):
    C_Stat[x]='0.3*x+'+str(C_Stat[x])


import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model
from scipy import stats

#Method used to graph rookies on same graph as the linear model for hall of famers
def graphnew(formula, x_range, name):  
    x = np.array(x_range)  
    y = eval(formula)
    plt.plot(x, y, label=name)  
    plt.legend()


#Graph method is used to graph linear model alongside the rookies from this past NBA season
def graph(playerType):
    finalxlist=[]
    finalylist=[]
    plt.figure(figsize=(10,7))
    for x in range(5):
        xlist=[]
        ylist=[]
        for j in range(len(playerType)):
            if(playerType==hof_PG):
                y=PGstats[j][x]
            elif(playerType==hof_SG):
                y=SGstats[j][x]
            elif(playerType==hof_SF):
                y=SFstats[j][x]
            elif(playerType==hof_PF):
                y=PFstats[j][x]
            else:
                y=Cstats[j][x]
            xlist.append(x)
            ylist.append(y)
        finalxlist.append(xlist)
        finalylist.append(ylist)
            
        if(playerType==hof_PG):
                plt.title("Years vs Hall of Fame Point Guard Statistics")
        elif(playerType==hof_SG):
            plt.title("Years vs Hall of Fame Shooting Guard Statistics")
        elif(playerType==hof_SF):
            plt.title("Years vs Hall of Fame Small Forward Statistics")
        elif(playerType==hof_PF):
            plt.title("Years vs Hall of Fame Power Forward Statistics")
        else:
            plt.title("Years vs Hall of Fame Center Statistics")
        
        plt.xlabel("Years Passed in Career")
        plt.ylabel("Statistic")

    #This is where the linear regression takes place, along with finding what the equation representative will be  
    final_xlist=(np.array(finalxlist)).flatten()
    final_ylist=(np.array(finalylist)).flatten()
    slope, intercept, r_value, p_value, std_err = stats.linregress(final_xlist,final_ylist)

    # use line_kws to set line label for legend
    #sns.violinplot(x =final_xlist, y = final_ylist)
    ax = sns.regplot(x=final_xlist, y=final_ylist, color='black', 
    line_kws={'label':"y={0:.1f}x+{1:.1f}".format(slope,intercept)})
    # plot legend
    ax.legend()
    if(playerType==hof_PG):
        for x in range(len(PG_Name)):
            graphnew(PG_Stat[x], range(0, 5),PG_Name[x])
    elif(playerType==hof_SG):
        for x in range(len(SG_Name)):
            graphnew(SG_Stat[x], range(0, 5),SG_Name[x])
    elif(playerType==hof_SF):
        for x in range(len(SF_Name)):
            graphnew(SF_Stat[x], range(0, 5),SF_Name[x])
    elif(playerType==hof_PF):
        for x in range(len(PF_Name)):
            graphnew(PF_Stat[x], range(0, 5),PF_Name[x])
    elif(playerType==hof_C):
        for x in range(len(C_Name)):
            graphnew(C_Stat[x], range(0, 5),C_Name[x])
    
    plt.show()



graph(hof_PG)
graph(hof_SG)
graph(hof_SF)
graph(hof_PF)
graph(hof_C)

How much better can the 2019 NBA Draft class get?¶