library(XML)
library(MASS)
library(RPostgreSQL)
library(rvest)
full_games = data.frame(games=(character()))
days = seq(as.Date('2016-10-25'),as.Date(Sys.Date()),by='day')
for(i in seq_along(days)){
date=format(days[i],format="%Y%m%d")
games = read_html(paste0("http://www.espn.com/nba/schedule/_/date/",date))
games = games %>% html_nodes(xpath="//td/a") %>% html_attr("href")
games = as.data.frame(games)
games[,1] = as.character(games[,1])
games = games[grep("gameId",games[,1]),]
games = gsub("\\/nba\\/game\\?gameId=","",games)
games = as.data.frame(games)
games[,1] = as.character(games[,1])
full_games = rbind(games,full_games)
}
full_games_clean = full_games[!duplicated(full_games[,1]),]
full_games_clean = as.data.frame(full_games_clean)
full_games_clean = full_games_clean[-grep("http",full_games_clean[,1]),]
full_games_clean = as.data.frame(full_games_clean)
names(full_games_clean) = "game_id"
full_games_clean$game_id = as.character(full_games_clean$game_id)
upload_game_data_to_db = function(games){
game_id = games$game_id
for(i in 1:length(game_id)){
play_by_play = readHTMLTable(paste0("http://www.espn.com/nba/playbyplay?gameId=",game_id[i]))
if(length(play_by_play)<5){
print("Game Hasnt Happened")
}
else{
print("GAME HAS HAPPEN")
play_by_play[[2]]$quarter = 1
play_by_play[[3]]$quarter = 2
play_by_play[[4]]$quarter = 3
play_by_play[[5]]$quarter = 4
play_by_play = rbind(play_by_play[[2]],play_by_play[[3]],play_by_play[[4]],play_by_play[[5]])
play_by_play = play_by_play[-grep("time",play_by_play[,1]),]
play_by_play = cbind(play_by_play,game_id[i])
play_by_play = as.data.frame(play_by_play)
names(play_by_play) = c("time_game","team","play_detail","score","nada","quarter","game_id")
play_by_play$quarter = as.numeric(play_by_play$quarter)
play_by_play$game_id = as.numeric(play_by_play$game_id)
shot_chart = read_html(paste0("http://www.espn.com/nba/playbyplay?gameId=",game_id[i]))
made_shots_text = shot_chart %>%
html_nodes(xpath ='//*[contains(concat( " ", @class, " " ), concat( " ", "made", " " ))]') %>%
html_attr("data-text")
made_shots_loc = shot_chart %>%
html_nodes(xpath ='//*[contains(concat( " ", @class, " " ), concat( " ", "made", " " ))]') %>%
html_attr("style")
player_id = shot_chart %>%
html_nodes(xpath ='//*[contains(concat( " ", @class, " " ), concat( " ", "made", " " ))]') %>%
html_attr("data-shooter")
quarter_shot = shot_chart %>%
html_nodes(xpath ='//*[contains(concat( " ", @class, " " ), concat( " ", "made", " " ))]') %>%
html_attr("data-period")
made_shots_loc_top = sub(".*top:", "", made_shots_loc)
made_shots_loc_left = sub(".*left:", "", made_shots_loc)
made_shots_loc_top = gsub("%;","",made_shots_loc_top)
made_shots_loc_left = gsub("%.*","",made_shots_loc_left)
location_makes=cbind(made_shots_text,made_shots_loc_top,
made_shots_loc_left,player_id,quarter_shot,game_id[i])
location_makes = as.data.frame(location_makes)
names(location_makes)[6]="game_id"
location_makes$made_shots_loc_top =as.numeric(location_makes$made_shots_loc_top)
location_makes$made_shots_loc_left =as.numeric(location_makes$made_shots_loc_left)
location_makes$player_id =as.numeric(location_makes$player_id)
location_makes$quarter_shot =as.numeric(location_makes$quarter_shot)
location_makes$game_id =as.numeric(location_makes$game_id)
if(i == 1){
drv = dbDriver("PostgreSQL")
connection = dbConnect(drv,
port = "port",
host = "host",
user = "user",
dbname = "nba_games",
password = "password")
dbWriteTable(connection, "location", value=location_makes,row.names=FALSE)
dbWriteTable(connection, "play_by_play", value=play_by_play,row.names=FALSE)
dbDisconnect(connection)
}
else{
drv = dbDriver("PostgreSQL")
connection = dbConnect(drv,
port = "port",
host = "host",
user = "user",
dbname = "nba_games",
password = "password")
dbWriteTable(connection, "location", value=location_makes,append=TRUE,row.names=FALSE)
dbWriteTable(connection, "play_by_play", value=play_by_play,append=TRUE,row.names=FALSE)
dbDisconnect(connection)
}
}
}
}
Next we mess with a couple queries to see what we can do with the data.
Now lets say we want Kawhi Leonard’s made shots during the 4th quarter, we can query the data and get this data. Now we can filter by certain times, or location of shots.For simple reasons let’s upload a png file of a basketball court and use ggplot to graph everything.
*Note that left side of the court is playing away, and right side of court is playing at home.
drv = dbDriver("PostgreSQL")
connection = dbConnect(drv,
port = "port",
host = "host",
user = "user",
dbname = "nba_games",
password = "password")
kawhi_4th_made=dbGetQuery(connection,"SELECT * FROM location WHERE quarter_shot = 4 AND player_id = 6450")
all_4th_quarter_threes = dbGetQuery(connection,"SELECT * FROM location WHERE quarter_shot = 4 AND made_shots_text LIKE '%three pointer%'")
isiah_first =dbGetQuery(connection,"SELECT * FROM location WHERE quarter_shot = 1 AND player_id = 6472")
isiah_second =dbGetQuery(connection,"SELECT * FROM location WHERE quarter_shot = 2 AND player_id = 6472")
isiah_third = dbGetQuery(connection,"SELECT * FROM location WHERE quarter_shot = 3 AND player_id = 6472")
isiah_fourth =dbGetQuery(connection,"SELECT * FROM location WHERE quarter_shot = 4 AND player_id = 6472")
isiah_first$made_shots_loc_left=isiah_first$made_shots_loc_left*5.94
isiah_first$made_shots_loc_top=315-isiah_first$made_shots_loc_top*3.15
isiah_second$made_shots_loc_left=isiah_second$made_shots_loc_left*5.94
isiah_second$made_shots_loc_top=315-isiah_second$made_shots_loc_top*3.15
isiah_third$made_shots_loc_left=isiah_third$made_shots_loc_left*5.94
isiah_third$made_shots_loc_top=315-isiah_third$made_shots_loc_top*3.15
isiah_fourth$made_shots_loc_left=isiah_fourth$made_shots_loc_left*5.94
isiah_fourth$made_shots_loc_top=315-isiah_fourth$made_shots_loc_top*3.15
kawhi_4th_made$made_shots_loc_left=kawhi_4th_made$made_shots_loc_left*5.94
kawhi_4th_made$made_shots_loc_top=315-kawhi_4th_made$made_shots_loc_top*3.15
all_4th_quarter_threes$made_shots_loc_left=all_4th_quarter_threes$made_shots_loc_left*5.94
all_4th_quarter_threes$made_shots_loc_top=315-all_4th_quarter_threes$made_shots_loc_top*3.15
head(kawhi_4th_made)
## made_shots_text made_shots_loc_top
## 1 Kawhi Leonard makes two point shot 151.2
## 2 Kawhi Leonard makes 17-foot two point shot 214.2
## 3 Kawhi Leonard makes two point shot 176.4
## 4 Kawhi Leonard makes 11-foot two point shot 170.1
## 5 Kawhi Leonard makes 29-foot three pointer 126.0
## 6 Kawhi Leonard makes driving dunk 157.5
## made_shots_loc_left player_id quarter_shot game_id
## 1 546.48 6450 4 400899470
## 2 463.32 6450 4 400900148
## 3 558.36 6450 4 400900148
## 4 481.14 6450 4 400900148
## 5 368.28 6450 4 400900148
## 6 558.36 6450 4 400900148
dbDisconnect(connection)
## [1] TRUE
library(grid)
library(jpeg)
library(ggplot2)
library(ggthemes)
courtImg.URL <- "~/Desktop/nba_shinyapp/play_by_play_data/bg-court-logo.jpeg"
court <- rasterGrob(readJPEG((courtImg.URL)))
qplot(kawhi_4th_made$made_shots_loc_left,kawhi_4th_made$made_shots_loc_top,geom="blank")+
annotation_custom(court,xmin=0, xmax=594, ymin=0, ymax=315) +geom_point(colour="red")+ coord_fixed() + xlim(0,594) +
ylim(0, 315)+ theme_pander()+theme(line = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
text=element_text(family="Avenir"))+ggtitle("Kawhi Leonard 4th Quarter Shots")
qplot(all_4th_quarter_threes$made_shots_loc_left,all_4th_quarter_threes$made_shots_loc_top,geom="blank")+
annotation_custom(court,xmin=0, xmax=594, ymin=0, ymax=315) +geom_point(colour="red")+ coord_fixed() + xlim(0,594) +
ylim(0, 315)+ theme_pander()+theme(line = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
text=element_text(family="Avenir"))+ggtitle("All NBA Three Point Shots 4th Quarter")
qplot(isiah_first$made_shots_loc_left,isiah_first$made_shots_loc_top,geom="blank")+
annotation_custom(court,xmin=0, xmax=594, ymin=0, ymax=315) +geom_point(colour="red")+ coord_fixed() + xlim(0,594) +
ylim(0, 315)+ theme_pander()+theme(line = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
text=element_text(family="Avenir"))+ggtitle("Isaiah Thomas Made 1st Quarter Shots")
qplot(isiah_second$made_shots_loc_left,isiah_second$made_shots_loc_top,geom="blank")+
annotation_custom(court,xmin=0, xmax=594, ymin=0, ymax=315) +geom_point(colour="red")+ coord_fixed() + xlim(0,594) +
ylim(0, 315)+ theme_pander()+theme(line = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
text=element_text(family="Avenir"))+ggtitle("Isaiah Thomas Made 2nd Quarter Shots")
qplot(isiah_third$made_shots_loc_left,isiah_third$made_shots_loc_top,geom="blank")+
annotation_custom(court,xmin=0, xmax=594, ymin=0, ymax=315) +geom_point(colour="red")+ coord_fixed() + xlim(0,594) +
ylim(0, 315)+ theme_pander()+theme(line = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
text=element_text(family="Avenir"))+ggtitle("Isaiah Thomas Made 3rd Quarter Shots")
qplot(isiah_fourth$made_shots_loc_left,isiah_fourth$made_shots_loc_top,geom="blank")+
annotation_custom(court,xmin=0, xmax=594, ymin=0, ymax=315) +geom_point(colour="red")+ coord_fixed() + xlim(0,594) +
ylim(0, 315)+ theme_pander()+theme(line = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
text=element_text(family="Avenir"))+ggtitle("Isaiah Thomas Made 4th Quarter Shots")
ggplot(kawhi_4th_made ,aes(made_shots_loc_left,made_shots_loc_top))+
annotation_custom(court,xmin=0, xmax=594, ymin=0, ymax=315) + coord_fixed() + xlim(0,594) +
ylim(0, 315)+ theme_pander()+theme(line = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
legend.title =element_blank() )+geom_hex(binwidth=c(13, 13),alpha=.7) +
scale_fill_gradientn(colours = c("yellow","orange","red"))
ggplot(all_4th_quarter_threes ,aes(made_shots_loc_left,made_shots_loc_top))+
annotation_custom(court,xmin=0, xmax=594, ymin=0, ymax=315) + coord_fixed() + xlim(0,594) +
ylim(0, 315)+ theme_pander()+theme(line = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
legend.title = element_blank())+geom_hex(binwidth=c(13, 13),alpha=.7) +
scale_fill_gradientn(colours = c("yellow","orange","red"))
I just got this data, so be sure to check my blog & website for more future visuals with this data.
Wanna thank Ed Maia for making this tutorial, which help me tons to visualize this data: https://thedatagame.com.au/2015/09/27/how-to-create-nba-shot-charts-in-r/