1
+
2
+ # ' Scrape NBA player data by season
3
+ # '
4
+ # ' This function retrieves NBA player data for a specific season, specified by
5
+ # ' year. There will be one row per player X team combination. That is, if a
6
+ # ' player played for multiple teams in one season, they will show up on multiple
7
+ # ' rows (one row for each team).
8
+ # '
9
+ # '
10
+ # ' @param year The season to collect data for. If you want the 2016-2017 data,
11
+ # ' use the latter year (2017).
12
+ # ' @return A data.frame with a row for each player X team combo in that season.
13
+ # '
14
+ # ' The columns include:
15
+ # '
16
+ # ' \describe{
17
+ # ' \item{player}{Player name}
18
+ # ' \item{pos}{Position}
19
+ # ' \item{age}{Age in years}
20
+ # ' \item{tm}{Team}
21
+ # ' \item{g}{Games played}
22
+ # ' \item{gs}{Games started}
23
+ # ' \item{mp}{Minutes played}
24
+ # ' \item{fg}{Field goals made}
25
+ # ' \item{fga}{Field goals attempted}
26
+ # ' \item{fg_pct}{Field goal shooting percentage}
27
+ # ' \item{three_p}{Three point shots made}
28
+ # ' \item{three_pa}{Three point shots attempted}
29
+ # ' \item{three_p_pct}{Three point shooting percentage}
30
+ # ' \item{two_p}{Two point shots made}
31
+ # ' \item{two_pa}{Two point shots attempted}
32
+ # ' \item{two_p_pct}{Two point shooting percentage}
33
+ # ' \item{efg_pct}{Effective field goal percentage (adjusts for fact that 3
34
+ # ' pointers are worth one more point than two pointers)}
35
+ # ' \item{ft}{Free throws made}
36
+ # ' \item{fta}{Free throw attempts}
37
+ # ' \item{ft_pct}{Free throw percentage}
38
+ # ' \item{orb}{Offensive rebounds}
39
+ # ' \item{drb}{Defensive rebounds}
40
+ # ' \item{trb}{Total rebounds}
41
+ # ' \item{ast}{Assists}
42
+ # ' \item{stl}{Steals}
43
+ # ' \item{blk}{Blocks}
44
+ # ' \item{tov}{Turnovers}
45
+ # ' \item{pf}{Personal fouls}
46
+ # ' \item{pts}{Points made}
47
+ # ' }
48
+ # '
49
+ # '
50
+ # ' @examples
51
+ # ' d <- get_season(2010)
52
+ # ' @export
53
+ # '
54
+ get_season <- function (year ) {
55
+ newest_year <- 1 + as.numeric(format(Sys.Date(), " %Y" ))
56
+ if (year < 1947 | year > newest_year ) {
57
+ stop(" Data are only available after 1947 and up to the present." )
58
+ }
59
+ url <- paste0(" http://www.basketball-reference.com/leagues/NBA_" ,
60
+ year ,
61
+ " _totals.html" )
62
+
63
+ html <- xml2 :: read_html(url )
64
+ node <- rvest :: html_node(html , " table" )
65
+ table <- rvest :: html_table(node , header = TRUE )
66
+ parse_season_table(table )
67
+ }
68
+
69
+ parse_season_table <- function (table ) {
70
+ duplicated_header_rows <- table $ Rk == " Rk"
71
+ table <- table [! duplicated_header_rows , ]
72
+ converted <- lapply(table , maybe_as_numeric )
73
+ df <- as.data.frame(converted , stringsAsFactors = FALSE )
74
+ df <- df [, ! (names(df ) == " Rk" )] # remove "Rank" column
75
+ names(df ) <- gsub(" \\ ." , " _pct" , names(df ))
76
+ names(df ) <- gsub(" X2" , " two_" , names(df ))
77
+ names(df ) <- gsub(" X3" , " three_" , names(df ))
78
+ names(df ) <- tolower(names(df ))
79
+ df
80
+ }
81
+
82
+ maybe_as_numeric <- function (x ) {
83
+ # tries to make numeric columns numeric (from char)
84
+ numeric_x <- suppressWarnings(as.numeric(x ))
85
+ if (! all(is.na(numeric_x ))) x <- numeric_x
86
+ x
87
+ }
0 commit comments