Skip to content

Commit da1bcfe

Browse files
committed
first upload
0 parents  commit da1bcfe

12 files changed

+301
-0
lines changed

.Rbuildignore

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
^.*\.Rproj$
2+
^\.Rproj\.user$
3+
^\.travis\.yml$
4+
^README\.Rmd$
5+
^README-.*\.png$
6+
^codecov\.yml$

.gitignore

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
.Rproj.user
2+
.Rhistory
3+
.RData
4+
.Ruserdata
5+
*.Rproj
6+

.travis.yml

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# R for travis: see documentation at https://docs.travis-ci.com/user/languages/r
2+
3+
language: R
4+
sudo: false
5+
cache: packages
6+
7+
after_success:
8+
- Rscript -e 'covr::codecov()'

DESCRIPTION

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
Package: bbr
2+
Type: Package
3+
Title: Scrapes data from basketball-reference.com
4+
Version: 0.1.0
5+
Author: Max Joseph
6+
Maintainer: Max Joseph <[email protected]>
7+
Description: Fetches data from basketball-reference.com.
8+
This package is actively under development, and the interface is likely to
9+
change as new features are added!
10+
License: GPL-3
11+
Encoding: UTF-8
12+
LazyData: true
13+
Imports:
14+
rvest,
15+
xml2
16+
RoxygenNote: 5.0.1
17+
Suggests: testthat,
18+
covr

NAMESPACE

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# Generated by roxygen2: do not edit by hand
2+
3+
export(get_season)

R/get_season.R

+87
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
2+
#' Scrape NBA player data by season
3+
#'
4+
#' This function retrieves NBA player data for a specific season, specified by
5+
#' year. There will be one row per player X team combination. That is, if a
6+
#' player played for multiple teams in one season, they will show up on multiple
7+
#' rows (one row for each team).
8+
#'
9+
#'
10+
#' @param year The season to collect data for. If you want the 2016-2017 data,
11+
#' use the latter year (2017).
12+
#' @return A data.frame with a row for each player X team combo in that season.
13+
#'
14+
#' The columns include:
15+
#'
16+
#' \describe{
17+
#' \item{player}{Player name}
18+
#' \item{pos}{Position}
19+
#' \item{age}{Age in years}
20+
#' \item{tm}{Team}
21+
#' \item{g}{Games played}
22+
#' \item{gs}{Games started}
23+
#' \item{mp}{Minutes played}
24+
#' \item{fg}{Field goals made}
25+
#' \item{fga}{Field goals attempted}
26+
#' \item{fg_pct}{Field goal shooting percentage}
27+
#' \item{three_p}{Three point shots made}
28+
#' \item{three_pa}{Three point shots attempted}
29+
#' \item{three_p_pct}{Three point shooting percentage}
30+
#' \item{two_p}{Two point shots made}
31+
#' \item{two_pa}{Two point shots attempted}
32+
#' \item{two_p_pct}{Two point shooting percentage}
33+
#' \item{efg_pct}{Effective field goal percentage (adjusts for fact that 3
34+
#' pointers are worth one more point than two pointers)}
35+
#' \item{ft}{Free throws made}
36+
#' \item{fta}{Free throw attempts}
37+
#' \item{ft_pct}{Free throw percentage}
38+
#' \item{orb}{Offensive rebounds}
39+
#' \item{drb}{Defensive rebounds}
40+
#' \item{trb}{Total rebounds}
41+
#' \item{ast}{Assists}
42+
#' \item{stl}{Steals}
43+
#' \item{blk}{Blocks}
44+
#' \item{tov}{Turnovers}
45+
#' \item{pf}{Personal fouls}
46+
#' \item{pts}{Points made}
47+
#' }
48+
#'
49+
#'
50+
#' @examples
51+
#' d <- get_season(2010)
52+
#' @export
53+
#'
54+
get_season <- function(year) {
55+
newest_year <- 1 + as.numeric(format(Sys.Date(), "%Y"))
56+
if (year < 1947 | year > newest_year) {
57+
stop("Data are only available after 1947 and up to the present.")
58+
}
59+
url <- paste0("http://www.basketball-reference.com/leagues/NBA_",
60+
year,
61+
"_totals.html")
62+
63+
html <- xml2::read_html(url)
64+
node <- rvest::html_node(html, "table")
65+
table <- rvest::html_table(node, header = TRUE)
66+
parse_season_table(table)
67+
}
68+
69+
parse_season_table <- function(table) {
70+
duplicated_header_rows <- table$Rk == "Rk"
71+
table <- table[!duplicated_header_rows, ]
72+
converted <- lapply(table, maybe_as_numeric)
73+
df <- as.data.frame(converted, stringsAsFactors = FALSE)
74+
df <- df[, !(names(df) == "Rk")] # remove "Rank" column
75+
names(df) <- gsub("\\.", "_pct", names(df))
76+
names(df) <- gsub("X2", "two_", names(df))
77+
names(df) <- gsub("X3", "three_", names(df))
78+
names(df) <- tolower(names(df))
79+
df
80+
}
81+
82+
maybe_as_numeric <- function(x) {
83+
# tries to make numeric columns numeric (from char)
84+
numeric_x <- suppressWarnings(as.numeric(x))
85+
if (!all(is.na(numeric_x))) x <- numeric_x
86+
x
87+
}

README.Rmd

+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
---
2+
output: github_document
3+
---
4+
5+
# bbr: basketball-reference data in R
6+
7+
[![Travis-CI Build Status](https://travis-ci.org/mbjoseph/bbr.svg?branch=master)](https://travis-ci.org/mbjoseph/bbr)
8+
[![Coverage Status](https://img.shields.io/codecov/c/github/mbjoseph/bbr/master.svg)](https://codecov.io/github/mbjoseph/bbr?branch=master)
9+
10+
<!-- README.md is generated from README.Rmd. Please edit that file -->
11+
12+
The bbr package is designed to quickly fetch tidy data from www.basketball-reference.com.
13+
This package is actively under development and the interface will change as new features are added.
14+
15+
## Installation
16+
17+
```{r, eval=FALSE}
18+
devtools::install_github("mbjoseph/bbr")
19+
```
20+
21+
## Example usage
22+
23+
```{r, echo = TRUE}
24+
library(bbr)
25+
26+
ninetyone <- get_season(1991)
27+
str(ninetyone)
28+
```

README.md

+54
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
2+
bbr: an R package for fetching data from basketball-reference
3+
=============================================================
4+
5+
<!-- README.md is generated from README.Rmd. Please edit that file -->
6+
The bbr package is designed to quickly fetch tidy data from www.basketball-reference.com
7+
8+
Installation
9+
------------
10+
11+
``` r
12+
devtools::install_github("mbjoseph/bbr")
13+
```
14+
15+
Example usage
16+
-------------
17+
18+
``` r
19+
library(bbr)
20+
21+
ninetyone <- get_season(1991)
22+
str(ninetyone)
23+
```
24+
25+
## 'data.frame': 441 obs. of 29 variables:
26+
## $ player : chr "Alaa Abdelnaby" "Mahmoud Abdul-Rauf" "Mark Acres" "Michael Adams" ...
27+
## $ pos : chr "PF" "PG" "C" "PG" ...
28+
## $ age : num 22 21 28 28 31 31 27 26 26 26 ...
29+
## $ tm : chr "POR" "DEN" "ORL" "DEN" ...
30+
## $ g : num 43 67 68 66 78 80 42 34 68 26 ...
31+
## $ gs : num 0 19 0 66 13 0 1 0 2 0 ...
32+
## $ mp : num 290 1505 1313 2346 2006 ...
33+
## $ fg : num 55 417 109 560 420 337 99 59 116 27 ...
34+
## $ fga : num 116 1009 214 1421 909 ...
35+
## $ fg_pct : num 0.474 0.413 0.509 0.394 0.462 0.472 0.44 0.504 0.43 0.37 ...
36+
## $ three_p : num 0 24 1 167 24 102 5 7 0 0 ...
37+
## $ three_pa : num 0 100 3 564 78 251 21 23 1 1 ...
38+
## $ three_p_pct: num NA 0.24 0.333 0.296 0.308 0.406 0.238 0.304 0 0 ...
39+
## $ two_p : num 55 393 108 393 396 235 94 52 116 27 ...
40+
## $ two_pa : num 116 909 211 857 831 463 204 94 269 72 ...
41+
## $ two_p_pct : num 0.474 0.432 0.512 0.459 0.477 0.508 0.461 0.553 0.431 0.375 ...
42+
## $ efg_pct : num 0.474 0.425 0.512 0.453 0.475 0.543 0.451 0.534 0.43 0.37 ...
43+
## $ ft : num 25 84 66 465 240 114 41 26 60 16 ...
44+
## $ fta : num 44 98 101 529 317 138 48 31 115 28 ...
45+
## $ ft_pct : num 0.568 0.857 0.653 0.879 0.757 0.826 0.854 0.839 0.522 0.571 ...
46+
## $ orb : num 27 34 140 58 134 45 41 10 97 26 ...
47+
## $ drb : num 62 87 219 198 240 160 76 14 221 49 ...
48+
## $ trb : num 89 121 359 256 374 205 117 24 318 75 ...
49+
## $ ast : num 12 206 25 693 139 285 45 22 16 3 ...
50+
## $ stl : num 4 55 25 147 47 63 15 8 35 8 ...
51+
## $ blk : num 12 4 25 6 20 13 8 1 45 9 ...
52+
## $ tov : num 22 110 42 240 128 100 40 16 84 22 ...
53+
## $ pf : num 39 149 218 162 209 195 88 11 140 29 ...
54+
## $ pts : num 135 942 285 1752 1104 ...

codecov.yml

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
comment: false

man/get_season.Rd

+63
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat.R

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
library(testthat)
2+
library(bbr)
3+
4+
test_check("bbr")

tests/testthat/test-get_season.R

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
context("get_season")
2+
3+
test_that("Years before 1947 raise errors", {
4+
expect_error(get_season(year = 1946))
5+
})
6+
7+
test_that("Years in the future raise errors", {
8+
current_year <- as.numeric(format(Sys.Date(), "%Y"))
9+
expect_error(get_season(year = current_year + 3))
10+
})
11+
12+
test_that("get_season returns the expected number of rows & cols for 2010", {
13+
twentyten_data <- get_season(2010)
14+
expect_equal(nrow(twentyten_data), 578)
15+
expect_equal(ncol(twentyten_data), 29)
16+
})
17+
18+
test_that("maybe_as_numeric converts chars to numbers only when appropriate", {
19+
should_convert <- c("3", "2")
20+
should_not_convert <- c("foo", "bar")
21+
expect_is(maybe_as_numeric(should_convert), "numeric")
22+
expect_is(maybe_as_numeric(should_not_convert), "character")
23+
})

0 commit comments

Comments
 (0)