From 3482a6c0c14912edac7f9596a5c42057df77366e Mon Sep 17 00:00:00 2001 From: gbganalyst Date: Mon, 26 Feb 2024 09:17:20 +0000 Subject: [PATCH] =?UTF-8?q?Deploying=20to=20gh-pages=20from=20@=20gbganaly?= =?UTF-8?q?st/bulkreadr@3421e4f6e4060a6f47b4840f94d3b3bb12c8cfbd=20?= =?UTF-8?q?=F0=9F=9A=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- articles/bulkreadr.html | 399 +--------------------------------- articles/index.html | 4 + articles/labelled-data.html | 318 +++++++++++++++++++++++++++ articles/other-functions.html | 340 +++++++++++++++++++++++++++++ index.html | 5 +- pkgdown.yml | 4 +- search.json | 2 +- sitemap.xml | 6 + 8 files changed, 680 insertions(+), 398 deletions(-) create mode 100644 articles/labelled-data.html create mode 100644 articles/other-functions.html diff --git a/articles/bulkreadr.html b/articles/bulkreadr.html index 8145489..0340ee8 100644 --- a/articles/bulkreadr.html +++ b/articles/bulkreadr.html @@ -149,43 +149,26 @@

How to load the packageNow that you have installed bulkreadr package, you can simply load it by using:

+library(bulkreadr)

Functions in bulkreadr package

This section provides a concise overview of the different functions -available in the bulkreadr package. These functions serve -various purposes and are designed to handle importing of data in -bulk.

- +available in the bulkreadr package for importing bulk data +in R.

-

Other functions in bulkreadr package: +

Note

- -

Note:

+

For the majority of functions within this package, we will utilize data stored in the system file by the bulkreadr, which can be accessed using the system.file() function. If you wish to utilize your own data stored in your local directory, please ensure that you have set the appropriate file path prior to using any functions provided by the bulkreadr package.

+
- -
-

read_spss_data() -

-

read_spss_data() is designed to seamlessly import data -from an SPSS data (.sav or .zsav) files. It -converts labelled variables into factors, a crucial step that enhances -the ease of data manipulation and analysis within the R programming -environment.

-

Read the SPSS data file without converting variable labels as -column names

-
-
-file_path <- system.file("extdata", "Wages.sav", package = "bulkreadr")
-
-data <- read_spss_data(file = file_path)
-
-data
-#> # A tibble: 400 × 9
-#>      id  educ south                  sex   exper  wage occup marr        ed     
-#>   <dbl> <dbl> <fct>                  <fct> <dbl> <dbl> <fct> <fct>       <fct>  
-#> 1     3    12 does not live in South Male     17  7.5  Other Married     High s…
-#> 2     4    13 does not live in South Male      9 13.1  Other Not married Some c…
-#> 3     5    10 lives in South         Male     27  4.45 Other Not married Less t…
-#> 4    12     9 lives in South         Male     30  6.25 Other Not married Less t…
-#> 5    13     9 lives in South         Male     29 20.0  Other Married     Less t…
-#> # ℹ 395 more rows
-

Read the SPSS data file and convert variable labels as column -names

-
-
-data <- read_spss_data(file = file_path, label = TRUE)
-
-data
-#> # A tibble: 400 × 9
-#>   `Worker ID` `Number of years of education` `Live in south`        Gender
-#>         <dbl>                          <dbl> <fct>                  <fct> 
-#> 1           3                             12 does not live in South Male  
-#> 2           4                             13 does not live in South Male  
-#> 3           5                             10 lives in South         Male  
-#> 4          12                              9 lives in South         Male  
-#> 5          13                              9 lives in South         Male  
-#> # ℹ 395 more rows
-#> # ℹ 5 more variables: `Number of years of work experience` <dbl>,
-#> #   `Wage (dollars per hour)` <dbl>, Occupation <fct>, `Marital status` <fct>,
-#> #   `Highest education level` <fct>
-
-
-

read_stata_data() -

-

read_stata_data() reads Stata data file -(.dta) into an R data frame, converting labeled variables -into factors.

-

Read the Stata data file without converting variable labels -as column names

-
-
-file_path <- system.file("extdata", "Wages.dta", package = "bulkreadr")
-
-data <- read_stata_data(file = file_path)
-
-data
-#> # A tibble: 400 × 9
-#>      id  educ south                  sex   exper  wage occup marr        ed     
-#>   <dbl> <dbl> <fct>                  <fct> <dbl> <dbl> <fct> <fct>       <fct>  
-#> 1     3    12 does not live in South Male     17  7.5  Other Married     High s…
-#> 2     4    13 does not live in South Male      9 13.1  Other Not married Some c…
-#> 3     5    10 lives in South         Male     27  4.45 Other Not married Less t…
-#> 4    12     9 lives in South         Male     30  6.25 Other Not married Less t…
-#> 5    13     9 lives in South         Male     29 20.0  Other Married     Less t…
-#> # ℹ 395 more rows
-

Read the Stata data file and convert variable labels as -column names

-
-
-data <- read_stata_data(file = file_path, label = TRUE)
-
-data
-#> # A tibble: 400 × 9
-#>   `Worker ID` `Number of years of education` `Live in south`        Gender
-#>         <dbl>                          <dbl> <fct>                  <fct> 
-#> 1           3                             12 does not live in South Male  
-#> 2           4                             13 does not live in South Male  
-#> 3           5                             10 lives in South         Male  
-#> 4          12                              9 lives in South         Male  
-#> 5          13                              9 lives in South         Male  
-#> # ℹ 395 more rows
-#> # ℹ 5 more variables: `Number of years of work experience` <dbl>,
-#> #   `Wage (dollars per hour)` <dbl>, Occupation <fct>, `Marital status` <fct>,
-#> #   `Highest education level` <fct>
-
-
-

generate_dictionary() -

-

generate_dictionary() creates a data dictionary from a -specified data frame. This function is particularly useful for -understanding and documenting the structure of your dataset, similar to -data dictionaries in Stata or SPSS.

-
-
-# Creating a data dictionary from an SPSS file
-
-file_path <- system.file("extdata", "Wages.sav", package = "bulkreadr")
-
-wage_data <- read_spss_data(file = file_path)
-
-generate_dictionary(wage_data)
-#> # A tibble: 9 × 6
-#>   position variable description                     `column type` missing levels
-#>      <int> <chr>    <chr>                           <chr>           <int> <name>
-#> 1        1 id       Worker ID                       dbl                 0 <NULL>
-#> 2        2 educ     Number of years of education    dbl                 0 <NULL>
-#> 3        3 south    Live in south                   fct                 0 <chr> 
-#> 4        4 sex      Gender                          fct                 0 <chr> 
-#> 5        5 exper    Number of years of work experi… dbl                 0 <NULL>
-#> # ℹ 4 more rows
-
-
-

look_for() -

-

The look_for() function is designed to emulate the -functionality of the Stata lookfor command in R. It -provides a powerful tool for searching through large datasets, -specifically targeting variable names, variable label descriptions, -factor levels, and value labels. This function is handy for users -working with extensive and complex datasets, enabling them to quickly -and efficiently locate the variables of interest.

-
-
-# Look for a single keyword.
-
-look_for(wage_data, "south")
-#>  pos variable label         col_type missing values                
-#>  3   south    Live in south fct      0       does not live in South
-#>                                              lives in South
-
-look_for(wage_data, "s")
-#>  pos variable label                              col_type missing
-#>  2   educ     Number of years of education       dbl      0      
-#>  3   south    Live in south                      fct      0      
-#>                                                                  
-#>  4   sex      Gender                             fct      0      
-#>                                                                  
-#>  5   exper    Number of years of work experience dbl      0      
-#>  6   wage     Wage (dollars per hour)            dbl      0      
-#>  7   occup    Occupation                         fct      0      
-#>                                                                  
-#>                                                                  
-#>                                                                  
-#>                                                                  
-#>                                                                  
-#>  8   marr     Marital status                     fct      0      
-#>                                                                  
-#>  9   ed       Highest education level            fct      0      
-#>                                                                  
-#>                                                                  
-#>                                                                  
-#>                                                                  
-#>  values                
-#>                        
-#>  does not live in South
-#>  lives in South        
-#>  Male                  
-#>  Female                
-#>                        
-#>                        
-#>  Management            
-#>  Sales                 
-#>  Clerical              
-#>  Service               
-#>  Professional          
-#>  Other                 
-#>  Not married           
-#>  Married               
-#>  Less than h.s. degree 
-#>  High school degree    
-#>  Some college          
-#>  College degree        
-#>  Graduate school
-
-
-

pull_out() -

-

pull_out() is similar to [. It acts on vectors, -matrices, arrays and lists to extract or replace parts. It is pleasant -to use with the magrittr (⁠%>%⁠) and -base(|>) operators.

-
-
-top_10_richest_nig <- c("Aliko Dangote", "Mike Adenuga", "Femi Otedola", "Arthur Eze", "Abdulsamad Rabiu", "Cletus Ibeto", "Orji Uzor Kalu", "ABC Orjiakor", "Jimoh Ibrahim", "Tony Elumelu")
-
-top_10_richest_nig %>% 
-  pull_out(c(1, 5, 2))
-#> [1] "Aliko Dangote"    "Abdulsamad Rabiu" "Mike Adenuga"
-
-top_10_richest_nig %>% 
-  pull_out(-c(1, 5, 2))
-#> [1] "Femi Otedola"   "Arthur Eze"     "Cletus Ibeto"   "Orji Uzor Kalu"
-#> [5] "ABC Orjiakor"   "Jimoh Ibrahim"  "Tony Elumelu"
-
-
-

convert_to_date() -

-

convert_to_date() parses an input vector into POSIXct -date-time object. It is also powerful to convert from excel date number -like 42370 into date value like -2016-01-01.

-
-
-## ** heterogeneous dates **
-
-dates <- c(
-  44869, "22.09.2022", NA, "02/27/92", "01-19-2022",
-  "13-01-  2022", "2023", "2023-2", 41750.2, 41751.99,
-  "11 07 2023", "2023-4"
-  )
-
-# Convert to POSIXct or Date object
-
-convert_to_date(dates)
-#>  [1] "2022-11-04" "2022-09-22" NA           "1992-02-27" "2022-01-19"
-#>  [6] "2022-01-13" "2023-01-01" "2023-02-01" "2014-04-21" "2014-04-22"
-#> [11] "2023-07-11" "2023-04-01"
-
-# It can also convert date time object to date object 
-
-convert_to_date(lubridate::now())
-#> [1] "2024-02-22"
-
-
-

inspect_na() -

-

inspect_na() summarizes the rate of missingness in each -column of a data frame. For a grouped data frame, the rate of -missingness is summarized separately for each group.

-
-
-# dataframe summary
-
-inspect_na(airquality)
-#> # A tibble: 6 × 3
-#>   col_name   cnt  pcnt
-#>   <chr>    <int> <dbl>
-#> 1 Ozone       37 24.2 
-#> 2 Solar.R      7  4.58
-#> 3 Wind         0  0   
-#> 4 Temp         0  0   
-#> 5 Month        0  0   
-#> # ℹ 1 more row
-

Grouped dataframe summary

-
-airquality %>% 
-  group_by(Month) %>% 
-  inspect_na()
-#> # A tibble: 25 × 4
-#> # Groups:   Month [5]
-#>   Month col_name   cnt  pcnt
-#>   <int> <chr>    <int> <dbl>
-#> 1     5 Ozone        5  16.1
-#> 2     5 Solar.R      4  12.9
-#> 3     5 Wind         0   0  
-#> 4     5 Temp         0   0  
-#> 5     5 Day          0   0  
-#> # ℹ 20 more rows
-
-
-

fill_missing_values() -

-

fill_missing_values() in an efficient function that -addresses missing values in a dataframe. It uses imputation by function, -meaning it replaces missing data in numeric variables with either the -mean or the median, and in non-numeric variables with the mode. The -function takes a column-based imputation approach, ensuring that -replacement values are derived from the respective columns, resulting in -accurate and consistent data. This method enhances the integrity of the -dataset and promotes sound decision-making and analysis in data -processing workflows.

-
-
-df <- tibble::tibble(
-  Sepal_Length = c(5.2, 5, 5.7, NA, 6.2, 6.7, 5.5),
-  Sepal.Width = c(4.1, 3.6, 3, 3, 2.9, 2.5, 2.4),
-  Petal_Length = c(1.5, 1.4, 4.2, 1.4, NA, 5.8, 3.7),
-  Petal_Width = c(NA, 0.2, 1.2, 0.2, 1.3, 1.8, NA),
-  Species = c("setosa", NA, "versicolor", "setosa",
-    NA, "virginica", "setosa"
-  )
-)
-
-df
-#> # A tibble: 7 × 5
-#>   Sepal_Length Sepal.Width Petal_Length Petal_Width Species   
-#>          <dbl>       <dbl>        <dbl>       <dbl> <chr>     
-#> 1          5.2         4.1          1.5        NA   setosa    
-#> 2          5           3.6          1.4         0.2 NA        
-#> 3          5.7         3            4.2         1.2 versicolor
-#> 4         NA           3            1.4         0.2 setosa    
-#> 5          6.2         2.9         NA           1.3 NA        
-#> # ℹ 2 more rows
-

Using mean to fill missing values for numeric -variables

-
-
-result_df_mean <- fill_missing_values(df, use_mean = TRUE)
-
-result_df_mean
-#> # A tibble: 7 × 5
-#>   Sepal_Length Sepal.Width Petal_Length Petal_Width Species   
-#>          <dbl>       <dbl>        <dbl>       <dbl> <chr>     
-#> 1         5.2          4.1          1.5        0.94 setosa    
-#> 2         5            3.6          1.4        0.2  setosa    
-#> 3         5.7          3            4.2        1.2  versicolor
-#> 4         5.72         3            1.4        0.2  setosa    
-#> 5         6.2          2.9          3          1.3  setosa    
-#> # ℹ 2 more rows
-

Using median to fill missing values for numeric -variables

-
-result_df_median <- fill_missing_values(df, use_mean = FALSE)
-
-result_df_median
-#> # A tibble: 7 × 5
-#>   Sepal_Length Sepal.Width Petal_Length Petal_Width Species   
-#>          <dbl>       <dbl>        <dbl>       <dbl> <chr>     
-#> 1          5.2         4.1          1.5         1.2 setosa    
-#> 2          5           3.6          1.4         0.2 setosa    
-#> 3          5.7         3            4.2         1.2 versicolor
-#> 4          5.6         3            1.4         0.2 setosa    
-#> 5          6.2         2.9          2.6         1.3 setosa    
-#> # ℹ 2 more rows
-
-

Impute missing values (NAs) in a grouped data frame -

-

You can use the fill_missing_values() in a grouped data -frame by using other grouping and map functions. Here is an example of -how to do this:

-
-sample_iris <- tibble::tibble(
-Sepal_Length = c(5.2, 5, 5.7, NA, 6.2, 6.7, 5.5),
-Petal_Length = c(1.5, 1.4, 4.2, 1.4, NA, 5.8, 3.7),
-Petal_Width = c(0.3, 0.2, 1.2, 0.2, 1.3, 1.8, NA),
-Species = c("setosa", "setosa", "versicolor", "setosa",
-          "virginica", "virginica", "setosa")
-)
-
-sample_iris
-#> # A tibble: 7 × 4
-#>   Sepal_Length Petal_Length Petal_Width Species   
-#>          <dbl>        <dbl>       <dbl> <chr>     
-#> 1          5.2          1.5         0.3 setosa    
-#> 2          5            1.4         0.2 setosa    
-#> 3          5.7          4.2         1.2 versicolor
-#> 4         NA            1.4         0.2 setosa    
-#> 5          6.2         NA           1.3 virginica 
-#> # ℹ 2 more rows
-
-sample_iris %>%
-  group_by(Species) %>%
-  group_split() %>%
-  map_df(fill_missing_values)
-#> # A tibble: 7 × 4
-#>   Sepal_Length Petal_Length Petal_Width Species   
-#>          <dbl>        <dbl>       <dbl> <chr>     
-#> 1         5.2           1.5       0.3   setosa    
-#> 2         5             1.4       0.2   setosa    
-#> 3         5.23          1.4       0.2   setosa    
-#> 4         5.5           3.7       0.233 setosa    
-#> 5         5.7           4.2       1.2   versicolor
-#> # ℹ 2 more rows
-
diff --git a/articles/index.html b/articles/index.html index 462f212..c701567 100644 --- a/articles/index.html +++ b/articles/index.html @@ -78,6 +78,10 @@

Get started

Introduction to bulkreadr

Start here if this is your first time using bulkreadr. You’ll learn how to use functions like read_excel_workbook() and read_excel_files_from_dir() for importing data from Excel and read_gsheets() for Google Sheets, allowing for data importation from multiple sheets. For handling CSV files, read_csv_files_from_dir() reads all CSV files from a specified directory.

+
Introduction to labelled data
+

The R ecosystem, through packages like foreign and haven, facilitates the importation of labelled data from software like SPSS and Stata, ensuring a smooth transition into R. This vignette introduces you to other functions in bulkreadr, such as read_spss_data(), which extends this functionality by leveraging haven to streamline the process further.

+
Other functions in bulkreadr
+

The bulkreadr package includes specialized functions beyond bulk data reading, aimed at enhancing data analysis efficiency. These functions are designed to operate on individual vectors, except for inspect_na() and fill_missing_values(), which work on data frames.

diff --git a/articles/labelled-data.html b/articles/labelled-data.html new file mode 100644 index 0000000..4254c93 --- /dev/null +++ b/articles/labelled-data.html @@ -0,0 +1,318 @@ + + + + + + + + +Introduction to labelled data • bulkreadr + + + + + + + + + + + + + + + + + Skip to contents + + +
+ + + + +
+
+ + + +
+

What is labelled data in R? +

+

Labelled data in SPSS and Stata refers to datasets where each +variable (or column) and its values are assigned meaningful labels. +These labels provide context, such as descriptions or categories, making +the data easier to understand and analyze. For instance, a variable +representing gender might have numerical codes (1, 2) with labels +(“Male”, “Female”). This feature enhances data analysis by allowing +researchers to work with descriptive labels instead of deciphering codes +or numeric values, facilitating clearer interpretation and communication +of statistical results.

+

The R ecosystem, through packages like foreign and +haven, facilitates the importation of labelled data from +software like SPSS and Stata, ensuring a smooth transition into R. The +bulkreadr package extends this functionality by leveraging +haven to further streamline the process. It automatically +converts labelled data into R’s factor data type, eliminating the need +for manual recoding. This enhancement significantly improves the +efficiency of the data analysis workflow within the R environment.

+
+

Note +

+
+

For the majority of functions within this package, we will utilize +data stored in the system file by the bulkreadr, which can +be accessed using the system.file() function. If you wish +to utilize your own data stored in your local directory, please ensure +that you have set the appropriate file path prior to using any functions +provided by the bulkreadr package.

+
+
+
+

read_spss_data() +

+

read_spss_data() is designed to seamlessly import data +from an SPSS data (.sav or .zsav) files. It +converts labelled variables into factors, a crucial step that enhances +the ease of data manipulation and analysis within the R programming +environment.

+

Read the SPSS data file without converting variable labels as +column names

+
+
+library(bulkreadr)
+
+file_path <- system.file("extdata", "Wages.sav", package = "bulkreadr")
+
+data <- read_spss_data(file = file_path)
+
+data
+#> # A tibble: 400 × 9
+#>      id  educ south                  sex   exper  wage occup marr        ed     
+#>   <dbl> <dbl> <fct>                  <fct> <dbl> <dbl> <fct> <fct>       <fct>  
+#> 1     3    12 does not live in South Male     17  7.5  Other Married     High s…
+#> 2     4    13 does not live in South Male      9 13.1  Other Not married Some c…
+#> 3     5    10 lives in South         Male     27  4.45 Other Not married Less t…
+#> 4    12     9 lives in South         Male     30  6.25 Other Not married Less t…
+#> 5    13     9 lives in South         Male     29 20.0  Other Married     Less t…
+#> # ℹ 395 more rows
+

Read the SPSS data file and convert variable labels as column +names

+
+
+data <- read_spss_data(file = file_path, label = TRUE)
+
+data
+#> # A tibble: 400 × 9
+#>   `Worker ID` `Number of years of education` `Live in south`        Gender
+#>         <dbl>                          <dbl> <fct>                  <fct> 
+#> 1           3                             12 does not live in South Male  
+#> 2           4                             13 does not live in South Male  
+#> 3           5                             10 lives in South         Male  
+#> 4          12                              9 lives in South         Male  
+#> 5          13                              9 lives in South         Male  
+#> # ℹ 395 more rows
+#> # ℹ 5 more variables: `Number of years of work experience` <dbl>,
+#> #   `Wage (dollars per hour)` <dbl>, Occupation <fct>, `Marital status` <fct>,
+#> #   `Highest education level` <fct>
+
+
+

read_stata_data() +

+

read_stata_data() reads Stata data file +(.dta) into an R data frame, converting labeled variables +into factors.

+

Read the Stata data file without converting variable labels +as column names

+
+
+file_path <- system.file("extdata", "Wages.dta", package = "bulkreadr")
+
+data <- read_stata_data(file = file_path)
+
+data
+#> # A tibble: 400 × 9
+#>      id  educ south                  sex   exper  wage occup marr        ed     
+#>   <dbl> <dbl> <fct>                  <fct> <dbl> <dbl> <fct> <fct>       <fct>  
+#> 1     3    12 does not live in South Male     17  7.5  Other Married     High s…
+#> 2     4    13 does not live in South Male      9 13.1  Other Not married Some c…
+#> 3     5    10 lives in South         Male     27  4.45 Other Not married Less t…
+#> 4    12     9 lives in South         Male     30  6.25 Other Not married Less t…
+#> 5    13     9 lives in South         Male     29 20.0  Other Married     Less t…
+#> # ℹ 395 more rows
+

Read the Stata data file and convert variable labels as +column names

+
+
+data <- read_stata_data(file = file_path, label = TRUE)
+
+data
+#> # A tibble: 400 × 9
+#>   `Worker ID` `Number of years of education` `Live in south`        Gender
+#>         <dbl>                          <dbl> <fct>                  <fct> 
+#> 1           3                             12 does not live in South Male  
+#> 2           4                             13 does not live in South Male  
+#> 3           5                             10 lives in South         Male  
+#> 4          12                              9 lives in South         Male  
+#> 5          13                              9 lives in South         Male  
+#> # ℹ 395 more rows
+#> # ℹ 5 more variables: `Number of years of work experience` <dbl>,
+#> #   `Wage (dollars per hour)` <dbl>, Occupation <fct>, `Marital status` <fct>,
+#> #   `Highest education level` <fct>
+
+
+

generate_dictionary() +

+

generate_dictionary() creates a data dictionary from a +specified data frame. This function is particularly useful for +understanding and documenting the structure of your dataset, similar to +data dictionaries in Stata or SPSS.

+
+
+# Creating a data dictionary from an SPSS file
+
+file_path <- system.file("extdata", "Wages.sav", package = "bulkreadr")
+
+wage_data <- read_spss_data(file = file_path)
+
+generate_dictionary(wage_data)
+#> # A tibble: 9 × 6
+#>   position variable description                     `column type` missing levels
+#>      <int> <chr>    <chr>                           <chr>           <int> <name>
+#> 1        1 id       Worker ID                       dbl                 0 <NULL>
+#> 2        2 educ     Number of years of education    dbl                 0 <NULL>
+#> 3        3 south    Live in south                   fct                 0 <chr> 
+#> 4        4 sex      Gender                          fct                 0 <chr> 
+#> 5        5 exper    Number of years of work experi… dbl                 0 <NULL>
+#> # ℹ 4 more rows
+
+
+

look_for() +

+

The look_for() function is designed to emulate the +functionality of the Stata lookfor command in R. It +provides a powerful tool for searching through large datasets, +specifically targeting variable names, variable label descriptions, +factor levels, and value labels. This function is handy for users +working with extensive and complex datasets, enabling them to quickly +and efficiently locate the variables of interest.

+
+
+# Look for a single keyword.
+
+look_for(wage_data, "south")
+#>  pos variable label         col_type missing values                
+#>  3   south    Live in south fct      0       does not live in South
+#>                                              lives in South
+
+look_for(wage_data, "^s")
+#>  pos variable label         col_type missing values                
+#>  3   south    Live in south fct      0       does not live in South
+#>                                              lives in South        
+#>  4   sex      Gender        fct      0       Male                  
+#>                                              Female
+
+
+
+
+ + + +
+ + + +
+
+ + + + + + + diff --git a/articles/other-functions.html b/articles/other-functions.html new file mode 100644 index 0000000..cd60a1a --- /dev/null +++ b/articles/other-functions.html @@ -0,0 +1,340 @@ + + + + + + + + +Other functions in bulkreadr • bulkreadr + + + + + + + + + + + + + + + + + Skip to contents + + +
+ + + + +
+
+ + + +

The bulkreadr package in R includes specialized +functions beyond bulk data reading, aimed at enhancing data analysis +efficiency. These functions are designed to operate on individual +vectors, except for inspect_na() and +fill_missing_values(), which work on data frames.

+
+

pull_out() +

+

pull_out() is similar to [. It acts on vectors, +matrices, arrays and lists to extract or replace parts. It is pleasant +to use with the magrittr (⁠%>%⁠) and +base(|>) operators.

+
+
+library(bulkreadr)
+library(dplyr)
+
+top_10_richest_nig <- c("Aliko Dangote", "Mike Adenuga", "Femi Otedola", "Arthur Eze", "Abdulsamad Rabiu", "Cletus Ibeto", "Orji Uzor Kalu", "ABC Orjiakor", "Jimoh Ibrahim", "Tony Elumelu")
+
+top_10_richest_nig %>% 
+  pull_out(c(1, 5, 2))
+#> [1] "Aliko Dangote"    "Abdulsamad Rabiu" "Mike Adenuga"
+
+top_10_richest_nig %>% 
+  pull_out(-c(1, 5, 2))
+#> [1] "Femi Otedola"   "Arthur Eze"     "Cletus Ibeto"   "Orji Uzor Kalu"
+#> [5] "ABC Orjiakor"   "Jimoh Ibrahim"  "Tony Elumelu"
+
+
+

convert_to_date() +

+

convert_to_date() parses an input vector into POSIXct +date-time object. It is also powerful to convert from excel date number +like 42370 into date value like +2016-01-01.

+
+
+## ** heterogeneous dates **
+
+dates <- c(
+  44869, "22.09.2022", NA, "02/27/92", "01-19-2022",
+  "13-01-  2022", "2023", "2023-2", 41750.2, 41751.99,
+  "11 07 2023", "2023-4"
+  )
+
+# Convert to POSIXct or Date object
+
+convert_to_date(dates)
+#>  [1] "2022-11-04" "2022-09-22" NA           "1992-02-27" "2022-01-19"
+#>  [6] "2022-01-13" "2023-01-01" "2023-02-01" "2014-04-21" "2014-04-22"
+#> [11] "2023-07-11" "2023-04-01"
+
+# It can also convert date time object to date object 
+
+convert_to_date(lubridate::now())
+#> [1] "2024-02-26"
+
+
+

inspect_na() +

+

inspect_na() summarizes the rate of missingness in each +column of a data frame. For a grouped data frame, the rate of +missingness is summarized separately for each group.

+
+
+# dataframe summary
+
+inspect_na(airquality)
+#> # A tibble: 6 × 3
+#>   col_name   cnt  pcnt
+#>   <chr>    <int> <dbl>
+#> 1 Ozone       37 24.2 
+#> 2 Solar.R      7  4.58
+#> 3 Wind         0  0   
+#> 4 Temp         0  0   
+#> 5 Month        0  0   
+#> # ℹ 1 more row
+

Grouped dataframe summary

+
+airquality %>% 
+  group_by(Month) %>% 
+  inspect_na()
+#> # A tibble: 25 × 4
+#> # Groups:   Month [5]
+#>   Month col_name   cnt  pcnt
+#>   <int> <chr>    <int> <dbl>
+#> 1     5 Ozone        5  16.1
+#> 2     5 Solar.R      4  12.9
+#> 3     5 Wind         0   0  
+#> 4     5 Temp         0   0  
+#> 5     5 Day          0   0  
+#> # ℹ 20 more rows
+
+
+

fill_missing_values() +

+

fill_missing_values() in an efficient function that +addresses missing values in a dataframe. It uses imputation by function, +meaning it replaces missing data in numeric variables with either the +mean or the median, and in non-numeric variables with the mode. The +function takes a column-based imputation approach, ensuring that +replacement values are derived from the respective columns, resulting in +accurate and consistent data. This method enhances the integrity of the +dataset and promotes sound decision-making and analysis in data +processing workflows.

+
+
+df <- tibble::tibble(
+  Sepal_Length = c(5.2, 5, 5.7, NA, 6.2, 6.7, 5.5),
+  Sepal.Width = c(4.1, 3.6, 3, 3, 2.9, 2.5, 2.4),
+  Petal_Length = c(1.5, 1.4, 4.2, 1.4, NA, 5.8, 3.7),
+  Petal_Width = c(NA, 0.2, 1.2, 0.2, 1.3, 1.8, NA),
+  Species = c("setosa", NA, "versicolor", "setosa",
+    NA, "virginica", "setosa"
+  )
+)
+
+df
+#> # A tibble: 7 × 5
+#>   Sepal_Length Sepal.Width Petal_Length Petal_Width Species   
+#>          <dbl>       <dbl>        <dbl>       <dbl> <chr>     
+#> 1          5.2         4.1          1.5        NA   setosa    
+#> 2          5           3.6          1.4         0.2 NA        
+#> 3          5.7         3            4.2         1.2 versicolor
+#> 4         NA           3            1.4         0.2 setosa    
+#> 5          6.2         2.9         NA           1.3 NA        
+#> # ℹ 2 more rows
+

Using mean to fill missing values for numeric +variables

+
+
+result_df_mean <- fill_missing_values(df, use_mean = TRUE)
+
+result_df_mean
+#> # A tibble: 7 × 5
+#>   Sepal_Length Sepal.Width Petal_Length Petal_Width Species   
+#>          <dbl>       <dbl>        <dbl>       <dbl> <chr>     
+#> 1         5.2          4.1          1.5        0.94 setosa    
+#> 2         5            3.6          1.4        0.2  setosa    
+#> 3         5.7          3            4.2        1.2  versicolor
+#> 4         5.72         3            1.4        0.2  setosa    
+#> 5         6.2          2.9          3          1.3  setosa    
+#> # ℹ 2 more rows
+

Using median to fill missing values for numeric +variables

+
+result_df_median <- fill_missing_values(df, use_mean = FALSE)
+
+result_df_median
+#> # A tibble: 7 × 5
+#>   Sepal_Length Sepal.Width Petal_Length Petal_Width Species   
+#>          <dbl>       <dbl>        <dbl>       <dbl> <chr>     
+#> 1          5.2         4.1          1.5         1.2 setosa    
+#> 2          5           3.6          1.4         0.2 setosa    
+#> 3          5.7         3            4.2         1.2 versicolor
+#> 4          5.6         3            1.4         0.2 setosa    
+#> 5          6.2         2.9          2.6         1.3 setosa    
+#> # ℹ 2 more rows
+
+

Impute missing values (NAs) in a grouped data frame +

+

You can use the fill_missing_values() in a grouped data +frame by using other grouping and map functions. Here is an example of +how to do this:

+
+sample_iris <- tibble::tibble(
+Sepal_Length = c(5.2, 5, 5.7, NA, 6.2, 6.7, 5.5),
+Petal_Length = c(1.5, 1.4, 4.2, 1.4, NA, 5.8, 3.7),
+Petal_Width = c(0.3, 0.2, 1.2, 0.2, 1.3, 1.8, NA),
+Species = c("setosa", "setosa", "versicolor", "setosa",
+          "virginica", "virginica", "setosa")
+)
+
+sample_iris
+#> # A tibble: 7 × 4
+#>   Sepal_Length Petal_Length Petal_Width Species   
+#>          <dbl>        <dbl>       <dbl> <chr>     
+#> 1          5.2          1.5         0.3 setosa    
+#> 2          5            1.4         0.2 setosa    
+#> 3          5.7          4.2         1.2 versicolor
+#> 4         NA            1.4         0.2 setosa    
+#> 5          6.2         NA           1.3 virginica 
+#> # ℹ 2 more rows
+
+sample_iris %>%
+  group_by(Species) %>%
+  group_split() %>%
+  map_df(fill_missing_values)
+#> # A tibble: 7 × 4
+#>   Sepal_Length Petal_Length Petal_Width Species   
+#>          <dbl>        <dbl>       <dbl> <chr>     
+#> 1         5.2           1.5       0.3   setosa    
+#> 2         5             1.4       0.2   setosa    
+#> 3         5.23          1.4       0.2   setosa    
+#> 4         5.5           3.7       0.233 setosa    
+#> 5         5.7           4.2       1.2   versicolor
+#> # ℹ 2 more rows
+
+
+
+
+ + + +
+ + + +
+
+ + + + + + + diff --git a/index.html b/index.html index c1cda8d..e43ff86 100644 --- a/index.html +++ b/index.html @@ -158,13 +158,12 @@

How to load the package

Now that you have installed bulkreadr package, you can simply load it by using:

+library(bulkreadr)

Context

-

bulkreadr draws on and complements / emulates other packages such as readxl, readr, and googlesheets4 to read bulk data in R.

+

bulkreadr is designed to integrate with and augment the capabilities of established packages such as readxl, readr, and googlesheets4, offering enhanced functionality for reading bulk data within the R programming environment.

  • readxl is the tidyverse package for reading Excel files (xls or xlsx) into an R data frame.

  • readr is the tidyverse package for reading delimited files (e.g., csv or tsv) into an R data frame.

  • diff --git a/pkgdown.yml b/pkgdown.yml index e9012d2..60c1705 100644 --- a/pkgdown.yml +++ b/pkgdown.yml @@ -3,7 +3,9 @@ pkgdown: 2.0.7 pkgdown_sha: ~ articles: bulkreadr: bulkreadr.html -last_built: 2024-02-22T18:01Z + labelled-data: labelled-data.html + other-functions: other-functions.html +last_built: 2024-02-26T09:16Z urls: reference: https://gbganalyst.github.io/bulkreadr/reference article: https://gbganalyst.github.io/bulkreadr/articles diff --git a/search.json b/search.json index 9ad7cd1..15ace42 100644 --- a/search.json +++ b/search.json @@ -1 +1 @@ -[{"path":"https://gbganalyst.github.io/bulkreadr/LICENSE.html","id":null,"dir":"","previous_headings":"","what":"MIT License","title":"MIT License","text":"Copyright (c) 2023 bulkreadr authors Permission hereby granted, free charge, person obtaining copy software associated documentation files (“Software”), deal Software without restriction, including without limitation rights use, copy, modify, merge, publish, distribute, sublicense, /sell copies Software, permit persons Software furnished , subject following conditions: copyright notice permission notice shall included copies substantial portions Software. SOFTWARE PROVIDED “”, WITHOUT WARRANTY KIND, EXPRESS IMPLIED, INCLUDING LIMITED WARRANTIES MERCHANTABILITY, FITNESS PARTICULAR PURPOSE NONINFRINGEMENT. EVENT SHALL AUTHORS COPYRIGHT HOLDERS LIABLE CLAIM, DAMAGES LIABILITY, WHETHER ACTION CONTRACT, TORT OTHERWISE, ARISING , CONNECTION SOFTWARE USE DEALINGS SOFTWARE.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/articles/bulkreadr.html","id":"about-the-package","dir":"Articles","previous_headings":"","what":"About the package","title":"Introduction to bulkreadr","text":"bulkreadr R package designed simplify streamline process reading processing large volumes data. collection functions tailored bulk data operations, package allows users efficiently read multiple sheets Microsoft Excel/Google Sheets workbooks multiple CSV files directory. returns data organized data frames, making convenient analysis manipulation. Whether dealing extensive data sets batch processing tasks, “bulkreadr” empowers users effortlessly handle data bulk, saving time effort data preparation workflows.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/articles/bulkreadr.html","id":"installation","dir":"Articles","previous_headings":"","what":"Installation","title":"Introduction to bulkreadr","text":"can install bulkreadr package CRAN : development version GitHub ","code":"install.packages(\"bulkreadr\") if(!require(\"devtools\")){ install.packages(\"devtools\") } devtools::install_github(\"gbganalyst/bulkreadr\")"},{"path":"https://gbganalyst.github.io/bulkreadr/articles/bulkreadr.html","id":"how-to-load-the-package","dir":"Articles","previous_headings":"","what":"How to load the package","title":"Introduction to bulkreadr","text":"Now installed bulkreadr package, can simply load using:","code":"library(bulkreadr) library(dplyr)"},{"path":"https://gbganalyst.github.io/bulkreadr/articles/bulkreadr.html","id":"functions-in-bulkreadr-package","dir":"Articles","previous_headings":"","what":"Functions in bulkreadr package","title":"Introduction to bulkreadr","text":"section provides concise overview different functions available bulkreadr package. functions serve various purposes designed handle importing data bulk. read_excel_workbook() read_excel_files_from_dir() read_csv_files_from_dir() read_gsheets() read_spss_data() read_stata_data()","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/articles/bulkreadr.html","id":"other-functions-in-bulkreadr-package","dir":"Articles","previous_headings":"","what":"Other functions in bulkreadr package:","title":"Introduction to bulkreadr","text":"generate_dictionary() look_for() pull_out() convert_to_date() inspect_na() fill_missing_values() Note: majority functions within package, utilize data stored system file bulkreadr, can accessed using system.file() function. wish utilize data stored local directory, please ensure set appropriate file path prior using functions provided bulkreadr package.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/articles/bulkreadr.html","id":"read_excel_workbook","dir":"Articles","previous_headings":"","what":"read_excel_workbook()","title":"Introduction to bulkreadr","text":"read_excel_workbook() reads data sheets Excel workbook return appended dataframe.","code":"# path to the xls/xlsx file. path <- system.file(\"extdata\", \"Diamonds.xlsx\", package = \"bulkreadr\") # read the sheets read_excel_workbook(path = path) #> # A tibble: 260 × 9 #> carat color clarity depth table price x y z #> #> 1 2 I SI1 65.9 60 13764 7.8 7.73 5.12 #> 2 0.7 H SI1 65.2 58 2048 5.49 5.55 3.6 #> 3 1.51 E SI1 58.4 70 11102 7.55 7.39 4.36 #> 4 0.7 D SI2 65.5 57 1806 5.56 5.43 3.6 #> 5 0.35 F VVS1 54.6 59 1011 4.85 4.79 2.63 #> # ℹ 255 more rows"},{"path":"https://gbganalyst.github.io/bulkreadr/articles/bulkreadr.html","id":"read_excel_files_from_dir","dir":"Articles","previous_headings":"","what":"read_excel_files_from_dir()","title":"Introduction to bulkreadr","text":"read_excel_files_from_dir() reads Excel workbooks \"~/data\" directory returns appended dataframe.","code":"# path to the directory containing the xls/xlsx files. directory <- system.file(\"xlsxfolder\", package = \"bulkreadr\") # import the workbooks read_excel_files_from_dir(dir_path = directory) #> # A tibble: 260 × 10 #> cut carat color clarity depth table price x y z #> #> 1 Fair 2 I SI1 65.9 60 13764 7.8 7.73 5.12 #> 2 Fair 0.7 H SI1 65.2 58 2048 5.49 5.55 3.6 #> 3 Fair 1.51 E SI1 58.4 70 11102 7.55 7.39 4.36 #> 4 Fair 0.7 D SI2 65.5 57 1806 5.56 5.43 3.6 #> 5 Fair 0.35 F VVS1 54.6 59 1011 4.85 4.79 2.63 #> # ℹ 255 more rows"},{"path":"https://gbganalyst.github.io/bulkreadr/articles/bulkreadr.html","id":"read_csv_files_from_dir","dir":"Articles","previous_headings":"","what":"read_csv_files_from_dir()","title":"Introduction to bulkreadr","text":"read_csv_files_from_dir() reads csv files \"~/data\" directory returns appended dataframe. resulting dataframe order CSV files directory.","code":"# path to the directory containing the CSV files. directory <- system.file(\"csvfolder\", package = \"bulkreadr\") # import the csv files read_csv_files_from_dir(dir_path = directory) #> # A tibble: 260 × 10 #> cut carat color clarity depth table price x y z #> #> 1 Fair 2 I SI1 65.9 60 13764 7.8 7.73 5.12 #> 2 Fair 0.7 H SI1 65.2 58 2048 5.49 5.55 3.6 #> 3 Fair 1.51 E SI1 58.4 70 11102 7.55 7.39 4.36 #> 4 Fair 0.7 D SI2 65.5 57 1806 5.56 5.43 3.6 #> 5 Fair 0.35 F VVS1 54.6 59 1011 4.85 4.79 2.63 #> # ℹ 255 more rows"},{"path":"https://gbganalyst.github.io/bulkreadr/articles/bulkreadr.html","id":"read_gsheets","dir":"Articles","previous_headings":"","what":"read_gsheets()","title":"Introduction to bulkreadr","text":"read_gsheets() function imports data multiple sheets Google Sheets spreadsheet appends resulting dataframes sheet together create single dataframe. function powerful tool data analysis, allows easily combine data multiple sheets single dataset.","code":"# Google Sheet ID or the link to the sheet sheet_id <- \"1izO0mHu3L9AMySQUXGDn9GPs1n-VwGFSEoAKGhqVQh0\" # read all the sheets read_gsheets(ss = sheet_id) #> # A tibble: 260 × 9 #> carat color clarity depth table price x y z #> #> 1 2 I SI1 65.9 60 13764 7.8 7.73 5.12 #> 2 0.7 H SI1 65.2 58 2048 5.49 5.55 3.6 #> 3 1.51 E SI1 58.4 70 11102 7.55 7.39 4.36 #> 4 0.7 D SI2 65.5 57 1806 5.56 5.43 3.6 #> 5 0.35 F VVS1 54.6 59 1011 4.85 4.79 2.63 #> # ℹ 255 more rows"},{"path":"https://gbganalyst.github.io/bulkreadr/articles/bulkreadr.html","id":"read_spss_data","dir":"Articles","previous_headings":"","what":"read_spss_data()","title":"Introduction to bulkreadr","text":"read_spss_data() designed seamlessly import data SPSS data (.sav .zsav) files. converts labelled variables factors, crucial step enhances ease data manipulation analysis within R programming environment. Read SPSS data file without converting variable labels column names Read SPSS data file convert variable labels column names","code":"file_path <- system.file(\"extdata\", \"Wages.sav\", package = \"bulkreadr\") data <- read_spss_data(file = file_path) data #> # A tibble: 400 × 9 #> id educ south sex exper wage occup marr ed #> #> 1 3 12 does not live in South Male 17 7.5 Other Married High s… #> 2 4 13 does not live in South Male 9 13.1 Other Not married Some c… #> 3 5 10 lives in South Male 27 4.45 Other Not married Less t… #> 4 12 9 lives in South Male 30 6.25 Other Not married Less t… #> 5 13 9 lives in South Male 29 20.0 Other Married Less t… #> # ℹ 395 more rows data <- read_spss_data(file = file_path, label = TRUE) data #> # A tibble: 400 × 9 #> `Worker ID` `Number of years of education` `Live in south` Gender #> #> 1 3 12 does not live in South Male #> 2 4 13 does not live in South Male #> 3 5 10 lives in South Male #> 4 12 9 lives in South Male #> 5 13 9 lives in South Male #> # ℹ 395 more rows #> # ℹ 5 more variables: `Number of years of work experience` , #> # `Wage (dollars per hour)` , Occupation , `Marital status` , #> # `Highest education level` "},{"path":"https://gbganalyst.github.io/bulkreadr/articles/bulkreadr.html","id":"read_stata_data","dir":"Articles","previous_headings":"","what":"read_stata_data()","title":"Introduction to bulkreadr","text":"read_stata_data() reads Stata data file (.dta) R data frame, converting labeled variables factors. Read Stata data file without converting variable labels column names Read Stata data file convert variable labels column names","code":"file_path <- system.file(\"extdata\", \"Wages.dta\", package = \"bulkreadr\") data <- read_stata_data(file = file_path) data #> # A tibble: 400 × 9 #> id educ south sex exper wage occup marr ed #> #> 1 3 12 does not live in South Male 17 7.5 Other Married High s… #> 2 4 13 does not live in South Male 9 13.1 Other Not married Some c… #> 3 5 10 lives in South Male 27 4.45 Other Not married Less t… #> 4 12 9 lives in South Male 30 6.25 Other Not married Less t… #> 5 13 9 lives in South Male 29 20.0 Other Married Less t… #> # ℹ 395 more rows data <- read_stata_data(file = file_path, label = TRUE) data #> # A tibble: 400 × 9 #> `Worker ID` `Number of years of education` `Live in south` Gender #> #> 1 3 12 does not live in South Male #> 2 4 13 does not live in South Male #> 3 5 10 lives in South Male #> 4 12 9 lives in South Male #> 5 13 9 lives in South Male #> # ℹ 395 more rows #> # ℹ 5 more variables: `Number of years of work experience` , #> # `Wage (dollars per hour)` , Occupation , `Marital status` , #> # `Highest education level` "},{"path":"https://gbganalyst.github.io/bulkreadr/articles/bulkreadr.html","id":"generate_dictionary","dir":"Articles","previous_headings":"","what":"generate_dictionary()","title":"Introduction to bulkreadr","text":"generate_dictionary() creates data dictionary specified data frame. function particularly useful understanding documenting structure dataset, similar data dictionaries Stata SPSS.","code":"# Creating a data dictionary from an SPSS file file_path <- system.file(\"extdata\", \"Wages.sav\", package = \"bulkreadr\") wage_data <- read_spss_data(file = file_path) generate_dictionary(wage_data) #> # A tibble: 9 × 6 #> position variable description `column type` missing levels #> #> 1 1 id Worker ID dbl 0 #> 2 2 educ Number of years of education dbl 0 #> 3 3 south Live in south fct 0 #> 4 4 sex Gender fct 0 #> 5 5 exper Number of years of work experi… dbl 0 #> # ℹ 4 more rows"},{"path":"https://gbganalyst.github.io/bulkreadr/articles/bulkreadr.html","id":"look_for","dir":"Articles","previous_headings":"","what":"look_for()","title":"Introduction to bulkreadr","text":"look_for() function designed emulate functionality Stata lookfor command R. provides powerful tool searching large datasets, specifically targeting variable names, variable label descriptions, factor levels, value labels. function handy users working extensive complex datasets, enabling quickly efficiently locate variables interest.","code":"# Look for a single keyword. look_for(wage_data, \"south\") #> pos variable label col_type missing values #> 3 south Live in south fct 0 does not live in South #> lives in South look_for(wage_data, \"s\") #> pos variable label col_type missing #> 2 educ Number of years of education dbl 0 #> 3 south Live in south fct 0 #> #> 4 sex Gender fct 0 #> #> 5 exper Number of years of work experience dbl 0 #> 6 wage Wage (dollars per hour) dbl 0 #> 7 occup Occupation fct 0 #> #> #> #> #> #> 8 marr Marital status fct 0 #> #> 9 ed Highest education level fct 0 #> #> #> #> #> values #> #> does not live in South #> lives in South #> Male #> Female #> #> #> Management #> Sales #> Clerical #> Service #> Professional #> Other #> Not married #> Married #> Less than h.s. degree #> High school degree #> Some college #> College degree #> Graduate school"},{"path":"https://gbganalyst.github.io/bulkreadr/articles/bulkreadr.html","id":"pull_out","dir":"Articles","previous_headings":"","what":"pull_out()","title":"Introduction to bulkreadr","text":"pull_out() similar [. acts vectors, matrices, arrays lists extract replace parts. pleasant use magrittr (⁠%>%⁠) base(|>) operators.","code":"top_10_richest_nig <- c(\"Aliko Dangote\", \"Mike Adenuga\", \"Femi Otedola\", \"Arthur Eze\", \"Abdulsamad Rabiu\", \"Cletus Ibeto\", \"Orji Uzor Kalu\", \"ABC Orjiakor\", \"Jimoh Ibrahim\", \"Tony Elumelu\") top_10_richest_nig %>% pull_out(c(1, 5, 2)) #> [1] \"Aliko Dangote\" \"Abdulsamad Rabiu\" \"Mike Adenuga\" top_10_richest_nig %>% pull_out(-c(1, 5, 2)) #> [1] \"Femi Otedola\" \"Arthur Eze\" \"Cletus Ibeto\" \"Orji Uzor Kalu\" #> [5] \"ABC Orjiakor\" \"Jimoh Ibrahim\" \"Tony Elumelu\""},{"path":"https://gbganalyst.github.io/bulkreadr/articles/bulkreadr.html","id":"convert_to_date","dir":"Articles","previous_headings":"","what":"convert_to_date()","title":"Introduction to bulkreadr","text":"convert_to_date() parses input vector POSIXct date-time object. also powerful convert excel date number like 42370 date value like 2016-01-01.","code":"## ** heterogeneous dates ** dates <- c( 44869, \"22.09.2022\", NA, \"02/27/92\", \"01-19-2022\", \"13-01- 2022\", \"2023\", \"2023-2\", 41750.2, 41751.99, \"11 07 2023\", \"2023-4\" ) # Convert to POSIXct or Date object convert_to_date(dates) #> [1] \"2022-11-04\" \"2022-09-22\" NA \"1992-02-27\" \"2022-01-19\" #> [6] \"2022-01-13\" \"2023-01-01\" \"2023-02-01\" \"2014-04-21\" \"2014-04-22\" #> [11] \"2023-07-11\" \"2023-04-01\" # It can also convert date time object to date object convert_to_date(lubridate::now()) #> [1] \"2024-02-22\""},{"path":"https://gbganalyst.github.io/bulkreadr/articles/bulkreadr.html","id":"inspect_na","dir":"Articles","previous_headings":"","what":"inspect_na()","title":"Introduction to bulkreadr","text":"inspect_na() summarizes rate missingness column data frame. grouped data frame, rate missingness summarized separately group. Grouped dataframe summary","code":"# dataframe summary inspect_na(airquality) #> # A tibble: 6 × 3 #> col_name cnt pcnt #> #> 1 Ozone 37 24.2 #> 2 Solar.R 7 4.58 #> 3 Wind 0 0 #> 4 Temp 0 0 #> 5 Month 0 0 #> # ℹ 1 more row airquality %>% group_by(Month) %>% inspect_na() #> # A tibble: 25 × 4 #> # Groups: Month [5] #> Month col_name cnt pcnt #> #> 1 5 Ozone 5 16.1 #> 2 5 Solar.R 4 12.9 #> 3 5 Wind 0 0 #> 4 5 Temp 0 0 #> 5 5 Day 0 0 #> # ℹ 20 more rows"},{"path":"https://gbganalyst.github.io/bulkreadr/articles/bulkreadr.html","id":"fill_missing_values","dir":"Articles","previous_headings":"","what":"fill_missing_values()","title":"Introduction to bulkreadr","text":"fill_missing_values() efficient function addresses missing values dataframe. uses imputation function, meaning replaces missing data numeric variables either mean median, non-numeric variables mode. function takes column-based imputation approach, ensuring replacement values derived respective columns, resulting accurate consistent data. method enhances integrity dataset promotes sound decision-making analysis data processing workflows. Using mean fill missing values numeric variables Using median fill missing values numeric variables","code":"df <- tibble::tibble( Sepal_Length = c(5.2, 5, 5.7, NA, 6.2, 6.7, 5.5), Sepal.Width = c(4.1, 3.6, 3, 3, 2.9, 2.5, 2.4), Petal_Length = c(1.5, 1.4, 4.2, 1.4, NA, 5.8, 3.7), Petal_Width = c(NA, 0.2, 1.2, 0.2, 1.3, 1.8, NA), Species = c(\"setosa\", NA, \"versicolor\", \"setosa\", NA, \"virginica\", \"setosa\" ) ) df #> # A tibble: 7 × 5 #> Sepal_Length Sepal.Width Petal_Length Petal_Width Species #> #> 1 5.2 4.1 1.5 NA setosa #> 2 5 3.6 1.4 0.2 NA #> 3 5.7 3 4.2 1.2 versicolor #> 4 NA 3 1.4 0.2 setosa #> 5 6.2 2.9 NA 1.3 NA #> # ℹ 2 more rows result_df_mean <- fill_missing_values(df, use_mean = TRUE) result_df_mean #> # A tibble: 7 × 5 #> Sepal_Length Sepal.Width Petal_Length Petal_Width Species #> #> 1 5.2 4.1 1.5 0.94 setosa #> 2 5 3.6 1.4 0.2 setosa #> 3 5.7 3 4.2 1.2 versicolor #> 4 5.72 3 1.4 0.2 setosa #> 5 6.2 2.9 3 1.3 setosa #> # ℹ 2 more rows result_df_median <- fill_missing_values(df, use_mean = FALSE) result_df_median #> # A tibble: 7 × 5 #> Sepal_Length Sepal.Width Petal_Length Petal_Width Species #> #> 1 5.2 4.1 1.5 1.2 setosa #> 2 5 3.6 1.4 0.2 setosa #> 3 5.7 3 4.2 1.2 versicolor #> 4 5.6 3 1.4 0.2 setosa #> 5 6.2 2.9 2.6 1.3 setosa #> # ℹ 2 more rows"},{"path":"https://gbganalyst.github.io/bulkreadr/articles/bulkreadr.html","id":"impute-missing-values-nas-in-a-grouped-data-frame","dir":"Articles","previous_headings":"fill_missing_values()","what":"Impute missing values (NAs) in a grouped data frame","title":"Introduction to bulkreadr","text":"can use fill_missing_values() grouped data frame using grouping map functions. example :","code":"sample_iris <- tibble::tibble( Sepal_Length = c(5.2, 5, 5.7, NA, 6.2, 6.7, 5.5), Petal_Length = c(1.5, 1.4, 4.2, 1.4, NA, 5.8, 3.7), Petal_Width = c(0.3, 0.2, 1.2, 0.2, 1.3, 1.8, NA), Species = c(\"setosa\", \"setosa\", \"versicolor\", \"setosa\", \"virginica\", \"virginica\", \"setosa\") ) sample_iris #> # A tibble: 7 × 4 #> Sepal_Length Petal_Length Petal_Width Species #> #> 1 5.2 1.5 0.3 setosa #> 2 5 1.4 0.2 setosa #> 3 5.7 4.2 1.2 versicolor #> 4 NA 1.4 0.2 setosa #> 5 6.2 NA 1.3 virginica #> # ℹ 2 more rows sample_iris %>% group_by(Species) %>% group_split() %>% map_df(fill_missing_values) #> # A tibble: 7 × 4 #> Sepal_Length Petal_Length Petal_Width Species #> #> 1 5.2 1.5 0.3 setosa #> 2 5 1.4 0.2 setosa #> 3 5.23 1.4 0.2 setosa #> 4 5.5 3.7 0.233 setosa #> 5 5.7 4.2 1.2 versicolor #> # ℹ 2 more rows"},{"path":"https://gbganalyst.github.io/bulkreadr/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Ezekiel Ogundepo. Author, maintainer. Ernest Fokoue. Contributor. Golibe Ezeechesi. Contributor. Fatimo Adebanjo. Contributor. Isaac Ajao. Contributor.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Ogundepo E (2024). bulkreadr: Ultimate Tool Reading Data Bulk. R package version 1.1.0, https://gbganalyst.github.io/bulkreadr/, https://github.com/gbganalyst/bulkreadr.","code":"@Manual{, title = {bulkreadr: The Ultimate Tool for Reading Data in Bulk}, author = {Ezekiel Ogundepo}, year = {2024}, note = {R package version 1.1.0, https://gbganalyst.github.io/bulkreadr/}, url = {https://github.com/gbganalyst/bulkreadr}, }"},{"path":"https://gbganalyst.github.io/bulkreadr/index.html","id":"bulkreadr-","dir":"","previous_headings":"","what":"The Ultimate Tool for Reading Data in Bulk","title":"The Ultimate Tool for Reading Data in Bulk","text":"Ultimate Tool Reading Data Bulk","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/index.html","id":"about-the-package","dir":"","previous_headings":"","what":"About the package","title":"The Ultimate Tool for Reading Data in Bulk","text":"bulkreadr R package designed simplify streamline process reading processing large volumes data. collection functions tailored bulk data operations, package allows users efficiently read multiple sheets Microsoft Excel/Google Sheets workbooks multiple CSV files directory. returns data organized data frames, making convenient analysis manipulation. Whether dealing extensive data sets batch processing tasks, “bulkreadr” empowers users effortlessly handle data bulk, saving time effort data preparation workflows. Additionally, package seamlessly works labelled data SPSS Stata. quick video tutorial, gave talk International Association Statistical Computing webinar. recorded session available webinar resources .","code":""},{"path":[]},{"path":"https://gbganalyst.github.io/bulkreadr/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"The Ultimate Tool for Reading Data in Bulk","text":"can install bulkreadr package CRAN : development version GitHub ","code":"install.packages(\"bulkreadr\") if(!require(\"devtools\")){ install.packages(\"devtools\") } devtools::install_github(\"gbganalyst/bulkreadr\")"},{"path":"https://gbganalyst.github.io/bulkreadr/index.html","id":"how-to-load-the-package","dir":"","previous_headings":"","what":"How to load the package","title":"The Ultimate Tool for Reading Data in Bulk","text":"Now installed bulkreadr package, can simply load using:","code":"library(bulkreadr) library(dplyr)"},{"path":"https://gbganalyst.github.io/bulkreadr/index.html","id":"context","dir":"","previous_headings":"","what":"Context","title":"The Ultimate Tool for Reading Data in Bulk","text":"bulkreadr draws complements / emulates packages readxl, readr, googlesheets4 read bulk data R. readxl tidyverse package reading Excel files (xls xlsx) R data frame. readr tidyverse package reading delimited files (e.g., csv tsv) R data frame. googlesheets4 package interact Google Sheets Sheets API v4 https://developers.google.com/sheets/api.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/aliases.html","id":null,"dir":"Reference","previous_headings":"","what":"Extract or replace parts of an object — pull_out","title":"Extract or replace parts of an object — pull_out","text":"pull_out() similar [. acts vectors, matrices, arrays lists extract replace parts. pleasant use magrittr (%>%) base (|>) operators.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/aliases.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Extract or replace parts of an object — pull_out","text":"pull_out() return object class input object.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/aliases.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Extract or replace parts of an object — pull_out","text":"","code":"good_choice <- letters %>% pull_out(c(5, 2, 1, 4)) good_choice #> [1] \"e\" \"b\" \"a\" \"d\" iris %>% pull_out(, 1:4) %>% head() #> Sepal.Length Sepal.Width Petal.Length Petal.Width #> 1 5.1 3.5 1.4 0.2 #> 2 4.9 3.0 1.4 0.2 #> 3 4.7 3.2 1.3 0.2 #> 4 4.6 3.1 1.5 0.2 #> 5 5.0 3.6 1.4 0.2 #> 6 5.4 3.9 1.7 0.4"},{"path":"https://gbganalyst.github.io/bulkreadr/reference/bulkreadr-package.html","id":null,"dir":"Reference","previous_headings":"","what":"bulkreadr: The Ultimate Tool for Reading Data in Bulk — bulkreadr-package","title":"bulkreadr: The Ultimate Tool for Reading Data in Bulk — bulkreadr-package","text":"Designed simplify streamline process reading processing large volumes data R, package offers collection functions tailored bulk data operations. enables users efficiently read multiple sheets Microsoft Excel Google Sheets workbooks, well various CSV files directory. data returned organized data frames, facilitating analysis manipulation. Ideal handling extensive data sets batch processing tasks, bulkreadr empowers users manage data bulk effortlessly, saving time effort data preparation workflows. Additionally, package seamlessly works labelled data SPSS Stata.","code":""},{"path":[]},{"path":"https://gbganalyst.github.io/bulkreadr/reference/bulkreadr-package.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"bulkreadr: The Ultimate Tool for Reading Data in Bulk — bulkreadr-package","text":"Maintainer: Ezekiel Ogundepo gbganalyst@gmail.com (ORCID) contributors: Ernest Fokoue epfeqa@rit.edu (ORCID) [contributor] Golibe Ezeechesi golibe.ezeechesi@gmail.com [contributor] Fatimo Adebanjo adebanjofatimo2000@gmail.com [contributor] Isaac Ajao isaacoluwaseyiajao@gmail.com [contributor]","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/convert_to_date.html","id":null,"dir":"Reference","previous_headings":"","what":"User friendly date parsing function — convert_to_date","title":"User friendly date parsing function — convert_to_date","text":"convert_to_date() parses input vector POSIXct date object. also powerful convert excel date number like 42370 date value like 2016-01-01.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/convert_to_date.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"User friendly date parsing function — convert_to_date","text":"","code":"convert_to_date(date_num_char, tz = \"UTC\")"},{"path":"https://gbganalyst.github.io/bulkreadr/reference/convert_to_date.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"User friendly date parsing function — convert_to_date","text":"date_num_char character numeric vector dates tz Time zone indicator. NULL (default), Date object returned. Otherwise POSIXct time zone attribute set tz.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/convert_to_date.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"User friendly date parsing function — convert_to_date","text":"vector class Date","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/convert_to_date.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"User friendly date parsing function — convert_to_date","text":"","code":"## ** heterogeneous dates ** dates <- c( 44869, \"22.09.2022\", NA, \"02/27/92\", \"01-19-2022\", \"13-01- 2022\", \"2023\", \"2023-2\", 41750.2, 41751.99, \"11 07 2023\", \"2023-4\" ) convert_to_date(dates) #> [1] \"2022-11-04\" \"2022-09-22\" NA \"1992-02-27\" \"2022-01-19\" #> [6] \"2022-01-13\" \"2023-01-01\" \"2023-02-01\" \"2014-04-21\" \"2014-04-22\" #> [11] \"2023-07-11\" \"2023-04-01\""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/fill_missing_values.html","id":null,"dir":"Reference","previous_headings":"","what":"Fill missing values in a dataframe — fill_missing_values","title":"Fill missing values in a dataframe — fill_missing_values","text":"fill_missing_values() efficient function addresses missing values dataframe. uses imputation function, also known column-based imputation, fill numeric variables mean median, non-numeric variables mode. approach ensures accurate consistent replacements derived individual columns, resulting complete reliable dataset improved analysis decision-making.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/fill_missing_values.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fill missing values in a dataframe — fill_missing_values","text":"","code":"fill_missing_values(df, use_mean = TRUE)"},{"path":"https://gbganalyst.github.io/bulkreadr/reference/fill_missing_values.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fill missing values in a dataframe — fill_missing_values","text":"df input dataframe processed. use_mean Logical. TRUE, missing values numeric columns replaced mean. FALSE, missing values numeric columns replaced median.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/fill_missing_values.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fill missing values in a dataframe — fill_missing_values","text":"dataframe missing values filled.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/fill_missing_values.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fill missing values in a dataframe — fill_missing_values","text":"","code":"library(dplyr) #> #> Attaching package: ‘dplyr’ #> The following objects are masked from ‘package:stats’: #> #> filter, lag #> The following objects are masked from ‘package:base’: #> #> intersect, setdiff, setequal, union # Assuming 'df' is the dataframe you want to process df <- tibble::tibble( Sepal_Length = c(5.2, 5, 5.7, NA, 6.2, 6.7, 5.5), Petal_Length = c(1.5, 1.4, 4.2, 1.4, NA, 5.8, 3.7), Petal_Width = c(NA, 0.2, 1.2, 0.2, 1.3, 1.8, NA), Species = c(\"setosa\", NA, \"versicolor\", \"setosa\", NA, \"virginica\", \"setosa\") ) # Using mean to fill missing values for numeric variables result_df_mean <- fill_missing_values(df, use_mean = TRUE) result_df_mean #> # A tibble: 7 × 4 #> Sepal_Length Petal_Length Petal_Width Species #> #> 1 5.2 1.5 0.94 setosa #> 2 5 1.4 0.2 setosa #> 3 5.7 4.2 1.2 versicolor #> 4 5.72 1.4 0.2 setosa #> 5 6.2 3 1.3 setosa #> 6 6.7 5.8 1.8 virginica #> 7 5.5 3.7 0.94 setosa # Using median to fill missing values for numeric variables result_df_median <- fill_missing_values(df, use_mean = FALSE) result_df_median #> # A tibble: 7 × 4 #> Sepal_Length Petal_Length Petal_Width Species #> #> 1 5.2 1.5 1.2 setosa #> 2 5 1.4 0.2 setosa #> 3 5.7 4.2 1.2 versicolor #> 4 5.6 1.4 0.2 setosa #> 5 6.2 2.6 1.3 setosa #> 6 6.7 5.8 1.8 virginica #> 7 5.5 3.7 1.2 setosa # Impute missing values (NAs) in a grouped data frame # You can do that by using the following: sample_iris <- tibble::tibble( Sepal_Length = c(5.2, 5, 5.7, NA, 6.2, 6.7, 5.5), Petal_Length = c(1.5, 1.4, 4.2, 1.4, NA, 5.8, 3.7), Petal_Width = c(0.3, 0.2, 1.2, 0.2, 1.3, 1.8, NA), Species = c(\"setosa\", \"setosa\", \"versicolor\", \"setosa\", \"virginica\", \"virginica\", \"setosa\") ) sample_iris %>% group_by(Species) %>% group_split() %>% map_df(fill_missing_values) #> # A tibble: 7 × 4 #> Sepal_Length Petal_Length Petal_Width Species #> #> 1 5.2 1.5 0.3 setosa #> 2 5 1.4 0.2 setosa #> 3 5.23 1.4 0.2 setosa #> 4 5.5 3.7 0.233 setosa #> 5 5.7 4.2 1.2 versicolor #> 6 6.2 5.8 1.3 virginica #> 7 6.7 5.8 1.8 virginica"},{"path":"https://gbganalyst.github.io/bulkreadr/reference/generate_dictionary.html","id":null,"dir":"Reference","previous_headings":"","what":"Create a data dictionary from labelled data — generate_dictionary","title":"Create a data dictionary from labelled data — generate_dictionary","text":"generate_dictionary() creates data dictionary specified data frame. function particularly useful understanding documenting structure dataset, similar data dictionaries Stata SPSS.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/generate_dictionary.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create a data dictionary from labelled data — generate_dictionary","text":"","code":"generate_dictionary(data)"},{"path":"https://gbganalyst.github.io/bulkreadr/reference/generate_dictionary.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create a data dictionary from labelled data — generate_dictionary","text":"data data frame survey object","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/generate_dictionary.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create a data dictionary from labelled data — generate_dictionary","text":"tibble representing data dictionary. row corresponds variable original data frame, providing detailed information variable's characteristics.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/generate_dictionary.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Create a data dictionary from labelled data — generate_dictionary","text":"function returns tibble (modern version R's data frame) following columns: position: integer vector indicating column position data frame. variable: character vector containing names variables (columns). description: character vector human-readable description variable. column type: character vector specifying data type (e.g., numeric, character) variable. missing: integer vector indicating count missing values variable. levels: list vector containing levels categorical variables, applicable.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/generate_dictionary.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Create a data dictionary from labelled data — generate_dictionary","text":"","code":"# Creating a data dictionary from an SPSS file file_path <- system.file(\"extdata\", \"Wages.sav\", package = \"bulkreadr\") wage_data <- read_spss_data(file = file_path) generate_dictionary(wage_data) #> # A tibble: 9 × 6 #> position variable description `column type` missing levels #> #> 1 1 id Worker ID dbl 0 #> 2 2 educ Number of years of education dbl 0 #> 3 3 south Live in south fct 0 #> 4 4 sex Gender fct 0 #> 5 5 exper Number of years of work experi… dbl 0 #> 6 6 wage Wage (dollars per hour) dbl 0 #> 7 7 occup Occupation fct 0 #> 8 8 marr Marital status fct 0 #> 9 9 ed Highest education level fct 0 "},{"path":"https://gbganalyst.github.io/bulkreadr/reference/inspect_na.html","id":null,"dir":"Reference","previous_headings":"","what":"Summarize missingness in data frame columns — inspect_na","title":"Summarize missingness in data frame columns — inspect_na","text":"inspect_na() summarizes rate missingness column data frame. grouped data frame, rate missingness summarized separately group.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/inspect_na.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Summarize missingness in data frame columns — inspect_na","text":"","code":"inspect_na(df)"},{"path":"https://gbganalyst.github.io/bulkreadr/reference/inspect_na.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Summarize missingness in data frame columns — inspect_na","text":"df data frame","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/inspect_na.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Summarize missingness in data frame columns — inspect_na","text":"tibble summarizing count percentage columnwise missingness data frame.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/inspect_na.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Summarize missingness in data frame columns — inspect_na","text":"tibble returned contains columns: col_name, character vector containing column names df1. cnt, integer vector containing number missing values column. pcnt, percentage records columns missing.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/inspect_na.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Summarize missingness in data frame columns — inspect_na","text":"","code":"library(dplyr) # dataframe summary inspect_na(airquality) #> # A tibble: 6 × 3 #> col_name cnt pcnt #> #> 1 Ozone 37 24.2 #> 2 Solar.R 7 4.58 #> 3 Wind 0 0 #> 4 Temp 0 0 #> 5 Month 0 0 #> 6 Day 0 0 # grouped dataframe summary airquality %>% group_by(Month) %>% inspect_na() #> # A tibble: 25 × 4 #> # Groups: Month [5] #> Month col_name cnt pcnt #> #> 1 5 Ozone 5 16.1 #> 2 5 Solar.R 4 12.9 #> 3 5 Wind 0 0 #> 4 5 Temp 0 0 #> 5 5 Day 0 0 #> 6 6 Ozone 21 70 #> 7 6 Solar.R 0 0 #> 8 6 Wind 0 0 #> 9 6 Temp 0 0 #> 10 6 Day 0 0 #> # ℹ 15 more rows"},{"path":"https://gbganalyst.github.io/bulkreadr/reference/look_for.html","id":null,"dir":"Reference","previous_headings":"","what":"Look for keywords variable names and descriptions in labelled data — look_for","title":"Look for keywords variable names and descriptions in labelled data — look_for","text":"look_for() function designed emulate functionality Stata lookfor command R. provides powerful tool searching large datasets, specifically targeting variable names, variable label descriptions, factor levels, value labels. function handy users working extensive complex datasets, enabling quickly efficiently locate variables interest.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/look_for.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Look for keywords variable names and descriptions in labelled data — look_for","text":"","code":"look_for( data, ..., labels = TRUE, values = TRUE, ignore.case = TRUE, details = c(\"basic\", \"none\", \"full\") )"},{"path":"https://gbganalyst.github.io/bulkreadr/reference/look_for.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Look for keywords variable names and descriptions in labelled data — look_for","text":"data data frame survey object ... optional list keywords, character string (several character strings), can formatted regular expression suitable base::grep() pattern, vector keywords; displays variables specified labels whether search variable labels (descriptions); TRUE default values whether search within values (factor levels value labels); TRUE default ignore.case whether make keywords case sensitive; TRUE default (case ignored matching) details add details variable (full details time consuming big data frames, FALSE equivalent \"none\" TRUE \"full\")","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/look_for.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Look for keywords variable names and descriptions in labelled data — look_for","text":"tibble data frame featuring variable position, name description (exists) original data frame.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/look_for.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Look for keywords variable names and descriptions in labelled data — look_for","text":"","code":"look_for(iris) #> pos variable label col_type missing values #> 1 Sepal.Length — dbl 0 #> 2 Sepal.Width — dbl 0 #> 3 Petal.Length — dbl 0 #> 4 Petal.Width — dbl 0 #> 5 Species — fct 0 setosa #> versicolor #> virginica # Look for a single keyword. look_for(iris, \"petal\") #> pos variable label col_type missing values #> 3 Petal.Length — dbl 0 #> 4 Petal.Width — dbl 0 look_for(iris, \"s\") #> pos variable label col_type missing values #> 1 Sepal.Length — dbl 0 #> 2 Sepal.Width — dbl 0 #> 5 Species — fct 0 setosa #> versicolor #> virginica"},{"path":"https://gbganalyst.github.io/bulkreadr/reference/pipe.html","id":null,"dir":"Reference","previous_headings":"","what":"Pipe operator — %>%","title":"Pipe operator — %>%","text":"See magrittr::%>% details.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/pipe.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Pipe operator — %>%","text":"","code":"lhs %>% rhs"},{"path":"https://gbganalyst.github.io/bulkreadr/reference/pipe.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Pipe operator — %>%","text":"return value","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_csv_files_from_dir.html","id":null,"dir":"Reference","previous_headings":"","what":"Reads all CSV files from a directory — read_csv_files_from_dir","title":"Reads all CSV files from a directory — read_csv_files_from_dir","text":"read_csv_files_from_dir reads csv files \"~/data\" directory returns appended dataframe. resulting dataframe order CSV files directory.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_csv_files_from_dir.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Reads all CSV files from a directory — read_csv_files_from_dir","text":"","code":"read_csv_files_from_dir(dir_path = \".\", col_types = NULL, .id = NULL)"},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_csv_files_from_dir.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Reads all CSV files from a directory — read_csv_files_from_dir","text":"dir_path Path directory containing CSV files. col_types One NULL, cols() specification, string. See vignette(\"readr\") details. NULL, column types inferred guess_max rows input, interspersed throughout file. convenient (fast), robust. guessed types wrong, need increase guess_max supply correct types . Column specifications created list() cols() must contain one column specification column. want read subset columns, use cols_only(). Alternatively, can use compact string representation character represents one column: c = character = integer n = number d = double l = logical f = factor D = date T = date time t = time ? = guess _ - = skip default, reading file without column specification print message showing readr guessed . remove message, set show_col_types = FALSE set `options(readr.show_col_types = FALSE). .id name column store file path. useful reading multiple input files data file paths, data collection date. NULL (default) extra column created.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_csv_files_from_dir.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Reads all CSV files from a directory — read_csv_files_from_dir","text":"tibble. column type mismatch data frames row binding, error occur. R combine columns different types. example, combine column integers column characters.","code":""},{"path":[]},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_csv_files_from_dir.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Reads all CSV files from a directory — read_csv_files_from_dir","text":"","code":"directory <- system.file(\"csvfolder\", package = \"bulkreadr\") read_csv_files_from_dir(dir_path = directory, .id = \"cut\") #> # A tibble: 260 × 10 #> cut carat color clarity depth table price x y z #> #> 1 /home/runner/work/_t… 2 I SI1 65.9 60 13764 7.8 7.73 5.12 #> 2 /home/runner/work/_t… 0.7 H SI1 65.2 58 2048 5.49 5.55 3.6 #> 3 /home/runner/work/_t… 1.51 E SI1 58.4 70 11102 7.55 7.39 4.36 #> 4 /home/runner/work/_t… 0.7 D SI2 65.5 57 1806 5.56 5.43 3.6 #> 5 /home/runner/work/_t… 0.35 F VVS1 54.6 59 1011 4.85 4.79 2.63 #> 6 /home/runner/work/_t… 0.5 E VS2 64.9 56 1397 5.01 4.95 3.23 #> 7 /home/runner/work/_t… 1 E SI1 65.1 61 4435 6.15 6.08 3.98 #> 8 /home/runner/work/_t… 1.09 J VS2 64.6 58 3443 6.48 6.41 4.16 #> 9 /home/runner/work/_t… 0.98 H SI2 67.9 60 2777 6.05 5.97 4.08 #> 10 /home/runner/work/_t… 0.7 F SI1 65.3 54 1974 5.58 5.54 3.63 #> # ℹ 250 more rows # Column types mismatch error -------------------------------------- # If the `read_csv_files_from_dir()` function complains about a data type mismatch, # then set the `col_types` argument to `\"c\"`. # This will make all the column types in the resulting dataframe be characters."},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_excel_files_from_dir.html","id":null,"dir":"Reference","previous_headings":"","what":"Read Excel Workbooks data from a directory — read_excel_files_from_dir","title":"Read Excel Workbooks data from a directory — read_excel_files_from_dir","text":"read_excel_files_from_dir() reads Excel workbooks \"~/data\" directory returns appended dataframe.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_excel_files_from_dir.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Read Excel Workbooks data from a directory — read_excel_files_from_dir","text":"","code":"read_excel_files_from_dir(dir_path, col_types = NULL, .id = NULL)"},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_excel_files_from_dir.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Read Excel Workbooks data from a directory — read_excel_files_from_dir","text":"dir_path Path directory containing xls/xlsx files. col_types Either NULL guess spreadsheet character vector containing one entry per column options: \"skip\", \"guess\", \"logical\", \"numeric\", \"date\", \"text\" \"list\". exactly one col_type specified, recycled. content cell skipped column never read column appear data frame output. list cell loads column list length 1 vectors, typed using type guessing logic col_types = NULL, cell--cell basis. .id name optional identifier column. Provide string create output column identifies input. column use names available, otherwise use positions.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_excel_files_from_dir.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Read Excel Workbooks data from a directory — read_excel_files_from_dir","text":"tibble. column type mismatch data frames row binding, error occur. R combine columns different types. example, combine column integers column characters.","code":""},{"path":[]},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_excel_files_from_dir.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Read Excel Workbooks data from a directory — read_excel_files_from_dir","text":"","code":"directory <- system.file(\"xlsxfolder\", package = \"bulkreadr\") read_excel_files_from_dir(dir_path = directory, .id = \"cut\") #> # A tibble: 260 × 10 #> cut carat color clarity depth table price x y z #> #> 1 /home/runner/work/_t… 2 I SI1 65.9 60 13764 7.8 7.73 5.12 #> 2 /home/runner/work/_t… 0.7 H SI1 65.2 58 2048 5.49 5.55 3.6 #> 3 /home/runner/work/_t… 1.51 E SI1 58.4 70 11102 7.55 7.39 4.36 #> 4 /home/runner/work/_t… 0.7 D SI2 65.5 57 1806 5.56 5.43 3.6 #> 5 /home/runner/work/_t… 0.35 F VVS1 54.6 59 1011 4.85 4.79 2.63 #> 6 /home/runner/work/_t… 0.5 E VS2 64.9 56 1397 5.01 4.95 3.23 #> 7 /home/runner/work/_t… 1 E SI1 65.1 61 4435 6.15 6.08 3.98 #> 8 /home/runner/work/_t… 1.09 J VS2 64.6 58 3443 6.48 6.41 4.16 #> 9 /home/runner/work/_t… 0.98 H SI2 67.9 60 2777 6.05 5.97 4.08 #> 10 /home/runner/work/_t… 0.7 F SI1 65.3 54 1974 5.58 5.54 3.63 #> # ℹ 250 more rows # Column types mismatch error -------------------------------------- # If the `read_excel_files_from_dir()` function complains about a data type mismatch, # then set the `col_types` argument to `\"text\"`. # This will make all the column types in the resulting dataframe be characters."},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_excel_workbook.html","id":null,"dir":"Reference","previous_headings":"","what":"Import data from multiple sheets of an Excel workbook — read_excel_workbook","title":"Import data from multiple sheets of an Excel workbook — read_excel_workbook","text":"read_excel_workbook() reads data sheets Excel workbook return appended dataframe.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_excel_workbook.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Import data from multiple sheets of an Excel workbook — read_excel_workbook","text":"","code":"read_excel_workbook(path, col_types = NULL, .id = NULL)"},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_excel_workbook.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Import data from multiple sheets of an Excel workbook — read_excel_workbook","text":"path Path xls/xlsx file. col_types Either NULL guess spreadsheet character vector containing one entry per column options: \"skip\", \"guess\", \"logical\", \"numeric\", \"date\", \"text\" \"list\". exactly one col_type specified, recycled. content cell skipped column never read column appear data frame output. list cell loads column list length 1 vectors, typed using type guessing logic col_types = NULL, cell--cell basis. .id name optional identifier column. Provide string create output column identifies input. column use names available, otherwise use positions.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_excel_workbook.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Import data from multiple sheets of an Excel workbook — read_excel_workbook","text":"tibble. column type mismatch data frames row binding, error occur. R combine columns different types. example, combine column integers column characters.","code":""},{"path":[]},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_excel_workbook.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Import data from multiple sheets of an Excel workbook — read_excel_workbook","text":"","code":"path <- system.file(\"extdata\", \"Diamonds.xlsx\", package = \"bulkreadr\", mustWork = TRUE) read_excel_workbook(path = path, .id = \"Year\") #> # A tibble: 260 × 10 #> Year carat color clarity depth table price x y z #> #> 1 Fair 2 I SI1 65.9 60 13764 7.8 7.73 5.12 #> 2 Fair 0.7 H SI1 65.2 58 2048 5.49 5.55 3.6 #> 3 Fair 1.51 E SI1 58.4 70 11102 7.55 7.39 4.36 #> 4 Fair 0.7 D SI2 65.5 57 1806 5.56 5.43 3.6 #> 5 Fair 0.35 F VVS1 54.6 59 1011 4.85 4.79 2.63 #> 6 Fair 0.5 E VS2 64.9 56 1397 5.01 4.95 3.23 #> 7 Fair 1 E SI1 65.1 61 4435 6.15 6.08 3.98 #> 8 Fair 1.09 J VS2 64.6 58 3443 6.48 6.41 4.16 #> 9 Fair 0.98 H SI2 67.9 60 2777 6.05 5.97 4.08 #> 10 Fair 0.7 F SI1 65.3 54 1974 5.58 5.54 3.63 #> # ℹ 250 more rows # Column types mismatch error -------------------------------------- # If the `read_excel_workbook()` function complains about a data type mismatch, # then set the `col_types` argument to `\"text\"`. # This will make all the column types in the resulting DataFrame be characters."},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_gsheets.html","id":null,"dir":"Reference","previous_headings":"","what":"Import data from multiple sheets in Google Sheets — read_gsheets","title":"Import data from multiple sheets in Google Sheets — read_gsheets","text":"read_gsheets() function imports data multiple sheets Google Sheets spreadsheet appends resulting dataframes sheet together create single dataframe. function powerful tool data analysis, allows easily combine data multiple sheets single dataset.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_gsheets.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Import data from multiple sheets in Google Sheets — read_gsheets","text":"","code":"read_gsheets(ss, col_types = NULL, .id = NULL)"},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_gsheets.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Import data from multiple sheets in Google Sheets — read_gsheets","text":"ss Something identifies Google Sheet: file id string drive_id URL can recover id one-row dribble, googledrive represents Drive files instance googlesheets4_spreadsheet, gs4_get() returns Processed as_sheets_id(). col_types Column types. Either NULL guess spreadsheet string readr-style shortcodes, one character code per column. exactly one col_type specified, recycled. See Column Specification . .id name optional identifier column. Provide string create output column identifies input. column use names available, otherwise use positions.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_gsheets.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Import data from multiple sheets in Google Sheets — read_gsheets","text":"tibble. column type mismatch data frames row binding, error occur. R combine columns different types. example, combine column integers column characters.","code":""},{"path":[]},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_gsheets.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Import data from multiple sheets in Google Sheets — read_gsheets","text":"","code":"if (FALSE) { # googlesheets4::gs4_has_token() sheet_id <- \"1izO0mHu3L9AMySQUXGDn9GPs1n-VwGFSEoAKGhqVQh0\" read_gsheets(ss = sheet_id, .id = \"sheet.name\") # Column types mismatch error -------------------------------------- # If the `read_gsheets()` function complains about a data type mismatch, # then set the `col_types` argument to `\"c\"`. # This will make all the column types in the resulting dataframe be characters. # For example, } if (FALSE) { # googlesheets4::gs4_has_token() sheet_id <- \"1rrjKAV05POre9lDVtHtZePTa8VROf1onVO47cHnhrTU\" try(read_gsheets(ss = sheet_id)) # error, column types mismatch read_gsheets(ss = sheet_id, col_types = \"c\") }"},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_spss_data.html","id":null,"dir":"Reference","previous_headings":"","what":"Read SPSS data file — read_spss_data","title":"Read SPSS data file — read_spss_data","text":"read_spss_data() designed seamlessly import data SPSS data (.sav .zsav) files. converts labelled variables factors, crucial step enhances ease data manipulation analysis within R programming environment.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_spss_data.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Read SPSS data file — read_spss_data","text":"","code":"read_spss_data(file, label = FALSE)"},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_spss_data.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Read SPSS data file — read_spss_data","text":"file path SPSS data file. label Logical indicating whether use variable labels column names (default FALSE).","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_spss_data.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Read SPSS data file — read_spss_data","text":"tibble containing data SPSS file.","code":""},{"path":[]},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_spss_data.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Read SPSS data file — read_spss_data","text":"","code":"# Read an SPSS data file without converting variable labels as column names file_path <- system.file(\"extdata\", \"Wages.sav\", package = \"bulkreadr\") data <- read_spss_data(file = file_path) data #> # A tibble: 400 × 9 #> id educ south sex exper wage occup marr ed #> #> 1 3 12 does not live in South Male 17 7.5 Other Married High … #> 2 4 13 does not live in South Male 9 13.1 Other Not married Some … #> 3 5 10 lives in South Male 27 4.45 Other Not married Less … #> 4 12 9 lives in South Male 30 6.25 Other Not married Less … #> 5 13 9 lives in South Male 29 20.0 Other Married Less … #> 6 14 12 does not live in South Male 37 7.3 Other Married High … #> 7 17 11 does not live in South Male 16 3.65 Other Not married Less … #> 8 20 12 does not live in South Male 9 3.75 Other Not married High … #> 9 21 11 lives in South Male 14 4.5 Other Married Less … #> 10 23 6 lives in South Male 45 5.75 Other Married Less … #> # ℹ 390 more rows # Read an SPSS data file and convert variable labels as column names data <- read_spss_data(file = file_path, label = TRUE) data #> # A tibble: 400 × 9 #> `Worker ID` `Number of years of education` `Live in south` Gender #> #> 1 3 12 does not live in South Male #> 2 4 13 does not live in South Male #> 3 5 10 lives in South Male #> 4 12 9 lives in South Male #> 5 13 9 lives in South Male #> 6 14 12 does not live in South Male #> 7 17 11 does not live in South Male #> 8 20 12 does not live in South Male #> 9 21 11 lives in South Male #> 10 23 6 lives in South Male #> # ℹ 390 more rows #> # ℹ 5 more variables: `Number of years of work experience` , #> # `Wage (dollars per hour)` , Occupation , `Marital status` , #> # `Highest education level` "},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_stata_data.html","id":null,"dir":"Reference","previous_headings":"","what":"Read Stata data file — read_stata_data","title":"Read Stata data file — read_stata_data","text":"Read Stata data file","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_stata_data.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Read Stata data file — read_stata_data","text":"","code":"read_stata_data(file, label = FALSE)"},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_stata_data.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Read Stata data file — read_stata_data","text":"file path Stata data file. label Logical indicating whether use variable labels column names (default FALSE).","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_stata_data.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Read Stata data file — read_stata_data","text":"data frame containing Stata data, labeled variables converted factors.","code":""},{"path":[]},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_stata_data.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Read Stata data file — read_stata_data","text":"","code":"# Read Stata data file without converting variable labels as column names file_path <- system.file(\"extdata\", \"Wages.dta\", package = \"bulkreadr\") data <- read_stata_data(file = file_path) data #> # A tibble: 400 × 9 #> id educ south sex exper wage occup marr ed #> #> 1 3 12 does not live in South Male 17 7.5 Other Married High … #> 2 4 13 does not live in South Male 9 13.1 Other Not married Some … #> 3 5 10 lives in South Male 27 4.45 Other Not married Less … #> 4 12 9 lives in South Male 30 6.25 Other Not married Less … #> 5 13 9 lives in South Male 29 20.0 Other Married Less … #> 6 14 12 does not live in South Male 37 7.3 Other Married High … #> 7 17 11 does not live in South Male 16 3.65 Other Not married Less … #> 8 20 12 does not live in South Male 9 3.75 Other Not married High … #> 9 21 11 lives in South Male 14 4.5 Other Married Less … #> 10 23 6 lives in South Male 45 5.75 Other Married Less … #> # ℹ 390 more rows # Read Stata data file and convert variable labels as column names data <- read_stata_data(file = file_path, label = TRUE) data #> # A tibble: 400 × 9 #> `Worker ID` `Number of years of education` `Live in south` Gender #> #> 1 3 12 does not live in South Male #> 2 4 13 does not live in South Male #> 3 5 10 lives in South Male #> 4 12 9 lives in South Male #> 5 13 9 lives in South Male #> 6 14 12 does not live in South Male #> 7 17 11 does not live in South Male #> 8 20 12 does not live in South Male #> 9 21 11 lives in South Male #> 10 23 6 lives in South Male #> # ℹ 390 more rows #> # ℹ 5 more variables: `Number of years of work experience` , #> # `Wage (dollars per hour)` , Occupation , `Marital status` , #> # `Highest education level` "},{"path":"https://gbganalyst.github.io/bulkreadr/news/index.html","id":"bulkreadr-110-2023-11-13","dir":"Changelog","previous_headings":"","what":"bulkreadr 1.1.0 (2023-11-13)","title":"bulkreadr 1.1.0 (2023-11-13)","text":"CRAN release: 2023-11-16 update includes following new features: generate_dictionary(): function designed automatically create comprehensive data dictionary labelled datasets. generated dictionary provides detailed insights variable, aiding better data understanding management. look_for(): enhances capability efficiently search within labelled datasets. allows users quickly find variable names descriptions searching specific keywords. feature streamlines data exploration analysis, particularly large datasets extensive variables. enhancements aim improve user experience data management exploration within bulkreadr. hope new features assist users effectively navigating understanding labelled datasets.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/news/index.html","id":"bulkreadr-100-2023-09-20","dir":"Changelog","previous_headings":"","what":"bulkreadr 1.0.0 (2023-09-20)","title":"bulkreadr 1.0.0 (2023-09-20)","text":"CRAN release: 2023-09-26 update includes following new features improvements: Developed read_stata_data() import Stata data file (.dta) R data frame, converting labeled variables factors. Reduced dependency packages optimize efficiency.","code":""}] +[{"path":"https://gbganalyst.github.io/bulkreadr/LICENSE.html","id":null,"dir":"","previous_headings":"","what":"MIT License","title":"MIT License","text":"Copyright (c) 2023 bulkreadr authors Permission hereby granted, free charge, person obtaining copy software associated documentation files (“Software”), deal Software without restriction, including without limitation rights use, copy, modify, merge, publish, distribute, sublicense, /sell copies Software, permit persons Software furnished , subject following conditions: copyright notice permission notice shall included copies substantial portions Software. SOFTWARE PROVIDED “”, WITHOUT WARRANTY KIND, EXPRESS IMPLIED, INCLUDING LIMITED WARRANTIES MERCHANTABILITY, FITNESS PARTICULAR PURPOSE NONINFRINGEMENT. EVENT SHALL AUTHORS COPYRIGHT HOLDERS LIABLE CLAIM, DAMAGES LIABILITY, WHETHER ACTION CONTRACT, TORT OTHERWISE, ARISING , CONNECTION SOFTWARE USE DEALINGS SOFTWARE.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/articles/bulkreadr.html","id":"about-the-package","dir":"Articles","previous_headings":"","what":"About the package","title":"Introduction to bulkreadr","text":"bulkreadr R package designed simplify streamline process reading processing large volumes data. collection functions tailored bulk data operations, package allows users efficiently read multiple sheets Microsoft Excel/Google Sheets workbooks multiple CSV files directory. returns data organized data frames, making convenient analysis manipulation. Whether dealing extensive data sets batch processing tasks, “bulkreadr” empowers users effortlessly handle data bulk, saving time effort data preparation workflows.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/articles/bulkreadr.html","id":"installation","dir":"Articles","previous_headings":"","what":"Installation","title":"Introduction to bulkreadr","text":"can install bulkreadr package CRAN : development version GitHub ","code":"install.packages(\"bulkreadr\") if(!require(\"devtools\")){ install.packages(\"devtools\") } devtools::install_github(\"gbganalyst/bulkreadr\")"},{"path":"https://gbganalyst.github.io/bulkreadr/articles/bulkreadr.html","id":"how-to-load-the-package","dir":"Articles","previous_headings":"","what":"How to load the package","title":"Introduction to bulkreadr","text":"Now installed bulkreadr package, can simply load using:","code":"library(bulkreadr)"},{"path":"https://gbganalyst.github.io/bulkreadr/articles/bulkreadr.html","id":"functions-in-bulkreadr-package","dir":"Articles","previous_headings":"","what":"Functions in bulkreadr package","title":"Introduction to bulkreadr","text":"section provides concise overview different functions available bulkreadr package importing bulk data R.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/articles/bulkreadr.html","id":"note","dir":"Articles","previous_headings":"","what":"Note","title":"Introduction to bulkreadr","text":"majority functions within package, utilize data stored system file bulkreadr, can accessed using system.file() function. wish utilize data stored local directory, please ensure set appropriate file path prior using functions provided bulkreadr package.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/articles/bulkreadr.html","id":"read_excel_workbook","dir":"Articles","previous_headings":"","what":"read_excel_workbook()","title":"Introduction to bulkreadr","text":"read_excel_workbook() reads data sheets Excel workbook return appended dataframe.","code":"# path to the xls/xlsx file. path <- system.file(\"extdata\", \"Diamonds.xlsx\", package = \"bulkreadr\") # read the sheets read_excel_workbook(path = path) #> # A tibble: 260 × 9 #> carat color clarity depth table price x y z #> #> 1 2 I SI1 65.9 60 13764 7.8 7.73 5.12 #> 2 0.7 H SI1 65.2 58 2048 5.49 5.55 3.6 #> 3 1.51 E SI1 58.4 70 11102 7.55 7.39 4.36 #> 4 0.7 D SI2 65.5 57 1806 5.56 5.43 3.6 #> 5 0.35 F VVS1 54.6 59 1011 4.85 4.79 2.63 #> # ℹ 255 more rows"},{"path":"https://gbganalyst.github.io/bulkreadr/articles/bulkreadr.html","id":"read_excel_files_from_dir","dir":"Articles","previous_headings":"","what":"read_excel_files_from_dir()","title":"Introduction to bulkreadr","text":"read_excel_files_from_dir() reads Excel workbooks \"~/data\" directory returns appended dataframe.","code":"# path to the directory containing the xls/xlsx files. directory <- system.file(\"xlsxfolder\", package = \"bulkreadr\") # import the workbooks read_excel_files_from_dir(dir_path = directory) #> # A tibble: 260 × 10 #> cut carat color clarity depth table price x y z #> #> 1 Fair 2 I SI1 65.9 60 13764 7.8 7.73 5.12 #> 2 Fair 0.7 H SI1 65.2 58 2048 5.49 5.55 3.6 #> 3 Fair 1.51 E SI1 58.4 70 11102 7.55 7.39 4.36 #> 4 Fair 0.7 D SI2 65.5 57 1806 5.56 5.43 3.6 #> 5 Fair 0.35 F VVS1 54.6 59 1011 4.85 4.79 2.63 #> # ℹ 255 more rows"},{"path":"https://gbganalyst.github.io/bulkreadr/articles/bulkreadr.html","id":"read_csv_files_from_dir","dir":"Articles","previous_headings":"","what":"read_csv_files_from_dir()","title":"Introduction to bulkreadr","text":"read_csv_files_from_dir() reads csv files \"~/data\" directory returns appended dataframe. resulting dataframe order CSV files directory.","code":"# path to the directory containing the CSV files. directory <- system.file(\"csvfolder\", package = \"bulkreadr\") # import the csv files read_csv_files_from_dir(dir_path = directory) #> # A tibble: 260 × 10 #> cut carat color clarity depth table price x y z #> #> 1 Fair 2 I SI1 65.9 60 13764 7.8 7.73 5.12 #> 2 Fair 0.7 H SI1 65.2 58 2048 5.49 5.55 3.6 #> 3 Fair 1.51 E SI1 58.4 70 11102 7.55 7.39 4.36 #> 4 Fair 0.7 D SI2 65.5 57 1806 5.56 5.43 3.6 #> 5 Fair 0.35 F VVS1 54.6 59 1011 4.85 4.79 2.63 #> # ℹ 255 more rows"},{"path":"https://gbganalyst.github.io/bulkreadr/articles/bulkreadr.html","id":"read_gsheets","dir":"Articles","previous_headings":"","what":"read_gsheets()","title":"Introduction to bulkreadr","text":"read_gsheets() function imports data multiple sheets Google Sheets spreadsheet appends resulting dataframes sheet together create single dataframe. function powerful tool data analysis, allows easily combine data multiple sheets single dataset.","code":"# Google Sheet ID or the link to the sheet sheet_id <- \"1izO0mHu3L9AMySQUXGDn9GPs1n-VwGFSEoAKGhqVQh0\" # read all the sheets read_gsheets(ss = sheet_id) #> # A tibble: 260 × 9 #> carat color clarity depth table price x y z #> #> 1 2 I SI1 65.9 60 13764 7.8 7.73 5.12 #> 2 0.7 H SI1 65.2 58 2048 5.49 5.55 3.6 #> 3 1.51 E SI1 58.4 70 11102 7.55 7.39 4.36 #> 4 0.7 D SI2 65.5 57 1806 5.56 5.43 3.6 #> 5 0.35 F VVS1 54.6 59 1011 4.85 4.79 2.63 #> # ℹ 255 more rows"},{"path":"https://gbganalyst.github.io/bulkreadr/articles/labelled-data.html","id":"what-is-labelled-data-in-r","dir":"Articles","previous_headings":"","what":"What is labelled data in R?","title":"Introduction to labelled data","text":"Labelled data SPSS Stata refers datasets variable (column) values assigned meaningful labels. labels provide context, descriptions categories, making data easier understand analyze. instance, variable representing gender might numerical codes (1, 2) labels (“Male”, “Female”). feature enhances data analysis allowing researchers work descriptive labels instead deciphering codes numeric values, facilitating clearer interpretation communication statistical results. R ecosystem, packages like foreign haven, facilitates importation labelled data software like SPSS Stata, ensuring smooth transition R. bulkreadr package extends functionality leveraging haven streamline process. automatically converts labelled data R’s factor data type, eliminating need manual recoding. enhancement significantly improves efficiency data analysis workflow within R environment.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/articles/labelled-data.html","id":"note","dir":"Articles","previous_headings":"What is labelled data in R?","what":"Note","title":"Introduction to labelled data","text":"majority functions within package, utilize data stored system file bulkreadr, can accessed using system.file() function. wish utilize data stored local directory, please ensure set appropriate file path prior using functions provided bulkreadr package.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/articles/labelled-data.html","id":"read_spss_data","dir":"Articles","previous_headings":"What is labelled data in R?","what":"read_spss_data()","title":"Introduction to labelled data","text":"read_spss_data() designed seamlessly import data SPSS data (.sav .zsav) files. converts labelled variables factors, crucial step enhances ease data manipulation analysis within R programming environment. Read SPSS data file without converting variable labels column names Read SPSS data file convert variable labels column names","code":"library(bulkreadr) file_path <- system.file(\"extdata\", \"Wages.sav\", package = \"bulkreadr\") data <- read_spss_data(file = file_path) data #> # A tibble: 400 × 9 #> id educ south sex exper wage occup marr ed #> #> 1 3 12 does not live in South Male 17 7.5 Other Married High s… #> 2 4 13 does not live in South Male 9 13.1 Other Not married Some c… #> 3 5 10 lives in South Male 27 4.45 Other Not married Less t… #> 4 12 9 lives in South Male 30 6.25 Other Not married Less t… #> 5 13 9 lives in South Male 29 20.0 Other Married Less t… #> # ℹ 395 more rows data <- read_spss_data(file = file_path, label = TRUE) data #> # A tibble: 400 × 9 #> `Worker ID` `Number of years of education` `Live in south` Gender #> #> 1 3 12 does not live in South Male #> 2 4 13 does not live in South Male #> 3 5 10 lives in South Male #> 4 12 9 lives in South Male #> 5 13 9 lives in South Male #> # ℹ 395 more rows #> # ℹ 5 more variables: `Number of years of work experience` , #> # `Wage (dollars per hour)` , Occupation , `Marital status` , #> # `Highest education level` "},{"path":"https://gbganalyst.github.io/bulkreadr/articles/labelled-data.html","id":"read_stata_data","dir":"Articles","previous_headings":"What is labelled data in R?","what":"read_stata_data()","title":"Introduction to labelled data","text":"read_stata_data() reads Stata data file (.dta) R data frame, converting labeled variables factors. Read Stata data file without converting variable labels column names Read Stata data file convert variable labels column names","code":"file_path <- system.file(\"extdata\", \"Wages.dta\", package = \"bulkreadr\") data <- read_stata_data(file = file_path) data #> # A tibble: 400 × 9 #> id educ south sex exper wage occup marr ed #> #> 1 3 12 does not live in South Male 17 7.5 Other Married High s… #> 2 4 13 does not live in South Male 9 13.1 Other Not married Some c… #> 3 5 10 lives in South Male 27 4.45 Other Not married Less t… #> 4 12 9 lives in South Male 30 6.25 Other Not married Less t… #> 5 13 9 lives in South Male 29 20.0 Other Married Less t… #> # ℹ 395 more rows data <- read_stata_data(file = file_path, label = TRUE) data #> # A tibble: 400 × 9 #> `Worker ID` `Number of years of education` `Live in south` Gender #> #> 1 3 12 does not live in South Male #> 2 4 13 does not live in South Male #> 3 5 10 lives in South Male #> 4 12 9 lives in South Male #> 5 13 9 lives in South Male #> # ℹ 395 more rows #> # ℹ 5 more variables: `Number of years of work experience` , #> # `Wage (dollars per hour)` , Occupation , `Marital status` , #> # `Highest education level` "},{"path":"https://gbganalyst.github.io/bulkreadr/articles/labelled-data.html","id":"generate_dictionary","dir":"Articles","previous_headings":"What is labelled data in R?","what":"generate_dictionary()","title":"Introduction to labelled data","text":"generate_dictionary() creates data dictionary specified data frame. function particularly useful understanding documenting structure dataset, similar data dictionaries Stata SPSS.","code":"# Creating a data dictionary from an SPSS file file_path <- system.file(\"extdata\", \"Wages.sav\", package = \"bulkreadr\") wage_data <- read_spss_data(file = file_path) generate_dictionary(wage_data) #> # A tibble: 9 × 6 #> position variable description `column type` missing levels #> #> 1 1 id Worker ID dbl 0 #> 2 2 educ Number of years of education dbl 0 #> 3 3 south Live in south fct 0 #> 4 4 sex Gender fct 0 #> 5 5 exper Number of years of work experi… dbl 0 #> # ℹ 4 more rows"},{"path":"https://gbganalyst.github.io/bulkreadr/articles/labelled-data.html","id":"look_for","dir":"Articles","previous_headings":"What is labelled data in R?","what":"look_for()","title":"Introduction to labelled data","text":"look_for() function designed emulate functionality Stata lookfor command R. provides powerful tool searching large datasets, specifically targeting variable names, variable label descriptions, factor levels, value labels. function handy users working extensive complex datasets, enabling quickly efficiently locate variables interest.","code":"# Look for a single keyword. look_for(wage_data, \"south\") #> pos variable label col_type missing values #> 3 south Live in south fct 0 does not live in South #> lives in South look_for(wage_data, \"^s\") #> pos variable label col_type missing values #> 3 south Live in south fct 0 does not live in South #> lives in South #> 4 sex Gender fct 0 Male #> Female"},{"path":"https://gbganalyst.github.io/bulkreadr/articles/other-functions.html","id":"pull_out","dir":"Articles","previous_headings":"","what":"pull_out()","title":"Other functions in bulkreadr","text":"pull_out() similar [. acts vectors, matrices, arrays lists extract replace parts. pleasant use magrittr (⁠%>%⁠) base(|>) operators.","code":"library(bulkreadr) library(dplyr) top_10_richest_nig <- c(\"Aliko Dangote\", \"Mike Adenuga\", \"Femi Otedola\", \"Arthur Eze\", \"Abdulsamad Rabiu\", \"Cletus Ibeto\", \"Orji Uzor Kalu\", \"ABC Orjiakor\", \"Jimoh Ibrahim\", \"Tony Elumelu\") top_10_richest_nig %>% pull_out(c(1, 5, 2)) #> [1] \"Aliko Dangote\" \"Abdulsamad Rabiu\" \"Mike Adenuga\" top_10_richest_nig %>% pull_out(-c(1, 5, 2)) #> [1] \"Femi Otedola\" \"Arthur Eze\" \"Cletus Ibeto\" \"Orji Uzor Kalu\" #> [5] \"ABC Orjiakor\" \"Jimoh Ibrahim\" \"Tony Elumelu\""},{"path":"https://gbganalyst.github.io/bulkreadr/articles/other-functions.html","id":"convert_to_date","dir":"Articles","previous_headings":"","what":"convert_to_date()","title":"Other functions in bulkreadr","text":"convert_to_date() parses input vector POSIXct date-time object. also powerful convert excel date number like 42370 date value like 2016-01-01.","code":"## ** heterogeneous dates ** dates <- c( 44869, \"22.09.2022\", NA, \"02/27/92\", \"01-19-2022\", \"13-01- 2022\", \"2023\", \"2023-2\", 41750.2, 41751.99, \"11 07 2023\", \"2023-4\" ) # Convert to POSIXct or Date object convert_to_date(dates) #> [1] \"2022-11-04\" \"2022-09-22\" NA \"1992-02-27\" \"2022-01-19\" #> [6] \"2022-01-13\" \"2023-01-01\" \"2023-02-01\" \"2014-04-21\" \"2014-04-22\" #> [11] \"2023-07-11\" \"2023-04-01\" # It can also convert date time object to date object convert_to_date(lubridate::now()) #> [1] \"2024-02-26\""},{"path":"https://gbganalyst.github.io/bulkreadr/articles/other-functions.html","id":"inspect_na","dir":"Articles","previous_headings":"","what":"inspect_na()","title":"Other functions in bulkreadr","text":"inspect_na() summarizes rate missingness column data frame. grouped data frame, rate missingness summarized separately group. Grouped dataframe summary","code":"# dataframe summary inspect_na(airquality) #> # A tibble: 6 × 3 #> col_name cnt pcnt #> #> 1 Ozone 37 24.2 #> 2 Solar.R 7 4.58 #> 3 Wind 0 0 #> 4 Temp 0 0 #> 5 Month 0 0 #> # ℹ 1 more row airquality %>% group_by(Month) %>% inspect_na() #> # A tibble: 25 × 4 #> # Groups: Month [5] #> Month col_name cnt pcnt #> #> 1 5 Ozone 5 16.1 #> 2 5 Solar.R 4 12.9 #> 3 5 Wind 0 0 #> 4 5 Temp 0 0 #> 5 5 Day 0 0 #> # ℹ 20 more rows"},{"path":"https://gbganalyst.github.io/bulkreadr/articles/other-functions.html","id":"fill_missing_values","dir":"Articles","previous_headings":"","what":"fill_missing_values()","title":"Other functions in bulkreadr","text":"fill_missing_values() efficient function addresses missing values dataframe. uses imputation function, meaning replaces missing data numeric variables either mean median, non-numeric variables mode. function takes column-based imputation approach, ensuring replacement values derived respective columns, resulting accurate consistent data. method enhances integrity dataset promotes sound decision-making analysis data processing workflows. Using mean fill missing values numeric variables Using median fill missing values numeric variables","code":"df <- tibble::tibble( Sepal_Length = c(5.2, 5, 5.7, NA, 6.2, 6.7, 5.5), Sepal.Width = c(4.1, 3.6, 3, 3, 2.9, 2.5, 2.4), Petal_Length = c(1.5, 1.4, 4.2, 1.4, NA, 5.8, 3.7), Petal_Width = c(NA, 0.2, 1.2, 0.2, 1.3, 1.8, NA), Species = c(\"setosa\", NA, \"versicolor\", \"setosa\", NA, \"virginica\", \"setosa\" ) ) df #> # A tibble: 7 × 5 #> Sepal_Length Sepal.Width Petal_Length Petal_Width Species #> #> 1 5.2 4.1 1.5 NA setosa #> 2 5 3.6 1.4 0.2 NA #> 3 5.7 3 4.2 1.2 versicolor #> 4 NA 3 1.4 0.2 setosa #> 5 6.2 2.9 NA 1.3 NA #> # ℹ 2 more rows result_df_mean <- fill_missing_values(df, use_mean = TRUE) result_df_mean #> # A tibble: 7 × 5 #> Sepal_Length Sepal.Width Petal_Length Petal_Width Species #> #> 1 5.2 4.1 1.5 0.94 setosa #> 2 5 3.6 1.4 0.2 setosa #> 3 5.7 3 4.2 1.2 versicolor #> 4 5.72 3 1.4 0.2 setosa #> 5 6.2 2.9 3 1.3 setosa #> # ℹ 2 more rows result_df_median <- fill_missing_values(df, use_mean = FALSE) result_df_median #> # A tibble: 7 × 5 #> Sepal_Length Sepal.Width Petal_Length Petal_Width Species #> #> 1 5.2 4.1 1.5 1.2 setosa #> 2 5 3.6 1.4 0.2 setosa #> 3 5.7 3 4.2 1.2 versicolor #> 4 5.6 3 1.4 0.2 setosa #> 5 6.2 2.9 2.6 1.3 setosa #> # ℹ 2 more rows"},{"path":"https://gbganalyst.github.io/bulkreadr/articles/other-functions.html","id":"impute-missing-values-nas-in-a-grouped-data-frame","dir":"Articles","previous_headings":"fill_missing_values()","what":"Impute missing values (NAs) in a grouped data frame","title":"Other functions in bulkreadr","text":"can use fill_missing_values() grouped data frame using grouping map functions. example :","code":"sample_iris <- tibble::tibble( Sepal_Length = c(5.2, 5, 5.7, NA, 6.2, 6.7, 5.5), Petal_Length = c(1.5, 1.4, 4.2, 1.4, NA, 5.8, 3.7), Petal_Width = c(0.3, 0.2, 1.2, 0.2, 1.3, 1.8, NA), Species = c(\"setosa\", \"setosa\", \"versicolor\", \"setosa\", \"virginica\", \"virginica\", \"setosa\") ) sample_iris #> # A tibble: 7 × 4 #> Sepal_Length Petal_Length Petal_Width Species #> #> 1 5.2 1.5 0.3 setosa #> 2 5 1.4 0.2 setosa #> 3 5.7 4.2 1.2 versicolor #> 4 NA 1.4 0.2 setosa #> 5 6.2 NA 1.3 virginica #> # ℹ 2 more rows sample_iris %>% group_by(Species) %>% group_split() %>% map_df(fill_missing_values) #> # A tibble: 7 × 4 #> Sepal_Length Petal_Length Petal_Width Species #> #> 1 5.2 1.5 0.3 setosa #> 2 5 1.4 0.2 setosa #> 3 5.23 1.4 0.2 setosa #> 4 5.5 3.7 0.233 setosa #> 5 5.7 4.2 1.2 versicolor #> # ℹ 2 more rows"},{"path":"https://gbganalyst.github.io/bulkreadr/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Ezekiel Ogundepo. Author, maintainer. Ernest Fokoue. Contributor. Golibe Ezeechesi. Contributor. Fatimo Adebanjo. Contributor. Isaac Ajao. Contributor.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Ogundepo E (2024). bulkreadr: Ultimate Tool Reading Data Bulk. R package version 1.1.0, https://gbganalyst.github.io/bulkreadr/, https://github.com/gbganalyst/bulkreadr.","code":"@Manual{, title = {bulkreadr: The Ultimate Tool for Reading Data in Bulk}, author = {Ezekiel Ogundepo}, year = {2024}, note = {R package version 1.1.0, https://gbganalyst.github.io/bulkreadr/}, url = {https://github.com/gbganalyst/bulkreadr}, }"},{"path":"https://gbganalyst.github.io/bulkreadr/index.html","id":"bulkreadr-","dir":"","previous_headings":"","what":"The Ultimate Tool for Reading Data in Bulk","title":"The Ultimate Tool for Reading Data in Bulk","text":"Ultimate Tool Reading Data Bulk","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/index.html","id":"about-the-package","dir":"","previous_headings":"","what":"About the package","title":"The Ultimate Tool for Reading Data in Bulk","text":"bulkreadr R package designed simplify streamline process reading processing large volumes data. collection functions tailored bulk data operations, package allows users efficiently read multiple sheets Microsoft Excel/Google Sheets workbooks multiple CSV files directory. returns data organized data frames, making convenient analysis manipulation. Whether dealing extensive data sets batch processing tasks, “bulkreadr” empowers users effortlessly handle data bulk, saving time effort data preparation workflows. Additionally, package seamlessly works labelled data SPSS Stata. quick video tutorial, gave talk International Association Statistical Computing webinar. recorded session available webinar resources .","code":""},{"path":[]},{"path":"https://gbganalyst.github.io/bulkreadr/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"The Ultimate Tool for Reading Data in Bulk","text":"can install bulkreadr package CRAN : development version GitHub ","code":"install.packages(\"bulkreadr\") if(!require(\"devtools\")){ install.packages(\"devtools\") } devtools::install_github(\"gbganalyst/bulkreadr\")"},{"path":"https://gbganalyst.github.io/bulkreadr/index.html","id":"how-to-load-the-package","dir":"","previous_headings":"","what":"How to load the package","title":"The Ultimate Tool for Reading Data in Bulk","text":"Now installed bulkreadr package, can simply load using:","code":"library(bulkreadr)"},{"path":"https://gbganalyst.github.io/bulkreadr/index.html","id":"context","dir":"","previous_headings":"","what":"Context","title":"The Ultimate Tool for Reading Data in Bulk","text":"bulkreadr designed integrate augment capabilities established packages readxl, readr, googlesheets4, offering enhanced functionality reading bulk data within R programming environment. readxl tidyverse package reading Excel files (xls xlsx) R data frame. readr tidyverse package reading delimited files (e.g., csv tsv) R data frame. googlesheets4 package interact Google Sheets Sheets API v4 https://developers.google.com/sheets/api.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/aliases.html","id":null,"dir":"Reference","previous_headings":"","what":"Extract or replace parts of an object — pull_out","title":"Extract or replace parts of an object — pull_out","text":"pull_out() similar [. acts vectors, matrices, arrays lists extract replace parts. pleasant use magrittr (%>%) base (|>) operators.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/aliases.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Extract or replace parts of an object — pull_out","text":"pull_out() return object class input object.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/aliases.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Extract or replace parts of an object — pull_out","text":"","code":"good_choice <- letters %>% pull_out(c(5, 2, 1, 4)) good_choice #> [1] \"e\" \"b\" \"a\" \"d\" iris %>% pull_out(, 1:4) %>% head() #> Sepal.Length Sepal.Width Petal.Length Petal.Width #> 1 5.1 3.5 1.4 0.2 #> 2 4.9 3.0 1.4 0.2 #> 3 4.7 3.2 1.3 0.2 #> 4 4.6 3.1 1.5 0.2 #> 5 5.0 3.6 1.4 0.2 #> 6 5.4 3.9 1.7 0.4"},{"path":"https://gbganalyst.github.io/bulkreadr/reference/bulkreadr-package.html","id":null,"dir":"Reference","previous_headings":"","what":"bulkreadr: The Ultimate Tool for Reading Data in Bulk — bulkreadr-package","title":"bulkreadr: The Ultimate Tool for Reading Data in Bulk — bulkreadr-package","text":"Designed simplify streamline process reading processing large volumes data R, package offers collection functions tailored bulk data operations. enables users efficiently read multiple sheets Microsoft Excel Google Sheets workbooks, well various CSV files directory. data returned organized data frames, facilitating analysis manipulation. Ideal handling extensive data sets batch processing tasks, bulkreadr empowers users manage data bulk effortlessly, saving time effort data preparation workflows. Additionally, package seamlessly works labelled data SPSS Stata.","code":""},{"path":[]},{"path":"https://gbganalyst.github.io/bulkreadr/reference/bulkreadr-package.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"bulkreadr: The Ultimate Tool for Reading Data in Bulk — bulkreadr-package","text":"Maintainer: Ezekiel Ogundepo gbganalyst@gmail.com (ORCID) contributors: Ernest Fokoue epfeqa@rit.edu (ORCID) [contributor] Golibe Ezeechesi golibe.ezeechesi@gmail.com [contributor] Fatimo Adebanjo adebanjofatimo2000@gmail.com [contributor] Isaac Ajao isaacoluwaseyiajao@gmail.com [contributor]","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/convert_to_date.html","id":null,"dir":"Reference","previous_headings":"","what":"User friendly date parsing function — convert_to_date","title":"User friendly date parsing function — convert_to_date","text":"convert_to_date() parses input vector POSIXct date object. also powerful convert excel date number like 42370 date value like 2016-01-01.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/convert_to_date.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"User friendly date parsing function — convert_to_date","text":"","code":"convert_to_date(date_num_char, tz = \"UTC\")"},{"path":"https://gbganalyst.github.io/bulkreadr/reference/convert_to_date.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"User friendly date parsing function — convert_to_date","text":"date_num_char character numeric vector dates tz Time zone indicator. NULL (default), Date object returned. Otherwise POSIXct time zone attribute set tz.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/convert_to_date.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"User friendly date parsing function — convert_to_date","text":"vector class Date","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/convert_to_date.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"User friendly date parsing function — convert_to_date","text":"","code":"## ** heterogeneous dates ** dates <- c( 44869, \"22.09.2022\", NA, \"02/27/92\", \"01-19-2022\", \"13-01- 2022\", \"2023\", \"2023-2\", 41750.2, 41751.99, \"11 07 2023\", \"2023-4\" ) convert_to_date(dates) #> [1] \"2022-11-04\" \"2022-09-22\" NA \"1992-02-27\" \"2022-01-19\" #> [6] \"2022-01-13\" \"2023-01-01\" \"2023-02-01\" \"2014-04-21\" \"2014-04-22\" #> [11] \"2023-07-11\" \"2023-04-01\""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/fill_missing_values.html","id":null,"dir":"Reference","previous_headings":"","what":"Fill missing values in a dataframe — fill_missing_values","title":"Fill missing values in a dataframe — fill_missing_values","text":"fill_missing_values() efficient function addresses missing values dataframe. uses imputation function, also known column-based imputation, fill numeric variables mean median, non-numeric variables mode. approach ensures accurate consistent replacements derived individual columns, resulting complete reliable dataset improved analysis decision-making.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/fill_missing_values.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fill missing values in a dataframe — fill_missing_values","text":"","code":"fill_missing_values(df, use_mean = TRUE)"},{"path":"https://gbganalyst.github.io/bulkreadr/reference/fill_missing_values.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fill missing values in a dataframe — fill_missing_values","text":"df input dataframe processed. use_mean Logical. TRUE, missing values numeric columns replaced mean. FALSE, missing values numeric columns replaced median.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/fill_missing_values.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fill missing values in a dataframe — fill_missing_values","text":"dataframe missing values filled.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/fill_missing_values.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fill missing values in a dataframe — fill_missing_values","text":"","code":"library(dplyr) #> #> Attaching package: ‘dplyr’ #> The following objects are masked from ‘package:stats’: #> #> filter, lag #> The following objects are masked from ‘package:base’: #> #> intersect, setdiff, setequal, union # Assuming 'df' is the dataframe you want to process df <- tibble::tibble( Sepal_Length = c(5.2, 5, 5.7, NA, 6.2, 6.7, 5.5), Petal_Length = c(1.5, 1.4, 4.2, 1.4, NA, 5.8, 3.7), Petal_Width = c(NA, 0.2, 1.2, 0.2, 1.3, 1.8, NA), Species = c(\"setosa\", NA, \"versicolor\", \"setosa\", NA, \"virginica\", \"setosa\") ) # Using mean to fill missing values for numeric variables result_df_mean <- fill_missing_values(df, use_mean = TRUE) result_df_mean #> # A tibble: 7 × 4 #> Sepal_Length Petal_Length Petal_Width Species #> #> 1 5.2 1.5 0.94 setosa #> 2 5 1.4 0.2 setosa #> 3 5.7 4.2 1.2 versicolor #> 4 5.72 1.4 0.2 setosa #> 5 6.2 3 1.3 setosa #> 6 6.7 5.8 1.8 virginica #> 7 5.5 3.7 0.94 setosa # Using median to fill missing values for numeric variables result_df_median <- fill_missing_values(df, use_mean = FALSE) result_df_median #> # A tibble: 7 × 4 #> Sepal_Length Petal_Length Petal_Width Species #> #> 1 5.2 1.5 1.2 setosa #> 2 5 1.4 0.2 setosa #> 3 5.7 4.2 1.2 versicolor #> 4 5.6 1.4 0.2 setosa #> 5 6.2 2.6 1.3 setosa #> 6 6.7 5.8 1.8 virginica #> 7 5.5 3.7 1.2 setosa # Impute missing values (NAs) in a grouped data frame # You can do that by using the following: sample_iris <- tibble::tibble( Sepal_Length = c(5.2, 5, 5.7, NA, 6.2, 6.7, 5.5), Petal_Length = c(1.5, 1.4, 4.2, 1.4, NA, 5.8, 3.7), Petal_Width = c(0.3, 0.2, 1.2, 0.2, 1.3, 1.8, NA), Species = c(\"setosa\", \"setosa\", \"versicolor\", \"setosa\", \"virginica\", \"virginica\", \"setosa\") ) sample_iris %>% group_by(Species) %>% group_split() %>% map_df(fill_missing_values) #> # A tibble: 7 × 4 #> Sepal_Length Petal_Length Petal_Width Species #> #> 1 5.2 1.5 0.3 setosa #> 2 5 1.4 0.2 setosa #> 3 5.23 1.4 0.2 setosa #> 4 5.5 3.7 0.233 setosa #> 5 5.7 4.2 1.2 versicolor #> 6 6.2 5.8 1.3 virginica #> 7 6.7 5.8 1.8 virginica"},{"path":"https://gbganalyst.github.io/bulkreadr/reference/generate_dictionary.html","id":null,"dir":"Reference","previous_headings":"","what":"Create a data dictionary from labelled data — generate_dictionary","title":"Create a data dictionary from labelled data — generate_dictionary","text":"generate_dictionary() creates data dictionary specified data frame. function particularly useful understanding documenting structure dataset, similar data dictionaries Stata SPSS.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/generate_dictionary.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create a data dictionary from labelled data — generate_dictionary","text":"","code":"generate_dictionary(data)"},{"path":"https://gbganalyst.github.io/bulkreadr/reference/generate_dictionary.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create a data dictionary from labelled data — generate_dictionary","text":"data data frame survey object","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/generate_dictionary.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create a data dictionary from labelled data — generate_dictionary","text":"tibble representing data dictionary. row corresponds variable original data frame, providing detailed information variable's characteristics.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/generate_dictionary.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Create a data dictionary from labelled data — generate_dictionary","text":"function returns tibble (modern version R's data frame) following columns: position: integer vector indicating column position data frame. variable: character vector containing names variables (columns). description: character vector human-readable description variable. column type: character vector specifying data type (e.g., numeric, character) variable. missing: integer vector indicating count missing values variable. levels: list vector containing levels categorical variables, applicable.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/generate_dictionary.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Create a data dictionary from labelled data — generate_dictionary","text":"","code":"# Creating a data dictionary from an SPSS file file_path <- system.file(\"extdata\", \"Wages.sav\", package = \"bulkreadr\") wage_data <- read_spss_data(file = file_path) generate_dictionary(wage_data) #> # A tibble: 9 × 6 #> position variable description `column type` missing levels #> #> 1 1 id Worker ID dbl 0 #> 2 2 educ Number of years of education dbl 0 #> 3 3 south Live in south fct 0 #> 4 4 sex Gender fct 0 #> 5 5 exper Number of years of work experi… dbl 0 #> 6 6 wage Wage (dollars per hour) dbl 0 #> 7 7 occup Occupation fct 0 #> 8 8 marr Marital status fct 0 #> 9 9 ed Highest education level fct 0 "},{"path":"https://gbganalyst.github.io/bulkreadr/reference/inspect_na.html","id":null,"dir":"Reference","previous_headings":"","what":"Summarize missingness in data frame columns — inspect_na","title":"Summarize missingness in data frame columns — inspect_na","text":"inspect_na() summarizes rate missingness column data frame. grouped data frame, rate missingness summarized separately group.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/inspect_na.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Summarize missingness in data frame columns — inspect_na","text":"","code":"inspect_na(df)"},{"path":"https://gbganalyst.github.io/bulkreadr/reference/inspect_na.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Summarize missingness in data frame columns — inspect_na","text":"df data frame","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/inspect_na.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Summarize missingness in data frame columns — inspect_na","text":"tibble summarizing count percentage columnwise missingness data frame.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/inspect_na.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Summarize missingness in data frame columns — inspect_na","text":"tibble returned contains columns: col_name, character vector containing column names df1. cnt, integer vector containing number missing values column. pcnt, percentage records columns missing.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/inspect_na.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Summarize missingness in data frame columns — inspect_na","text":"","code":"library(dplyr) # dataframe summary inspect_na(airquality) #> # A tibble: 6 × 3 #> col_name cnt pcnt #> #> 1 Ozone 37 24.2 #> 2 Solar.R 7 4.58 #> 3 Wind 0 0 #> 4 Temp 0 0 #> 5 Month 0 0 #> 6 Day 0 0 # grouped dataframe summary airquality %>% group_by(Month) %>% inspect_na() #> # A tibble: 25 × 4 #> # Groups: Month [5] #> Month col_name cnt pcnt #> #> 1 5 Ozone 5 16.1 #> 2 5 Solar.R 4 12.9 #> 3 5 Wind 0 0 #> 4 5 Temp 0 0 #> 5 5 Day 0 0 #> 6 6 Ozone 21 70 #> 7 6 Solar.R 0 0 #> 8 6 Wind 0 0 #> 9 6 Temp 0 0 #> 10 6 Day 0 0 #> # ℹ 15 more rows"},{"path":"https://gbganalyst.github.io/bulkreadr/reference/look_for.html","id":null,"dir":"Reference","previous_headings":"","what":"Look for keywords variable names and descriptions in labelled data — look_for","title":"Look for keywords variable names and descriptions in labelled data — look_for","text":"look_for() function designed emulate functionality Stata lookfor command R. provides powerful tool searching large datasets, specifically targeting variable names, variable label descriptions, factor levels, value labels. function handy users working extensive complex datasets, enabling quickly efficiently locate variables interest.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/look_for.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Look for keywords variable names and descriptions in labelled data — look_for","text":"","code":"look_for( data, ..., labels = TRUE, values = TRUE, ignore.case = TRUE, details = c(\"basic\", \"none\", \"full\") )"},{"path":"https://gbganalyst.github.io/bulkreadr/reference/look_for.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Look for keywords variable names and descriptions in labelled data — look_for","text":"data data frame survey object ... optional list keywords, character string (several character strings), can formatted regular expression suitable base::grep() pattern, vector keywords; displays variables specified labels whether search variable labels (descriptions); TRUE default values whether search within values (factor levels value labels); TRUE default ignore.case whether make keywords case sensitive; TRUE default (case ignored matching) details add details variable (full details time consuming big data frames, FALSE equivalent \"none\" TRUE \"full\")","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/look_for.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Look for keywords variable names and descriptions in labelled data — look_for","text":"tibble data frame featuring variable position, name description (exists) original data frame.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/look_for.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Look for keywords variable names and descriptions in labelled data — look_for","text":"","code":"look_for(iris) #> pos variable label col_type missing values #> 1 Sepal.Length — dbl 0 #> 2 Sepal.Width — dbl 0 #> 3 Petal.Length — dbl 0 #> 4 Petal.Width — dbl 0 #> 5 Species — fct 0 setosa #> versicolor #> virginica # Look for a single keyword. look_for(iris, \"petal\") #> pos variable label col_type missing values #> 3 Petal.Length — dbl 0 #> 4 Petal.Width — dbl 0 look_for(iris, \"s\") #> pos variable label col_type missing values #> 1 Sepal.Length — dbl 0 #> 2 Sepal.Width — dbl 0 #> 5 Species — fct 0 setosa #> versicolor #> virginica"},{"path":"https://gbganalyst.github.io/bulkreadr/reference/pipe.html","id":null,"dir":"Reference","previous_headings":"","what":"Pipe operator — %>%","title":"Pipe operator — %>%","text":"See magrittr::%>% details.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/pipe.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Pipe operator — %>%","text":"","code":"lhs %>% rhs"},{"path":"https://gbganalyst.github.io/bulkreadr/reference/pipe.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Pipe operator — %>%","text":"return value","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_csv_files_from_dir.html","id":null,"dir":"Reference","previous_headings":"","what":"Reads all CSV files from a directory — read_csv_files_from_dir","title":"Reads all CSV files from a directory — read_csv_files_from_dir","text":"read_csv_files_from_dir reads csv files \"~/data\" directory returns appended dataframe. resulting dataframe order CSV files directory.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_csv_files_from_dir.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Reads all CSV files from a directory — read_csv_files_from_dir","text":"","code":"read_csv_files_from_dir(dir_path = \".\", col_types = NULL, .id = NULL)"},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_csv_files_from_dir.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Reads all CSV files from a directory — read_csv_files_from_dir","text":"dir_path Path directory containing CSV files. col_types One NULL, cols() specification, string. See vignette(\"readr\") details. NULL, column types inferred guess_max rows input, interspersed throughout file. convenient (fast), robust. guessed types wrong, need increase guess_max supply correct types . Column specifications created list() cols() must contain one column specification column. want read subset columns, use cols_only(). Alternatively, can use compact string representation character represents one column: c = character = integer n = number d = double l = logical f = factor D = date T = date time t = time ? = guess _ - = skip default, reading file without column specification print message showing readr guessed . remove message, set show_col_types = FALSE set `options(readr.show_col_types = FALSE). .id name column store file path. useful reading multiple input files data file paths, data collection date. NULL (default) extra column created.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_csv_files_from_dir.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Reads all CSV files from a directory — read_csv_files_from_dir","text":"tibble. column type mismatch data frames row binding, error occur. R combine columns different types. example, combine column integers column characters.","code":""},{"path":[]},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_csv_files_from_dir.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Reads all CSV files from a directory — read_csv_files_from_dir","text":"","code":"directory <- system.file(\"csvfolder\", package = \"bulkreadr\") read_csv_files_from_dir(dir_path = directory, .id = \"cut\") #> # A tibble: 260 × 10 #> cut carat color clarity depth table price x y z #> #> 1 /home/runner/work/_t… 2 I SI1 65.9 60 13764 7.8 7.73 5.12 #> 2 /home/runner/work/_t… 0.7 H SI1 65.2 58 2048 5.49 5.55 3.6 #> 3 /home/runner/work/_t… 1.51 E SI1 58.4 70 11102 7.55 7.39 4.36 #> 4 /home/runner/work/_t… 0.7 D SI2 65.5 57 1806 5.56 5.43 3.6 #> 5 /home/runner/work/_t… 0.35 F VVS1 54.6 59 1011 4.85 4.79 2.63 #> 6 /home/runner/work/_t… 0.5 E VS2 64.9 56 1397 5.01 4.95 3.23 #> 7 /home/runner/work/_t… 1 E SI1 65.1 61 4435 6.15 6.08 3.98 #> 8 /home/runner/work/_t… 1.09 J VS2 64.6 58 3443 6.48 6.41 4.16 #> 9 /home/runner/work/_t… 0.98 H SI2 67.9 60 2777 6.05 5.97 4.08 #> 10 /home/runner/work/_t… 0.7 F SI1 65.3 54 1974 5.58 5.54 3.63 #> # ℹ 250 more rows # Column types mismatch error -------------------------------------- # If the `read_csv_files_from_dir()` function complains about a data type mismatch, # then set the `col_types` argument to `\"c\"`. # This will make all the column types in the resulting dataframe be characters."},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_excel_files_from_dir.html","id":null,"dir":"Reference","previous_headings":"","what":"Read Excel Workbooks data from a directory — read_excel_files_from_dir","title":"Read Excel Workbooks data from a directory — read_excel_files_from_dir","text":"read_excel_files_from_dir() reads Excel workbooks \"~/data\" directory returns appended dataframe.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_excel_files_from_dir.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Read Excel Workbooks data from a directory — read_excel_files_from_dir","text":"","code":"read_excel_files_from_dir(dir_path, col_types = NULL, .id = NULL)"},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_excel_files_from_dir.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Read Excel Workbooks data from a directory — read_excel_files_from_dir","text":"dir_path Path directory containing xls/xlsx files. col_types Either NULL guess spreadsheet character vector containing one entry per column options: \"skip\", \"guess\", \"logical\", \"numeric\", \"date\", \"text\" \"list\". exactly one col_type specified, recycled. content cell skipped column never read column appear data frame output. list cell loads column list length 1 vectors, typed using type guessing logic col_types = NULL, cell--cell basis. .id name optional identifier column. Provide string create output column identifies input. column use names available, otherwise use positions.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_excel_files_from_dir.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Read Excel Workbooks data from a directory — read_excel_files_from_dir","text":"tibble. column type mismatch data frames row binding, error occur. R combine columns different types. example, combine column integers column characters.","code":""},{"path":[]},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_excel_files_from_dir.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Read Excel Workbooks data from a directory — read_excel_files_from_dir","text":"","code":"directory <- system.file(\"xlsxfolder\", package = \"bulkreadr\") read_excel_files_from_dir(dir_path = directory, .id = \"cut\") #> # A tibble: 260 × 10 #> cut carat color clarity depth table price x y z #> #> 1 /home/runner/work/_t… 2 I SI1 65.9 60 13764 7.8 7.73 5.12 #> 2 /home/runner/work/_t… 0.7 H SI1 65.2 58 2048 5.49 5.55 3.6 #> 3 /home/runner/work/_t… 1.51 E SI1 58.4 70 11102 7.55 7.39 4.36 #> 4 /home/runner/work/_t… 0.7 D SI2 65.5 57 1806 5.56 5.43 3.6 #> 5 /home/runner/work/_t… 0.35 F VVS1 54.6 59 1011 4.85 4.79 2.63 #> 6 /home/runner/work/_t… 0.5 E VS2 64.9 56 1397 5.01 4.95 3.23 #> 7 /home/runner/work/_t… 1 E SI1 65.1 61 4435 6.15 6.08 3.98 #> 8 /home/runner/work/_t… 1.09 J VS2 64.6 58 3443 6.48 6.41 4.16 #> 9 /home/runner/work/_t… 0.98 H SI2 67.9 60 2777 6.05 5.97 4.08 #> 10 /home/runner/work/_t… 0.7 F SI1 65.3 54 1974 5.58 5.54 3.63 #> # ℹ 250 more rows # Column types mismatch error -------------------------------------- # If the `read_excel_files_from_dir()` function complains about a data type mismatch, # then set the `col_types` argument to `\"text\"`. # This will make all the column types in the resulting dataframe be characters."},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_excel_workbook.html","id":null,"dir":"Reference","previous_headings":"","what":"Import data from multiple sheets of an Excel workbook — read_excel_workbook","title":"Import data from multiple sheets of an Excel workbook — read_excel_workbook","text":"read_excel_workbook() reads data sheets Excel workbook return appended dataframe.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_excel_workbook.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Import data from multiple sheets of an Excel workbook — read_excel_workbook","text":"","code":"read_excel_workbook(path, col_types = NULL, .id = NULL)"},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_excel_workbook.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Import data from multiple sheets of an Excel workbook — read_excel_workbook","text":"path Path xls/xlsx file. col_types Either NULL guess spreadsheet character vector containing one entry per column options: \"skip\", \"guess\", \"logical\", \"numeric\", \"date\", \"text\" \"list\". exactly one col_type specified, recycled. content cell skipped column never read column appear data frame output. list cell loads column list length 1 vectors, typed using type guessing logic col_types = NULL, cell--cell basis. .id name optional identifier column. Provide string create output column identifies input. column use names available, otherwise use positions.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_excel_workbook.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Import data from multiple sheets of an Excel workbook — read_excel_workbook","text":"tibble. column type mismatch data frames row binding, error occur. R combine columns different types. example, combine column integers column characters.","code":""},{"path":[]},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_excel_workbook.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Import data from multiple sheets of an Excel workbook — read_excel_workbook","text":"","code":"path <- system.file(\"extdata\", \"Diamonds.xlsx\", package = \"bulkreadr\", mustWork = TRUE) read_excel_workbook(path = path, .id = \"Year\") #> # A tibble: 260 × 10 #> Year carat color clarity depth table price x y z #> #> 1 Fair 2 I SI1 65.9 60 13764 7.8 7.73 5.12 #> 2 Fair 0.7 H SI1 65.2 58 2048 5.49 5.55 3.6 #> 3 Fair 1.51 E SI1 58.4 70 11102 7.55 7.39 4.36 #> 4 Fair 0.7 D SI2 65.5 57 1806 5.56 5.43 3.6 #> 5 Fair 0.35 F VVS1 54.6 59 1011 4.85 4.79 2.63 #> 6 Fair 0.5 E VS2 64.9 56 1397 5.01 4.95 3.23 #> 7 Fair 1 E SI1 65.1 61 4435 6.15 6.08 3.98 #> 8 Fair 1.09 J VS2 64.6 58 3443 6.48 6.41 4.16 #> 9 Fair 0.98 H SI2 67.9 60 2777 6.05 5.97 4.08 #> 10 Fair 0.7 F SI1 65.3 54 1974 5.58 5.54 3.63 #> # ℹ 250 more rows # Column types mismatch error -------------------------------------- # If the `read_excel_workbook()` function complains about a data type mismatch, # then set the `col_types` argument to `\"text\"`. # This will make all the column types in the resulting DataFrame be characters."},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_gsheets.html","id":null,"dir":"Reference","previous_headings":"","what":"Import data from multiple sheets in Google Sheets — read_gsheets","title":"Import data from multiple sheets in Google Sheets — read_gsheets","text":"read_gsheets() function imports data multiple sheets Google Sheets spreadsheet appends resulting dataframes sheet together create single dataframe. function powerful tool data analysis, allows easily combine data multiple sheets single dataset.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_gsheets.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Import data from multiple sheets in Google Sheets — read_gsheets","text":"","code":"read_gsheets(ss, col_types = NULL, .id = NULL)"},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_gsheets.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Import data from multiple sheets in Google Sheets — read_gsheets","text":"ss Something identifies Google Sheet: file id string drive_id URL can recover id one-row dribble, googledrive represents Drive files instance googlesheets4_spreadsheet, gs4_get() returns Processed as_sheets_id(). col_types Column types. Either NULL guess spreadsheet string readr-style shortcodes, one character code per column. exactly one col_type specified, recycled. See Column Specification . .id name optional identifier column. Provide string create output column identifies input. column use names available, otherwise use positions.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_gsheets.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Import data from multiple sheets in Google Sheets — read_gsheets","text":"tibble. column type mismatch data frames row binding, error occur. R combine columns different types. example, combine column integers column characters.","code":""},{"path":[]},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_gsheets.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Import data from multiple sheets in Google Sheets — read_gsheets","text":"","code":"if (FALSE) { # googlesheets4::gs4_has_token() sheet_id <- \"1izO0mHu3L9AMySQUXGDn9GPs1n-VwGFSEoAKGhqVQh0\" read_gsheets(ss = sheet_id, .id = \"sheet.name\") # Column types mismatch error -------------------------------------- # If the `read_gsheets()` function complains about a data type mismatch, # then set the `col_types` argument to `\"c\"`. # This will make all the column types in the resulting dataframe be characters. # For example, } if (FALSE) { # googlesheets4::gs4_has_token() sheet_id <- \"1rrjKAV05POre9lDVtHtZePTa8VROf1onVO47cHnhrTU\" try(read_gsheets(ss = sheet_id)) # error, column types mismatch read_gsheets(ss = sheet_id, col_types = \"c\") }"},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_spss_data.html","id":null,"dir":"Reference","previous_headings":"","what":"Read SPSS data file — read_spss_data","title":"Read SPSS data file — read_spss_data","text":"read_spss_data() designed seamlessly import data SPSS data (.sav .zsav) files. converts labelled variables factors, crucial step enhances ease data manipulation analysis within R programming environment.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_spss_data.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Read SPSS data file — read_spss_data","text":"","code":"read_spss_data(file, label = FALSE)"},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_spss_data.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Read SPSS data file — read_spss_data","text":"file path SPSS data file. label Logical indicating whether use variable labels column names (default FALSE).","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_spss_data.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Read SPSS data file — read_spss_data","text":"tibble containing data SPSS file.","code":""},{"path":[]},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_spss_data.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Read SPSS data file — read_spss_data","text":"","code":"# Read an SPSS data file without converting variable labels as column names file_path <- system.file(\"extdata\", \"Wages.sav\", package = \"bulkreadr\") data <- read_spss_data(file = file_path) data #> # A tibble: 400 × 9 #> id educ south sex exper wage occup marr ed #> #> 1 3 12 does not live in South Male 17 7.5 Other Married High … #> 2 4 13 does not live in South Male 9 13.1 Other Not married Some … #> 3 5 10 lives in South Male 27 4.45 Other Not married Less … #> 4 12 9 lives in South Male 30 6.25 Other Not married Less … #> 5 13 9 lives in South Male 29 20.0 Other Married Less … #> 6 14 12 does not live in South Male 37 7.3 Other Married High … #> 7 17 11 does not live in South Male 16 3.65 Other Not married Less … #> 8 20 12 does not live in South Male 9 3.75 Other Not married High … #> 9 21 11 lives in South Male 14 4.5 Other Married Less … #> 10 23 6 lives in South Male 45 5.75 Other Married Less … #> # ℹ 390 more rows # Read an SPSS data file and convert variable labels as column names data <- read_spss_data(file = file_path, label = TRUE) data #> # A tibble: 400 × 9 #> `Worker ID` `Number of years of education` `Live in south` Gender #> #> 1 3 12 does not live in South Male #> 2 4 13 does not live in South Male #> 3 5 10 lives in South Male #> 4 12 9 lives in South Male #> 5 13 9 lives in South Male #> 6 14 12 does not live in South Male #> 7 17 11 does not live in South Male #> 8 20 12 does not live in South Male #> 9 21 11 lives in South Male #> 10 23 6 lives in South Male #> # ℹ 390 more rows #> # ℹ 5 more variables: `Number of years of work experience` , #> # `Wage (dollars per hour)` , Occupation , `Marital status` , #> # `Highest education level` "},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_stata_data.html","id":null,"dir":"Reference","previous_headings":"","what":"Read Stata data file — read_stata_data","title":"Read Stata data file — read_stata_data","text":"Read Stata data file","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_stata_data.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Read Stata data file — read_stata_data","text":"","code":"read_stata_data(file, label = FALSE)"},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_stata_data.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Read Stata data file — read_stata_data","text":"file path Stata data file. label Logical indicating whether use variable labels column names (default FALSE).","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_stata_data.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Read Stata data file — read_stata_data","text":"data frame containing Stata data, labeled variables converted factors.","code":""},{"path":[]},{"path":"https://gbganalyst.github.io/bulkreadr/reference/read_stata_data.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Read Stata data file — read_stata_data","text":"","code":"# Read Stata data file without converting variable labels as column names file_path <- system.file(\"extdata\", \"Wages.dta\", package = \"bulkreadr\") data <- read_stata_data(file = file_path) data #> # A tibble: 400 × 9 #> id educ south sex exper wage occup marr ed #> #> 1 3 12 does not live in South Male 17 7.5 Other Married High … #> 2 4 13 does not live in South Male 9 13.1 Other Not married Some … #> 3 5 10 lives in South Male 27 4.45 Other Not married Less … #> 4 12 9 lives in South Male 30 6.25 Other Not married Less … #> 5 13 9 lives in South Male 29 20.0 Other Married Less … #> 6 14 12 does not live in South Male 37 7.3 Other Married High … #> 7 17 11 does not live in South Male 16 3.65 Other Not married Less … #> 8 20 12 does not live in South Male 9 3.75 Other Not married High … #> 9 21 11 lives in South Male 14 4.5 Other Married Less … #> 10 23 6 lives in South Male 45 5.75 Other Married Less … #> # ℹ 390 more rows # Read Stata data file and convert variable labels as column names data <- read_stata_data(file = file_path, label = TRUE) data #> # A tibble: 400 × 9 #> `Worker ID` `Number of years of education` `Live in south` Gender #> #> 1 3 12 does not live in South Male #> 2 4 13 does not live in South Male #> 3 5 10 lives in South Male #> 4 12 9 lives in South Male #> 5 13 9 lives in South Male #> 6 14 12 does not live in South Male #> 7 17 11 does not live in South Male #> 8 20 12 does not live in South Male #> 9 21 11 lives in South Male #> 10 23 6 lives in South Male #> # ℹ 390 more rows #> # ℹ 5 more variables: `Number of years of work experience` , #> # `Wage (dollars per hour)` , Occupation , `Marital status` , #> # `Highest education level` "},{"path":"https://gbganalyst.github.io/bulkreadr/news/index.html","id":"bulkreadr-110-2023-11-13","dir":"Changelog","previous_headings":"","what":"bulkreadr 1.1.0 (2023-11-13)","title":"bulkreadr 1.1.0 (2023-11-13)","text":"CRAN release: 2023-11-16 update includes following new features: generate_dictionary(): function designed automatically create comprehensive data dictionary labelled datasets. generated dictionary provides detailed insights variable, aiding better data understanding management. look_for(): enhances capability efficiently search within labelled datasets. allows users quickly find variable names descriptions searching specific keywords. feature streamlines data exploration analysis, particularly large datasets extensive variables. enhancements aim improve user experience data management exploration within bulkreadr. hope new features assist users effectively navigating understanding labelled datasets.","code":""},{"path":"https://gbganalyst.github.io/bulkreadr/news/index.html","id":"bulkreadr-100-2023-09-20","dir":"Changelog","previous_headings":"","what":"bulkreadr 1.0.0 (2023-09-20)","title":"bulkreadr 1.0.0 (2023-09-20)","text":"CRAN release: 2023-09-26 update includes following new features improvements: Developed read_stata_data() import Stata data file (.dta) R data frame, converting labeled variables factors. Reduced dependency packages optimize efficiency.","code":""}] diff --git a/sitemap.xml b/sitemap.xml index 8175bf0..adfd1c5 100644 --- a/sitemap.xml +++ b/sitemap.xml @@ -15,6 +15,12 @@ https://gbganalyst.github.io/bulkreadr/articles/index.html + + https://gbganalyst.github.io/bulkreadr/articles/labelled-data.html + + + https://gbganalyst.github.io/bulkreadr/articles/other-functions.html + https://gbganalyst.github.io/bulkreadr/authors.html