Skip to contents

Information on schools players attended, by school

Usage

data(Schools)

Format

A data frame with 1287 observations on the following 5 variables.

schoolID

school ID code

name_full

school name

city

city where school is located

state

state where school's city is located

country

country where school is located

Source

Lahman, S. (2026) Lahman's Baseball Database, 1871-2025, 2026 version, https://sabr.org/lahman-database/

Examples


require("dplyr")

# How many different schools are listed in each state?
table(Schools$state)
#> 
#>        AL        AR        AZ        CA        CO        CT        DC        DE 
#>        38        19        14       140        11        15         7         4 
#>        FL        GA        HI        IA        ID        IL        IN        KS 
#>        62        31         3        26         8        65        24        29 
#>        KY        LA La Habana        MA        MD        ME        MI        MN 
#>        18        21         1        28        27         8        33        15 
#>        MO        MS        NC        ND        NE        NH        NJ        NM 
#>        34        26        46         2        12         5        25         7 
#>        NV        NY        OH        OK        OR        PA        RI        SC 
#>         3        56        42        24        17        75         7        22 
#>        SD        TN        TX        UT        VA        VT        WA        WI 
#>         1        45        89        10        31         4        26        16 
#>        WV        WY 
#>        14         1 
 
# How many different schools are listed in each country?
table(Schools$country)
#> 
#> Cuba  USA 
#>    1 1286 

# Top 20 schools 
schoolInfo <- Schools %>% select(-country)

schoolCount <- CollegePlaying %>%
                 group_by(schoolID) %>%
                 summarise(players = length(schoolID)) %>%
                 left_join(schoolInfo, by = "schoolID") %>%
                 arrange(desc(players)) 
head(schoolCount, 20)
#> # A tibble: 20 × 5
#>    schoolID   players name_full                                   city     state
#>    <chr>        <int> <chr>                                       <chr>    <chr>
#>  1 texas          265 University of Texas at Austin               Austin   TX   
#>  2 usc            250 University of Southern California           Los Ang… CA   
#>  3 stanford       248 Stanford University                         Palo Al… CA   
#>  4 arizonast      236 Arizona State University                    Tempe    AZ   
#>  5 michigan       192 University of Michigan                      Ann Arb… MI   
#>  6 ucla           180 University of California - Los Angeles      Los Ang… CA   
#>  7 holycross      167 College of the Holy Cross                   Worcest… MA   
#>  8 california     163 University of California - Berkeley         Berkeley CA   
#>  9 arizona        162 University of Arizona                       Tucson   AZ   
#> 10 alabama        155 University of Alabama                       Tuscalo… AL   
#> 11 unc            154 University of North Carolina at Chapel Hill Chapel … NC   
#> 12 floridast      152 Florida State University                    Tallaha… FL   
#> 13 lsu            149 Louisiana State University                  Baton R… LA   
#> 14 illinois       143 University of Illinois at Urbana-Champaign  Champai… IL   
#> 15 clemson        138 Clemson University                          Clemson  SC   
#> 16 florida        138 University of Florida                       Gainesv… FL   
#> 17 gatech         137 Georgia Institute of Technology             Atlanta  GA   
#> 18 oklahoma       135 University of Oklahoma                      Norman   OK   
#> 19 notredame      134 University of Notre Dame                    South B… IN   
#> 20 okstate        132 Oklahoma State University                   Stillwa… OK   

# sum counts by state
schoolStates <- schoolCount %>%
                  group_by(state) %>%
                  summarise(players = sum(players),
                            schools = length(state))
str(schoolStates)
#> tibble [50 × 3] (S3: tbl_df/tbl/data.frame)
#>  $ state  : chr [1:50] "AL" "AR" "AZ" "CA" ...
#>  $ players: int [1:50] 480 171 525 2958 80 174 109 28 1068 406 ...
#>  $ schools: int [1:50] 34 18 13 127 7 15 5 4 59 18 ...
summary(schoolStates)
#>     state              players           schools      
#>  Length:50          Min.   :   1.00   Min.   :  1.00  
#>  Class :character   1st Qu.:  92.75   1st Qu.:  6.25  
#>  Mode  :character   Median : 215.50   Median : 17.50  
#>                     Mean   : 353.74   Mean   : 22.44  
#>                     3rd Qu.: 469.25   3rd Qu.: 26.50  
#>                     Max.   :2958.00   Max.   :127.00