
Pitching table
Pitching.RdPitching table
Usage
data(Pitching)Format
A data frame with 57630 observations on the following 30 variables.
playerIDPlayer ID code
yearIDYear
stintplayer's stint (order of appearances within a season)
teamIDTeam; a factor
lgIDLeague; a factor with levels
AAALFLNLPLUAWWins
LLosses
GGames
GSGames Started
CGComplete Games
SHOShutouts
SVSaves
IPoutsOuts Pitched (innings pitched x 3)
HHits
EREarned Runs
HRHomeruns
BBWalks
SOStrikeouts
BAOppOpponent's Batting Average
ERAEarned Run Average
IBBIntentional Walks
WPWild Pitches
HBPBatters Hit By Pitch
BKBalks
BFPBatters faced by Pitcher
GFGames Finished
RRuns Allowed
SHSacrifices by opposing batters
SFSacrifice flies by opposing batters
GIDPGrounded into double plays by opposing batter
Source
Lahman, S. (2026) Lahman's Baseball Database, 1871-2025, 2026 version, http://www.seanlahman.com
Examples
# Pitching data
require("dplyr")
###################################
# cleanup, and add some other stats
###################################
# Restrict to AL and NL data, 1901+
# All data re SH, SF and GIDP are missing, so remove
# Intentional walks (IBB) not recorded until 1955
pitching <- Pitching %>%
filter(yearID >= 1901 & lgID %in% c("AL", "NL")) %>%
select(-(28:30)) %>% # remove SH, SF, GIDP
mutate(BAOpp = round(H/(H + IPouts), 3), # loose def'n
WHIP = round((H + BB) * 3/IPouts, 2),
KperBB = round(ifelse(yearID >= 1955,
SO/(BB - IBB), SO/BB), 2))
#####################
# some simple queries
#####################
# Team pitching statistics, Toronto Blue Jays, 1993
tor93 <- pitching %>%
filter(yearID == 1993 & teamID == "TOR") %>%
arrange(ERA)
# Career pitching statistics, Greg Maddux
subset(pitching, playerID == "maddugr01")
#> playerID yearID stint teamID lgID W L G GS CG SHO SV IPouts H ER
#> 27248 maddugr01 1986 1 CHN NL 2 4 6 5 1 0 0 93 44 19
#> 27249 maddugr01 1987 1 CHN NL 6 14 30 27 1 1 0 467 181 97
#> 27250 maddugr01 1988 1 CHN NL 18 8 34 34 9 3 0 747 230 88
#> 27251 maddugr01 1989 1 CHN NL 19 12 35 35 7 1 0 715 222 78
#> 27252 maddugr01 1990 1 CHN NL 15 15 35 35 8 2 0 711 242 91
#> 27253 maddugr01 1991 1 CHN NL 15 11 37 37 7 2 0 789 232 98
#> 27254 maddugr01 1992 1 CHN NL 20 11 35 35 9 4 0 804 201 65
#> 27255 maddugr01 1993 1 ATL NL 20 10 36 36 8 1 0 801 228 70
#> 27256 maddugr01 1994 1 ATL NL 16 6 25 25 10 3 0 606 150 35
#> 27257 maddugr01 1995 1 ATL NL 19 2 28 28 10 3 0 629 147 38
#> 27258 maddugr01 1996 1 ATL NL 15 11 35 35 5 1 0 735 225 74
#> 27259 maddugr01 1997 1 ATL NL 19 4 33 33 5 2 0 698 200 57
#> 27260 maddugr01 1998 1 ATL NL 18 9 34 34 9 5 0 753 201 62
#> 27261 maddugr01 1999 1 ATL NL 19 9 33 33 4 0 0 658 258 87
#> 27262 maddugr01 2000 1 ATL NL 19 9 35 35 6 3 0 748 225 83
#> 27263 maddugr01 2001 1 ATL NL 17 11 34 34 3 3 0 699 220 79
#> 27264 maddugr01 2002 1 ATL NL 16 6 34 34 0 0 0 598 194 58
#> 27265 maddugr01 2003 1 ATL NL 16 11 36 36 1 0 0 655 225 96
#> 27266 maddugr01 2004 1 CHN NL 16 11 33 33 2 1 0 638 218 95
#> 27267 maddugr01 2005 1 CHN NL 13 15 35 35 3 0 0 675 239 106
#> 27268 maddugr01 2006 1 CHN NL 9 11 22 22 0 0 0 409 153 71
#> 27269 maddugr01 2006 2 LAN NL 6 3 12 12 0 0 0 221 66 27
#> 27270 maddugr01 2007 1 SDN NL 14 11 34 34 1 0 0 594 221 91
#> 27271 maddugr01 2008 1 SDN NL 6 9 26 26 0 0 0 460 161 68
#> 27272 maddugr01 2008 2 LAN NL 2 4 7 7 0 0 0 122 43 23
#> HR BB SO BAOpp ERA IBB WP HBP BK BFP GF R WHIP KperBB
#> 27248 3 11 20 0.321 5.52 2 2 1 0 144 1 20 1.77 2.22
#> 27249 17 74 101 0.279 5.61 13 4 4 7 701 2 111 1.64 1.66
#> 27250 13 81 140 0.235 3.18 16 3 9 6 1047 0 97 1.25 2.15
#> 27251 13 82 135 0.237 2.95 13 5 6 3 1002 0 90 1.28 1.96
#> 27252 11 71 144 0.254 3.46 10 3 4 3 1011 0 116 1.32 2.36
#> 27253 18 66 198 0.227 3.35 9 6 6 3 1070 0 113 1.13 3.47
#> 27254 7 70 199 0.200 2.18 7 5 14 0 1061 0 68 1.01 3.16
#> 27255 14 52 197 0.222 2.36 7 5 6 1 1064 0 85 1.05 4.38
#> 27256 4 31 156 0.198 1.56 3 3 6 1 774 0 44 0.90 5.57
#> 27257 8 23 181 0.189 1.63 3 1 4 0 785 0 39 0.81 9.05
#> 27258 11 28 172 0.234 2.72 11 4 3 0 978 0 85 1.03 10.12
#> 27259 9 20 177 0.223 2.20 6 0 6 0 893 0 58 0.95 12.64
#> 27260 13 45 204 0.211 2.22 10 4 7 0 987 0 75 0.98 5.83
#> 27261 16 37 136 0.282 3.57 8 1 4 0 940 0 103 1.34 4.69
#> 27262 19 42 190 0.231 3.00 12 1 10 2 1012 0 91 1.07 6.33
#> 27263 20 27 173 0.239 3.05 10 2 7 0 927 0 86 1.06 10.18
#> 27264 14 45 118 0.245 2.62 7 1 4 0 820 0 67 1.20 3.11
#> 27265 24 33 124 0.256 3.96 7 3 8 0 901 0 112 1.18 4.77
#> 27266 35 33 151 0.255 4.02 4 2 9 0 872 0 103 1.18 5.21
#> 27267 29 36 136 0.261 4.24 4 8 7 0 936 0 112 1.22 4.25
#> 27268 14 23 81 0.272 4.69 3 0 0 0 572 0 78 1.29 4.05
#> 27269 6 14 36 0.230 3.30 4 0 0 0 290 0 31 1.09 3.60
#> 27270 14 25 104 0.271 4.14 3 5 6 0 830 0 92 1.24 4.73
#> 27271 16 26 80 0.259 3.99 4 2 5 2 638 0 80 1.22 3.64
#> 27272 5 4 18 0.261 5.09 1 0 1 0 166 0 25 1.16 6.00
# Best ERAs for starting pitchers post WWII
pitching %>%
filter(yearID >= 1946 & IPouts >= 600) %>%
group_by(lgID) %>%
arrange(ERA) %>%
do(head(., 5))
#> # A tibble: 10 × 29
#> # Groups: lgID [2]
#> playerID yearID stint teamID lgID W L G GS CG SHO SV
#> <chr> <int> <int> <fct> <fct> <int> <int> <int> <int> <int> <int> <int>
#> 1 tiantlu01 1968 1 CLE AL 21 9 34 32 19 9 0
#> 2 chancde01 1964 1 LAA AL 20 9 46 35 15 11 4
#> 3 guidrro01 1978 1 NYA AL 25 3 35 35 16 9 0
#> 4 martipe02 2000 1 BOS AL 18 6 29 29 7 4 0
#> 5 mcdowsa01 1968 1 CLE AL 15 14 38 37 11 3 0
#> 6 gibsobo01 1968 1 SLN NL 22 9 34 34 28 13 0
#> 7 goodedw01 1985 1 NYN NL 24 4 35 35 16 8 0
#> 8 maddugr01 1994 1 ATL NL 16 6 25 25 10 3 0
#> 9 maddugr01 1995 1 ATL NL 19 2 28 28 10 3 0
#> 10 greinza01 2015 1 LAN NL 19 3 32 32 1 0 0
#> # ℹ 17 more variables: IPouts <int>, H <int>, ER <int>, HR <int>, BB <int>,
#> # SO <int>, BAOpp <dbl>, ERA <dbl>, IBB <int>, WP <int>, HBP <int>, BK <int>,
#> # BFP <int>, GF <int>, R <int>, WHIP <dbl>, KperBB <dbl>
# Best K/BB ratios post-1955 among starters (excludes intentional walks)
pitching %>%
filter(yearID >= 1955 & IPouts >= 600) %>%
mutate(KperBB = SO/(BB - IBB)) %>%
arrange(desc(KperBB)) %>%
head(., 10)
#> playerID yearID stint teamID lgID W L G GS CG SHO SV IPouts H ER HR BB
#> 1 maddugr01 1997 1 ATL NL 19 4 33 33 5 2 0 698 200 57 9 20
#> 2 hugheph01 2014 1 MIN AL 16 10 32 32 1 0 0 629 221 82 16 16
#> 3 maddugr01 2001 1 ATL NL 17 11 34 34 3 3 0 699 220 79 20 27
#> 4 maddugr01 1996 1 ATL NL 15 11 35 35 5 1 0 735 225 74 11 28
#> 5 schilcu01 2002 1 ARI NL 23 7 36 35 5 1 0 778 218 93 29 33
#> 6 maddugr01 1995 1 ATL NL 19 2 28 28 10 3 0 629 147 38 8 23
#> 7 martipe02 2000 1 BOS AL 18 6 29 29 7 4 0 651 128 42 17 32
#> 8 martipe02 1999 1 BOS AL 23 4 31 29 5 1 0 640 160 49 9 37
#> 9 scherma01 2015 1 WAS NL 14 12 33 33 4 3 0 686 176 71 27 34
#> 10 sheetbe01 2004 1 MIL NL 12 14 34 34 5 0 0 711 201 71 25 32
#> SO BAOpp ERA IBB WP HBP BK BFP GF R WHIP KperBB
#> 1 177 0.223 2.20 6 0 6 0 893 0 58 0.95 12.642857
#> 2 186 0.260 3.52 1 1 5 0 855 0 88 1.13 12.400000
#> 3 173 0.239 3.05 10 2 7 0 927 0 86 1.06 10.176471
#> 4 172 0.234 2.72 11 4 3 0 978 0 85 1.03 10.117647
#> 5 316 0.219 3.23 1 6 3 0 1017 0 95 0.97 9.875000
#> 6 181 0.189 1.63 3 1 4 0 785 0 39 0.81 9.050000
#> 7 284 0.164 1.74 0 1 14 0 817 0 44 0.74 8.875000
#> 8 313 0.200 2.07 1 6 9 0 835 1 56 0.92 8.694444
#> 9 276 0.204 2.79 2 10 5 1 899 0 74 0.92 8.625000
#> 10 264 0.220 2.70 1 8 4 1 937 0 85 0.98 8.516129
# Best K/BB ratios among relievers post-1950 (min. 20 saves)
pitching %>%
filter(yearID >= 1950 & SV >= 20) %>%
arrange(desc(KperBB)) %>%
head(., 10)
#> playerID yearID stint teamID lgID W L G GS CG SHO SV IPouts H ER HR BB
#> 1 eckerde01 1990 1 OAK AL 4 2 63 0 0 0 48 220 41 5 2 4
#> 2 hendrli01 2021 1 CHA AL 8 3 69 0 0 0 38 213 45 20 11 7
#> 3 eckerde01 1992 1 OAK AL 7 1 69 0 0 0 51 240 62 17 5 11
#> 4 eckerde01 1989 1 OAK AL 4 0 51 0 0 0 33 173 32 10 5 3
#> 5 janseke01 2017 1 LAN NL 5 0 65 0 0 0 41 205 44 10 5 7
#> 6 eckerde01 1991 1 OAK AL 5 4 67 0 0 0 43 228 60 25 11 9
#> 7 ueharko01 2013 1 BOS AL 4 1 73 0 0 0 21 223 33 9 5 9
#> 8 riverma01 2008 1 NYA AL 6 5 64 0 0 0 39 212 41 11 4 6
#> 9 doolise01 2014 1 OAK AL 2 4 61 0 0 0 22 188 38 19 5 8
#> 10 eckerde01 1996 1 SLN NL 0 6 63 0 0 0 30 180 65 22 8 6
#> SO BAOpp ERA IBB WP HBP BK BFP GF R WHIP KperBB
#> 1 73 0.157 0.61 1 0 0 0 262 61 9 0.61 24.33
#> 2 113 0.174 2.54 1 6 1 0 267 58 23 0.73 18.83
#> 3 93 0.205 1.91 6 0 1 0 309 65 17 0.91 18.60
#> 4 55 0.156 1.56 0 0 1 0 206 46 10 0.61 18.33
#> 5 109 0.177 1.32 0 2 2 1 258 57 11 0.75 15.57
#> 6 87 0.208 2.96 3 1 1 0 299 59 26 0.91 14.50
#> 7 101 0.129 1.09 2 1 1 0 265 40 10 0.57 14.43
#> 8 77 0.162 1.40 0 1 2 0 259 60 11 0.67 12.83
#> 9 89 0.168 2.73 1 0 0 0 236 40 19 0.73 12.71
#> 10 49 0.265 3.30 2 0 4 0 251 53 26 1.18 12.25
###############################################
# Winningest pitchers in each league each year:
###############################################
# Add name & throws information:
peopleInfo <- People %>%
select(playerID, nameLast, nameFirst, throws)
# Merge peopleInfo into the pitching data
pitching1 <- right_join(peopleInfo, pitching, by = "playerID")
# Extract the pitcher with the maximum number of wins
# each year, by league
winp <- pitching1 %>%
group_by(yearID, lgID) %>%
filter(W == max(W)) %>%
select(nameLast, nameFirst, teamID, W, throws)
#> Adding missing grouping variables: `yearID`, `lgID`
# A simple ANCOVA model of wins vs. year, league and hand (L/R)
anova(lm(formula = W ~ yearID + I(yearID^2) + lgID + throws, data = winp))
#> Analysis of Variance Table
#>
#> Response: W
#> Df Sum Sq Mean Sq F value Pr(>F)
#> yearID 1 2532.12 2532.12 266.2474 < 2.2e-16 ***
#> I(yearID^2) 1 112.21 112.21 11.7986 0.0006701 ***
#> lgID 1 23.43 23.43 2.4634 0.1175064
#> throws 1 36.02 36.02 3.7876 0.0525009 .
#> Residuals 323 3071.86 9.51
#> ---
#> Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# Nature of managing pitching staffs has altered importance of
# wins over time
if (FALSE) { # \dontrun{
require("ggplot2")
# compare loess smooth with quadratic fit
ggplot(winp, aes(x = yearID, y = W)) +
geom_point(aes(colour = throws, shape=lgID), size = 2) +
geom_smooth(method="loess", size=1.5, color="blue") +
geom_smooth(method = "lm", se=FALSE, color="black",
formula = y ~ poly(x,2)) +
ylab("League maximum Wins") + xlab("Year") +
ggtitle("Maximum pitcher wins by year")
## To reinforce this, plot the mean IPouts by year and league,
## which gives some idea of pitcher usage. Restrict pitcher
## pool to those who pitched at least 100 innings in a year.
pitching %>% filter(IPouts >= 300) %>% # >= 100 IP
ggplot(., aes(x = yearID, y = IPouts, color = lgID)) +
geom_smooth(method="loess") +
labs(x = "Year", y = "IPouts")
## Another indicator: total number of complete games pitched
## (Mirrors the trend from the preceding plot.)
pitching %>%
group_by(yearID, lgID) %>%
summarise(totalCG = sum(CG, na.rm = TRUE)) %>%
ggplot(., aes(x = yearID, y = totalCG, color = lgID)) +
geom_point() +
geom_path() +
labs(x = "Year", y = "Number of complete games")
} # }