DEA-Tobit模型的R实现
自动计算不同效率值的回归模型
#加载相关的R包----
library(deaR) # 用于数据包络分析(DEA)的R包
library(magrittr) # 提供管道操作符 (%>%),使代码更简洁
library(openxlsx) # 用于读写Excel文件的R包
library(ggplot2) # 用于数据可视化的强大R包
library(showtext) # 用于在图形中添加和管理字体的R包
library(readxl) # 用于读取Excel文件的R包
library(flextable) # 用于创建可灵活调整的表格的R包
library(officer) # 用于操作Word和PowerPoint文档的R包
library(AER) # 用于应用经济学研究的R包,包括Tobit回归等模型
library(GGally) # 提供ggplot2扩展,用于绘制矩阵图和其他多变量图表
library(dplyr)
# 设置字体和中文支持
showtext_auto(enable = TRUE, record = TRUE)
font_add(family = "苹方",regular = "/System/Library/Fonts/PingFang.ttc")
#font_add(family = "黑体",regular = "/System/Library/Fonts/STHeiti Medium.ttc")
# 读取数据文件
data <- read_excel("Data/数据模板.xlsx")
# 提取年份列表
years <- unique(data$Year)
# 初始化存储所有年份的效率值的数据框
efficiency_results <- list(crs = data.frame(), vrs = data.frame(), scale = data.frame(), sbm = data.frame(), super = data.frame())
#一、循环计算每年的 DEA 效率-----
for (year in years) {
# 提取该年的数据
data_year <- subset(data, Year == year)
# 定义投入和产出指标
inputs <- c("Input1", "Input2", "Input3")
outputs <- c("Output1", "Output2")
# 创建 DEA 数据
dea_data <- make_deadata(datadea = data_year,
inputs = inputs,
outputs = outputs)
# 构建 DEA 模型
models <- list(
crs = model_basic(dea_data, rts = "crs", orientation = "io"),
vrs = model_basic(dea_data, rts = "vrs", orientation = "io"),
sbm = model_sbmeff(dea_data, orientation = "io"),
super = model_supereff(dea_data, rts = "crs", orientation = "io")
)
#计算规模效率
scale_eff <- efficiencies(models$crs) / efficiencies(models$vrs)
#计算效率,并存储结果
efficiency_results$crs <- rbind(efficiency_results$crs, data.frame(Year = year, DMU = data_year$DMU, Efficiency = round(efficiencies(models$crs), 3)))
efficiency_results$vrs <- rbind(efficiency_results$vrs, data.frame(Year = year, DMU = data_year$DMU, Efficiency = round(efficiencies(models$vrs), 3)))
efficiency_results$scale <- rbind(efficiency_results$scale, data.frame(Year = year, DMU = data_year$DMU, Efficiency = round(scale_eff, 3)))
efficiency_results$sbm <- rbind(efficiency_results$sbm, data.frame(Year = year, DMU = data_year$DMU, Efficiency = round(efficiencies(models$sbm), 3)))
efficiency_results$super <- rbind(efficiency_results$super, data.frame(Year = year, DMU = data_year$DMU, Efficiency = round(efficiencies(models$super), 3)))
}
#合并所有模型的效率结果
all_efficiencies <- rbind(
efficiency_results$crs %>% mutate(Model = "CRS"),
efficiency_results$vrs %>% mutate(Model = "VRS"),
efficiency_results$scale %>% mutate(Model = "Scale"),
efficiency_results$sbm %>% mutate(Model = "SBM"),
efficiency_results$super %>% mutate(Model = "Super")
)
#将效率结果与原数据合并
merged_data <- merge(data, all_efficiencies, by = c("Year", "DMU"))
#二、进行 Tobit 回归分析---------
##1.解释变量的相关度分析(散点图)---------
ggpairs(data, columns = c("解释变量1", "Input2", "Input3","Output1","解释变量2"))
#其中值越接近 1 相关性越显著
##2.方差被解释量计算----
# 计算相关系数
#r <- cor(fitted_values, model_data$apt)
#r_squared <- r^2#方差被解释量
#print(paste("VRS R-squared:", r_squared))
##3.Tobit 计算----
tobit_results <- list()
models <- c("CRS", "VRS", "Scale", "SBM", "Super")
for (model in models) {
model_data <- subset(merged_data, Model == model)
tobit_model <- tobit(Efficiency ~ 解释变量1 + 解释变量2 + Input3, data = model_data, left = 0, right = 1)#使用方式:在这里输入你的解释变量名称即可
summary_tobit <- summary(tobit_model)
tobit_results[[model]] <- data.frame(
Model = model,
Variable = rownames(summary_tobit$coefficients),
Coefficient = summary_tobit$coefficients[, "Estimate"],
StdError = summary_tobit$coefficients[, "Std. Error"],
ZValue = summary_tobit$coefficients[, "z value"],
PValue = summary_tobit$coefficients[, "Pr(>|z|)"]
)
}
###打印回归结果
print(tobit_results)
#三、结果导出----
##1.创建表格----
wb <- createWorkbook()
for (model in models) {
addWorksheet(wb, model)
writeData(wb, sheet = model, tobit_results[[model]])
}
# 保存 Excel 文件
saveWorkbook(wb, "Table/DEA_Tobit_Results.xlsx", overwrite = TRUE)
##2.创建三线表,排除截距和 Log(scale)-----
#四、打印回归结果----
print(tobit_results)
#五、作图----
##1.创建残差图,检验模型与数据匹配程度------
# 提取 VRS 模型的 Tobit 回归模型
vrs_model_data <- subset(merged_data, Model == "VRS")
vrs_tobit_model <- tobit(Efficiency ~ 解释变量1 + 解释变量2, data = vrs_model_data, left = 0, right = 1)
# 提取残差和拟合值
tobit_residuals <- residuals(vrs_tobit_model)
fitted_values <- fitted(vrs_tobit_model)
residuals_data <- data.frame(
Fitted = fitted_values,
Residuals = tobit_residuals
)
# 生成残差图
ggplot(residuals_data, aes(x = Fitted, y = Residuals)) +
geom_point() +
geom_smooth(method = "loess", col = "red") +
labs(title = "DEA规模效率-Tobit 模型的残差图",
x = "拟合值",
y = "残差") +
theme_minimal(base_family = "苹方")
## 2.计算模型间的边际效应----
calculate_marginal_effects <- function(model, variable_name, data) {
# 获取模型的系数
coefs <- summary(model)$coefficients
# 计算边际效应(对每个变量的系数)
marginal_effects <- data.frame(
Variable = variable_name,
Effect = coefs[variable_name, "Estimate"]
)
return(marginal_effects)
}
#提取Tobit模型中的边际效应
marginal_effects_list <- list()
for (model in models) {
model_data <- subset(merged_data, Model == model)
tobit_model <- tobit(Efficiency ~ 解释变量1 + 解释变量2 + Input3, data = model_data, left = 0, right = 1)
# 计算每个变量的边际效应
effect1 <- calculate_marginal_effects(tobit_model, "解释变量1", model_data)
effect2 <- calculate_marginal_effects(tobit_model, "解释变量2", model_data)
effect3 <- calculate_marginal_effects(tobit_model, "Input3", model_data)
marginal_effects_list[[model]] <- rbind(effect1, effect2, effect3)
}
# 合并所有模型的边际效应结果
all_marginal_effects <- do.call(rbind, marginal_effects_list)
all_marginal_effects$Model <- rep(models, each = nrow(all_marginal_effects) / length(models))
# 绘制边际效应图
ggplot(all_marginal_effects, aes(x = Variable, y = Effect, fill = Model)) +
geom_bar(stat = "identity", position = "dodge") +
theme_minimal() +
labs(title = "边际效应图", x = "解释变量", y = "边际效应") +
scale_fill_brewer(palette = "Set1") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))