R实现Malmquist指数及分解
实现Malmquist指数及分解
# ---- 初始化 ----
library(productivity)
library(openxlsx)
library(dplyr)
library(readxl)
library(officer)
library(flextable)
library(ggrepel)
# ---- 数据导入
datafile <- read_excel("Data/数据模板.xlsx")
data <- as.data.frame(datafile)
# ---- 计算Malmquist指数 ----
Malmquist <- malm(data, id.var = "DMU", time.var = "Year",
x.vars = c("Input1", "Input2", "Input3"), #投入指标
y.vars = c("Output1", "解释变量2"), rts = "vrs", orientation = "in")#产出指标 BCC 模型
## ---- 提取并四舍五入效率值 ----
results <- data.frame(
DMU = Malmquist$Changes$DMU,
Year.0 = Malmquist$Changes$Year.0,
Year.1 = Malmquist$Changes$Year.1,
effch = round(Malmquist$Changes$effch, 3),
tech = round(Malmquist$Changes$tech, 3),
pure_inp_effch = round(Malmquist$Changes$pure.inp.effch, 3),
inp_scalech = round(Malmquist$Changes$inp.scalech, 3),
malmquist = round(Malmquist$Changes$malmquist, 3)
)
## ---- 计算相邻年份的平均值 ----
averages <- results %>%
group_by(Year.1, Year.0) %>%
summarise(
effch_avg = round(mean(effch, na.rm = TRUE), 3),
tech_avg = round(mean(tech, na.rm = TRUE), 3),
pure_inp_effch_avg = round(mean(pure_inp_effch, na.rm = TRUE), 3),
inp_scalech_avg = round(mean(inp_scalech, na.rm = TRUE), 3),
malmquist_avg = round(mean(malmquist, na.rm = TRUE), 3)
) %>%
mutate(Years = paste(pmin(Year.0, Year.1), pmax(Year.0, Year.1), sep = "-"))
## ---- 准备结果数据框 ----
results_avg <- data.frame(
Years = averages$Years,
effch = averages$effch_avg,
tech = averages$tech_avg,
pure_inp_effch = averages$pure_inp_effch_avg,
inp_scalech = averages$inp_scalech_avg,
malmquist = averages$malmquist_avg
)
## ---- 计算总体平均值 ----
overall_avg <- results %>%
summarise(
Years = "平均值",
effch = round(mean(effch, na.rm = TRUE), 3),
tech = round(mean(tech, na.rm = TRUE), 3),
pure_inp_effch = round(mean(pure_inp_effch, na.rm = TRUE), 3),
inp_scalech = round(mean(inp_scalech, na.rm = TRUE), 3),
malmquist = round(mean(malmquist, na.rm = TRUE), 3)
)
## ---- 将结果与总体平均值合并 ----
results_avg <- bind_rows(results_avg, overall_avg)
# ---- 计算DMU平均值 ----
dmu_avg <- results %>%
group_by(DMU) %>%
summarise(
effch_avg = round(mean(effch, na.rm = TRUE), 3),
tech_avg = round(mean(tech, na.rm = TRUE), 3),
pure_inp_effch_avg = round(mean(pure_inp_effch, na.rm = TRUE), 3),
inp_scalech_avg = round(mean(inp_scalech, na.rm = TRUE), 3),
malmquist_avg = round(mean(malmquist, na.rm = TRUE), 3)
) %>%
ungroup()
# 添加“平均值”行
dmu_avg <- bind_rows(
dmu_avg,
dmu_avg %>%
summarise(
DMU = "平均值",
effch_avg = round(mean(effch_avg, na.rm = TRUE), 3),
tech_avg = round(mean(tech_avg, na.rm = TRUE), 3),
pure_inp_effch_avg = round(mean(pure_inp_effch_avg, na.rm = TRUE), 3),
inp_scalech_avg = round(mean(inp_scalech_avg, na.rm = TRUE), 3),
malmquist_avg = round(mean(malmquist_avg, na.rm = TRUE), 3)
)
)
## ---- 准备 DMU 结果数据框 ----
results_DMU <- dmu_avg
## ---- 打印 DMU 结果数据框 ----
print(results_DMU)
# ---- 将结果写入Excel ----
wb <- createWorkbook()
addWorksheet(wb, "平均效率指数")
writeData(wb, sheet = "平均效率指数", x = results_avg)
# 添加一个新的工作表用于DMU平均值
addWorksheet(wb, "DMU平均指数")
writeData(wb, sheet = "DMU平均指数", x = dmu_avg)
saveWorkbook(wb, file = "Table/Average_Indices_Across_Years.xlsx", overwrite = TRUE)
# ---- 将结果写入Word ----
#Word创建Flextable
ft1 <- flextable(results_avg) %>%
theme_booktabs() %>%
autofit()
ft2 <- flextable(dmu_avg) %>%
theme_booktabs() %>%
autofit()
## ---- 准备中文Flextable ----
results_avg_cn <- results_avg %>%
mutate(
Years = ifelse(Years == "Overall Average", "平均值", Years)
)
colnames(results_avg_cn) <- c("年份", "技术效率变化指数", "技术进步效率指数", "纯技术效率指数", "规模效率指数", "全要素生产率")
dmu_avg_cn <- dmu_avg %>%
mutate(
DMU = ifelse(DMU == "平均值", "平均值", DMU)
)
colnames(dmu_avg_cn) <- c("地区", "技术效率变化指数", "技术进步效率指数", "纯技术效率指数", "规模效率指数", "全要素生产率")
ft3 <- flextable(results_avg_cn) %>%
theme_booktabs() %>%
autofit()
ft4 <- flextable(dmu_avg_cn) %>%
theme_booktabs() %>%
autofit()
## ---- 创建并保存Word文档 ----
doc <- read_docx() %>%
body_add_par("表 (面板数据期间)我国 xx效率分年度变动指数及其分解指数", style = "centered") %>%
body_add_flextable(ft1) %>%
body_add_par(" ", style = "Normal") %>%
body_add_par("表 (面板数据期间)我国 xx效率分年度变动指数及其分解指数", style = "centered") %>%
body_add_flextable(ft3) %>%
body_add_par(" ", style = "Normal") %>%
body_add_par("表 (面板数据期间)各地区效率整体变动指数及其分解指数", style = "centered") %>%
body_add_flextable(ft2) %>%
body_add_par(" ", style = "Normal") %>%
body_add_par("表 (面板数据期间)各地区效率整体变动指数及其分解指数", style = "centered") %>%
body_add_flextable(ft4)
print(doc, target = "Table/Malmquist指数及其分解指数结果(三线表).docx")
# ---- 绘制图表 ----
# 过滤掉“总体平均”数据点
results_avg_filtered <- results_avg[results_avg$Years != "平均值", ]
## 绘制图表1----
library(ggrepel)
# 绘制图表
p <- ggplot(data = results_avg_filtered, aes(x = factor(Years), group = 1)) +
geom_point(aes(y = effch, shape = "技术效率变化指数"), size = 3, color = "black") +
geom_point(aes(y = tech, shape = "技术进步效率指数"), size = 3, color = "black", alpha = 0.2) +
geom_point(aes(y = pure_inp_effch, shape = "纯技术效率指数"), size = 3, color = "black", alpha = 0.7) +
geom_point(aes(y = inp_scalech, shape = "规模效率指数"), size = 3, color = "black", alpha = 0.5) +
geom_point(aes(y = malmquist, shape = "全要素生产率指数"), size = 3, color = "black") +
geom_text_repel(aes(y = effch, label = round(effch, 3)), color = "black") +
geom_text_repel(aes(y = tech, label = round(tech, 3)), color = "black") +
geom_text_repel(aes(y = pure_inp_effch, label = round(pure_inp_effch, 3)), color = "black") +
geom_text_repel(aes(y = inp_scalech, label = round(inp_scalech, 3)), color = "black") +
geom_text_repel(aes(y = malmquist, label = round(malmquist, 3)), color = "black") +
scale_shape_manual(name = " ", values = c(16, 17, 0, 3, 4)) +
labs(
title = "2013-2022 年民族八省区Malmquist指数均数情况", # 添加标题
x = "年份",
y = "Malmquist指数"
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "bottom",
plot.margin = margin(30, 30, 50, 30), # 增加顶部边距
legend.box.margin = margin(10, 10, 10, 10), # 增加图例区域的边距
plot.title = element_text(hjust = 0.5, vjust = 1) # 标题居中
) +
guides(
shape = guide_legend(nrow = 2, byrow = TRUE, override.aes = list(size = 4, color = "black")) # 增加图例形状的大小和颜色
)
# 绘制图表
print(p)
## 绘制图表 p2----
# 过滤掉“总体平均”数据点
results_avg_filtered <- results_avg[results_avg$Years != "平均值", ]
library(ggrepel)
p2 <- ggplot(data = results_avg_filtered, aes(x = factor(Years), group = 1)) +
geom_line(aes(y = effch, linetype = "技术效率变化指数"), size = 1, color = "black") +
geom_line(aes(y = tech, linetype = "技术进步效率指数"), size = 1, color = "black", alpha = 0.2) +
geom_line(aes(y = pure_inp_effch, linetype = "纯技术效率指数"), size = 1, color = "black", alpha = 0.7) +
geom_line(aes(y = inp_scalech, linetype = "规模效率指数"), size = 1, color = "black", alpha = 0.5) +
geom_line(aes(y = malmquist, linetype = "全要素生产率指数"), size = 1, color = "black") +
geom_text_repel(aes(y = effch, label = round(effch, 3)), color = "black") +
geom_text_repel(aes(y = tech, label = round(tech, 3)), color = "black") +
geom_text_repel(aes(y = pure_inp_effch, label = round(pure_inp_effch, 3)), color = "black") +
geom_text_repel(aes(y = inp_scalech, label = round(inp_scalech, 3)), color = "black") +
geom_text_repel(aes(y = malmquist, label = round(malmquist, 3)), color = "black") +
scale_linetype_manual(name = " ", values = c("solid", "twodash", "dashed", "dotted", "dotdash"),
labels = c("技术效率变化指数", "技术进步效率指数", "纯技术效率指数", "规模效率指数", "全要素生产率指数")) +
labs(
title = "效率变动及其分解指数变化趋势图",
x = "年份",
y = "效率值"
) +
theme_bw() +
theme(
legend.position = "bottom",
legend.title = element_blank(), # 去掉图例标题
legend.text = element_text(size = 9), # 设置图例文本大小
legend.key.width = unit(2, "cm"), # 调整图例键的宽度
legend.key.height = unit(0.5, "cm"), # 调整图例键的高度
axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.5, vjust = 1) # 标题居中
) +
guides(linetype = guide_legend(nrow = 2, byrow = TRUE))
# 绘制图表
print(p2)
# ---- 打印分年度变动指数 ----
print(results_avg)
# ---- 打印整体变动指数 ----
print(results_DMU)