Dualizaiton working paper
미정
propensity score를 활용해서 외부자성을 계산해보자.
Important
실험적 단계의 연구논문으로, 아직 미완성된 게시글입니다.
1 Rawdata
Code
p23_raw <- read_dta("../../../rawdata/klips23p.dta")
p05_raw <- read_dta("../../../rawdata/klips05p.dta")
h23_raw <- read_dta("../../../rawdata/klips23h.dta") %>%
mutate(wage = h232102 / sqrt(h230150)) %>%
select(hhid23, wage) %>%
drop_na()
h05_raw <- read_dta("../../../rawdata/klips05h.dta") %>%
mutate(wage = h052102 / sqrt(h050150)) %>%
select(hhid05, wage) %>%
drop_na()
Code
p23 <- p23_raw %>%
left_join(h23_raw, by = "hhid23") %>%
mutate(
empsta = ifelse(is.na(p230314), NA, p230314),
male = ifelse(p230101 == 1, 1, 0) %>% as.factor(),
age = p230107,
youth = case_when(
age >= 15 & age < 35 ~ 1,
age >= 35 & age <= 64 ~ 0,
TRUE ~ NA
) %>% as.factor(),
) %>%
mutate(
partim = ifelse(p230315 == 1, 1, 0) %>% as.factor(),
fixter_1 = ifelse(p230501 == 1, 1, 0) %>% as.factor(),
fixter_2 = case_when(
p230501 == 2 & p230601 == 1 & p230602 == 2 ~ 1,
p230501 == 2 & p230601 == 2 & (p230605>=1 & p230605<=6) ~ 1,
TRUE ~ 0
) %>% as.factor(),
fixter_3 = ifelse(empsta == 2, 1, 0) %>% as.factor(),
fixter = ifelse(fixter_1 == 1 | fixter_2 == 1 | fixter_3 == 2, 1, 0) %>% as.factor(),
daiwor_1 = if_else(p230508 == 1, 1, 0, 0) %>% as.factor(),
daiwor_2 = ifelse(empsta == 3, 1, 0) %>% as.factor(),
daiwor = ifelse(daiwor_1 == 1 | daiwor_2 == 2, 1, 0) %>% as.factor(),
agewor_1 = ifelse(p230611 == 2, 1, 0) %>% as.factor(),
agewor_2 = ifelse(p230611 == 3, 1, 0) %>% as.factor(),
agewor = ifelse(agewor_1 == 1 | agewor_2 == 1, 1, 0) %>% as.factor(),
domwor = ifelse(p230613 == 1, 1, 0) %>% as.factor(),
speemp = ifelse(p230612 == 1, 1, 0) %>% as.factor(),
natpen = ifelse(p232101 == 1, 1, 0) %>% as.factor(),
spepen = ifelse(p232102 == 1, 1, 0) %>% as.factor(),
totpen = ifelse(natpen == 1 | spepen == 1, 1, 0) %>% as.factor(),
nathel = ifelse(p232103 == 1, 1, 0) %>% as.factor(),
empins = ifelse(p232104 == 1 | spepen == 1, 1, 0) %>% as.factor(),
indins = ifelse(p232105 == 1 | spepen == 1, 1, 0) %>% as.factor(),
retben = ifelse(p234101 == 1, 1, 0) %>% as.factor(),
) %>%
mutate(
standard = ifelse(partim == 0
& fixter == 0
& daiwor == 0
& agewor == 0
& domwor == 0
& speemp == 0,
1,
0) %>% as.factor(),
socins = ifelse(totpen == 1
& nathel == 1
& empins == 1
& indins == 1,
1,
0) %>% as.factor(),
) %>%
mutate(
KSCO5=p230350 %>% str_sub(1, 3) %>% as.factor(),
class_serv = case_when(
p230350 >= 140 & p230350 < 160 ~ 1,
p230350 >= 170 & p230350 < 190 ~ 1,
p230350 >= 240 & p230350 < 260 ~ 1,
p230350 >= 270 & p230350 < 290 ~ 1,
p230350 >= 310 & p230350 < 540 ~ 1,
TRUE ~ 0
),
class3 = case_when(
(p230350 >= 400 & p230350<600) | p230350 >= 900 ~ 1,
(p230350 >= 300 & p230350<400) | (p230350 >= 700 & p230350<900) ~ 2,
p230350 >= 0 & p230350<300 ~ 3
),
class5 = case_when(
class_serv == 1 & class3 == 1 ~ 1,
class_serv == 0 & class3 == 1 ~ 2,
class_serv == 1 & class3 == 2 ~ 3,
class_serv == 0 & class3 == 2 ~ 4,
class3 == 3 ~ 5,
TRUE ~ NA
) %>% as.factor(),
large_firm = case_when(
(p230402 >= 1 & p230402 <= 299) | p230403 <= 7 ~ 1,
p230402 >= 300 | (p230403 >= 8 & p230403 <= 10) ~ 2, # | p230401 == 5
is.na(p230402) == TRUE & is.na(p230403) == TRUE ~ NA,
TRUE ~ NA
) %>% as.factor(),
firm_group = case_when(
(p230402 >= 1 & p230402 <= 4) | p230403 <= 1 ~ 1,
(p230402 >= 5 & p230402 <= 29) | (p230403 >= 2 & p230403 <= 3) ~ 2,
(p230402 >= 30 & p230402 <= 99) | (p230403 >= 4 & p230403 <= 6) ~ 3,
(p230402 >= 100 & p230402 <= 299) | (p230403 >= 7 & p230403 <= 7) ~ 4,
p230402 >= 300 | (p230403 >= 8 & p230403 <= 10) ~ 5,
TRUE ~ NA
) %>% as.factor(),
firmsize = case_when(
(p230402 >= 1 & p230402 <= 4) | p230403 == 1 ~ 1,
(p230402 >= 5 & p230402 <= 9) | p230403 == 2 ~ 2,
(p230402 >= 10 & p230402 <= 29) | p230403 == 3 ~ 3,
(p230402 >= 30 & p230402 <= 49) | p230403 == 4 ~ 4,
(p230402 >= 50 & p230402 <= 69) | p230403 == 5 ~ 5,
(p230402 >= 70 & p230402 <= 99) | p230403 == 6 ~ 6,
(p230402 >= 100 & p230402 <= 299) | p230403 == 7 ~ 7,
(p230402 >= 300 & p230402 <= 499) | p230403 == 8 ~ 8,
(p230402 >= 500 & p230402 <= 999) | p230403 == 9 ~ 9,
p230402 >= 1000 | p230403 == 10 ~ 10,
TRUE ~ NA
) %>% as.factor()
) %>%
filter(empsta <= 3) %>%
drop_na(male, age, youth, wage, class5, firm_group, large_firm, KSCO5)
Code
p05 <- p05_raw %>%
left_join(h05_raw, by = "hhid05") %>%
mutate(
empsta = ifelse(is.na(p050314), NA, p050314),
male = ifelse(p050101==1, 1, 0) %>% as.factor(),
age = p050107,
youth = case_when(
age >= 15 & age < 35 ~ 1,
age >= 35 & age<=64 ~ 0,
TRUE ~ NA
) %>% as.factor(),
) %>%
mutate(
partim = ifelse(p050315 == 1, 1, 0) %>% as.factor(),
fixter_1 = ifelse(p050501 == 1, 1, 0) %>% as.factor(),
fixter_2 = case_when(p050501 == 2 &
p050601 == 2 &
(p050605 >= 1 & p050605 <= 6) ~ 1, TRUE ~ 0) %>% as.factor(),
fixter_3 = ifelse(empsta == 2, 1, 0) %>% as.factor(),
fixter = ifelse(fixter_1 == 1 | fixter_2 == 1 | fixter_3 == 2, 1, 0) %>% as.factor(),
daiwor_1 = if_else(p050508 == 1, 1, 0, 0) %>% as.factor(),
daiwor_2 = ifelse(empsta == 3, 1, 0) %>% as.factor(),
daiwor = ifelse(daiwor_1 == 1 | daiwor_2 == 2, 1, 0) %>% as.factor(),
agewor_1= ifelse(p050611 == 2, 1, 0) %>% as.factor(),
agewor_2 = ifelse(p050611 == 3, 1, 0) %>% as.factor(),
agewor = ifelse(agewor_1 == 1 | agewor_2 == 1, 1, 0) %>% as.factor(),
domwor = ifelse(p050613 == 1, 1, 0) %>% as.factor(),
speemp = ifelse(p050612 == 1, 1, 0) %>% as.factor(),
natpen = ifelse(p052101 == 1, 1, 0) %>% as.factor(),
spepen = ifelse(p052102 == 1, 1, 0) %>% as.factor(),
totpen = ifelse(natpen == 1 | spepen == 1, 1, 0) %>% as.factor(),
nathel = ifelse(p052103 == 1, 1, 0) %>% as.factor(),
empins = ifelse(p052104 == 1 | spepen == 1, 1, 0) %>% as.factor(),
indins = ifelse(p052105 == 1 | spepen == 1, 1, 0) %>% as.factor(),
retben = ifelse(p054101 == 1, 1, 0) %>% as.factor(),
) %>%
mutate(
standard = ifelse(partim == 0
& fixter == 0
& daiwor == 0
& agewor == 0
& domwor == 0
& speemp == 0,
1,
0),
socins = ifelse(totpen == 1
& nathel == 1
& empins == 1
& indins == 1,
1,
0)
) %>%
mutate(
KSCO5=p050350 %>% str_sub(1, 3) %>% as.factor(),
class_serv=case_when(
p050350 >= 140 & p050350 < 160 ~ 1,
p050350 >= 170 & p050350 < 190 ~ 1,
p050350 >= 240 & p050350 < 260 ~ 1,
p050350 >= 270 & p050350 < 290 ~ 1,
p050350 >= 310 & p050350 < 540 ~ 1,
TRUE ~ 0
) %>% as.factor(),
class3=case_when(
(p050350 >= 400 & p050350 < 600) | p050350 >= 900 ~ 1,
(p050350 >= 300 & p050350 < 400) | (p050350 >= 700 & p050350 < 900) ~ 2,
p050350>=0 & p050350<300 ~ 3
) %>% as.factor(),
class5 = case_when(
class_serv == 1 & class3 == 1 ~ 1,
class_serv == 0 & class3 == 1 ~ 2,
class_serv == 1 & class3 == 2 ~ 3,
class_serv == 0 & class3 == 2 ~ 4,
class3 == 3 ~ 5,
TRUE ~ NA
) %>% as.factor(),
large_firm = case_when(
(p050402 >= 1 & p050402 <= 299) | p050403 <= 7 ~ 1,
p050402 >= 300 | (p050403 >= 8 & p050403 <= 10) ~ 2, # | p050401 == 5
is.na(p050402) == TRUE & is.na(p050403) == TRUE ~ NA,
TRUE ~ NA
) %>% as.factor(),
firm_group = case_when(
(p050402 >= 1 & p050402 <= 4) | p050403 <= 1 ~ 1,
(p050402 >= 5 & p050402 <= 29) | (p050403 >= 2 & p050403 <= 3) ~ 2,
(p050402 >= 30 & p050402 <= 99) | (p050403 >= 4 & p050403 <= 6) ~ 3,
(p050402 >= 100 & p050402 <= 299) | (p050403 >= 7 & p050403 <= 7) ~ 4,
p050402 >= 300 | (p050403 >= 8 & p050403 <= 10) ~ 5,
TRUE ~ NA
) %>% as.factor(),
firmsize = case_when(
(p050402 >= 1 & p050402 <= 4) | p050403 == 1 ~ 1,
(p050402 >= 5 & p050402 <= 9) | p050403 == 2 ~ 2,
(p050402 >= 10 & p050402 <= 29) | p050403 == 3 ~ 3,
(p050402 >= 30 & p050402 <= 49) | p050403 == 4 ~ 4,
(p050402 >= 50 & p050402 <= 69) | p050403 == 5 ~ 5,
(p050402 >= 70 & p050402 <= 99) | p050403 == 6 ~ 6,
(p050402 >= 100 & p050402 <= 299) | p050403 == 7 ~ 7,
(p050402 >= 300 & p050402 <= 499) | p050403 == 8 ~ 8,
(p050402 >= 500 & p050402 <= 999) | p050403 == 9 ~ 9,
p050402 >= 1000 | p050403 == 10 ~ 10,
TRUE ~ NA
) %>% as.factor()
) %>%
filter(empsta <= 3) %>%
drop_na(male, age, youth, wage, class5, firm_group, large_firm, KSCO5)
2 Scoring The Propensity
Code
staemp23 <- glm(standard ~ male + age + log(wage) + class5,
family = binomial,
data = p23,
na.action = na.exclude)
# socins23 <- glm(socins ~ male + age + log(wage) + class5 + firmsize,
# family = binomial,
# data = p23,
# na.action = na.exclude)
socins23 <- glm(socins ~ male + age + log(wage) +
partim +
fixter_1 +
fixter_2 +
fixter_3 +
daiwor_1 +
daiwor_2 +
agewor_1 +
agewor_2 +
domwor +
speemp,
family = binomial,
data = p23,
na.action = na.exclude)
x23 <- predict(staemp23, type = "response")
y23 <- predict(socins23, type = "response")
staemp05 <- glm(standard ~ male + age + log(wage) + class5,
family = binomial,
data = p05,
na.action = na.exclude)
# socins05 <- glm(socins ~ male + age + wage + class5 + firmsize,
# family = binomial,
# data = p05,
# na.action = na.exclude)
socins05 <- glm(socins ~ male + age + log(wage) + class5 +
partim +
fixter_1 +
fixter_2 +
fixter_3 +
daiwor_1 +
daiwor_2 +
agewor_1 +
agewor_2 +
domwor +
speemp,
family = binomial,
data = p05,
na.action = na.exclude)
x05 <- predict(staemp05, type = "response")
y05 <- predict(socins05, type = "response")
df23 <- p23 %>%
mutate(x = x23,
y = y23) %>%
select(pid, x, y, standard, socins,
male, age, youth, wage,
class5, firm_group, large_firm)
g23 <- ggplot(data = df23, aes(x = x, y = y)) +
scale_x_continuous(limits = c(0, 1)) +
scale_y_continuous(limits = c(0, 1))
df05 <- p05 %>%
mutate(x = x05,
y = y05) %>%
select(pid, x, y, standard, socins,
male, age, youth, wage,
class5, firm_group, large_firm)
g05 <- ggplot(data = df05, aes(x = x, y = y)) +
scale_x_continuous(limits = c(0, 1)) +
scale_y_continuous(limits = c(0, 1))
아래 왼쪽은 2005년, 오른쪽은 2023년
구분 | ’05년 중소기업 | ’05년 대기업 | ’23년 중소기업 | ’23년 대기업 |
---|---|---|---|---|
절편 | -0.986902 | -1.0469062 | 0.456389 | 0.638658 |
계수 | 2.0717141 | 2.2160975 | 0.5373582 | 0.3605782 |
Code
grid.arrange(
g23 +
theme(legend.position = c(0.2, 0.5),
legend.background = element_rect(fill = alpha(0.1))) +
geom_smooth(data = df23 %>% filter(large_firm == 1),
method = "lm",
color = "#d0473e") +
geom_point(data = df23 %>% filter(large_firm == 1) %>% slice_sample(n = 100),
size = 3,
color = "#d0473e",
alpha = 0.1) +
geom_point(data = df23 %>% filter(large_firm == 2) %>% slice_sample(n = 100),
size = 3,
color = "#306ec0",
alpha = 0.01),
g23 +
theme(legend.position = c(0.2, 0.5),
legend.background = element_rect(fill = alpha(0.1))) +
geom_smooth(data = df23 %>% filter(large_firm == 2),
method = "lm",
color = "#306ec0") +
geom_point(data = df23 %>% filter(large_firm == 2) %>% slice_sample(n = 100),
size = 3,
color = "#306ec0",
alpha = 0.1) +
geom_point(data = df23 %>% filter(large_firm == 1) %>% slice_sample(n = 100),
size = 3,
color = "#d0473e",
alpha = 0.01),
g23 +
theme(legend.position = c(0.2, 0.5),
legend.background = element_rect(fill = alpha(0.1))) +
geom_point(aes(color = class5), size = 3, alpha = 0.1),
g23 +
theme(legend.position = c(0.2, 0.5),
legend.background = element_rect(fill = alpha(0.1))) +
geom_point(aes(color = male), size = 3, alpha = 0.1),
nrow = 2,
ncol = 2
)
Code
grid.arrange(
g05 +
theme(legend.position = c(0.2, 0.5),
legend.background = element_rect(fill = alpha(0.1))) +
geom_smooth(data = df05 %>% filter(large_firm == 1),
method = "lm",
color = "#d0473e") +
geom_point(data = df05 %>% filter(large_firm == 1) %>% slice_sample(n = 100),
size = 3,
color = "#d0473e",
alpha = 0.1) +
geom_point(data = df05 %>% filter(large_firm == 2) %>% slice_sample(n = 100),
size = 3,
color = "#306ec0",
alpha = 0.01),
g05 +
theme(legend.position = c(0.2, 0.5),
legend.background = element_rect(fill = alpha(0.1))) +
geom_smooth(data = df05 %>% filter(large_firm == 2),
method = "lm",
color = "#306ec0") +
geom_point(data = df05 %>% filter(large_firm == 2) %>% slice_sample(n = 100),
size = 3,
color = "#306ec0",
alpha = 0.1) +
geom_point(data = df05 %>% filter(large_firm == 1) %>% slice_sample(n = 100),
size = 3,
color = "#d0473e",
alpha = 0.01),
g05 +
theme(legend.position = c(0.2, 0.5),
legend.background = element_rect(fill = alpha(0.1))) +
geom_point(aes(color = class5), size = 3, alpha = 0.1),
g05 +
theme(legend.position = c(0.2, 0.5),
legend.background = element_rect(fill = alpha(0.1))) +
geom_point(aes(color = male), size = 3, alpha = 0.1),
nrow = 2,
ncol = 2
)
어렵다 어려워. 직종을 줄이니 차이가 거의 없어짐. 대기업효과 보다는 직종, 성별 효과가 더 큰걸까? 만약 그게 맞다면, 뭐하러 이 그래프를 그리냐? 그냥 상호작용 항으로 선형모델에 적합시키면 되지.
df23 <- df23 %>%
mutate(
x.q = case_when(
x > summary(df23$x)[5] ~ "1/4",
x > summary(df23$x)[3] & x <= summary(df23$x)[5] ~ "2/4",
x > summary(df23$x)[2] & x <= summary(df23$x)[3] ~ "3/4",
x <= summary(df23$x)[2] ~ "4/4",
),
y.q = case_when(
y > summary(df23$y)[5] ~ "1/4",
y > summary(df23$y)[3] & y <= summary(df23$y)[5] ~ "2/4",
y > summary(df23$y)[2] & y <= summary(df23$y)[3] ~ "3/4",
y <= summary(df23$y)[2] ~ "4/4",
)
)
mytable_sub(x.q ~ male + youth + class5 + large_firm + standard + socins, df23)
Descriptive Statistics by 'x.q'
—————————————————————————————————————————————————————————————————————
1/4 2/4 3/4 4/4 p
(N=1621) (N=1620) (N=1620) (N=1621)
—————————————————————————————————————————————————————————————————————
male 0.000
- 0 87 ( 5.4%) 581 (35.9%) 844 (52.1%) 1217 (75.1%)
- 1 1534 (94.6%) 1039 (64.1%) 776 (47.9%) 404 (24.9%)
youth 0.000
- 0 1198 (73.9%) 1203 (74.3%) 1305 (80.6%) 1396 (86.1%)
- 1 423 (26.1%) 417 (25.7%) 315 (19.4%) 225 (13.9%)
class5 0.000
- 1 1 ( 0.1%) 16 ( 1.0%) 190 (11.7%) 764 (47.1%)
- 2 0 ( 0.0%) 0 ( 0.0%) 8 ( 0.5%) 595 (36.7%)
- 3 827 (51.0%) 420 (25.9%) 190 (11.7%) 7 ( 0.4%)
- 4 92 ( 5.7%) 679 (41.9%) 608 (37.5%) 201 (12.4%)
- 5 701 (43.2%) 505 (31.2%) 624 (38.5%) 54 ( 3.3%)
large_firm 0.000
- 1 1054 (65.0%) 1238 (76.4%) 1407 (86.9%) 1475 (91.0%)
- 2 567 (35.0%) 382 (23.6%) 213 (13.1%) 146 ( 9.0%)
standard 0.000
- 0 183 (11.3%) 309 (19.1%) 523 (32.3%) 905 (55.8%)
- 1 1438 (88.7%) 1311 (80.9%) 1097 (67.7%) 716 (44.2%)
socins 0.000
- 0 113 ( 7.0%) 205 (12.7%) 371 (22.9%) 701 (43.2%)
- 1 1508 (93.0%) 1415 (87.3%) 1249 (77.1%) 920 (56.8%)
—————————————————————————————————————————————————————————————————————
Descriptive Statistics by 'y.q'
—————————————————————————————————————————————————————————————————————
1/4 2/4 3/4 4/4 p
(N=1621) (N=1620) (N=1620) (N=1621)
—————————————————————————————————————————————————————————————————————
male 0.000
- 0 548 (33.8%) 480 (29.6%) 725 (44.8%) 976 (60.2%)
- 1 1073 (66.2%) 1140 (70.4%) 895 (55.2%) 645 (39.8%)
youth 0.000
- 0 714 (44.0%) 1493 (92.2%) 1544 (95.3%) 1351 (83.3%)
- 1 907 (56.0%) 127 ( 7.8%) 76 ( 4.7%) 270 (16.7%)
class5 0.000
- 1 121 ( 7.5%) 140 ( 8.6%) 212 (13.1%) 498 (30.7%)
- 2 45 ( 2.8%) 53 ( 3.3%) 147 ( 9.1%) 358 (22.1%)
- 3 483 (29.8%) 474 (29.3%) 337 (20.8%) 150 ( 9.3%)
- 4 314 (19.4%) 409 (25.2%) 494 (30.5%) 363 (22.4%)
- 5 658 (40.6%) 544 (33.6%) 430 (26.5%) 252 (15.5%)
large_firm 0.000
- 1 1106 (68.2%) 1188 (73.3%) 1386 (85.6%) 1494 (92.2%)
- 2 515 (31.8%) 432 (26.7%) 234 (14.4%) 127 ( 7.8%)
standard 0.000
- 0 104 ( 6.4%) 137 ( 8.5%) 291 (18.0%) 1388 (85.6%)
- 1 1517 (93.6%) 1483 (91.5%) 1329 (82.0%) 233 (14.4%)
socins 0.000
- 0 74 ( 4.6%) 92 ( 5.7%) 248 (15.3%) 976 (60.2%)
- 1 1547 (95.4%) 1528 (94.3%) 1372 (84.7%) 645 (39.8%)
—————————————————————————————————————————————————————————————————————