aggregatedata_emp
aggregatedata_emp <- dbSendQuery(wrds,
"select gvkey, fyear, emp, at from comp.funda
where DATAFMT='STD' and POPSRC='D' and CONSOL='C' and FYEAR > 2000
and FYEAR < 2018 and INDFMT = 'INDL' and FIC='USA'") %>%
dbFetch %>%
filter(!is.na(emp)) %>%
group_by(gvkey) %>%
mutate(count = n()) %>%
ungroup %>%
filter(count == 17) %>%
group_by(fyear) %>%
summarise(emp = emp %>% sum,
at = at %>% sum) %>%
mutate(emp_log = emp %>% log,
emp_log_d1 = emp_log - lag(emp_log, 1),
at_log = at %>% log,
at_log_d1 = at_log - lag(at_log, 1)) %>%
select(-ends_with("_log"))
aggregatedata_emp2
aggregatedata_emp2 <- dbSendQuery(wrds,
"select gvkey, fyear, emp, at, funda.naicsh from comp.funda
where DATAFMT='STD' and POPSRC='D' and CONSOL='C' and FYEAR > 2000
and FYEAR < 2018 and INDFMT = 'INDL' and FIC='USA'") %>%
dbFetch %>%
mutate(naics2dig = naicsh %>% substr(1, 2) %>% as.factor) %>%
filter(!is.na(emp)) %>%
group_by(gvkey) %>%
mutate(count = n()) %>% # from package: concordance
ungroup %>%
filter(count == 17) %>%
group_by(naics2dig, fyear) %>%
summarise(emp = emp %>% sum,
at = at %>% sum) %>%
mutate(emp_log = emp %>% log,
emp_log_d1 = emp_log - lag(emp_log, 1),
at_log = at %>% log,
at_log_d1 = at_log - lag(at_log, 1)) %>%
select(- ends_with("_log")) %>%
ungroup
aggregatedata_emp2 %>%
group_by(fyear) %>%
summarise(emp = emp %>% sum,
at = at %>% sum) %>%
mutate(emp_log = emp %>% log,
emp_log_d1 = emp_log - lag(emp_log, 1),
at_log = at %>% log,
at_log_d1 = at_log - lag(at_log, 1)) %>%
select(- ends_with("_log")) %>%
{if (is_html_output()) datatable(., filter = 'top', rownames = F) else .}
aggregatedata_emp %>%
select(fyear, emp_log_d1) %>%
na.omit %>%
ggplot(aes(x = fyear, y = emp_log_d1)) + geom_line() + theme_bw()
# V1 V2 V3 V4
# 1: 4 11111 Soybean Farming
# 2: 5 111110 Soybean Farming