There are many datasets on WRDS which you may explore directly using R. You first need to load some packages: tidyverse
for data manipulation and PostGreSQL
to establish an SQL connection using PostGreSQL. knitr
and kableExtra
are also loaded to produce pretty tables on this tutorial, and scales
to add features to ggplot2
.
You may open a connection loading your WRDS username and password into wrds_username
and wrds_password
, whose details will be loaded in a PqConnection
object named wrds
.
The list of variables for most datasets is available here: https://wrds-web.wharton.upenn.edu/wrds/tools/variable.cfm
Next, you may list all datasets which are available with WRDS using the following dbSendQuery
request.
dbSendQuery(wrds, "select distinct table_schema
from information_schema.tables
where table_type ='VIEW'
or table_type ='FOREIGN TABLE'
order by table_schema") %>%
dbFetch %>%
print_table_long
table_schema |
---|
aha |
ahasamp |
audit |
blab |
block |
boardex |
boardsmp |
bvd |
bvdsamp |
calcbnch |
cboe |
centris |
ciq |
ciqsamp |
cisdm |
cisdmsmp |
clrvt |
clrvtsmp |
comp |
compa |
compb |
compdcur |
compg |
comph |
compm |
compmcur |
compsamp |
compseg |
compsnap |
comscore |
contrib |
crsp |
crspa |
crspm |
crspq |
crspsamp |
csmar |
dealscan |
djones |
dmef |
doe |
emdb |
etfg |
etfgsamp |
eureka |
execcomp |
factset |
ff |
fisd |
fisdsamp |
frb |
fssamp |
ftse |
ginsight |
gmi |
govpx |
govpxsmp |
gsi |
hbsamp |
hfr |
hfrsamp |
ibes |
ibescorp |
ibeskpi |
ifgr |
ifgrsamp |
ims |
imssamp |
information_schema |
iri |
kld |
ktmine |
ktsamp |
levin |
lspd |
lvnsamp |
macrofin |
markit |
mfl |
mrktsamp |
msfanly |
msfinst |
msrb |
msrbsamp |
omtrial |
optionm |
otc |
pg_catalog |
phlx |
ppublica |
ppubsamp |
public |
pwt |
rent |
reprisk |
repsamp |
revere |
risk |
risksamp |
rpna |
rpnasamp |
sdc |
sdcsamp |
secsamp |
snapsamp |
snl |
snlsamp |
sprat |
sustain |
sustsamp |
taqmsamp |
taqmsec |
taqsamp |
tass |
tfn |
totalq |
toyo |
trace |
trown |
trsamp |
trsdcgs |
trws |
twoiq |
twoiqsmp |
wrdsapps |
wrdsrpts |
wrdssec |
zacks |
zacksamp |
Only one database here. For instance, here is the variables in table cboe
of database cboe
.
dbSendQuery(wrds, "select column_name
from information_schema.columns
where table_schema='cboe'
and table_name='cboe'
order by column_name") %>%
dbFetch %>%
unlist %>%
unname
# [1] "date" "vix" "vixh" "vixl" "vixo" "vxd" "vxdh" "vxdl" "vxdo" "vxn"
# [11] "vxnh" "vxnl" "vxno" "vxo" "vxoh" "vxol" "vxoo"
For instance, here is the table names in database crsp
.
dbSendQuery(wrds,
"select distinct table_name from information_schema.columns
where table_schema='crsp' order by table_name") %>%
dbFetch %>%
unlist %>%
unname
# [1] "acti" "asia"
# [3] "asib" "asic"
# [5] "asio" "asix"
# [7] "bmdebt" "bmheader"
# [9] "bmpaymts" "bmquotes"
# [11] "bmyield" "bndprt06"
# [13] "bndprt12" "bxcalind"
# [15] "bxdlyind" "bxmthind"
# [17] "bxquotes" "bxyield"
# [19] "cap" "ccm_lookup"
# [21] "ccm_qvards" "ccmxpf_linktable"
# [23] "ccmxpf_lnkhist" "ccmxpf_lnkrng"
# [25] "ccmxpf_lnkused" "comphead"
# [27] "comphist" "compmaster"
# [29] "contact_info" "crsp_cik_map"
# [31] "crsp_daily_data" "crsp_header"
# [33] "crsp_monthly_data" "crsp_names"
# [35] "crsp_portno_map" "crsp_ziman_daily_index"
# [37] "crsp_ziman_monthly_index" "cs20yr"
# [39] "cs5yr" "cs90d"
# [41] "cst_hist" "daily_nav"
# [43] "daily_nav_ret" "daily_returns"
# [45] "dividends" "dport1"
# [47] "dport2" "dport3"
# [49] "dport4" "dport5"
# [51] "dport6" "dport7"
# [53] "dport8" "dport9"
# [55] "dsbc" "dsbo"
# [57] "dse" "dse62"
# [59] "dse62delist" "dse62dist"
# [61] "dse62exchdates" "dse62names"
# [63] "dse62nasdin" "dse62shares"
# [65] "dseall" "dseall62"
# [67] "dsedelist" "dsedist"
# [69] "dseexchdates" "dsenames"
# [71] "dsenasdin" "dseshares"
# [73] "dsf" "dsf62"
# [75] "dsfhdr" "dsfhdr62"
# [77] "dsi" "dsi62"
# [79] "dsia" "dsib"
# [81] "dsic" "dsio"
# [83] "dsir" "dsix"
# [85] "dsiy" "dsp500"
# [87] "dsp500list" "dsp500p"
# [89] "dssc" "dsso"
# [91] "eod_cap" "eod_sector"
# [93] "eod_vg" "erdport1"
# [95] "erdport2" "erdport3"
# [97] "erdport4" "erdport5"
# [99] "erdport6" "erdport7"
# [101] "erdport8" "erdport9"
# [103] "ermport1" "ermport2"
# [105] "ermport3" "ermport4"
# [107] "ermport5" "fbpri"
# [109] "fbyld" "front_load"
# [111] "front_load_det" "front_load_grp"
# [113] "fund_fees" "fund_flows"
# [115] "fund_hdr" "fund_hdr_hist"
# [117] "fund_names" "fund_style"
# [119] "fund_summary" "fund_summary2"
# [121] "fwdask06" "fwdask12"
# [123] "fwdave06" "fwdave12"
# [125] "fwdbid06" "fwdbid12"
# [127] "hldask06" "hldask12"
# [129] "hldave06" "hldave12"
# [131] "hldbid06" "hldbid12"
# [133] "holdings" "index_descriptions"
# [135] "index_type_map" "mbi"
# [137] "mbmdat" "mbmhdr"
# [139] "mbx" "mbxid"
# [141] "mcti" "mfdbname"
# [143] "mhista" "mhistn"
# [145] "mhistq" "monthly_nav"
# [147] "monthly_returns" "monthly_tna"
# [149] "monthly_tna_ret_nav" "mport1"
# [151] "mport2" "mport3"
# [153] "mport4" "mport5"
# [155] "mse" "mse62"
# [157] "mse62delist" "mse62dist"
# [159] "mse62exchdates" "mse62names"
# [161] "mse62nasdin" "mse62shares"
# [163] "mseall" "mseall62"
# [165] "msedelist" "msedist"
# [167] "mseexchdates" "msenames"
# [169] "msenasdin" "mseshares"
# [171] "msf" "msf62"
# [173] "msfhdr" "msfhdr62"
# [175] "msi" "msi62"
# [177] "msia" "msib"
# [179] "msic" "msio"
# [181] "msir" "msix"
# [183] "msiy" "msp500"
# [185] "msp500list" "msp500p"
# [187] "portnomap" "priask06"
# [189] "priask12" "priave06"
# [191] "priave12" "pribid06"
# [193] "pribid12" "price_type"
# [195] "property_type" "qcti"
# [197] "qsia" "qsib"
# [199] "qsic" "qsio"
# [201] "qsix" "rear_load"
# [203] "rear_load_det" "rear_load_grp"
# [205] "rebala" "rebaln"
# [207] "rebalq" "reit_type"
# [209] "riskfree" "s6z_agg_ann"
# [211] "s6z_agg_mth" "s6z_agg_qtr"
# [213] "s6z_del" "s6z_dind"
# [215] "s6z_dis" "s6z_dp_dly"
# [217] "s6z_ds_dly" "s6z_hdr"
# [219] "s6z_indhdr" "s6z_mdel"
# [221] "s6z_mind" "s6z_mth"
# [223] "s6z_nam" "s6z_ndi"
# [225] "s6z_shr" "saz_agg_ann"
# [227] "saz_agg_mth" "saz_agg_qtr"
# [229] "saz_del" "saz_dind"
# [231] "saz_dis" "saz_dp_dly"
# [233] "saz_ds_dly" "saz_hdr"
# [235] "saz_indhdr" "saz_mdel"
# [237] "saz_mind" "saz_mth"
# [239] "saz_nam" "saz_ndi"
# [241] "saz_shr" "sechead"
# [243] "sechist" "sector"
# [245] "sfz_dind" "sfz_indhdr"
# [247] "sfz_mbr" "sfz_mind"
# [249] "sfz_portd" "sfz_portm"
# [251] "sfz_rb" "stock_qvards"
# [253] "stocknames" "stocknames62"
# [255] "sub_property_type" "tfz_dly"
# [257] "tfz_dly_cd" "tfz_dly_cpi"
# [259] "tfz_dly_ft" "tfz_dly_rf2"
# [261] "tfz_dly_ts2" "tfz_idx"
# [263] "tfz_iss" "tfz_mast"
# [265] "tfz_mth" "tfz_mth_bp"
# [267] "tfz_mth_cd" "tfz_mth_cpi"
# [269] "tfz_mth_fb" "tfz_mth_ft"
# [271] "tfz_mth_rf" "tfz_mth_rf2"
# [273] "tfz_mth_ts" "tfz_mth_ts2"
# [275] "tfz_pay" "vg"
# [277] "yldask06" "yldask12"
# [279] "yldave06" "yldave12"
# [281] "yldbid06" "yldbid12"
# [283] "ziman_reit_info" "zr_hdrnames"
For instance, here is the variables in table msf
of database crsp
.
dbSendQuery(wrds, "select column_name
from information_schema.columns
where table_schema='crsp'
and table_name='msf'
order by column_name") %>%
dbFetch %>%
unlist %>%
unname
# [1] "altprc" "altprcdt" "ask" "askhi" "bid" "bidlo"
# [7] "cfacpr" "cfacshr" "cusip" "date" "hexcd" "hsiccd"
# [13] "issuno" "permco" "permno" "prc" "ret" "retx"
# [19] "shrout" "spread" "vol"
dbSendQuery(wrds,
"select distinct table_name from information_schema.columns
where table_schema='dealscan' order by table_name") %>%
dbFetch %>%
unlist %>%
unname
# [1] "borrowerbase" "chars"
# [3] "company" "currfacpricing"
# [5] "dealamendment" "dealpurposecomment"
# [7] "facility" "facilityamendment"
# [9] "facilitydates" "facilityguarantor"
# [11] "facilitypaymentschedule" "facilityrepaymentcomment"
# [13] "facilitysecurity" "facilitysponsor"
# [15] "financialcovenant" "financialratios"
# [17] "lendershares" "lins"
# [19] "marketsegment" "networthcovenant"
# [21] "organizationtype" "package"
# [23] "packageassignmentcomment" "packageprepaymentcomment"
# [25] "performancepricing" "performancepricingcomments"
# [27] "sublimits"
For instance, here is the variables in table borrowerbase
.
dbSendQuery(wrds, "select column_name
from information_schema.columns
where table_schema='dealscan'
and table_name='borrowerbase'
order by column_name") %>%
dbFetch %>%
unlist %>%
unname
# [1] "borrowerbasepercentage" "borrowerbasetype" "comment"
# [4] "facilityid"
For instance, here is the table names in database execcomp
.
dbSendQuery(wrds,
"select distinct table_name from information_schema.columns
where table_schema='execcomp' order by table_name") %>%
dbFetch %>%
unlist %>%
unname
# [1] "anncomp" "codirfin" "colev"
# [4] "coperol" "deferredcomp" "directorcomp"
# [7] "ex_black" "ex_header" "exnames"
# [10] "ltawdtab" "outstandingawards" "pension"
# [13] "person" "planbasedawards" "stgrttab"
For instance, here is the variables in table msf
of database crsp
.
dbSendQuery(wrds, "select column_name
from information_schema.columns
where table_schema='execcomp'
and table_name='anncomp'
order by column_name") %>%
dbFetch %>%
unlist %>%
unname
# [1] "address" "age"
# [3] "allothpd" "allothtot"
# [5] "becameceo" "bonus"
# [7] "ceoann" "cfoann"
# [9] "chg_ctrl_pymt" "city"
# [11] "co_per_rol" "comment"
# [13] "coname" "cusip"
# [15] "defer_balance_tot" "defer_contrib_co_tot"
# [17] "defer_contrib_exec_tot" "defer_earnings_tot"
# [19] "defer_rpt_as_comp_tot" "defer_withdr_tot"
# [21] "eip_unearn_num" "eip_unearn_val"
# [23] "exchange" "exec_fname"
# [25] "exec_fullname" "exec_lname"
# [27] "exec_mname" "execdir"
# [29] "execid" "execrank"
# [31] "execrankann" "gender"
# [33] "gvkey" "inddesc"
# [35] "interlock" "joined_co"
# [37] "leftco" "leftofc"
# [39] "ltip" "naics"
# [41] "naicsdesc" "nameprefix"
# [43] "noneq_incent" "old_datafmt_flag"
# [45] "opt_exer_num" "opt_exer_val"
# [47] "opt_unex_exer_est_val" "opt_unex_exer_num"
# [49] "opt_unex_unexer_est_val" "opt_unex_unexer_num"
# [51] "option_awards" "option_awards_blk_value"
# [53] "option_awards_fv" "option_awards_num"
# [55] "option_awards_rpt_value" "othann"
# [57] "othcomp" "page"
# [59] "pceo" "pcfo"
# [61] "pension_chg" "pension_pymts_tot"
# [63] "pension_value_tot" "reason"
# [65] "rejoin" "releft"
# [67] "reprice" "ret_yrs"
# [69] "rstkgrnt" "rstkvyrs"
# [71] "sal_pct" "salary"
# [73] "shrown_excl_opts" "shrown_excl_opts_pct"
# [75] "shrown_tot" "shrown_tot_pct"
# [77] "shrs_vest_num" "shrs_vest_val"
# [79] "sic" "sicdesc"
# [81] "spcode" "spindex"
# [83] "state" "stock_awards"
# [85] "stock_awards_fv" "stock_unvest_num"
# [87] "stock_unvest_val" "sub_tele"
# [89] "tdc1" "tdc1_pct"
# [91] "tdc2" "tdc2_pct"
# [93] "tele" "term_pymt"
# [95] "ticker" "title"
# [97] "titleann" "total_alt1"
# [99] "total_alt1_pct" "total_alt2"
# [101] "total_alt2_pct" "total_curr"
# [103] "total_curr_pct" "total_sec"
# [105] "total_sec_pct" "year"
# [107] "zip"
For instance, here is the table names in database crsp
.
dbSendQuery(wrds,
"select distinct table_name from information_schema.columns
where table_schema='frb' order by table_name") %>%
dbFetch %>%
unlist %>%
unname
# [1] "fx_daily" "fx_monthly" "rates_daily" "rates_monthly"
For instance, here is the variables in table msf
of database crsp
.
dbSendQuery(wrds, "select column_name
from information_schema.columns
where table_schema='frb'
and table_name='fx_daily'
order by column_name") %>%
dbFetch %>%
unlist %>%
unname
# [1] "date" "exalus" "exauus" "exbeus" "exbzus" "excaus" "exchus"
# [8] "exdnus" "execus" "exeuus" "exfnus" "exfrus" "exgeus" "exgrus"
# [15] "exhkus" "exinus" "exirus" "exitus" "exjpus" "exkous" "exmaus"
# [22] "exmxus" "exneus" "exnous" "exnzus" "expous" "exsdus" "exsfus"
# [29] "exsius" "exslus" "exspus" "exszus" "extaus" "exthus" "exukus"
# [36] "exusal" "exusec" "exuseu" "exusir" "exusnz" "exusuk" "exvzus"
# [43] "indexgx" "twexb" "twexm" "twexo"
For instance, here is the table names in database crsp
.
dbSendQuery(wrds,
"select distinct table_name from information_schema.columns
where table_schema='trace' order by table_name") %>%
dbFetch %>%
unlist %>%
unname
# [1] "absmasterfile" "cmomasterfile"
# [3] "masterfile" "mbsmasterfile"
# [5] "tbamasterfile" "trace"
# [7] "trace_agency" "trace_agency_names"
# [9] "trace_btds144a" "trace_btds144a_names"
# [11] "trace_enhanced" "trace_enhanced_names"
# [13] "trace_names" "trace_spds144a_abs"
# [15] "trace_spds144a_abs_names" "trace_spds144a_cmo"
# [17] "trace_spds144a_cmo_names" "trace_spds_abs"
# [19] "trace_spds_abs_names" "trace_spds_cmo"
# [21] "trace_spds_cmo_names" "trace_spds_mbs"
# [23] "trace_spds_mbs_names" "trace_spds_tba"
# [25] "trace_spds_tba_names" "trade_summary"
# [27] "trade_summary_agency" "trade_summary_btds144a"
# [29] "trade_summary_spds144a_abs" "trade_summary_spds144a_cmo"
# [31] "trade_summary_spds_abs" "trade_summary_spds_cmo"
# [33] "trade_summary_spds_mbs" "trade_summary_spds_tba"
For instance, here is the variables in table msf
of database crsp
.
dbSendQuery(wrds, "select column_name
from information_schema.columns
where table_schema='trace'
and table_name='trace'
order by column_name") %>%
dbFetch %>%
unlist %>%
unname
# [1] "ascii_rptd_vol_tx" "asof_cd" "ats_indicator"
# [4] "bond_sym_id" "bsym" "chng_cd"
# [7] "cmsn_trd" "company_symbol" "contra_party_type"
# [10] "cusip_id" "days_to_sttl_ct" "diss_rptg_side_cd"
# [13] "frmt_cd" "function" "high_yld_pt"
# [16] "high_yld_sign_cd" "low_yld_pt" "low_yld_sign_cd"
# [19] "lsal_yld_pt" "lsal_yld_sign_cd" "msg_seq_nb"
# [22] "orig_dis_dt" "orig_msg_seq_nb" "remuneration"
# [25] "rptd_high_pr" "rptd_last_pr" "rptd_low_pr"
# [28] "rptd_pr" "rptg_party_type" "sale_cndtn2_cd"
# [31] "sale_cndtn_cd" "side" "spcl_trd_fl"
# [34] "sttl_dt" "sub_prd_type" "trans_dt"
# [37] "trc_st" "trd_exctn_dt" "trd_exctn_tm"
# [40] "wis_fl" "yld_pt" "yld_sign_cd"