There are many datasets on WRDS which you may explore directly using R. You first need to load some packages: tidyverse for data manipulation and PostGreSQL to establish an SQL connection using PostGreSQL. knitr and kableExtra are also loaded to produce pretty tables on this tutorial, and scales to add features to ggplot2.
You may open a connection loading your WRDS username and password into wrds_username and wrds_password, whose details will be loaded in a PqConnection object named wrds.
The list of variables for most datasets is available here: https://wrds-web.wharton.upenn.edu/wrds/tools/variable.cfm
Next, you may list all datasets which are available with WRDS using the following dbSendQuery request.
dbSendQuery(wrds, "select distinct table_schema
from information_schema.tables
where table_type ='VIEW'
or table_type ='FOREIGN TABLE'
order by table_schema") %>%
dbFetch %>%
print_table_long| table_schema |
|---|
| aha |
| ahasamp |
| audit |
| blab |
| block |
| boardex |
| boardsmp |
| bvd |
| bvdsamp |
| calcbnch |
| cboe |
| centris |
| ciq |
| ciqsamp |
| cisdm |
| cisdmsmp |
| clrvt |
| clrvtsmp |
| comp |
| compa |
| compb |
| compdcur |
| compg |
| comph |
| compm |
| compmcur |
| compsamp |
| compseg |
| compsnap |
| comscore |
| contrib |
| crsp |
| crspa |
| crspm |
| crspq |
| crspsamp |
| csmar |
| dealscan |
| djones |
| dmef |
| doe |
| emdb |
| etfg |
| etfgsamp |
| eureka |
| execcomp |
| factset |
| ff |
| fisd |
| fisdsamp |
| frb |
| fssamp |
| ftse |
| ginsight |
| gmi |
| govpx |
| govpxsmp |
| gsi |
| hbsamp |
| hfr |
| hfrsamp |
| ibes |
| ibescorp |
| ibeskpi |
| ifgr |
| ifgrsamp |
| ims |
| imssamp |
| information_schema |
| iri |
| kld |
| ktmine |
| ktsamp |
| levin |
| lspd |
| lvnsamp |
| macrofin |
| markit |
| mfl |
| mrktsamp |
| msfanly |
| msfinst |
| msrb |
| msrbsamp |
| omtrial |
| optionm |
| otc |
| pg_catalog |
| phlx |
| ppublica |
| ppubsamp |
| public |
| pwt |
| rent |
| reprisk |
| repsamp |
| revere |
| risk |
| risksamp |
| rpna |
| rpnasamp |
| sdc |
| sdcsamp |
| secsamp |
| snapsamp |
| snl |
| snlsamp |
| sprat |
| sustain |
| sustsamp |
| taqmsamp |
| taqmsec |
| taqsamp |
| tass |
| tfn |
| totalq |
| toyo |
| trace |
| trown |
| trsamp |
| trsdcgs |
| trws |
| twoiq |
| twoiqsmp |
| wrdsapps |
| wrdsrpts |
| wrdssec |
| zacks |
| zacksamp |
Only one database here. For instance, here is the variables in table cboe of database cboe.
dbSendQuery(wrds, "select column_name
from information_schema.columns
where table_schema='cboe'
and table_name='cboe'
order by column_name") %>%
dbFetch %>%
unlist %>%
unname# [1] "date" "vix" "vixh" "vixl" "vixo" "vxd" "vxdh" "vxdl" "vxdo" "vxn"
# [11] "vxnh" "vxnl" "vxno" "vxo" "vxoh" "vxol" "vxoo"
For instance, here is the table names in database crsp.
dbSendQuery(wrds,
"select distinct table_name from information_schema.columns
where table_schema='crsp' order by table_name") %>%
dbFetch %>%
unlist %>%
unname# [1] "acti" "asia"
# [3] "asib" "asic"
# [5] "asio" "asix"
# [7] "bmdebt" "bmheader"
# [9] "bmpaymts" "bmquotes"
# [11] "bmyield" "bndprt06"
# [13] "bndprt12" "bxcalind"
# [15] "bxdlyind" "bxmthind"
# [17] "bxquotes" "bxyield"
# [19] "cap" "ccm_lookup"
# [21] "ccm_qvards" "ccmxpf_linktable"
# [23] "ccmxpf_lnkhist" "ccmxpf_lnkrng"
# [25] "ccmxpf_lnkused" "comphead"
# [27] "comphist" "compmaster"
# [29] "contact_info" "crsp_cik_map"
# [31] "crsp_daily_data" "crsp_header"
# [33] "crsp_monthly_data" "crsp_names"
# [35] "crsp_portno_map" "crsp_ziman_daily_index"
# [37] "crsp_ziman_monthly_index" "cs20yr"
# [39] "cs5yr" "cs90d"
# [41] "cst_hist" "daily_nav"
# [43] "daily_nav_ret" "daily_returns"
# [45] "dividends" "dport1"
# [47] "dport2" "dport3"
# [49] "dport4" "dport5"
# [51] "dport6" "dport7"
# [53] "dport8" "dport9"
# [55] "dsbc" "dsbo"
# [57] "dse" "dse62"
# [59] "dse62delist" "dse62dist"
# [61] "dse62exchdates" "dse62names"
# [63] "dse62nasdin" "dse62shares"
# [65] "dseall" "dseall62"
# [67] "dsedelist" "dsedist"
# [69] "dseexchdates" "dsenames"
# [71] "dsenasdin" "dseshares"
# [73] "dsf" "dsf62"
# [75] "dsfhdr" "dsfhdr62"
# [77] "dsi" "dsi62"
# [79] "dsia" "dsib"
# [81] "dsic" "dsio"
# [83] "dsir" "dsix"
# [85] "dsiy" "dsp500"
# [87] "dsp500list" "dsp500p"
# [89] "dssc" "dsso"
# [91] "eod_cap" "eod_sector"
# [93] "eod_vg" "erdport1"
# [95] "erdport2" "erdport3"
# [97] "erdport4" "erdport5"
# [99] "erdport6" "erdport7"
# [101] "erdport8" "erdport9"
# [103] "ermport1" "ermport2"
# [105] "ermport3" "ermport4"
# [107] "ermport5" "fbpri"
# [109] "fbyld" "front_load"
# [111] "front_load_det" "front_load_grp"
# [113] "fund_fees" "fund_flows"
# [115] "fund_hdr" "fund_hdr_hist"
# [117] "fund_names" "fund_style"
# [119] "fund_summary" "fund_summary2"
# [121] "fwdask06" "fwdask12"
# [123] "fwdave06" "fwdave12"
# [125] "fwdbid06" "fwdbid12"
# [127] "hldask06" "hldask12"
# [129] "hldave06" "hldave12"
# [131] "hldbid06" "hldbid12"
# [133] "holdings" "index_descriptions"
# [135] "index_type_map" "mbi"
# [137] "mbmdat" "mbmhdr"
# [139] "mbx" "mbxid"
# [141] "mcti" "mfdbname"
# [143] "mhista" "mhistn"
# [145] "mhistq" "monthly_nav"
# [147] "monthly_returns" "monthly_tna"
# [149] "monthly_tna_ret_nav" "mport1"
# [151] "mport2" "mport3"
# [153] "mport4" "mport5"
# [155] "mse" "mse62"
# [157] "mse62delist" "mse62dist"
# [159] "mse62exchdates" "mse62names"
# [161] "mse62nasdin" "mse62shares"
# [163] "mseall" "mseall62"
# [165] "msedelist" "msedist"
# [167] "mseexchdates" "msenames"
# [169] "msenasdin" "mseshares"
# [171] "msf" "msf62"
# [173] "msfhdr" "msfhdr62"
# [175] "msi" "msi62"
# [177] "msia" "msib"
# [179] "msic" "msio"
# [181] "msir" "msix"
# [183] "msiy" "msp500"
# [185] "msp500list" "msp500p"
# [187] "portnomap" "priask06"
# [189] "priask12" "priave06"
# [191] "priave12" "pribid06"
# [193] "pribid12" "price_type"
# [195] "property_type" "qcti"
# [197] "qsia" "qsib"
# [199] "qsic" "qsio"
# [201] "qsix" "rear_load"
# [203] "rear_load_det" "rear_load_grp"
# [205] "rebala" "rebaln"
# [207] "rebalq" "reit_type"
# [209] "riskfree" "s6z_agg_ann"
# [211] "s6z_agg_mth" "s6z_agg_qtr"
# [213] "s6z_del" "s6z_dind"
# [215] "s6z_dis" "s6z_dp_dly"
# [217] "s6z_ds_dly" "s6z_hdr"
# [219] "s6z_indhdr" "s6z_mdel"
# [221] "s6z_mind" "s6z_mth"
# [223] "s6z_nam" "s6z_ndi"
# [225] "s6z_shr" "saz_agg_ann"
# [227] "saz_agg_mth" "saz_agg_qtr"
# [229] "saz_del" "saz_dind"
# [231] "saz_dis" "saz_dp_dly"
# [233] "saz_ds_dly" "saz_hdr"
# [235] "saz_indhdr" "saz_mdel"
# [237] "saz_mind" "saz_mth"
# [239] "saz_nam" "saz_ndi"
# [241] "saz_shr" "sechead"
# [243] "sechist" "sector"
# [245] "sfz_dind" "sfz_indhdr"
# [247] "sfz_mbr" "sfz_mind"
# [249] "sfz_portd" "sfz_portm"
# [251] "sfz_rb" "stock_qvards"
# [253] "stocknames" "stocknames62"
# [255] "sub_property_type" "tfz_dly"
# [257] "tfz_dly_cd" "tfz_dly_cpi"
# [259] "tfz_dly_ft" "tfz_dly_rf2"
# [261] "tfz_dly_ts2" "tfz_idx"
# [263] "tfz_iss" "tfz_mast"
# [265] "tfz_mth" "tfz_mth_bp"
# [267] "tfz_mth_cd" "tfz_mth_cpi"
# [269] "tfz_mth_fb" "tfz_mth_ft"
# [271] "tfz_mth_rf" "tfz_mth_rf2"
# [273] "tfz_mth_ts" "tfz_mth_ts2"
# [275] "tfz_pay" "vg"
# [277] "yldask06" "yldask12"
# [279] "yldave06" "yldave12"
# [281] "yldbid06" "yldbid12"
# [283] "ziman_reit_info" "zr_hdrnames"
For instance, here is the variables in table msf of database crsp.
dbSendQuery(wrds, "select column_name
from information_schema.columns
where table_schema='crsp'
and table_name='msf'
order by column_name") %>%
dbFetch %>%
unlist %>%
unname# [1] "altprc" "altprcdt" "ask" "askhi" "bid" "bidlo"
# [7] "cfacpr" "cfacshr" "cusip" "date" "hexcd" "hsiccd"
# [13] "issuno" "permco" "permno" "prc" "ret" "retx"
# [19] "shrout" "spread" "vol"
dbSendQuery(wrds,
"select distinct table_name from information_schema.columns
where table_schema='dealscan' order by table_name") %>%
dbFetch %>%
unlist %>%
unname# [1] "borrowerbase" "chars"
# [3] "company" "currfacpricing"
# [5] "dealamendment" "dealpurposecomment"
# [7] "facility" "facilityamendment"
# [9] "facilitydates" "facilityguarantor"
# [11] "facilitypaymentschedule" "facilityrepaymentcomment"
# [13] "facilitysecurity" "facilitysponsor"
# [15] "financialcovenant" "financialratios"
# [17] "lendershares" "lins"
# [19] "marketsegment" "networthcovenant"
# [21] "organizationtype" "package"
# [23] "packageassignmentcomment" "packageprepaymentcomment"
# [25] "performancepricing" "performancepricingcomments"
# [27] "sublimits"
For instance, here is the variables in table borrowerbase.
dbSendQuery(wrds, "select column_name
from information_schema.columns
where table_schema='dealscan'
and table_name='borrowerbase'
order by column_name") %>%
dbFetch %>%
unlist %>%
unname# [1] "borrowerbasepercentage" "borrowerbasetype" "comment"
# [4] "facilityid"
For instance, here is the table names in database execcomp.
dbSendQuery(wrds,
"select distinct table_name from information_schema.columns
where table_schema='execcomp' order by table_name") %>%
dbFetch %>%
unlist %>%
unname# [1] "anncomp" "codirfin" "colev"
# [4] "coperol" "deferredcomp" "directorcomp"
# [7] "ex_black" "ex_header" "exnames"
# [10] "ltawdtab" "outstandingawards" "pension"
# [13] "person" "planbasedawards" "stgrttab"
For instance, here is the variables in table msf of database crsp.
dbSendQuery(wrds, "select column_name
from information_schema.columns
where table_schema='execcomp'
and table_name='anncomp'
order by column_name") %>%
dbFetch %>%
unlist %>%
unname# [1] "address" "age"
# [3] "allothpd" "allothtot"
# [5] "becameceo" "bonus"
# [7] "ceoann" "cfoann"
# [9] "chg_ctrl_pymt" "city"
# [11] "co_per_rol" "comment"
# [13] "coname" "cusip"
# [15] "defer_balance_tot" "defer_contrib_co_tot"
# [17] "defer_contrib_exec_tot" "defer_earnings_tot"
# [19] "defer_rpt_as_comp_tot" "defer_withdr_tot"
# [21] "eip_unearn_num" "eip_unearn_val"
# [23] "exchange" "exec_fname"
# [25] "exec_fullname" "exec_lname"
# [27] "exec_mname" "execdir"
# [29] "execid" "execrank"
# [31] "execrankann" "gender"
# [33] "gvkey" "inddesc"
# [35] "interlock" "joined_co"
# [37] "leftco" "leftofc"
# [39] "ltip" "naics"
# [41] "naicsdesc" "nameprefix"
# [43] "noneq_incent" "old_datafmt_flag"
# [45] "opt_exer_num" "opt_exer_val"
# [47] "opt_unex_exer_est_val" "opt_unex_exer_num"
# [49] "opt_unex_unexer_est_val" "opt_unex_unexer_num"
# [51] "option_awards" "option_awards_blk_value"
# [53] "option_awards_fv" "option_awards_num"
# [55] "option_awards_rpt_value" "othann"
# [57] "othcomp" "page"
# [59] "pceo" "pcfo"
# [61] "pension_chg" "pension_pymts_tot"
# [63] "pension_value_tot" "reason"
# [65] "rejoin" "releft"
# [67] "reprice" "ret_yrs"
# [69] "rstkgrnt" "rstkvyrs"
# [71] "sal_pct" "salary"
# [73] "shrown_excl_opts" "shrown_excl_opts_pct"
# [75] "shrown_tot" "shrown_tot_pct"
# [77] "shrs_vest_num" "shrs_vest_val"
# [79] "sic" "sicdesc"
# [81] "spcode" "spindex"
# [83] "state" "stock_awards"
# [85] "stock_awards_fv" "stock_unvest_num"
# [87] "stock_unvest_val" "sub_tele"
# [89] "tdc1" "tdc1_pct"
# [91] "tdc2" "tdc2_pct"
# [93] "tele" "term_pymt"
# [95] "ticker" "title"
# [97] "titleann" "total_alt1"
# [99] "total_alt1_pct" "total_alt2"
# [101] "total_alt2_pct" "total_curr"
# [103] "total_curr_pct" "total_sec"
# [105] "total_sec_pct" "year"
# [107] "zip"
For instance, here is the table names in database crsp.
dbSendQuery(wrds,
"select distinct table_name from information_schema.columns
where table_schema='frb' order by table_name") %>%
dbFetch %>%
unlist %>%
unname# [1] "fx_daily" "fx_monthly" "rates_daily" "rates_monthly"
For instance, here is the variables in table msf of database crsp.
dbSendQuery(wrds, "select column_name
from information_schema.columns
where table_schema='frb'
and table_name='fx_daily'
order by column_name") %>%
dbFetch %>%
unlist %>%
unname# [1] "date" "exalus" "exauus" "exbeus" "exbzus" "excaus" "exchus"
# [8] "exdnus" "execus" "exeuus" "exfnus" "exfrus" "exgeus" "exgrus"
# [15] "exhkus" "exinus" "exirus" "exitus" "exjpus" "exkous" "exmaus"
# [22] "exmxus" "exneus" "exnous" "exnzus" "expous" "exsdus" "exsfus"
# [29] "exsius" "exslus" "exspus" "exszus" "extaus" "exthus" "exukus"
# [36] "exusal" "exusec" "exuseu" "exusir" "exusnz" "exusuk" "exvzus"
# [43] "indexgx" "twexb" "twexm" "twexo"
For instance, here is the table names in database crsp.
dbSendQuery(wrds,
"select distinct table_name from information_schema.columns
where table_schema='trace' order by table_name") %>%
dbFetch %>%
unlist %>%
unname# [1] "absmasterfile" "cmomasterfile"
# [3] "masterfile" "mbsmasterfile"
# [5] "tbamasterfile" "trace"
# [7] "trace_agency" "trace_agency_names"
# [9] "trace_btds144a" "trace_btds144a_names"
# [11] "trace_enhanced" "trace_enhanced_names"
# [13] "trace_names" "trace_spds144a_abs"
# [15] "trace_spds144a_abs_names" "trace_spds144a_cmo"
# [17] "trace_spds144a_cmo_names" "trace_spds_abs"
# [19] "trace_spds_abs_names" "trace_spds_cmo"
# [21] "trace_spds_cmo_names" "trace_spds_mbs"
# [23] "trace_spds_mbs_names" "trace_spds_tba"
# [25] "trace_spds_tba_names" "trade_summary"
# [27] "trade_summary_agency" "trade_summary_btds144a"
# [29] "trade_summary_spds144a_abs" "trade_summary_spds144a_cmo"
# [31] "trade_summary_spds_abs" "trade_summary_spds_cmo"
# [33] "trade_summary_spds_mbs" "trade_summary_spds_tba"
For instance, here is the variables in table msf of database crsp.
dbSendQuery(wrds, "select column_name
from information_schema.columns
where table_schema='trace'
and table_name='trace'
order by column_name") %>%
dbFetch %>%
unlist %>%
unname# [1] "ascii_rptd_vol_tx" "asof_cd" "ats_indicator"
# [4] "bond_sym_id" "bsym" "chng_cd"
# [7] "cmsn_trd" "company_symbol" "contra_party_type"
# [10] "cusip_id" "days_to_sttl_ct" "diss_rptg_side_cd"
# [13] "frmt_cd" "function" "high_yld_pt"
# [16] "high_yld_sign_cd" "low_yld_pt" "low_yld_sign_cd"
# [19] "lsal_yld_pt" "lsal_yld_sign_cd" "msg_seq_nb"
# [22] "orig_dis_dt" "orig_msg_seq_nb" "remuneration"
# [25] "rptd_high_pr" "rptd_last_pr" "rptd_low_pr"
# [28] "rptd_pr" "rptg_party_type" "sale_cndtn2_cd"
# [31] "sale_cndtn_cd" "side" "spcl_trd_fl"
# [34] "sttl_dt" "sub_prd_type" "trans_dt"
# [37] "trc_st" "trd_exctn_dt" "trd_exctn_tm"
# [40] "wis_fl" "yld_pt" "yld_sign_cd"