devtools::install_github("simon-hans/CEDARS", upgrade="never")
# The code below creates an instance of CEDARS project on a public test MongoDB cluster, populated
# with fictitious EHR corpora.
db_user_name <- "testUser"
db_host <- "cedars.yvjp6.mongodb.net"
# Using standard MongoDB URL format
uri_fun <- mongo_uri_standard
# Name for MongoDB database which will contain the CEDARS project
# In this case we generate a random name
mongo_database <- find_project_name()
# We create the database and all required collections on a test cluster
create_project(uri_fun, db_user_name, db_user_pw, db_host, db_replica_set, db_port, mongo_database,
"CEDARS Example Project", "Dr Smith")
# Adding one CEDARS end user
add_end_user(uri_fun, db_user_name, db_user_pw, db_host, db_replica_set, db_port, mongo_database, "John",
# Negex is included with CEDARS and required for assessment of negation
negex_upload(uri_fun, db_user_name, db_user_pw, db_host, db_replica_set, db_port, mongo_database)
# Uploading the small simulated collection of EHR corpora
upload_notes(uri_fun, db_user_name, db_user_pw, db_host, db_replica_set, db_port, mongo_database,
# This is a simple query which will report all sentences with a word starting in
# "bleed" or "hem", or an exact match for "bled"
search_query <- "bleed* OR hem* OR bled"
save_query(uri_fun, db_user_name, db_user_pw, db_host, db_replica_set, db_port, mongo_database, search_query,
use_negation, hide_duplicates, skip_after_event)
# Running the NLP annotations on EHR corpora
# We are only using one core, for large datasets parallel processing is faster
automatic_NLP_processor(NA, "latin1", "udpipe", uri_fun, db_user_name, db_user_pw,
db_host, db_replica_set, db_port, mongo_database, max_n_grams_length = 0, negex_depth = 6, select_cores = 1)
# Pre-searching based on query
# This is optional but will speed-up the interface
pre_search(patient_vect = NA, uri_fun, db_user_name, db_user_pw, db_host, db_replica_set, db_port, mongo_database)
# Start the CEDARS GUI locally
# Your user name is "John", password is "strongpassword"
# Once you have entered those credentials, click on button "ENTER NEW DATE" and CEDARS will seek the first record to annotate
# Try out the interface, adjudicating sentences, entering event dates, comments, moving between sentences and searching for records
# Once you have entered some data, close the GUI
start_local(db_user_name, db_user_pw, db_host, db_replica_set, db_port, mongo_database)
# Obtaining events and info associated with data entry
# The annotations entered in the GUI are now available in this dataframe
event_output <- download_events(uri_fun, db_user_name, db_user_pw, db_host, db_replica_set, db_port, mongo_database)
# Remove project from MongoDB
terminate_project(uri_fun, db_user_name, db_user_pw, db_host, db_replica_set, db_port, mongo_database, fast=TRUE)