Skip to contents

Creating a codelist for osteoarthritis

For this example we are going to generate a candidate codelist for osteoarthritis, looking at the impact of alternative search strategies.

# postgres database connection details
serverDbi <- Sys.getenv("server")
user <- Sys.getenv("user")
password <- Sys.getenv("password")
port <- Sys.getenv("port")
host <- Sys.getenv("host")

db <- dbConnect(RPostgres::Postgres(),
  dbname = serverDbi,
  port = port,
  host = host,
  user = user,
  password = password
)

# name of vocabulary schema
vocabularyDatabaseSchema <- "vocabulary"

# create cdm reference
cdm <- CDMConnector::cdm_from_con(
  con = db,
  cdm_schema = vocabularyDatabaseSchema
)

Search strategies

Condition domain, without searching synonyms, with exclusions, without including descendants or ancestor

To start we will search for “osteoarthritis”, while excluding “post-infection” and “post-traumatic”, but without searching synonyms, without searching via non-standard codes, and without including descendants or the direct ancestor of the included concepts.

oaCodes1 <- getCandidateCodes(
  cdm = cdm,
  keywords = "osteoarthritis",
  domains = "Condition",
  searchInSynonyms = FALSE,
  searchNonStandard = FALSE,
  exclude = c(
    "post-infection",
    "post-traumatic"
  ),
  includeDescendants = FALSE,
  includeAncestor = FALSE
)

What is the candidate codelist?

oaCodes1 |> 
  glimpse()
#> Rows: 151
#> Columns: 6
#> $ concept_id       <int> 72993, 73840, 75036, 77631, 78505, 79904, 80180, 8018…
#> $ found_from       <chr> "From initial search", "From initial search", "From i…
#> $ concept_name     <chr> "Localized, primary osteoarthritis", "Localized, prim…
#> $ domain_id        <chr> "Condition", "Condition", "Condition", "Condition", "…
#> $ vocabulary_id    <chr> "SNOMED", "SNOMED", "SNOMED", "SNOMED", "SNOMED", "SN…
#> $ standard_concept <chr> "standard", "standard", "standard", "standard", "stan…

Including descendants

Now we will also include the descendants of included concepts.

oaCodes2 <- getCandidateCodes(
  cdm = cdm,
  keywords = "osteoarthritis",
  domains = "Condition",
  searchInSynonyms = FALSE,
  searchNonStandard = FALSE,
  exclude = c(
    "post-infection",
    "post-traumatic"
  ),
  includeDescendants = TRUE,
  includeAncestor = FALSE
)

What new codes do we pick up?

newCodes1To2 <- compareCodelists(oaCodes1, oaCodes2) |>
  filter(codelist == "Only codelist 2") |>
  select(-"codelist")

newCodes1To2 |> 
  glimpse()
#> Rows: 262
#> Columns: 2
#> $ concept_id   <int> 72401, 72405, 72709, 72990, 73287, 73550, 73554, 74132, 7…
#> $ concept_name <chr> "Kashin-Bek disease", "Degenerative joint disease of shou…

Including observation domain

Now we will search the observation domain as well as the condition domain.

oaCodes3 <- getCandidateCodes(
  cdm = cdm,
  keywords = "osteoarthritis",
  domains = c("Condition", "Observation"),
  searchInSynonyms = FALSE,
  searchNonStandard = FALSE,
  exclude = c(
    "post-infection",
    "post-traumatic"
  ),
  includeDescendants = FALSE,
  includeAncestor = FALSE
)

What new codes do we pick up?

newCodes1To3 <- compareCodelists(oaCodes1, oaCodes3) |>
  filter(codelist == "Only codelist 2") |>
  select(-"codelist")

newCodes1To3 |> 
  glimpse()
#> Rows: 18
#> Columns: 2
#> $ concept_id   <int> 1988221, 1988793, 2101879, 2102817, 2108437, 2617599, 261…
#> $ concept_name <chr> "Knee injury and osteoarthritis outcome score - physical …

Search synonyms

Now we will search the concept synonym table to identify concepts to include.

oaCodes4 <- getCandidateCodes(
  cdm = cdm,
  keywords = "osteoarthritis",
  domains = "Condition",
  searchInSynonyms = TRUE,
  searchNonStandard = FALSE,
  exclude = c(
    "post-infection",
    "post-traumatic"
  ),
  includeDescendants = FALSE,
  includeAncestor = FALSE
)

What new codes do we pick up?

newCodes1To4 <- compareCodelists(oaCodes1, oaCodes4) |>
  filter(codelist == "Only codelist 2") |>
  select(-"codelist")

newCodes1To4 |> 
  glimpse()
#> Rows: 20
#> Columns: 2
#> $ concept_id   <int> 72405, 75617, 78227, 80494, 80809, 4143463, 4153359, 4183…
#> $ concept_name <chr> "Degenerative joint disease of shoulder region", "Degener…

Search via non-standard

Now we will search the concept synonym table to identify concepts to include.

oaCodes5 <- getCandidateCodes(
  cdm = cdm,
  keywords = "osteoarthritis",
  domains = "Condition",
  searchInSynonyms = FALSE,
  searchNonStandard = TRUE,
  exclude = c(
    "post-infection",
    "post-traumatic"
  ),
  includeDescendants = FALSE,
  includeAncestor = FALSE
)

What new codes do we pick up?

newCodes1To5 <- compareCodelists(oaCodes1, oaCodes5) |>
  filter(codelist == "Only codelist 2") |>
  select(-"codelist")

newCodes1To5 |> 
  glimpse()
#> Rows: 0
#> Columns: 2
#> $ concept_id   <int> 
#> $ concept_name <chr>

Include ancestor

Now we include the direct ancestor of included terms.

oaCodes8 <- getCandidateCodes(
  cdm = cdm,
  keywords = "osteoarthritis",
  domains = "Condition",
  searchInSynonyms = FALSE,
  searchNonStandard = FALSE,
  exclude = c(
    "post-infection",
    "post-traumatic"
  ),
  includeDescendants = FALSE,
  includeAncestor = TRUE
)

What new codes do we pick up?

newCodes1To8 <- compareCodelists(oaCodes1, oaCodes8) |>
  filter(codelist == "Only codelist 2") |>
  select(-"codelist")

newCodes1To8 |> 
  glimpse()
#> Rows: 99
#> Columns: 2
#> $ concept_id   <int> 72405, 73553, 75620, 75897, 76777, 78227, 80494, 81937, 4…
#> $ concept_name <chr> "Degenerative joint disease of shoulder region", "Arthrop…