71 lines
2.7 KiB
R
71 lines
2.7 KiB
R
# Copyright 2017 Observational Health Data Sciences and Informatics
|
|
#
|
|
# This file is part of DDLGeneratr
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
|
|
#' Parse Wiki files
|
|
#'
|
|
#' @description
|
|
#' Parses all .md files in the specified location (or any subfolders), extracting definitions
|
|
#' of the Common Data Model.
|
|
#'
|
|
#' @param mdFilesLocation Path to the root folder of the Wiki repository.
|
|
#' @param output_file Path to where the output CSV file should be written.
|
|
#' @return CSV files with the OMOP CDM specifications.
|
|
#' @importFrom utils write.csv
|
|
#' @export
|
|
parseWiki <- function(mdFilesLocation, output_file) {
|
|
# mdFilesLocation <- "../CommonDataModel.wiki"
|
|
files <- list.files(mdFilesLocation, pattern = ".*\\.md", recursive = TRUE, full.names = TRUE)
|
|
file <- files[18]
|
|
parseTableRow <- function(row) {
|
|
cells <- stringr::str_trim(stringr::str_split(row, "\\|")[[1]])
|
|
if (substr(row, 1, 1) == "|") {
|
|
cells <- cells[2:5]
|
|
}
|
|
return(data.frame(field = tolower(cells[1]),
|
|
required = cells[2],
|
|
type = toupper(cells[3]),
|
|
description = cells[4]))
|
|
}
|
|
|
|
parseMdFile <- function(file) {
|
|
text <- readChar(file, file.info(file)$size)
|
|
lines <- stringr::str_split(text, "\n")[[1]]
|
|
lines <- stringr::str_trim(lines)
|
|
tableStart <- grep("\\s*field\\s*\\|\\s*required\\s*\\|\\s*type\\s*\\|\\s*description\\s*", tolower(lines))
|
|
if (length(tableStart) > 1)
|
|
stop("More than one table definition found in ", file)
|
|
|
|
if (length(tableStart) == 1) {
|
|
tableName <- basename(file)
|
|
tableName <- tolower(stringr::str_sub(tableName, 1, -4))
|
|
writeLines(paste("Parsing table", tableName))
|
|
tableStart <- tableStart + 2
|
|
tableEnd <- c(which(lines == ""), length(lines) + 1)
|
|
tableEnd <- min(tableEnd[tableEnd > tableStart]) - 1
|
|
tableDefinition <- lapply(lines[tableStart:tableEnd], parseTableRow)
|
|
tableDefinition <- do.call(rbind, tableDefinition)
|
|
tableDefinition$table <- tableName
|
|
return(tableDefinition)
|
|
} else {
|
|
return(NULL)
|
|
}
|
|
}
|
|
tableDefinitions <- lapply(files, parseMdFile)
|
|
tableDefinitions <- do.call(rbind, tableDefinitions)
|
|
write.csv(tableDefinitions, output_file, row.names = FALSE)
|
|
}
|