Skip to content

Commit

Permalink
feat: make db connection more generic; add larger dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
pieterlukasse committed Oct 1, 2024
1 parent 1d7d42f commit 7c37186
Show file tree
Hide file tree
Showing 3 changed files with 138 additions and 5 deletions.
4 changes: 2 additions & 2 deletions .secrets.baseline
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,9 @@
"filename": "utils/dsn.go",
"hashed_secret": "347cd9c53ff77d41a7b22aa56c7b4efaf54658e3",
"is_verified": false,
"line_number": 23
"line_number": 36
}
]
},
"generated_at": "2022-04-19T19:51:52Z"
"generated_at": "2024-10-01T14:58:26Z"
}
120 changes: 120 additions & 0 deletions tests/data_generator/example_test_data_config2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
# list of "global" concepts:
concepts:
- concept: HARE-group
id: 2000007027
concept_name: 'HARE'
value_type: concept
- concept: HARE-group
id: 2000007027
value_type: concept
possible_values: [2000007028]
concept_value_name: 'Hispanic'
- concept: HARE-group
id: 2000007027
value_type: concept
possible_values: [2000007029]
concept_value_name: 'non-Hispanic Asian'
- concept: HARE-group
id: 2000007027
value_type: concept
possible_values: [2000007030]
concept_value_name: 'non-Hispanic Black'
- concept: HARE-group
id: 2000007027
value_type: concept
possible_values: [2000007031]
concept_value_name: 'non-Hispanic White'
- concept: height
id: 2000006000
value_type: number
- concept: weight
id: 2000006001
value_type: number


# list of cohorts to be created:
cohorts:
- cohort: small-cohort
number_of_persons: 120
# this will repeat a cohort like below 2 times:
clone_count: 2
# concepts that will be created and linked to persons in this cohort via observations:
concepts:
- concept: concept-name
value_type: number
# this means ~1/3 of the persons in the cohort will have this concept associated to them in the observation table:
ratio_of_persons: 0.34
# makes 2 copies of this concept:
clone_count: 2
- concept: height
value_type: number
# this means 90% of the persons in the cohort will have height filled in:
ratio_of_persons: 0.9
id: 2000006000
- concept: weight
value_type: number
# this means 90% of the persons in the cohort will have weight filled in:
ratio_of_persons: 0.9
id: 2000006001
- concept: HARE-group
value_type: concept
# we want this concept to have/use a specific id...
id: 2000007027
# ...and populate with one of these ids (picks a random one):
possible_values: [2000007028,2000007029,2000007030,2000007031]
- cohort: medium-cohort
number_of_persons: 10000
# this will repeat a cohort like below 2 times:
clone_count: 2
# concepts that will be created and linked to persons in this cohort via observations:
concepts:
- concept: concept-name
value_type: number
# this means ~1/3 of the persons in the cohort will have this concept associated to them in the observation table:
ratio_of_persons: 0.34
# makes 2 copies of this concept:
clone_count: 2
- concept: height
value_type: number
# this means 90% of the persons in the cohort will have height filled in:
ratio_of_persons: 0.9
id: 2000006000
- concept: weight
value_type: number
# this means 90% of the persons in the cohort will have weight filled in:
ratio_of_persons: 0.9
id: 2000006001
- concept: HARE-group
value_type: concept
# we want this concept to have/use a specific id...
id: 2000007027
# ...and populate with one of these ids (picks a random one):
possible_values: [2000007028,2000007029,2000007030,2000007031]
- cohort: large-cohort
number_of_persons: 50000
# this will repeat a cohort like below 2 times:
clone_count: 2
# concepts that will be created and linked to persons in this cohort via observations:
concepts:
- concept: concept-name
value_type: number
# this means ~1/3 of the persons in the cohort will have this concept associated to them in the observation table:
ratio_of_persons: 0.34
# makes 2 copies of this concept:
clone_count: 2
- concept: height
value_type: number
# this means 90% of the persons in the cohort will have height filled in:
ratio_of_persons: 0.9
id: 2000006000
- concept: weight
value_type: number
# this means 90% of the persons in the cohort will have weight filled in:
ratio_of_persons: 0.9
id: 2000006001
- concept: HARE-group
value_type: concept
# we want this concept to have/use a specific id...
id: 2000007027
# ...and populate with one of these ids (picks a random one):
possible_values: [2000007028,2000007029,2000007030,2000007031]
19 changes: 16 additions & 3 deletions utils/dsn.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,22 @@ func GenerateDsn(sourceConnectionString string) string {
log.Printf("Found db vendor %s", dbVendor)
host := sourceConnectionParts[2]
port := sourceConnectionParts[3]
dbname := sourceConnectionParts[5]
username := sourceConnectionParts[7]
password := sourceConnectionParts[9]
dbname := ""
username := ""
password := ""
if len(sourceConnectionParts) == 9 {
// expecting a string like this: jdbc:postgresql://hostname.com:5432/dbname?user=username&etc
dbname = sourceConnectionParts[4]
username = sourceConnectionParts[6]
password = sourceConnectionParts[8]
} else if len(sourceConnectionParts) == 10 {
// expecting a string like this: jdbc:sqlserver://hostname.com:1433;databaseName=dbname;user=username;etc
dbname = sourceConnectionParts[5]
username = sourceConnectionParts[7]
password = sourceConnectionParts[9]
} else {
panic("connection string format not supported")
}

dsn := fmt.Sprintf(dbVendor+"://%s:%s@%s:%s?database=%s",
username,
Expand Down

0 comments on commit 7c37186

Please sign in to comment.