-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathinput_parameters.yml
39 lines (33 loc) · 2.79 KB
/
input_parameters.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
####################################################################################################
####################################################################################################
## This is the input parameter file for the Nanopore consensus pipeline.
## The file is written in the YAML format. A nice description of the
## format can be found at http://docs.ansible.com/ansible/YAMLSyntax.html
##
## Unless otherwise mentioned, all fields must be filled.
##
####################################################################################################
####################################################################################################
# Full path to the files need to be provided. One fastq file at a time can be analyzed.
fastq: /home/ada/Desktop/16S_alignments/scripts/16S_alignment/reads_clustering_pipeline/mock_datasets/mock5_1000_varying.fastq
tmp: /home/ada/Desktop/16S_alignments/scripts/16S_alignment/reads_clustering_pipeline/main_pipeline/src/tmp
dependencies: /home/ada/Desktop/16S_alignments/scripts/16S_alignment/reads_clustering_pipeline/main_pipeline/src/dependencies
results: /home/ada/Desktop/16S_alignments/scripts/16S_alignment/reads_clustering_pipeline/main_pipeline/result_files
blast_db: /media/data/home/blast/ref_prok_rep_genomes_db
db_name: ref_prok_rep_genomes
# Set to True if you want to include a refinement of the clusters based on the MSA guide tree.
refinement: True
# Parameters used for clustering and refinement.
parameters:
kmer_len: 5 # Length (bp) of kmers for kmer frequency matrix
umap_n_components: 10 # Dimensions for UMAP clustering
umap_n_neighbors: 5 # Controls how UMAP balances local versus global structure in the data
hdbscan_min_cluster_size: 500 # Min number of reads that will create a cluster
hdbscan_cluster_selection_method: leaf # Determine how it selects flat clusters from the cluster tree hierarchy (eom or leaf)
reads_per_job: 30000 # Number of reads that will be analyzed at once.
std_ref: 0.0015 # Value of standard deviation of branch lengths that will determine if we do or do not split the cluster
std_t: 0.001 # Value of standard deviation of branch lengths that will determine how we split the clusters
#####
# time python3 /home/ada/Desktop/16S_alignments/scripts/16S_alignment/reads_clustering_pipeline/main_pipeline/main.py --config_file /home/ada/Desktop/16S_alignments/scripts/16S_alignment/reads_clustering_pipeline/main_pipeline/input_parameters.yml
# Cluster
# python3 /home/amadejska/16s_project/Nanopore_consensus_pipeline/src/main.py --config_file /home/amadejska/16s_project/Nanopore_consensus_pipeline/input_parameters.yml