β€ mkdir -p ~/.conda/envs
β€ wget https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-Linux-x86_64.sh
β€ bash Mambaforge-Linux-x86_64.sh
# set the install path to ~/.conda/envs/mambaforge
# then activate mambaforge
β€ conda activate mambaforge
# install snakemake
β€ conda install -c bioconda -c conda-forge snakemake fd-find seqkitβ€ echo "export PYTHONPATH=/path/to/qiimepi:$PYTHONPATH" >> ~/.bashrc
# reloginβ€ conda activate mambaforge
β€ python /path/to/qiimepi/run_qiimepi.py --help
usage: qiimepi [-h] [-v] ...
βββββββ ββββββββββ βββββββββββββββββββ βββ
ββββββββββββββββββββ ββββββββββββββββββββββββ
βββ ββββββββββββββββββββββββββ βββββββββββ
βββββ ββββββββββββββββββββββββββ βββββββ βββ
ββββββββββββββββββ βββ ββββββββββββββ βββ
βββββββ βββββββββ ββββββββββββββ βββ
Omics for All, Open Source for All
Quantitative Insights Into Microbial Ecology
optional arguments:
-h, --help show this help message and exit
-v, --version print software version and exit
available subcommands:
init init project
amplicon_wf amplicon data analysis pipeline using QIIME2# for snakemake
β€ conda activate mambaforgeβ€ mkdir -p test
β€ cd test
β€ wget -c https://mothur.s3.us-east-2.amazonaws.com/wiki/miseqsopdata.zip
β€ unzip miseqsopdata.zip
β€ gzip MiSeq_SOP/*.fastqβ€ fd fastq.gz /full/path/to/MiSeq_SOP | \
sort | uniq | paste - - | \
awk -F'[/_]' \
'BEGIN {print "sample-id\tforward-absolute-filepath\treverse-absolute-filepath"};{print $(NF-4) "\t" $0}' \
> samples.tsvβ€ cd test
β€ python /path/to/qiimepi/run_qiimepi.py init -d . -s samples.tsv
β€ ll
config.yaml
envs
profiles
results
samples.miseq_sop.tsvβ€ cat config.yaml
params:
samples: "samples.tsv"
# https://docs.qiime2.org/2024.2/data-resources/
database:
taxonomy_classifiers:
silva_138_99_OTUs_full_length_sequences:
remote: "https://data.qiime2.org/classifiers/sklearn-1.4.2/silva/silva-138-99-nb-classifier.qza"
local: "/databases/ecogenomics/QIIME2/2024.5/taxonomic_classifier/silva-138-99-nb-classifier.qza"
diverse_weighted_silva_138_99_OTUs_full_length_sequences:
remote: "https://data.qiime2.org/classifiers/sklearn-1.4.2/silva/silva-138-99-nb-diverse-weighted-classifier.qza"
local: "/databases/ecogenomics/QIIME2/2024.5/taxonomic_classifier/silva-138-99-nb-diverse-weighted-classifier.qza"
human_stool_weighted_silva_138_99_OTUs_full_length_sequences:
remote: "https://data.qiime2.org/classifiers/sklearn-1.4.2/silva/silva-138-99-nb-human-stool-weighted-classifier.qza"
local: "/databases/ecogenomics/QIIME2/2024.5/taxonomic_classifier/silva-138-99-nb-human-stool-weighted-classifier.qza"
gtdb_classifier_r220:
remote: "https://data.qiime2.org/classifiers/sklearn-1.4.2/gtdb/gtdb_classifier_r220.qza"
local: "/databases/ecogenomics/QIIME2/2024.5/taxonomic_classifier/gtdb_classifier_r220.qza"
diverse_weighted_gtdb_classifier_r220:
remote: "https://data.qiime2.org/classifiers/sklearn-1.4.2/gtdb/gtdb_diverse_weighted_classifier_r220.qza"
local: "/databases/ecogenomics/QIIME2/2024.5/taxonomic_classifier/gtdb_diverse_weighted_classifier_r220.qza"
human_stool_weighted_gtdb_classifier_r220:
remote: "https://data.qiime2.org/classifiers/sklearn-1.4.2/gtdb/gtdb_human_stool_weighted_classifier_r220.qza"
local: "/databases/ecogenomics/QIIME2/2024.5/taxonomic_classifier/gtdb_human_stool_weighted_classifier_r220.qza"
greengenes2_2024_09_full_length_sequences:
remote: "https://data.qiime2.org/classifiers/sklearn-1.4.2/greengenes2/2024.09.backbone.full-length.nb.sklearn-1.4.2.qza"
local: "/databases/ecogenomics/QIIME2/2024.5/taxonomic_classifier/2024.09.backbone.full-length.nb.sklearn-1.4.2.qza"
greengenes2_2024_09_from_515F_806R_region_of_sequences:
remote: "https://data.qiime2.org/classifiers/sklearn-1.4.2/greengenes2/2024.09.backbone.v4.nb.sklearn-1.4.2.qza"
local: "/databases/ecogenomics/QIIME2/2024.5/taxonomic_classifier/2024.09.backbone.v4.nb.sklearn-1.4.2.qza"
# https://docs.qiime2.org/2023.9/tutorials/importing/
import:
type: "SampleData[PairedEndSequencesWithQuality]"
# EMPSingleEndSequences
# EMPPairedEndSequences
# MultiplexedSingleEndBarcodeInSequence
# MultiplexedPairedEndBarcodeInSequence
# SampleData[SequencesWithQuality]
# SampleData[PairedEndSequencesWithQuality]
# FeatureTable[Frequency]
# Phylogeny[Unrooted]
format: "PairedEndFastqManifestPhred33V2"
# CasavaOneEightSingleLanePerSampleDirFmt
# SingleEndFastqManifestPhred33V2
# SingleEndFastqManifestPhred64V2
# PairedEndFastqManifestPhred33V2
# PairedEndFastqManifestPhred64V2
# BIOMV100Format
# BIOMV210Format
denoise:
threads: 8
dada2:
do: True
paired:
trunc_len_f: 280
trunc_len_r: 250
trim_left_f: 0
trim_left_r: 0
single:
trunc_len: 120
trim_left: 0
deblur:
do: False
trim_len: 280
left_trim_len: 0
taxonomic:
threads: 8
classifiers: ["silva_138_99_OTUs_full_length_sequences", "gtdb_classifier_r220", "greengenes2_2024_09_full_length_sequences"]
# silva_138_99_OTUs_full_length_sequences
# diverse_weighted_silva_138_99_OTUs_full_length_sequences
# human_stool_weighted_silva_138_99_OTUs_full_length_sequences
# gtdb_classifier_r220
# diverse_weighted_gtdb_classifier_r220
# human_stool_weighted_gtdb_classifier_r220
# greengenes2_2024_09_full_length_sequences
# greengenes2_2024_09_from_515F_806R_region_of_sequences
phylotree:
threads: 8
sepp_db: /home/jiezhu/databases/ecogenomics/QIIME2/2023.9/sepp_reference_databases/sepp-refs-silva-128.qza
# /home/jiezhu/databases/ecogenomics/QIIME2/2023.9/sepp_reference_databases/sepp-refs-gg-13-8.qza
function:
threads: 8
output:
tmp: "results/tmp"
database: "results/00.database"
import: "results/00.import"
denoise: "results/01.denoise"
taxonomic: "results/02.taxonomic"
phylotree: "results/03.phylotree"
function: "results/04.function"
envs:
qiime2: "envs/qiime2.yaml"
picrust2: "envs/picrust2.yaml"
β€ python /path/to/qiimepi/run_qiimepi.py amplicon_wf all --use-conda --dry-run
Job stats:
job count min threads max threads
------------------------------------------- ------- ------------- -------------
all 1 1 1
qiime2_denoise_dada2 1 8 8
qiime2_denoise_dada2_export 1 1 1
qiime2_denoise_dada2_visualization 1 1 1
qiime2_denoise_dada2_visualization_export 1 1 1
qiime2_feature_table_export 1 1 1
qiime2_feature_table_summarize 1 1 1
qiime2_feature_table_tabulate 1 1 1
qiime2_feature_table_tabulate_export 1 1 1
qiime2_function_picrust2 1 8 8
qiime2_function_picrust2_add_descriptions 1 1 1
qiime2_import 1 1 1
qiime2_import_summarize 1 1 1
qiime2_import_summarize_export 1 1 1
qiime2_phylotree_align 1 8 8
qiime2_phylotree_align_export 1 1 1
qiime2_phylotree_align_visualization 1 1 1
qiime2_phylotree_align_visualization_export 1 1 1
qiime2_phylotree_sepp 1 8 8
qiime2_phylotree_sepp_export 1 1 1
qiime2_taxonomic_barplot 1 1 1
qiime2_taxonomic_barplot_export 1 1 1
qiime2_taxonomic_classification 1 8 8
qiime2_taxonomic_classification_export 1 1 1
qiime2_taxonomic_krona 1 1 1
qiime2_taxonomic_krona_export 1 1 1
qiime2_taxonomic_visualization 1 1 1
qiime2_taxonomic_visualization_export 1 1 1
total 28 1 8β€ python /path/to/qiimepi/run_qiimepi.py \
amplicon_wf all \
--use-conda \
--run-local \
--jobs 10 \
--cores 10β€ tree results
results/
βββ 00.import
βΒ Β βββ demux.qza
βΒ Β βββ demux.qzv
βββ 01.denoise
βΒ Β βββ dada2
βΒ Β βΒ Β βββ denoise_stats.qza
βΒ Β βΒ Β βββ denoise_stats_qza
βΒ Β βΒ Β βΒ Β βββ stats.tsv
βΒ Β βΒ Β βββ denoise_stats.qzv
βΒ Β βΒ Β βββ rep_seqs.qza
βΒ Β βΒ Β βββ rep_seqs_qza
βΒ Β βΒ Β βΒ Β βββ dna-sequences.fasta
βΒ Β βΒ Β βββ rep_seqs.qzv
βΒ Β βΒ Β βββ table.qza
βΒ Β βΒ Β βββ table_qza
βΒ Β βΒ Β βΒ Β βββ feature-table.biom
βΒ Β βΒ Β βΒ Β βββ feature-table.tsv
βΒ Β βΒ Β βββ table.qzv
βββ 02.taxonomic
βΒ Β βββ dada2
βΒ Β βΒ Β βββ krona.qzv
βΒ Β βΒ Β βββ taxonomy_barplot.qzv
βΒ Β βΒ Β βββ taxonomy.qza
βΒ Β βΒ Β βββ taxonomy_qza
βΒ Β βΒ Β βΒ Β βββ taxonomy.tsv
βΒ Β βΒ Β βββ taxonomy.qzv
βββ 03.phylotree
βΒ Β βββ dada2
βΒ Β βΒ Β βββ align
βΒ Β βΒ Β βΒ Β βββ alignment.qza
βΒ Β βΒ Β βΒ Β βββ empress_tree.qzv
βΒ Β βΒ Β βΒ Β βββ masked_alignment.qza
βΒ Β βΒ Β βΒ Β βββ rooted_tree.qza
βΒ Β βΒ Β βΒ Β βββ rooted_tree_qza
βΒ Β βΒ Β βΒ Β βΒ Β βββ tree.nwk
βΒ Β βΒ Β βΒ Β βββ tree.qza
βΒ Β βΒ Β βΒ Β βββ tree_qza
βΒ Β βΒ Β βΒ Β βββ tree.nwk
βΒ Β βΒ Β βββ sepp
βΒ Β βΒ Β βββ tree_placements.qza
βΒ Β βΒ Β βββ tree_placements_qza
βΒ Β βΒ Β βΒ Β βββ placements.json
βΒ Β βΒ Β βββ tree.qza
βΒ Β βΒ Β βββ tree_qza
βΒ Β βΒ Β βββ tree.nwk
βββ 04.function
βΒ Β βββ dada2
βΒ Β βΒ Β βββ picrust2
βΒ Β βΒ Β βββ done
βΒ Β βΒ Β βββ done_description
βΒ Β βΒ Β βββ EC_metagenome_out
βΒ Β βΒ Β βΒ Β βββ pred_metagenome_contrib.tsv.gz
βΒ Β βΒ Β βΒ Β βββ pred_metagenome_unstrat_descrip.tsv.gz
βΒ Β βΒ Β βΒ Β βββ pred_metagenome_unstrat.tsv.gz
βΒ Β βΒ Β βΒ Β βββ seqtab_norm.tsv.gz
βΒ Β βΒ Β βΒ Β βββ weighted_nsti.tsv.gz
βΒ Β βΒ Β βββ EC_predicted.tsv.gz
βΒ Β βΒ Β βββ KO_metagenome_out
βΒ Β βΒ Β βΒ Β βββ pred_metagenome_contrib.tsv.gz
βΒ Β βΒ Β βΒ Β βββ pred_metagenome_unstrat_descrip.tsv.gz
βΒ Β βΒ Β βΒ Β βββ pred_metagenome_unstrat.tsv.gz
βΒ Β βΒ Β βΒ Β βββ seqtab_norm.tsv.gz
βΒ Β βΒ Β βΒ Β βββ weighted_nsti.tsv.gz
βΒ Β βΒ Β βββ KO_predicted.tsv.gz
βΒ Β βΒ Β βββ marker_predicted_and_nsti.tsv.gz
βΒ Β βΒ Β βββ out.tre
βΒ Β βΒ Β βββ pathways_out
βΒ Β βΒ Β βββ path_abun_contrib.tsv.gz
βΒ Β βΒ Β βββ path_abun_unstrat_descrip.tsv.gz
βΒ Β βΒ Β βββ path_abun_unstrat.tsv.gz
β€ seqkit convert xx.fq.gz | headPhred Score table
| Quality system name | Phred Score | Coordinates |
|---|---|---|
| S - Sanger | Phred+33 | raw reads typically (0, 40) |
| X - Solexa | Solexa+64 | raw reads typically (-5, 40) |
| I - Illumina 1.3+ | Phred+64 | raw reads typically (0, 40) |
| J - Illumina 1.5+ | Phred+64 | raw reads typically (3, 41) |
| L - Illumina 1.8+ | Phred+33 | raw reads typically (0, 41) |
| P - PacBio | Phred+33 | HiFi reads typically (0, 93) |