Manifest file
sample-id forward-absolute-filepath reverse-absolute-filepath
sample-1 $PWD/some/filepath/sample0_R1.fastq.gz $PWD/some/filepath/sample1_R2.fastq.gz
sample-2 $PWD/some/filepath/sample2_R1.fastq.gz $PWD/some/filepath/sample2_R2.fastq.gz
based on: https://docs.qiime2.org/2022.2/tutorials/importing/
Collect files into QIIME2 artifact
conda activate qiime2_2022.2
qiime tools import \
--type 'SampleData[PairedEndSequencesWithQuality]' \
--input-path manifest.tsv \
--output-path paired-end-demux.qza \
--input-format PairedEndFastqManifestPhred33V2
Trim primers
qiime cutadapt trim-paired \
--i-demultiplexed-sequences paired-end-demux.qza \
--p-cores 20 \
--p-front-f CCTACGGGNGGCWGCAG \
--p-front-r GACTACHVGGGTATCTAATCC \
--p-minimum-length 100 \
--p-discard-untrimmed \
--o-trimmed-sequences paired-end-demux-trimmed.qza
Denoising with DADA2
conda activate qiime2_2022.2
qiime dada2 denoise-paired \
--i-demultiplexed-seqs paired-end-demux-trimmed.qza \
--p-trunc-len-f 270 \
--p-trunc-len-r 220 \
--o-representative-sequences representative_sequences.qza \
--p-n-threads 20 \
--o-table table.qza \
--output-dir ./dada2_output
Build phylogenetic trees
conda activate qiime2_2022.2
qiime alignment mafft --i-sequences representative_sequences.qz --o-alignment aligned.qza --p-n-threads 20
qiime alignment mask --i-alignment aligned.qza --o-masked-alignment masked.qza
qiime phylogeny fasttree --i-alignment masked.qza --o-tree unrooted-tree.qza --p-n-threads 20
qiime phylogeny midpoint-root --i-tree unrooted-tree.qza --o-rooted-tree rooted-tree.qza
Rarefaction
conda activate qiime2_2022.2
qiime feature-table rarefy --i-table table.qza \
--p-sampling-depth 10000 \
--o-rarefied-table feature-table_rarefied.qza \
--output-dir output_rarefaction
Alpha-diversity
conda activate qiime2_2022.2
qiime diversity alpha --i-table feature-table_rarefied.qza --p-metric <MEASURE> --o-alpha-diversity <MEASURE>_vector.qza
Beta-diversity
conda activate qiime2_2022.2
qiime diversity beta-phylogenetic --i-table feature-table_rarefied.qza --i-phylogeny rooted-tree.qza --p-metric <MEASURE> --o-distance-matrix <MEASURE>_dmat.qza
Principal coordinate analysis
conda activate qiime2_2022.2
qiime diversity pcoa --i-distance-matrix <MEASURE>_dmat.qza --o-pcoa <MEASURE>_pcoa_result.qza
Taxonomy annotation
conda activate qiime2_2022.2
qiime feature-classifier classify-sklearn --i-classifier <ClassifierModel>.qza --i-reads representative_sequences.qza --o-classification taxonomy.qza --p-n-jobs 20
LEfSe analysis
Input: tab-delimited numeric features, class vector, (optionally the subclass and subject vectors)
- features: read counts or abundance floating-point values
- the first field: name of the feature
- Class, subclass and subject vectors have a name (the first field) and a list of non-numerical strings
conda create -n lefse
conda install -c bioconda lefse
conda activate lefse
run_lefse.py
plot_res.py hmp_aerobiosis_small.res hmp_aerobiosis_small.png
plot_cladogram.py hmp_aerobiosis_small.res hmp_aerobiosis_small.cladogram.png --format png
docker run -it biobakery/lefse bash