Generate microbial feature table

Manifest file

sample-id     forward-absolute-filepath       reverse-absolute-filepath
sample-1      $PWD/some/filepath/sample0_R1.fastq.gz  $PWD/some/filepath/sample1_R2.fastq.gz
sample-2      $PWD/some/filepath/sample2_R1.fastq.gz  $PWD/some/filepath/sample2_R2.fastq.gz

based on: https://docs.qiime2.org/2022.2/tutorials/importing/

Collect files into QIIME2 artifact

conda activate qiime2_2022.2
qiime tools import \
  --type 'SampleData[PairedEndSequencesWithQuality]' \
  --input-path manifest.tsv \
  --output-path paired-end-demux.qza \
  --input-format PairedEndFastqManifestPhred33V2 

Trim primers

qiime cutadapt trim-paired \
  --i-demultiplexed-sequences paired-end-demux.qza \
  --p-cores 20 \
  --p-front-f CCTACGGGNGGCWGCAG \
  --p-front-r GACTACHVGGGTATCTAATCC \
  --p-minimum-length 100 \
  --p-discard-untrimmed \
  --o-trimmed-sequences paired-end-demux-trimmed.qza

Denoising with DADA2

conda activate qiime2_2022.2
qiime dada2 denoise-paired \
  --i-demultiplexed-seqs paired-end-demux-trimmed.qza \
  --p-trunc-len-f 270 \
  --p-trunc-len-r 220 \
  --o-representative-sequences representative_sequences.qza \
  --p-n-threads 20 \
  --o-table table.qza \
  --output-dir ./dada2_output

Build phylogenetic trees

conda activate qiime2_2022.2
qiime alignment mafft   --i-sequences representative_sequences.qz --o-alignment aligned.qza --p-n-threads 20
qiime alignment mask --i-alignment aligned.qza --o-masked-alignment masked.qza
qiime phylogeny fasttree --i-alignment masked.qza --o-tree unrooted-tree.qza --p-n-threads 20
qiime phylogeny midpoint-root --i-tree unrooted-tree.qza --o-rooted-tree rooted-tree.qza

Rarefaction

conda activate qiime2_2022.2
qiime feature-table rarefy --i-table table.qza \
  --p-sampling-depth 10000 \
  --o-rarefied-table feature-table_rarefied.qza \
  --output-dir output_rarefaction

Alpha-diversity

conda activate qiime2_2022.2
qiime diversity alpha --i-table feature-table_rarefied.qza --p-metric <MEASURE> --o-alpha-diversity <MEASURE>_vector.qza

Beta-diversity

conda activate qiime2_2022.2
qiime diversity beta-phylogenetic --i-table feature-table_rarefied.qza --i-phylogeny rooted-tree.qza --p-metric <MEASURE> --o-distance-matrix <MEASURE>_dmat.qza

Principal coordinate analysis

conda activate qiime2_2022.2
qiime diversity pcoa --i-distance-matrix <MEASURE>_dmat.qza --o-pcoa <MEASURE>_pcoa_result.qza

Taxonomy annotation

conda activate qiime2_2022.2
qiime feature-classifier classify-sklearn --i-classifier <ClassifierModel>.qza --i-reads representative_sequences.qza --o-classification taxonomy.qza --p-n-jobs 20

LEfSe analysis

Input: tab-delimited numeric features, class vector, (optionally the subclass and subject vectors)

  • features: read counts or abundance floating-point values
  • the first field: name of the feature
  • Class, subclass and subject vectors have a name (the first field) and a list of non-numerical strings
lefse
conda create -n lefse
conda install -c bioconda lefse
conda activate lefse
run_lefse.py
plot_res.py hmp_aerobiosis_small.res hmp_aerobiosis_small.png
plot_cladogram.py hmp_aerobiosis_small.res hmp_aerobiosis_small.cladogram.png --format png
docker run -it biobakery/lefse bash

Leave a Comment

Your email address will not be published. Required fields are marked *

Scroll to Top