Mean sequence lenght
awk '{/>/&&++a||b+=length()}END{print b/a}' file.fasta
awk '/^>/{split(FILENAME, b, "_"); split($1, a, "_"); print a[1]"_"a[2]"_"b[1]; next} 1' viral_seqs.fasta
sed 's/^>/>/g' test.fasta | awk '{split($0,a," "); if(a[1]) print ">"a[1]; else print; }' | sed 's/^>//g' > res.fasta
couldn't make work with a single
>
because it was adding
find -maxdepth 1 -size 0 -print
awk 'NR==FNR{n[">"$0];next} f{print f ORS $0;f=""} $0 in n{f=$0}' ids.txt contig_sequences.fa > output.fa
grep -o '.*virus' in_file.fasta > out_file.fasta
awk '/^>/ {s=substr($0,2) ".fasta";print " ">s}; s{print >s}' multi.fasta
grep -c ">" $1 file_name
grep -c ‘^>’ filename #faster version
awk '/^>/{sub(/^>/,"");val=$0;next} {print val,length($0)}' file.fas > lenghts.txt
seqkit fx2tab --length --name --header-line foo.fasta
grep '^>' contigs.fasta | awk -F _ 'BEGIN {OFS="\t"} {print $0,$4,$6}' | more
awk -F '>' '/^>/ {F=sprintf("%s.fasta", $2); print > F;next;} {print F; close(F)}' < FASTAFILE.fa
awk '/^>/{s=++d".fasta"} {print > s}' <inputFile>
sed 's, ,_,g' -i FASTA_file
for f in [0-9]file; do mv "$f" "$f.csv"; done
grep -oh '\w*\.[1:3]\w*' file_name
find regex
\w = \word followed by commodin search for '\.1 to 3' end \word commodin
awk '/^>/{f=!d[$1];d[$1]=1}f' input.fa > output.fa
grep 'CONTIG\|Sbjct' input_file.txt > output_file.fa # keeping CONTIG and sequence
sed 's/Sbjct:[0-9]*//g' input_file.fa # removing Subject withing same file `sed -i.bak
awk '{print $2}' input_file.fa > output_file.fasta # keeping second column
- e - go to the end of the current word.
- E - go to the end of the current WORD.
- b - go to the previous (before) word.
- B - go to the previous (before) WORD.
- w - go to the next word.
- W - go to the next WORD.
ulimit -a
for f in *; do
mv -- "$f" "$f.fa"
done
for f in *.txt; do
mv -- "$f" "${f%.txt}.fa"
done
The part ${file%.html}
is using the shell parameter expansion to remove the .html
part from the filename.
for filename in *.txt
do
name=$(basename ${filename} .txt}
mv ${filename} ${name}_2019.txt
done
Now the file were save in ${name}_2019.txt
for filename in *_2019.txt
do
name=$(basename ${filename} _2019.txt)
mv ${filaname} {name}.txt
done
sed 's/virus.*/virus/' file.fasta | sed 's/viroid.*/viroid/' | sed 's/satellite.*/satellite/' > output_file.fasta
for FILE in *.fa;
do
awk '/^>/ {gsub(/.fa(sta)?$/,"",FILENAME);printf(">%s\n",FILENAME);next;} {print}' $FILE > outdir/changed_${FILE}
done
#!bin/bash
for FILE in *.out;
do
sed -n 4p $FILE;
done
grep -o "<Textseq-id_accession>.*</Textseq-id_accession>" file.xml | cut -d ">" -f 2 | cut -d "<" -f 1
https://github.com/crazyhottommy/bioinformatics-one-liners/blob/master/README.md
http://blog.shenwei.me/manipulation-on-fasta-sequence/
Getting the size in human readible way
du -sh file name
Good bash script example Bash-Beginners-Guide
- Create a new file
~/.zprofile
- Type there
source ~/.bash_profile
- Save and close
- Run a new terminal session
while IFS= read -r line; do
Rscript --vanilla ~/folder/file.R $line
done < file_with_lines.txt
– 49-ejemplos-en-una-sola-linea
export PS1="\[\033[36m\]\u\[\033[m\]@\[\033[32m\]\h:\[\033[33;1m\]\w\[\033[m\]\$ "
export CLICOLOR=1
export LSCOLORS=ExFxBxDxCxegedabagacad
alias ls='ls -GFh'
#### change the prompt
#export PS1='==>>'
#export PS1='\u@\h: '
export PS1='\u@ '
export PS1='\[\033[0;37m\]\u@\[\033[1;31m\]\W\033[0;32m\]\$\[\033[0;37m\]'
export CLICOLOR=1
#export LSCOLORS=ExFxBxDxCxegedabagacad
export LSCOLORS=BxFxGxDxCxegedabagaced