Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 4 May 2019 15:09:10 +0000 (UTC)
From:      "Jason W. Bacon" <jwb@FreeBSD.org>
To:        ports-committers@freebsd.org, svn-ports-all@freebsd.org, svn-ports-head@freebsd.org
Subject:   svn commit: r500812 - in head/biology/ddocent: . files
Message-ID:  <201905041509.x44F9AwT057411@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jwb
Date: Sat May  4 15:09:10 2019
New Revision: 500812
URL: https://svnweb.freebsd.org/changeset/ports/500812

Log:
  biology/ddocent: Upgrade to 2.7.8
  
  Reported by:    portscout

Added:
  head/biology/ddocent/files/patch-scripts_ReferenceOpt.sh   (contents, props changed)
Modified:
  head/biology/ddocent/Makefile
  head/biology/ddocent/distinfo
  head/biology/ddocent/files/ddocent-assembly-test
  head/biology/ddocent/files/patch-dDocent
  head/biology/ddocent/pkg-plist

Modified: head/biology/ddocent/Makefile
==============================================================================
--- head/biology/ddocent/Makefile	Sat May  4 13:00:03 2019	(r500811)
+++ head/biology/ddocent/Makefile	Sat May  4 15:09:10 2019	(r500812)
@@ -1,10 +1,9 @@
 # $FreeBSD$
 
-PORTNAME=	dDocent
+PORTNAME=		dDocent
 DISTVERSIONPREFIX=	v
-DISTVERSION=	2.2.25
-PORTREVISION=	1
-CATEGORIES=	biology java
+DISTVERSION=		2.7.8
+CATEGORIES=		biology java perl5 python
 
 MAINTAINER=	jwb@FreeBSD.org
 COMMENT=	Bash pipeline for RAD sequencing
@@ -32,29 +31,21 @@ RUN_DEPENDS=	unzip>=0:archivers/unzip \
 		bedtools>=2.26.0:biology/bedtools \
 		pear-merger>=0:biology/pear-merger \
 		vcflib>=0:biology/vcflib \
-		freebayes:biology/freebayes
+		freebayes:biology/freebayes \
+		fastp:biology/fastp
 
 USES=		perl5 python shebangfix
-SHEBANG_FILES=	dDocent scripts/*.sh scripts/*.pl scripts/dDocent_filters
 USE_JAVA=	yes
 USE_GITHUB=	yes
+
+SHEBANG_FILES=	dDocent scripts/*.sh scripts/*.pl scripts/dDocent_filters
 GH_ACCOUNT=	jpuritz
 
 NO_BUILD=	yes
 NO_ARCH=	yes
 
-# These are on top of patch-dDocent, so don't apply them within the source
-# tree, or they'll get picked up by patch generators, and hard-code PREFIX.
-post-install:
-	${REINPLACE_CMD} -i '' \
-		-e 's|%%PREFIX%%|${PREFIX}|g' \
-		-e 's|%%JAVAJARDIR%%|${JAVAJARDIR}|g' \
-		-e 's|%%BASH%%|${LOCALBASE}/bin/bash|g' \
-		-e 's|python|${PYTHON_CMD}|g' \
-		${STAGEDIR}${PREFIX}/bin/dDocent
-
 do-install:
-	${MKDIR} ${STAGEDIR}${PREFIX}/bin
+	@${MKDIR} ${STAGEDIR}${PREFIX}/bin
 	${INSTALL_SCRIPT} \
 		${WRKSRC}/dDocent \
 		${WRKSRC}/*.sh \
@@ -64,5 +55,13 @@ do-install:
 		${WRKSRC}/scripts/*.pl \
 		${WRKSRC}/scripts/dDocent_filters \
 		${STAGEDIR}${PREFIX}/bin
+
+# These are on top of patch-dDocent, so don't apply them within the source
+# tree, or they'll get picked up by patch generators, and hard-code PREFIX.
+post-install:
+	@${REINPLACE_CMD} -i '' \
+		-e 's|SHELL=bash|SHELL=${LOCALBASE}/bin/bash|g' \
+		-e 's|python|${PYTHON_CMD}|g' \
+		${STAGEDIR}${PREFIX}/bin/dDocent
 
 .include <bsd.port.mk>

Modified: head/biology/ddocent/distinfo
==============================================================================
--- head/biology/ddocent/distinfo	Sat May  4 13:00:03 2019	(r500811)
+++ head/biology/ddocent/distinfo	Sat May  4 15:09:10 2019	(r500812)
@@ -1,3 +1,3 @@
-TIMESTAMP = 1520345850
-SHA256 (jpuritz-dDocent-v2.2.25_GH0.tar.gz) = 903c3010b29b2ca95f7fe6099925948e4d3f21655668caff653df97dfa7ecf44
-SIZE (jpuritz-dDocent-v2.2.25_GH0.tar.gz) = 336804
+TIMESTAMP = 1556888100
+SHA256 (jpuritz-dDocent-v2.7.8_GH0.tar.gz) = 02aa297f602b55587782c959379cada8d8b0570973da75eb9f5786089a3ed485
+SIZE (jpuritz-dDocent-v2.7.8_GH0.tar.gz) = 345571

Modified: head/biology/ddocent/files/ddocent-assembly-test
==============================================================================
--- head/biology/ddocent/files/ddocent-assembly-test	Sat May  4 13:00:03 2019	(r500811)
+++ head/biology/ddocent/files/ddocent-assembly-test	Sat May  4 15:09:10 2019	(r500812)
@@ -135,15 +135,11 @@ rm *rem*
 { set +x; } 2>/dev/null
 pause
 
-rm -f Rename_for_dDocent.sh     # Always get the latest
-set -x
-curl --insecure -L -O https://github.com/jpuritz/dDocent/raw/master/Rename_for_dDocent.sh
-more Rename_for_dDocent.sh
 { set +x; } 2>/dev/null
 pause
 
 set -x
-bash Rename_for_dDocent.sh SimRAD.barcodes
+Rename_for_dDocent.sh SimRAD.barcodes
 { set +x; } 2>/dev/null
 
 set -x
@@ -312,20 +308,11 @@ cd-hit-est -i rainbow.fasta -o referenceRC.fasta -M 0 
 { set +x; } 2>/dev/null
 pause
 
-rm -f remake_reference.sh
-set -x
-curl --insecure -L -O https://github.com/jpuritz/dDocent/raw/master/scripts/remake_reference.sh
-more remake_reference.sh
-#fix_bash_path remake_reference.sh
-
-bash remake_reference.sh 4 4 0.90 PE 2
+remake_reference.sh 4 4 0.90 PE 2
 { set +x; } 2>/dev/null
 pause
 
-rm -f ReferenceOpt.sh
-set -x
-curl --insecure -L -O https://github.com/jpuritz/dDocent/raw/master/scripts/ReferenceOpt.sh
-more ReferenceOpt.sh
+ReferenceOpt.sh
 
 bash ReferenceOpt.sh 4 8 4 8 PE 16
 { set +x; } 2>/dev/null
@@ -357,7 +344,6 @@ printf "Bonus Section: Optimize reference assemblies? 
 read bonus
 if [ 0$bonus = 0y ]; then
     set -x
-    curl -L -O https://raw.githubusercontent.com/jpuritz/dDocent/master/scripts/RefMapOpt.sh
     { set +x; } 2>/dev/null
     printf "Running dDocent to trim reads.\n"
     pause
@@ -372,7 +358,7 @@ no
 no
 bacon@uwm.edu
 EOM
-    bash RefMapOpt.sh 4 8 4 8 0.9 64 PE
+    RefMapOpt.sh 4 8 4 8 0.9 64 PE
     { set +x; } 2>/dev/null
     pause
     more mapping.results

Modified: head/biology/ddocent/files/patch-dDocent
==============================================================================
--- head/biology/ddocent/files/patch-dDocent	Sat May  4 13:00:03 2019	(r500811)
+++ head/biology/ddocent/files/patch-dDocent	Sat May  4 15:09:10 2019	(r500812)
@@ -1,44 +1,13 @@
---- dDocent.orig	2018-04-20 00:10:34 UTC
+--- dDocent.orig	2019-05-03 12:59:20 UTC
 +++ dDocent
-@@ -1,6 +1,9 @@
+@@ -1,5 +1,6 @@
  #!/usr/local/bin/bash
  export LC_ALL=en_US.UTF-8
- 
 +# GNU Parallel uses $SHELL and has issues with [t]csh
-+export SHELL=%%BASH%%
-+
- ##########dDocent##########
- VERSION='2.2.25'
- #This script serves as an interactive bash wrapper to QC, assemble, map, and call SNPs from double digest RAD (SE or PE), ezRAD (SE or PE) data, or SE RAD data.
-@@ -27,15 +30,15 @@ do
- 	fi
- done
+ export SHELL=bash
  
--if find ${PATH//:/ } -maxdepth 1 -name trimmomatic*jar 2> /dev/null| grep -q 'trim' ; then
--	TRIMMOMATIC=$(find ${PATH//:/ } -maxdepth 1 -name trimmomatic*jar 2> /dev/null | head -1)
-+if [ -e %%JAVAJARDIR%%/trimmomatic.jar ]; then
-+       TRIMMOMATIC=%%JAVAJARDIR%%/trimmomatic.jar
- 	else
-     echo "The dependency trimmomatic is not installed or is not in your" '$PATH'"."
-     NUMDEP=$((NUMDEP + 1))
- 	fi
- 
--if find ${PATH//:/ } -maxdepth 1 -name TruSeq2-PE.fa 2> /dev/null | grep -q 'Tru' ; then
--	ADAPTERS=$(find ${PATH//:/ } -maxdepth 1 -name TruSeq2-PE.fa 2> /dev/null | head -1)
-+if [ -e %%PREFIX%%/share/trimmomatic/adapters/TruSeq2-PE.fa ]; then
-+       ADAPTERS=%%PREFIX%%/share/trimmomatic/adapters/TruSeq2-PE.fa
- 	else
-     echo "The file listing adapters (included with trimmomatic) is not installed or is not in your" '$PATH'"."
-     NUMDEP=$((NUMDEP + 1))
-@@ -80,6 +83,7 @@ FREEB=(`freebayes | grep -oh 'v[0-9].*' 
-         	exit 1
-         fi         	
- VCFTV=$(vcftools | grep VCF | grep -oh '[0-9]*[a-z]*)$' | sed 's/[a-z)]//')
-+	echo $VCFTV
- 	if [ "$VCFTV" -lt "10" ]; then
-         	echo "The version of VCFtools installed in your" '$PATH' "is not optimized for dDocent."
-         	echo "Please install at least version 0.1.11"
-@@ -89,7 +93,7 @@ VCFTV=$(vcftools | grep VCF | grep -oh '
+ ##########dDocent##########
+@@ -83,7 +84,7 @@ VCFTV=$(vcftools | grep VCF | grep -oh '[0-9]*[a-z]*)$
          elif [ "$VCFTV" -ge "12" ]; then
                  VCFGTFLAG="--max-missing"
  	fi
@@ -47,88 +16,58 @@
  	if [ "$BWAV" -lt "13" ]; then
          	echo "The version of bwa installed in your" '$PATH' "is not optimized for dDocent."
          	echo "Please install at least version 0.7.13"
-@@ -107,13 +111,12 @@ BTC=$( bedtools --version | mawk '{print
- 		exit 1	
- 	fi
- 		
--if ! awk --version | fgrep -v GNU &>/dev/null; then
-+if ! awk --version | fgrep GNU &>/dev/null; then
-          awk=gawk
-     else
-          awk=awk
- fi
+@@ -481,7 +482,7 @@ if [ "$SNP" != "no" ]; then
+         		if ( cov < cutoff) {x="mapped."i".bed";print $1"\t"$2"\t"$3 > x}
+         		else {i=i+1; x="mapped."i".bed"; print $1"\t"$2"\t"$3 > x; cov=0}
+         	}'
+-                ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | shuf | parallel --bar --halt now,fail=1 --env call_genos2 --memfree $MAXMemory -j 4 --no-notice "call_genos2 {} 2> /dev/null"
++                ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | gshuf | parallel --bar --halt now,fail=1 --env call_genos2 --memfree $MAXMemory -j 4 --no-notice "call_genos2 {} 2> /dev/null"
+                 if [ -f "freebayes.error" ]; then
+                         echo -e "\n\n\nFreeBayes has failed when trying to finish a previously failed instance.  Memory and processor settings need to be drastically reconfigured"
+                         ERROR3=1
+@@ -505,7 +506,7 @@ if [ "$SNP" != "no" ]; then
+ 	
+ 	rm freebayes.error freebayes.log &> /dev/null
+ 	
+-	ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | shuf | parallel --bar --halt now,fail=5 --env call_genos --memfree $MAXMemory -j $NUMProc --no-notice "call_genos {} 2> /dev/null"
++	ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | gshuf | parallel --bar --halt now,fail=5 --env call_genos --memfree $MAXMemory -j $NUMProc --no-notice "call_genos {} 2> /dev/null"
  
--
- if [ $NUMDEP -gt 0 ]; then
- 	echo -e "\nPlease install all required software before running dDocent again."
- 	exit 1
-@@ -291,9 +294,9 @@ echo "Using BWA to map reads."
-         	for i in "${NAMES[@]}"
-         	do
-         	if [ -f "$i.R2.fq.gz" ]; then
--        		bwa mem reference.fasta $i.R1.fq.gz $i.R2.fq.gz -L 20,5 -I $INSERT,$SD,$INSERTH,$INSERTL -t $NUMProc -a -M -T 10 -A $optA -B $optB -O $optO -R "@RG\tID:$i\tSM:$i\tPL:Illumina" 2> bwa.$i.log | mawk '$6 !~/[2-9].[SH]/ && $6 !~ /[1-9][0-9].[SH]/' | samtools view -@$NUMProc -q 1 -SbT reference.fasta - > $i.bam 2>$i.bam.log
-+        		bwa mem -L 20,5 -I $INSERT,$SD,$INSERTH,$INSERTL -t $NUMProc -a -M -T 10 -A $optA -B $optB -O $optO -R "@RG\tID:$i\tSM:$i\tPL:Illumina" reference.fasta $i.R1.fq.gz $i.R2.fq.gz 2> bwa.$i.log | mawk '$6 !~/[2-9].[SH]/ && $6 !~ /[1-9][0-9].[SH]/' | samtools view -@$NUMProc -q 1 -SbT reference.fasta - > $i.bam 2>$i.bam.log
-         	else
--        		bwa mem reference.fasta $i.R1.fq.gz -L 20,5 -t $NUMProc -a -M -T 10 -A $optA -B $optB -O $optO -R "@RG\tID:$i\tSM:$i\tPL:Illumina" 2> bwa.$i.log | mawk '$6 !~/[2-9].[SH]/ && $6 !~ /[1-9][0-9].[SH]/' | samtools view -@$NUMProc -q 1 -SbT reference.fasta - > $i.bam 2>$i.bam.log
-+        		bwa mem -L 20,5 -t $NUMProc -a -M -T 10 -A $optA -B $optB -O $optO -R "@RG\tID:$i\tSM:$i\tPL:Illumina" reference.fasta $i.R1.fq.gz 2> bwa.$i.log | mawk '$6 !~/[2-9].[SH]/ && $6 !~ /[1-9][0-9].[SH]/' | samtools view -@$NUMProc -q 1 -SbT reference.fasta - > $i.bam 2>$i.bam.log
-         	fi
-         	samtools sort -@$NUMProc $i.bam -o $i.bam 
- 		mv $i.bam $i-RG.bam
-@@ -388,10 +391,10 @@ if [ "$SNP" != "no" ]; then
- 	}
- 	export -f call_genos
  
--	ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | shuf | parallel --env call_genos --memfree $MAXMemory -j $NUMProc --no-notice call_genos {}
-+	ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | gshuf | parallel --env call_genos --memfree $MAXMemory -j $NUMProc --no-notice call_genos {}
- ####	
--	#ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | shuf | parallel --memfree $MAXMemory -j $FB1 --no-notice --delay 1 freebayes -L bamlist.list -t mapped.{}.bed -v raw.{}.vcf -f reference.fasta -m 5 -q 5 -E 3 --min-repeat-entropy 1 -V --populations popmap -n 10
--	#ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | shuf | parallel --memfree $MAXMemory -j $FB1 --no-notice "samtools view -b -L mapped.{}.bed | freebayes -c -t mapped.{}.bed -v raw.{}.vcf -f reference.fasta -m 5 -q 5 -E 3 --min-repeat-entropy 1 -V --populations popmap -n 10"
-+	#ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | gshuf | parallel --memfree $MAXMemory -j $FB1 --no-notice --delay 1 freebayes -L bamlist.list -t mapped.{}.bed -v raw.{}.vcf -f reference.fasta -m 5 -q 5 -E 3 --min-repeat-entropy 1 -V --populations popmap -n 10
-+	#ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | gshuf | parallel --memfree $MAXMemory -j $FB1 --no-notice "samtools view -b -L mapped.{}.bed | freebayes -c -t mapped.{}.bed -v raw.{}.vcf -f reference.fasta -m 5 -q 5 -E 3 --min-repeat-entropy 1 -V --populations popmap -n 10"
+ 	if [ -f "freebayes.error" ]; then
+@@ -541,7 +542,7 @@ if [ "$SNP" != "no" ]; then
+ 			echo "Using FreeBayes to call SNPs again"
+ 			NumP=$(( $NUMProc / 4 ))
+ 			NumP=$(( $NumP * 3 ))
+-			ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | shuf | parallel --bar --halt now,fail=5 --env call_genos --memfree $MAXMemory -j $NumP --no-notice "call_genos {} 2> /dev/null" 
++			ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | gshuf | parallel --bar --halt now,fail=5 --env call_genos --memfree $MAXMemory -j $NumP --no-notice "call_genos {} 2> /dev/null" 
+         fi
+     fi
  
+@@ -575,7 +576,7 @@ if [ "$SNP" != "no" ]; then
+             	NumP=$(( $NumP / 4 ))
+                 NumP=$(( $NumP * 3 ))
+ 			echo "Using FreeBayes to call SNPs again"
+-            ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | shuf | parallel --bar --halt now,fail=1 --env call_genos --memfree $MAXMemory -j $NumP --no-notice "call_genos {} 2> /dev/null"
++            ls mapped.*.bed | sed 's/mapped.//g' | sed 's/.bed//g' | gshuf | parallel --bar --halt now,fail=1 --env call_genos --memfree $MAXMemory -j $NumP --no-notice "call_genos {} 2> /dev/null"
+         fi
+ 	fi
  
- 	rm mapped.*.bed 
-@@ -447,8 +450,8 @@ fi
+@@ -1132,6 +1133,8 @@ fi
  
- #Function for trimming reads using trimmomatic
- trim_reads(){
--	TRIMMOMATIC=$(find ${PATH//:/ } -maxdepth 1 -name trimmomatic*jar 2> /dev/null | head -1)
--    ADAPTERS=$(find ${PATH//:/ } -maxdepth 1 -name TruSeq2-PE.fa 2> /dev/null | head -1)
-+       TRIMMOMATIC=%%JAVAJARDIR%%/trimmomatic.jar
-+       ADAPTERS=%%PREFIX%%/share/trimmomatic/adapters/TruSeq2-PE.fa
- 
- 	if [ -f $1.R.fq.gz ]; then	
- 		java -Xmx2g -jar $TRIMMOMATIC PE -threads 2 -phred33 $1.F.fq.gz $1.R.fq.gz $1.R1.fq.gz $1.unpairedF.fq.gz $1.R2.fq.gz $1.unpairedR.fq.gz ILLUMINACLIP:$ADAPTERS:2:30:10 LEADING:20 TRAILING:20 SLIDINGWINDOW:5:10 $TW &> $1.trim.log
-@@ -747,7 +750,14 @@ else
+ if [[ "$OSTYPE" == "darwin"* ]]; then
+ 	NUMProc=( `sysctl hw.ncpu | cut -f2 -d " " `)
++elif [[ "$OSTYPE" == "FreeBSD" ]]; then
++	NUMProc=( `sysctl -n hw.ncpu` )
+ else
+ 	NUMProc=( `grep -c ^processor /proc/cpuinfo 2> /dev/null` ) 
  fi
- 
- #Tries to get number of processors, if not asks user
--NUMProc=( `grep -c ^processor /proc/cpuinfo 2> /dev/null` ) 
-+if [ `uname` = Linux ]; then
-+    NUMProc=( `grep -c ^processor /proc/cpuinfo 2> /dev/null` ) 
-+elif [ `uname` = FreeBSD ]; then
-+    NUMProc=( `sysctl -n hw.ncpu` ) 
-+else
-+    printf "Unsupported platform: `uname`\n"
-+    exit 1
-+fi
- NUMProc=$(($NUMProc + 0)) 
- 
- echo "dDocent detects $NUMProc processors available on this system."
-@@ -764,7 +774,15 @@ if [ $NUMProc -lt 1 ]; then
- fi
- 
+@@ -1154,6 +1157,9 @@ fi
  #Tries to get maximum system memory, if not asks user
--MAXMemory=$(($(grep -Po '(?<=^MemTotal:)\s*[0-9]+' /proc/meminfo | tr -d " ") / 1048576))G
-+if [ `uname` = Linux ]; then
-+    MAXMemory=$(($(grep -Po '(?<=^MemTotal:)\s*[0-9]+' /proc/meminfo | tr -d " ") / 1048576))G
-+elif [ `uname` = FreeBSD ]; then
-+    MAXMemory=`sysctl -n hw.realmem`
-+    MAXMemory=$((MAXMemory / 1073741824))G
-+else
-+    printf "Unsupported platform: `uname`\n"
-+    exit 1
-+fi
+ if [[ "$OSTYPE" == "darwin"* ]]; then
+ 	MAXMemory=0
++elif [[ "$OSTYPE" == "FreeBSD" ]]; then
++	MAXMemory=`sysctl -n hw.realmem`
++	MAXMemory=$((MAXMemory / 1073741824))G
+ else
+ 	MAXMemory=$(($(grep -Po '(?<=^MemTotal:)\s*[0-9]+' /proc/meminfo | tr -d " ") / 1048576))
  
- echo "dDocent detects $MAXMemory maximum memory available on this system."
- echo "Please enter the maximum memory to use for this analysis. The size can be postfixed with 

Added: head/biology/ddocent/files/patch-scripts_ReferenceOpt.sh
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/biology/ddocent/files/patch-scripts_ReferenceOpt.sh	Sat May  4 15:09:10 2019	(r500812)
@@ -0,0 +1,11 @@
+--- scripts/ReferenceOpt.sh.orig	2019-05-03 12:58:47 UTC
++++ scripts/ReferenceOpt.sh
+@@ -400,7 +400,7 @@ done
+ 
+ cut -f4 -d " " kopt.data > plot.kopt.data
+ gnuplot << \EOF
+-set terminal dumb size 120, 30
++set terminal dumb size 80, 30
+ set autoscale
+ unset label
+ set title "Histogram of number of reference contigs"

Modified: head/biology/ddocent/pkg-plist
==============================================================================
--- head/biology/ddocent/pkg-plist	Sat May  4 13:00:03 2019	(r500811)
+++ head/biology/ddocent/pkg-plist	Sat May  4 15:09:10 2019	(r500812)
@@ -1,6 +1,5 @@
 bin/ErrorCount.sh
 bin/RefMapOpt.sh
-bin/ReferenceOpt.hyb.sh
 bin/ReferenceOpt.sh
 bin/Rename_SequenceFiles.sh
 bin/Rename_for_dDocent.sh



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201905041509.x44F9AwT057411>