1
0
mirror of https://git.FreeBSD.org/ports.git synced 2024-12-12 03:00:28 +00:00

biology/haplohseq: Switch to distfile with example data

Add script to run on example input
Minor fix to report inadequate VCF input
This commit is contained in:
Jason W. Bacon 2019-11-17 15:00:13 +00:00
parent fbc9e5d03a
commit c8eb3c6e6c
Notes: svn2git 2021-03-31 03:12:20 +00:00
svn path=/head/; revision=517828
8 changed files with 131 additions and 9 deletions

View File

@ -2,10 +2,9 @@
PORTNAME= haplohseq
DISTVERSION= 0.1.2
PORTREVISION= 1
CATEGORIES= biology
MASTER_SITES= https://mirror1.hpc.uwm.edu/Distfiles/ \
https://mirror2.hpc.uwm.edu/Distfiles/
DISTNAME= haplohseq_source-${DISTVERSION}
MASTER_SITES= https://acadix.biz/Ports/distfiles/
MAINTAINER= jwb@FreeBSD.org
COMMENT= Identify regions of allelic imbalance
@ -13,15 +12,21 @@ COMMENT= Identify regions of allelic imbalance
LICENSE= MIT
LIB_DEPENDS= libboost_system.so:devel/boost-libs
RUN_DEPENDS= R-cran-optparse>0:devel/R-cran-optparse
USES= compiler:c++11-lang gmake localbase:ldflags zip
USES= compiler:c++11-lang gmake localbase:ldflags python:2.7 tar:xz
OPTIONS_DEFINE= EXAMPLES
MAKEFILE= makefile
WRKSRC= ${WRKDIR}/haplohseq_source
LDFLAGS+= -lpthread
MAKE_ENV= STRIP=${STRIP}
PLIST_FILES= bin/haplohseq
SUB_FILES= haplohseq-example
post-install-EXAMPLES-on:
${INSTALL_SCRIPT} ${WRKDIR}/haplohseq-example ${STAGEDIR}${PREFIX}/bin
(cd ${WRKSRC} && ${COPYTREE_SHARE} "example ldmap scripts" ${STAGEDIR}${EXAMPLESDIR})
do-test:
@(cd ${WRKSRC}/build/test && ${FILESDIR}/run-tests)

View File

@ -1,3 +1,3 @@
TIMESTAMP = 1567118170
SHA256 (haplohseq_source-0.1.2.zip) = 35f8fe6718020e6eacdc309d28fc5dd3cc7c6e8400da1d962fdf6fb52d4e90c5
SIZE (haplohseq_source-0.1.2.zip) = 63620
TIMESTAMP = 1573660978
SHA256 (haplohseq-0.1.2.tar.xz) = 0e7474f6af6b41c4b38f5f4549344714ec022e2adfc4aa9d00e0d420483e260a
SIZE (haplohseq-0.1.2.tar.xz) = 8108896

View File

@ -0,0 +1,37 @@
#!/bin/sh -e
##########################################################################
# Script description:
# Run haplohseq example
#
# https://sites.google.com/site/integrativecancergenomics/software/haplohseq
#
# History:
# Date Name Modification
# 2019-11-13 Jason Bacon Begin
##########################################################################
usage()
{
printf "Usage: $0 directory\n"
exit 1
}
##########################################################################
# Main
##########################################################################
if [ $# != 1 ]; then
usage
fi
dir="$1"
if [ -e "$dir" ]; then
printf "'$dir' already exists. Please remove it or specify another.\n"
exit 1
fi
cp -R %%EXAMPLESDIR%% "$dir"
cd "$dir/example"
sh ./example_run.sh

View File

@ -0,0 +1,24 @@
--- example/example_run.sh.orig 2019-11-13 15:45:57 UTC
+++ example/example_run.sh
@@ -1,4 +1,4 @@
-#! /bin/bash
+#!/bin/sh
# Example:
# Identify allelic imbalance (AI) given a tumor
@@ -6,13 +6,13 @@
# of the GATK. This involves the following 3 steps.
printf "STEP 1: PHASING 1KG HET SITES ...\n"
-python ../scripts/simple_phaser.py \
+python2.7 ../scripts/simple_phaser.py \
--ldmap ../ldmap/hg19.exome.ldmap \
--vcf example_input/tumor_exome.vcf \
-o example_output/tumor_exome
printf "\nSTEP 2: IDENTIFYING REGIONS OF AI ...\n"
-../haplohseq \
+haplohseq \
--vcf example_output/tumor_exome.hap.vcf \
--phased example_output/tumor_exome.hap \
--event_prevalence 0.1 \

View File

@ -0,0 +1,8 @@
--- scripts/ldmap.py.orig 2019-11-13 15:51:02 UTC
+++ scripts/ldmap.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python2.7
+
import argparse
import sys
import random

View File

@ -0,0 +1,8 @@
--- scripts/simple_phaser.py.orig 2019-11-13 15:52:02 UTC
+++ scripts/simple_phaser.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python2.7
+
import argparse
import random
import sys

View File

@ -0,0 +1,29 @@
--- src/FreqPhase.cpp.orig 2019-11-13 14:19:06 UTC
+++ src/FreqPhase.cpp
@@ -5,6 +5,7 @@
* Email: sanlucas@gmail.com
*/
+#include <sysexits.h>
#include "FreqPhase.h"
namespace haplohseq {
@@ -180,10 +181,17 @@ double FreqPhase::meanValue(const std::v
double FreqPhase::medianValue(const std::vector<double>& values) {
double median;
size_t size = values.size();
+
+ // We can probably detect this condition earlier while loading the VCF
+ if ( size == 0 ) {
+ std::cerr << "FreqPhase::medianValue(): values vector is empty." << std::endl;
+ std::cerr << "Make sure your VCF has all of GT:AD:DP in the FORMAT column." << std::endl;
+ exit(EX_DATAERR);
+ }
std::vector<double> tempFreqs(values);
sort(tempFreqs.begin(), tempFreqs.end());
- if (size % 2 == 0) {
+ if (size % 2 == 0) {
median = (tempFreqs[size / 2 - 1] + tempFreqs[size / 2]) / 2;
}
else {

View File

@ -0,0 +1,11 @@
bin/haplohseq
%%PORTEXAMPLES%%bin/haplohseq-example
%%PORTEXAMPLES%%%%EXAMPLESDIR%%/example/example_input/tumor_exome.vcf
%%PORTEXAMPLES%%%%EXAMPLESDIR%%/example/example_run.sh
%%PORTEXAMPLES%%%%EXAMPLESDIR%%/ldmap/hg19.exome.ldmap
%%PORTEXAMPLES%%%%EXAMPLESDIR%%/scripts/haplohseq_plot.R
%%PORTEXAMPLES%%%%EXAMPLESDIR%%/scripts/ldmap.py
%%PORTEXAMPLES%%%%EXAMPLESDIR%%/scripts/ldmap.py.orig
%%PORTEXAMPLES%%%%EXAMPLESDIR%%/scripts/simple_phaser.py
%%PORTEXAMPLES%%%%EXAMPLESDIR%%/scripts/simple_phaser.py.orig
%%PORTEXAMPLES%%@dir %%EXAMPLESDIR%%/example/example_output