#!/bin/bash

function doStats4PairedEnd
{
  echo $doStats4PairedEnd
  iFileN=$1
  oFileN=$2
  echo 'For pairend read, the shell only format the result:'
  echo ""   
  cat $iFileN | tr '\r' '\n' | grep 'Reads:\|Sub0\|Pairs\|Single' | sed 's/, $/, \$/g' |\
  tr -cd "[:print:]\t\n" | tr '\$' '\n' | tr ',' '\t' | sed 's/^\t//g'  | tr '\t' ' '
}

if [ $# != 2 ]; then
	echo "This SHELL/R script sum the total mapping statistics for PerM's log."	
	echo "after multiple reads sets are mapped." 
	echo "Ex: After you use PerM ref.fa reads.fa -v 5 | tee my.log"
	echo "Use the script with command  ./PerMStats.sh my.log mapStats.txt"
	echo "The 1st ptr is the input log file."
	echo "The 2st ptr is the ouput file"	
	exit
fi

iFileN=$1
oFileN=$2

bPair=$(grep 'Pairs' $iFileN | wc -l)
if [[ $bPair == '0' ]]; then
	tmpFileN='tmp.tmp'
	bVersion23=$(grep 'Kept' $iFileN | wc -l)
	bVersion27=$(grep 'MultiMappped>' $iFileN | wc -l)
	if [[ $bVersion23 == '0' ]]; then
		#selected="1,2,3,5,7,9,12,14,16,18,20,22,24,26"
		# cut -f $selected
		cat $iFileN | tr '\r' '\n' | grep 'Read#\|Sub0' | sed 's/, $/, \$/g' | tr '\n' ',' | tr -cd "[:print:]\t\n" | tr '\$' '\n' | tr -d ' ' | tr ',' '\t' | sed 's/^\t//g' > $tmpFileN
		mv $tmpFileN $oFileN
		exit
	elif [[ $bVersion27 == '0' ]]; then
		selected="1,4,6,8,10,13,15,17,19,21,23,25,27,29"
		cat $iFileN | tr '\r' '\n' | grep 'Reads:\|Sub0' | sed 's/, $/, \$/g' | tr '\n' ',' | tr -cd "[:print:]\t\n" | tr '\$' '\n' | tr -d ' ' | tr ',' '\t' | sed 's/^\t//g' | cut -f $selected > $tmpFileN
	else
		selected="1,4,6,8,10,12,15,17,19,21,23,25,27,29,31"
		cat $iFileN | tr '\r' '\n' | grep 'Reads:\|Sub0' | sed 's/, $/, \$/g' | tr '\n' ',' | tr -cd "[:print:]\t\n" | tr '\$' '\n' | tr -d ' ' | tr ',' '\t' | sed 's/^\t//g' | cut -f $selected > $tmpFileN
	fi 	
	if test -s $tmpFileN; then 				
R --slave --vanilla --no-save  <<EEE
	setwd("$PWD")
	data=read.table("$tmpFileN", header=FALSE)   
	colNum=dim(data)[2]
	colName = c("ReadsFileN", "Filtered", "Kept", "Mapped", "MultiMapped", "MutiMapped>T")
	colName = c(colName, paste("Sub", 0:(colNum - length(colName)-1), sep=''))
	names(data) = colName    
	total=c("Total", apply(data[,2:colNum],2, sum))
	#rbind(data,total)
	#print(data)
	print(total)
    write.table(data, "$oFileN")    
EEE
	\rm $tmpFileN
	fi
else 
	doStats4PairedEnd $iFileN $oFileN
fi 

