# to be used with wavesurfer tool
# http://www.speech.kth.se/wavesurfer
# 
# You have to create a label and an f0-file with wavesurver
# and use "ESPS" format on both. Then you can run this script
# giving first the f0 and second the label file as argument.
# the resulting pho-file will be written to stdout.
#
# arguments: file.f0 file.label
# the label file should be formated like this:
# first field: start time; third field: phon
# example:
#
#  signal test.sd
#  separator ;
#  nfields 1
#  #
#     0.255552   -1 I
#     0.351947   -1 n
#     0.451644   -1 s
#     0.522950   -1 i:
#     0.652624   -1 b
#     0.705443   -1 n
#  ...

#
# version 0.3
#
# changelog:
# 0.3 15/7/05 forgot the "o"
# 0.2 14/7/05 added Greek phonemes (gr2)

# adaptions should be made to:
# - the conformity between the personal sampa-dialect and the respective
#   mbrola diphon-database (step 1)
# - the distance of calculated f0-values (step 2)
# - in coherence with the distance the accuracy of all time 
#   specifications (all steps)


# step 1: read in the label-file, 
# control conformity to mbrola input
# and calculate duration
awk 'BEGIN {
	i = 0;
    } {
    pho = $3;

 if(i%2==0) {
      time = $1-t1;
      if (i!=0)
	printf("%.02f\n",time); 
      t1 = $1;
      printf("%.02f\t%s \t", $1, pho); 
    } else {
      time = $1-t1;
      printf("%.02f\n",time);
      printf("%.02f\t%s \t", $1, pho);
      t1 = $1;
    }
    i++;
   }
END {printf("%.02f\n",time);}
' $2 > `basename $1 .sd`.f1;

# step 2: extract times and f0-vals from sd-file
cat $1 | awk '{printf "%.2f\t%d\n",FNR*0.01,$1*$2}' > `basename $1 .sd`.f2;

# step 3: join them
join -a1 `basename $1 .sd`.f2 `basename $1 .sd`.f1 > `basename $1 .sd`.f3;

# step 4: collect the f0vals 

awk '
BEGIN {v=0}
{
    # is it a voiced phon ?
    if ($3 ~ "^b$|^d$|^D$|^g$|^G$|^v$|^V$|^z$|^Z$|^m$|^n$|^N$|^l$|^L$|^R$|^r$|^6$|^j$|^J$|^aI$|^OY$|^aU$|^@$|^i:$|^i$|^I$|^y:$|^y$|^Y$|^e:$|^e$|^E$|^E:$|^2:$|^9$|^u:$|^u$|^U$|^o:$|^o$|^O$|^a:$|^a$|^A$|^E^$|^a^$|^9^$") {
    if (v==1) printf("\n");
    printf("%s\t%d\t",$3, $4*1000);
    v=1;
    }
    if ($3 ~ "^p$|^t$|^k$|^q$|^T$|^f$|^s$|^S$|^x$|^X$|^C$|^c$|^h$|^pf$|^ts$|^tS$|^_$") {
	if (v==1) {
	    printf("\n");
	    v=0;
	    }
	printf("%s\t%d\n",$3, $4*1000);  
    }
    if (v==1) {
    printf("%d ", $2);
}
}' `basename $1 .sd`.f3 > `basename $1 .sd`.f4;

# step 5: write mbrola-file
awk '
BEGIN {i=0; v=0}
{
    # is it a voiced phon ?
    if ($3 ~ /[0-9]/) {
	perc = 100/(NF-2);
	printf("%s  %d", $1, $2);
	for(i=0; i<NF-2; i++) {
	    if ($(3+i) != 0)
		printf("  %d %d",i*perc, $(3+i));
	}
	printf("\n");
	}
    else
	print;
}' `basename $1 .sd`.f4;

# clean up
rm `basename $1 .sd`.f?