|
$B#2!%2;8;<1JL%"%k%4%j%:%`(B
|
$B?^#1$N:8?^$N$h$&$K%^%$%/%m%[%s%"%l!<$KBP$7$F@5LLJ}8~$+$i2;@l9g$r9M$($k!#$3$3$G9b2;u67$K$*$$$F2;8;J}8~$r?dDj$9$k$3$H$,$G$-$F$b!"OC

$B?^#1(B $B2;8;<1JL%"%k%4%j%:%`(B
$B?^#2$Kr7o$r<($9!#@5LLJ}8~$KL\(B
$BE*2;8;!"B&LL$K;(2;8;$,B8:_$9$k>u67$rA[Dj$9$k!#$3$Nr7o2<$K$*$$$F%7(B
$B%s%0%k%^%$%/%m%[%s$*$h$S%^%$%/%m%[%s%"%l!<$rMQ$$$F@5LLJ}8~$K%S!<%`%U%)!<(B
$B%_%s%0$7$?$H$-$N!"3F(BSNR$B$K$*$1$k2;8;<1JL@-G=$rI>2A$7$?!#$^$?!"2;@<$KBP(B
$B$7$F$O(BIPA(Imformation-technology Promotion Agency) 1998$BG/EY$N2;6A%b%G(B
$B%k$rMQ$$$F2;@

$B?^#2!'
|
$BI=#1!'(B $Br7o(B
| $B%^%$%/%m%[%s%"%l!<(B |
$BAG;R?t(B14, $BAG;R4V3V(B2.83 cm |
| Beamformer |
$BCY1dOB%"%l(B- |
| $B%5%s%W%j%s%0<~GH?t(B |
16KHz |
| $B%U%l!<%`D9(B |
32 msec.$B!J%O%_%s%0Ak!K(B |
| $BFCD'%Y%/%H%k(B |
MFCC, $B&$(BMFCC, $B&$%Q%o!<(B |
| $B<1JLMQ(BHMM |
Gaussian mixture$B7?(B |
| $B2;6A%b%G%k?t(B |
$B2;@<(B 1$B%b%G%k(B
$BHs2;@<(B 1$B%b%G%k(B |
| $B2;@<(BDB |
ATR$B2;@<%G!<%?%Y!<%9(BSetA |
| $B2;@<%b%G%k3X=,(B |
$BCK=w3F(B8$BL>(B 400$B8l(B |
| $BHs2;@<(BDB |
RWCP-DB |
| $BHs2;@<%b%G%k3X=,(B |
$B4D6-2;(B92$B
|
| $B%F%9%H(B($B%*!<%W%s(B) |
$B2;@<(B: MHT$B2;1$%P%i%s%9(B216$B8l(B
$BHs2;@<(B: $B4D6-2;(B92$B
|
| $B2;6AEAC#FC@-(B |
RWCP-DB |
| $B;D6A;~4V(B |
0.0, 0.3, 1.3 sec |
| SNR |
-5, 0, 5, 10, 15, 20 ,25,30,clean dB |
|
$BK\2A$9$k!#$^$?2;@2A$r9T$&!#(B
$B?^#3(B(a)$B$K%7%s%0%k%^%$%/%m%[%s!"(B(b)$B$K%^%$%/%m%[%s%"%l!<$rMQ$$$?>l9g(B
$B$Ne$7$F$$$k!#$3$l(B
$B$h$j%^%$%/%m%[%s%"%l!<$NM-8z@-$,3NG'$G$-$k!#$5$i$K!"2;8;<1JLN($O(BSNR$B$,(B
0dB$B$N7k2L$H(B20dB$B$N7k2L$O$[$H$s$IJQ$o$i$J$$$3$H$+$i!"(BSNR$B$,Dc$$4D6-2<$G$b(B
$B9b@:EY$J2;8;<1JL$,2DG=$G$"$k$3$H$,$o$+$k!#$7$+$7!$(BSNR$B$,(B0dB$B$G$"$k>l9g$N(B
$B2;@l9g$h$j$bBg$-$/Nt2=$7$F$$$k!#:#8e!"$h$j9b2;2A$7$?>l9g!"9b;D6A2<(B($B;D6A2DJQ<<(B
[T60=1.3 sec.])$B$K$*$$$FDc(BSNR$B$N4D6-$G$b9b<1JL@-G=$G$"$k$3$H$+$i!"2;8;$N(B
$B0LCV$,M=$a$o$+$C$F$$$k$J$i$P!"$=$N2;8;$,OC


(a) A single microphone
|

(b) A microphone array
|
$B?^(B3 $B2;8;<1JLN($H2;@
$BK\8&5f$G$OOCl9g$K$D$$$F8!F$$9$kM=Dj$"$k!#(B
[$B]
RWCP Sound Scene Database in Real Acoustical Environments
Copyright (c) 1998-2001 Satoshi Nakamura, and Takanobu Nishiura, ATR Spoken Language Translation Research Laboratories.