|
$BB??t%/%i%9$KBP1~$9$k35G04V$N4X78$r9MN8$7$?0lHLJ*BNG'<1(B
$B>>ED(B $BM5;J(B
2011$BG/(B 2$B7n(B 8$BF|(B
1 $B$O$8$a$K(B
$B%*%V%8%'%/%H$d%7!<%s$H$$$C$?%+%F%4%j!<$NG'<1$O%3%s%T%e!<%?!<%S%8%g%s$NJ,(B
$BLn$K$*$$$F=EMW$J2]Bj$G$O$"$k$,!"G'<1BP>]$H$J$k%+%F%4%j$,Hs>o$KB?$$$3$H$d!"(B
$BF10l%+%F%4%jFb$NBP>]$N%"%T%"%i%s%9$NJQ2=$,6K$a$FBg$-$$$3$H$J$I$+$i!"Hs>o(B
$B$K:$Fq$JLdBj$H$J$C$F$$$k!#(B
$B$3$l$^$G$N8&5f$G$O!";k3PE*FCD'$,Hf3SE*Bg$-$/0[$J$k?tI4$N%+%F%4%j$NG'<1$,(B
$B
2 $BL\E*(B
$BK\8&5f$G$O!"(BVisual ontology[1]$B$rMQ$$$F!"35G04V$N4X78$rMxMQ$7(B
$B$?B??t%/%i%9$X$N2hA|J,N`$r9T$&$3$H$rL\E*$H$7$F$$$k!#(B
Ontology$B$H$O!"7W;;5!$K?M4V$,M}2r$7$F$$$k$h$&$JJ*;v$N4X78@-$rM}2r$5$;$k$3(B
$B$H$G$"$j!"(BVisual ontology$B$O$=$l$r;k3PE*FCD'$rMQ$$$F
3 $BDs0F
$B=hM}$NN.$l(B
- $B3X=,2hA|$*$h$S%F%9%H2hA|$+$i(BSIFT$B!"?'FCD'$rCj=P$9$k!#(B
- $B3X=,2hA|$N(BSIFT$B$+$i%3!<%I%V%C%/$r:n@.$9$k!#(B
$B$H(B $BNN0h$K$D$$$F!"(BBoK$B$H%+%i!<%R%9%H%0%i%`$r:n@.(B
$B$7!":G=*E*$J%Y%/%H%kI=8=$rF@$k!#(B
- $B3X=,2hA|$N%Y%/%H%kI=8=$rMQ$$$F!"3F35G04V$N5wN%$r7W;;$9$k!#(B
- SVM$B$K$h$j3X=,$r9T$&!#(B
- SVM$B$H(B4$B$rMQ$$$FJ,N`$r9T$&!#(B
|
3.1 $BFCD'Cj=P(B
$BK\
3.1.1 $B?'FCD'(B
$B?'FCD'$O(BRGB$B?'6u4V$N%+%i!<%R%9%H%0%i%`$rMQ$$$k!#(B
3.1.2 SIFT
SIFT$B$O!"(BD.Lowe$B$K$h$j9M0F$5$l$?!"FCD'E@<~$j$N6I=j2hA|%Q%?!<%s$r(B128$B$d%9%1!<%kJQ2=$KITJQ$G$"$j!">HL@(B
$BJQ2=$d%"%U%#%sJQ49(B($B;kE@0\F0(B)$B$K$b4h7r$G$"$k!#(B
3.2 $B2hA|$N%Y%/%H%kI=8=(B
$B?'FCD'$O(B64$B?'$K8:?'$7$?(B64$BpJs$rL5;k$7$F$7$^$C$F$$$k$?$a!"(BSpatial Pyramid
matching$B$rMxMQ$9$k!#(B
$B$3$l$O2hA|$r%0%j%C%IJ,3d$7!"$=$l$>$l$NNN0h$+$i%R%9%H%0%i%`$r:n@.$9$k$H$$(B
$B$&$B$*$h$S(B $BNN0h$+$i$=$l$>$l%+%i!<(B
$B%R%9%H%0%i%`$H(BBoK$B$r:n@.$7!"$3$l$i$r7k9g$7$?$b$N$r2hA|$N%Y%/%H%kI=8=$H$9(B
$B$k!#(B
3.3 Visual ontology$B$N:n@.(B
$BK\8&5f$G$O=)4V$i$K$h$C$FDs0F$5$l$?(BVisual ontology[1]$B$rMQ$$$k!#(B
$B$^$:!"2hA|$N%Y%/%H%kI=8=$KBP$7$F!"3F2hA|(B $B$N3F%H%T%C%/(B $B$X$N5"B03NN((B
$B$r(BpLSA$B$K$h$j5a$a$k!#(BpLSA$B$O3NN(E*%/%i%9%?%j%s%0
$B$B$rMQ$$$F!"3F35G0$rI=$9%Y%/%H%kI=8=$r:n@.$9$k!#(B
$B35G0$N%Y%/%H%k$O!$<0(B1$B$N$h$&$K!$$=$N35G0$KB0$9$k2hA|$NI=8=%Y%/%H%k$NJ?(B
$B6Q$GI=8=$5$l$k(B
$B35G04V$N5wN%$O!"35G0%Y%/%H%k4V$N(BJS$B%@%$%P!<%8%'%s%9$G7W;;$9$k!#(B
JS$B%@%$%P!<%8%'%s%9$O(B2$B$D$N3NN(J,I[4V$N5wN%<\EY$G$"$j!"$3$NCM$,>.$5$$$[$I(B
$BN`;w$9$k35G0$G$"$k$H$$$&$3$H$K$J$k!#(B
$B$3$N35G04V$N5wN%$r4X78@-$H$7$F(BVisual ontology$B$r
3.4 $BJ,N`(B
$BJ,N`4o$H$7$F(BSVM(Support Vector Machine)$B$rMQ$$$k!#(B
SVM$B$O65;U$"$j$NG'<1
$BG'<1$9$k%+%F%4%j!<$N?t$r(BN$B!"3F%+%F%4%j!<$NJ,N`4o$N=PNOCM$r(B
$B!"35G04V$N5wN%$r(B $B!"(Bg$B$r@5Dj?t$H$7$F!"DL>o$N(B
1-vs-rest$BJ,N`$O<0(B4$B!"(BVisual ontology$B$rMQ$$$?J,N`$O<0(B
5$B$N$h$&$K$J$k!#(B
4 $B
4.1 $B%G!<%?%;%C%H(B
$BK\8&5f$G$O!"@-G=I>2A$K(B
ILSVRC2010(ImageNet Large Scale Visual Recognition Challenge
2010)1$B$N%G!<%?(B
$B%;%C%H$rMQ$$$k!#$3$N%G!<%?%;%C%H$O(BImageNet[2]$B$N2hA|$G9=@.$5$l$F$*$j!"(B1000$B
4.2 $BI>2AJ}K!(B
$BK\8&5f$G$O!"@-G=I>2A$K(BLSVRC2010$B$G;HMQ$5$l$kI>2AJ}K!$r:NMQ$9$k!#(B
ILSVRC2010$B$G$O!"3F2hA|Kh$K:GBg$G8^$D$N%i%Y%k$rJ,N`7k2L$H$7$F=PNO$7!"$=$l(B
$B$KBP$7$F0J2<$NFs$D$N4QE@$+$iI>2A$,$J$5$l$k!#(B
-
- Flat cost
- $B@52r$,4^$^$l$F$$$J$$3d9g(B
- Hierarchical cost
- $B@52r$N%+%F%4%j!<$H$I$N$/$i$$N%$l$F$$$k$+(B
|
$B%7%9%F%`A4BN$O!"$9$Y$F$N%F%9%H2hA|$KBP$9$k%(%i!<%9%3%"$NJ?6Q$GI>2A$5$l!"(B
$B2hA|0lKg$4$H$NI>2A$O!"%7%9%F%`$N=PNO$9$k%i%Y%k$r(B
$B!"(B
$B2hA|$N@52r%i%Y%k$r(B
gt
$B$H$9$k$H!"0J2<$N<0(B6
$B$N$h$&$K$J$k!#(B
 |
(6) |
$B$3$3$G!"<0(B6$B$N(B
$B$O!"(BFlat cost$B$H(BHierarchicalcost$B$H$G$O7W;;J}K!$,0[$J$j!"(BFlat cost$B$G$O(B
 |
(7) |
Hierarchical cost$B$G$O(B
 |
(8) |
$B$N$h$&$K5a$a$i$l$k!#(B
$B$?$@$7!"(BH(x,y)$B$O(BWordNet$B3,AX$G:G$b6a$$(Bx$B$H(By$B6&DL$NAD@h$^$G$N9b$5$rI=$9!#(B
$B$3$NI>2A$O%(%i!
5 $B
SVM$B$H(Bvisual ontology$B$rMxMQ$7$?>l9g$NHf3S$r9T$&!#(B
5.1 $BA4BN$NI>2A(B
$B1$B$K<($9!#(B
$BI=(B 1:
$B
Method |
flat cost |
hierarchical cost |
SVM |
0.8912 |
9.2475 |
Visual ontology (g = 1) |
0.9950 |
10.5593 |
Visual ontology (g = 10) |
0.9950 |
10.5593 |
Visual ontology (g = 100) |
0.9025 |
9.4636 |
$B<0(B5$B$N(Bg$B$NCM$rJQ2=$5$;$F$$$/$H!"(B $B$G$O=E$_$,$[$\(B
$BC10L9TNs$H$J$C$F$7$^$$!"DL>o$N(B1-vs-rest$B$K$h$kJ,N`$HF1$87k2L$K$J$C$F$$$k!#(B
5.2 $B%+%F%4%j!2A(B
$B2A$r9T$&!#(B
Visual ontology$B$rMQ$$$?7k2L$N>e0L(B10$BC18l$N(Bflat cost$B!"(BSVM$B$G$N(Bflat cost$B$rI=(B
2$B$K<($9!#(B
$B?^(B1$B$OI=(B2$B$NC18l$N%F%9%H2hA|$N0lIt$G$"(B
$B$k!#(B
$BI=(B 2:
$B%+%F%4%j!
$B=g0L(B |
$BC18l(B |
flat cost |
flat cost(SVM) |
1 |
garden pink |
0.0537 |
0.0671 |
2 |
lunar crater |
0.0738 |
0.0738 |
3 |
odometer |
0.1007 |
0.0940 |
4 |
rapeseed |
0.1081 |
0.1081 |
5 |
wood anemone |
0.1224 |
0.1293 |
6 |
sand dune |
0.1275 |
0.1477 |
7 |
scanner |
0.1419 |
0.1622 |
8 |
yellow chamomile |
0.1477 |
0.1544 |
9 |
upright piano |
0.1486 |
0.1554 |
10 |
sunflower |
0.1554 |
0.1554 |
$B?^(B 1:
$BJ,N`@:EY$N9b$$%+%F%4%j!<$N2hA|(B
|
6 $B9M;!(B
$BDs0F2A$G$OI=(B1$B$K$"$k$h$&$K(BSVM$B$h$j$b0-$/(B
$B$J$k$H$$$&7k2L$K$J$C$?!#(B
$B860x$H$7$F$O!";k3PE*$KN`;w$9$k%+%F%4%j!<$KB0$9$k2hA|F1;N$O!"J,N`$N7k2L$H(B
$B$7$FF1MM$N%+%F%4%j!<$KJ,N`$5$l$k$3$H$r4|BT$7$F$$$k$,!":#2s;HMQ$7$?FCD'$d(B
$B%Y%/%H%kI=8=$G$O(BSVM$B$K$h$kJ,N`$,$&$^$/$$$+$J$+$C$?$?$a!"IT@52r$N%+%F%4%j!<(B
$B$G$"$k3NN($,9b$/$J$C$F$7$^$C$?$H$$$&$3$H$,9M$($i$l$k!#(B
$B$3$l$r2r>C$9$k$?$a$K$O!"JL$NFCD'$d%Y%/%H%kI=8=$rMQ$$$F!"$=$l$@$1$G$"$kDx(B
$BEY$N@:EY$r;}$DJ,N`4o$r:n@.$9$kI,MW$,$"$k!#(B
$B%+%F%4%j!<$4$H$N7k2L$G$O!"I=(B2$B$N$h$&$K(BSVM$B$K$h$kJ,(B
$BN`$,@53N$K9T$o$l$F$$$k%+%F%4%j!<$K$D$$$F$O!"(BVisual ontology$B$rMQ$$$?>l9g(B
$B$K$b$[$H$s$IF1$8=g0L$G$"$k$,!">e0L(B10$B%+%F%4%j!/$7$F$$$k$N$,3NG'$G$-$k!#$3$l$O(BSVM$B$G@53N$KJ,N`$,=PMh$F$$$k(B
$B%+%F%4%j!<$K$D$$$F$O!"%+%F%4%j!<4V$N4X78@-$rMQ$$$k$3$H$K$h$C$F!"6O$+$G$O(B
$B$"$k$,J,N`@:EY$,8~>e$9$k$3$H$r<($7$F$$$k!#(B
$B$3$N$?$a4{B8$NBgNL$N%+%F%4%j!<$NJ,N`$K$*$$$F@.2L$r<}$a$F$$$k
7 $B$^$H$a(B
$BK\8&5f$G$O!"(BVisual ontology$B$rMQ$$$F35G04V$N4X78@-$r9MN8$7$?0lHLJ*BNG'<1(B
$BZ$7$?7k2L!"(BSVM$B$K$h(B
$B$k7k2L$h$j0-$/$J$k$H$$$&7k2L$H$J$C$?!#(B
$B$7$+$7!"J,N`@:EY$,>e0L$N0lIt$N%+%F%4%j!<$K4X$7$F$O!"35G04V$N4X78@-$rMQ$$(B
$B$k$3$H$G@:EY$N8~>e$,8+$i$l$?!#(B
8 $B:#8e$N2]Bj(B
$B:#8e$N2]Bj$H$7$F$O!":#2s$N$K$b!"%+%F%4%j!<$r8BDj$7!"(BVisual ontology$B$rMQ$$$k$3$H$,M-8z$G$"$k%+%F(B
$B%4%j!<$ND4::$b8!F$$9$k!#(B
$BJ88%L\O?(B
- 1
-
$B=)4VM:B@(B, $B@n5WJ]=(IR(B, $BLx0f7<;J(B.
Folksonomy$B$K$h$k3,AX9=B$2hA|%G!<%?%Y!<%9$N9=C[(B.
$B2hA|$NG'<1!&M}2r%7%s%]%8%&%`(B (MIRU 2010), 2010.
- 2
-
J. Deng, W. Dong, R. Socher, J. Li, K. Li, and L. Fei-Fei.
ImageNet: A large-scale hierarchical image database.
In Proc. of IEEE Computer Vision and Pattern Recognition, 2009.
$B5SCm(B
- 1
- http://www.image-net.org/challenges/LSVRC/2010/
|