Диссертация (1137066), страница 15
Текст из файла (страница 15)
random . s e t [ [ t a r g e t ] ]# Get r a n g e f o r t a r g e t :t a r g e t . r a n g e 0 = range ( g . random . s e t [ [ t a r g e t ] ] )# F i n d image f o r d e s c r i p t i o n ( p o t e n t i a l p r e m i s e ) :image . s e t = image . f i n d ( d e s c 0 , d a t a = t r [ , v a r s ] )# Save a l l t a r g e t v a l u e s f o r h i s t o g r a m :h1 = t r [ image . s e t , t a r g e t ]# Get r a n g e f o r t a r g e t :t a r g e t .
r a n g e 1 = range ( t r [ image . s e t , t a r g e t ] )median . h1 = median ( h1 )# Calculate a n t i . support f o r the c l u s t e r with desc0 :s h a r e . o f . b r e a k s = sum ( t a r g e t . r a n g e 0 [ 1 ] − t r [ image . s e t , t a r g e t ] > ( median .h1 − t a r g e t . r a n g e 0 [ 1 ] ) ∗ a l l o w e d . d r o p o u t |t r [ image . s e t , t a r g e t ] − t a r g e t . r a n g e 0 [ 2 ] > ( t a r g e t.
r a n g e 0 [ 2 ] − median . h1 ) ∗ a l l o w e d . d r o p o u t ) /l e n g t h ( image . s e t )i f ( s h a r e . o f . b r e a k s <= a l p h a . t h r e s h o l d ) {i f ( capped ) {h 1 c a p p e d = h1 [ h1 >= t a r g e t . r a n g e 0 [ 1 ] & h1 <= t a r g e t . r a n g e 0 [ 2 ] ]87} else{h 1 c a p p e d = h1}p r e m i s e s [ [ l e n g t h ( p r e m i s e s ) + 1 ] ] = l i s t ( e x t e n t = image . s e t ,a n t i .
support = share . of . breaks ,t a r g e t . d i s t r i b u t i o n = h1capped )}}# ####################################################### # F o r e c a s t f o r t e s t o b j e c t g t b a s e d on mined p r e m i s e s :# ######################################################f i n a l . d i s t r i b u t i o n = d a t a . frame ( t a r g e t .
v a l u e = 0 , a n t i . s u p p o r t = 0 , s t d e v =0 , s t r i n g s A s F a c t o r s = FALSE ) [ − 1 , ]for ( p in premises ) {f i n a l . d i s t r i b u t i o n = rbind ( f i n a l . d i s t r i b u t i o n ,d a t a . frame ( p$ t a r g e t . d i s t r i b u t i o n ,rep ( p $ a n t i .s u p p o r t , l e n g t h ( p $ t a r g e t . d i s t r i b u t i o n ) ) , rep ( sd ( p $ t a r g e t .
d i s t r i b u t i o n ), l e n g t h ( p$ t a r g e t . d i s t r i b u t i o n ) ) ) )}names ( f i n a l . d i s t r i b u t i o n ) = c ( " t a r g e t . v a l u e " , " a n t i . s u p p o r t " , " s t d e v " )i f ( p e n a l i z e . for . high . d e v i a t i o n & account . for . a n t i . support ) f i n a l .d i s t r i b u t i o n [ [ " weight " ] ] = (1 − f i n a l . d i s t r i b u t i o n [ [ " a n t i . support " ] ] ) ∗f i n a l . d i s t r i b u t i o n [[ " stdev " ]]i f ( p e n a l i z e . for .
high . d e v i a t i o n & ( ! account . for . a n t i . support ) ) f i n a l .d i s t r i b u t i o n [ [ " weight " ] ] = 1 / f i n a l . d i s t r i b u t i o n [ [ " stdev " ] ]i f ( ( ! p e n a l i z e . for . high . d e v i a t i o n ) & account . for . a n t i . support ) f i n a l .d i s t r i b u t i o n [ [ " weight " ] ] = 1 − f i n a l .
d i s t r i b u t i o n [ [ " a n t i . support " ] ]i f ( ( ! p e n a l i z e . f o r . h i g h . d e v i a t i o n ) & ( ! a c c o u n t . f o r . a n t i . s u p p o r t ) & nrow (f i n a l . d i s t r i b u t i o n ) >0) f i n a l . d i s t r i b u t i o n [ [ " w e i g h t " ] ] = 1p r e d i c t i o n . v e c t o r = c ( wtd . mean ( f i n a l . d i s t r i b u t i o n $ t a r g e t . v a l u e , w e i g h t s =f i n a l . d i s t r i b u t i o n $ weight ) ,wtd .
q u a n t i l e ( f i n a l . d i s t r i b u t i o n $ t a r g e t . v a l u e ,probs = c(0.01 ,0.05 ,0.1 ,0.25 ,0.5 ,0.75 ,0.9 ,0.95 ,0.99),w e i g h t s = 1000 ∗ f i n a l . d i s t r i b u t i o n $ w e i g h t88))v l [ k , " avg " ] = p r e d i c t i o n . v e c t o r [ 1 ]v l [ k , " q1 " ] = p r e d i c t i o n . v e c t o r [ 2 ]v l [ k , " q2 " ] = p r e d i c t i o n . v e c t o r [ 3 ]v l [ k , " q3 " ] = p r e d i c t i o n .
v e c t o r [ 4 ]v l [ k , " q4 " ] = p r e d i c t i o n . v e c t o r [ 5 ]v l [ k , " median " ] = p r e d i c t i o n . v e c t o r [ 6 ]v l [ k , " q6 " ] = p r e d i c t i o n . v e c t o r [ 7 ]v l [ k , " q7 " ] = p r e d i c t i o n . v e c t o r [ 8 ]v l [ k , " q8 " ] = p r e d i c t i o n . v e c t o r [ 9 ]v l [ k , " q9 " ] = p r e d i c t i o n . v e c t o r [ 1 0 ]v l [ k , " num_ p r e m i s e s " ] = l e n g t h ( p r e m i s e s )i f ( l e n g t h ( p r e m i s e s ) ==0) {v l [ k , " avg " ] = n a i v e _ avgv l [ k , " avg " ] = n a i v e _med}}t 1 = Sys .
t i m e ( )num_ p r e m i s e s = mean ( v l $num_ p r e m i s e s [ v l $num_ p r e m i s e s > 0 ] ) # c o n d i t i o n a l a v e r a g enum_ n a i v e _ p r e d s = sum ( v l $num_ p r e m i s e s == 0 )mad . avg = mean ( abs ( v l $ avg − v l [ [ t a r g e t ] ] ) , na . rm = TRUE)mad . median = mean ( abs ( v l $ median − v l [ [ t a r g e t ] ] ) , na . rm = TRUE). G l o b a l E n v $ g l o b a l . i n f o . t a b [ nrow ( . G l o b a l E n v $ g l o b a l .
i n f o . t a b ) + 1 , ] =c(subsample . size , n . i t e r , alpha . t hr es ho ld ,a l l o w e d . d r o p o u t , capped , a c c o u n t . f o r . a n t i . s u p p o r t , p e n a l i z e . f o r . h i g h .d e v i a t i o n , num_ p r e m i s e s , num_ n a i v e _ p r e d s , mad . avg , mad .
median , a s .numeric ( t 1 − t 0 ) / nrow ( v l ) )v l [ , c ( " avg " , " q1 " , " q2 " , " q3 " , " q4 " , " median " , " q6 " , " q7 " , " q8 " , " q9 " ) ]}library ( doParallel )cores = detectCores () − 1c l u s t e r _name = ’ m y C l u s t e r ’a s s i g n ( x = c l u s t e r _name , v a l u e = m a k e C l u s t e r ( c o r e s ) , e n v i r = . G l o b a l E n v )r e g i s t e r D o P a r a l l e l ( g e t ( c l u s t e r _name , e n v i r = . G l o b a l E n v ) )g l o b a l . i n f o . t a b = d a t a . frame ( s u b s a m p l e .
s i z e = 0 . 0 1 , n . i t e r =1000 , a l p h a .threshold = 0.01 ,a l l o w e d . d r o p o u t = 0 , c a p p e d = TRUE ,a c c o u n t . f o r . a n t i . s u p p o r t = TRUE , p e n a l i z e . f o r . h i g h . d e v i a t i o n = TRUE ,a v e r a g e . number . o f . p r e m i s e s = 0 ,89number . o f . n a i v e . p r e d s = 0 ,mad . avg = 0 ,mad .
median = 0 ,time _ seconds = 0 ,s t r i n g s A s F a c t o r s = FALSE ) [ − 1 , ]finalRes = l i s t ()for ( ss in c ( 0 . 0 5 , 0 . 1 ) ) { # subsample . s i z ef o r ( n i i n c ( 1 00 ,5 0 0 , 1 0 00 , 20 0 0 ) ) { #n . i t e rf o r ( ad i n c ( 0 , 0 . 1 , 0 . 5 , 1 , 1 . 5 ) ) { # a l l o w e d . d r o p o u tf o r ( cp i n c (TRUE , FALSE ) ) { # c a p p e df o r ( a s i n c (TRUE , FALSE ) ) { # a c c o u n t . f o r .
a n t i . s u p p o r tf o r ( pd i n c (TRUE , FALSE ) ) { # p e n a l i z e . f o r . h i g h . d e v i a t i o noutRes = foreach ( a t = c ( 0 , 0 . 0 5 , 0 . 0 1 , 0 . 0 1 5 , 0 . 0 2 , 0 . 0 5 ) ,. e x p o r t = c ( ’ v l 1 ’ , ’ t r 1 ’ , ’ v a r s ’ , ’ s s ’ , ’ n i ’ , ’ a t ’ , ’ ad ’ , ’cp ’ , ’ a s ’ , ’ pd ’ , ’ l a z y . e v a l u a t o r ’ ) ) \% d o p a r \%{ #alpha . t h r e s h o l dtry ({ t e s t . l a z y = l a z y . e v a l u a t o r ( v l = vl1 , t r = t r 1 , v a r s = vars ,subsample . s i z e = ss ,n . i t e r = ni ,alpha .
t h r e s h o l d = at ,a l l o w e d . d r o p o u t = ad ,c a p p e d = cp ,a c c o u n t . f o r . a n t i . s u p p o r t = as ,p e n a l i z e . f o r . h i g h . d e v i a t i o n = pd ) } )i f ( c l a s s ( t e s t . l a z y ) ! = " t r y −e r r o r " ) {names ( t e s t . l a z y ) = p a s t e 0 ( names ( t e s t . l a z y ) , " _ " , a t )s a v e ( t e s t .
l a z y , f i l e = p a s t e 0 ( "C : / U s e r s / M a s y u t i n −AA/ D e s k t o p / CLA 2016 / " ,p a s t e ( c ( s s , n i , ad , cp , as , pd , a t ) , c o l l a p s e = " _ " ) , " . R d a t a " ) )return ( t e s t . lazy )}}f i n a l R e s [ [ length ( f i n a l R e s ) +1]] = outRes}}}}}}s t o p C l u s t e r ( c l u s t e r _name )90The program for Query-Based Classification ("lazy" classification) is providedin R language as for the prototype.# Functions Definition :gnew=1v o t i n g =" count "w e i g h t . a l p h a =FALSEalpha =0.02subsample . s i z e =0.03num_ i t e r =100unknown . d a t a<− t e s t .
d a t al a z y . p r e d i c t<− f u n c t i o n ( gnew , unknown . data , pos . data , neg . data , v o t i n g = " c o u n t " ,s u b s a m p l e . s i z e = 0 . 0 0 5 , num_ i t e r s =100 , a l p h a =0 , w e i g h t . a l p h a =FALSE ) {c o n c e p t . s e t<− l i s t ( )v a r s _new<−unknown . d a t a [ gnew , ]# Loop f o r p o s i t i v e c o n t e x t# f o r ( g i n 1 : nrow ( p o s . d a t a ) ) {f o r ( i i n 1 : num_ i t e r s ) {g<−sample ( 1 : nrow ( pos . d a t a ) , s i z e = s u b s a m p l e . s i z e ∗nrow ( pos . d a t a ) )K<− l e n g t h ( c o n c e p t .
s e t )v a r s<−pos . d a t a [ g , ]# i f ( nrow ( v a r s ) ==0) { p r i n t ( v a r s )#print (g)#p r i n t ( pos . data [ g , ] ) }p e r e s e k<−r b i n d ( v a r s , v a r s _new )s e p<−Reduce ( f = rbind , l a p p l y ( names ( v a r s ) , f u n c t i o n ( v ) c ( min ( p e r e s e k [ [ v ] ] ) , max (peresek [[ v ] ] ) ) ) )wi<−d a t a . frame ( low .
b o r d e r = s e p [ , 1 ] , h i g h . b o r d e r = s e p [ , 2 ] )names ( wi )<−c ( " low . b o r d e r " , " h i g h . b o r d e r " )row . names ( wi )<−names ( v a r s )neg . image<−what . e x t e n t ( d a t a = neg . data , b o r d e r . frame = wi )i f ( l e n g t h ( neg . image ) <= a l p h a ∗nrow ( neg . d a t a ) ) {pos .
image<−what . e x t e n t ( d a t a = pos . data , b o r d e r . frame = wi )c o n c e p t . s e t [K+ 1 ]<− l i s t ( l i s t ( I d =row . names ( unknown . d a t a ) [ gnew ] ,Rownumber=gnew ,Type= " P o s i t i v e " ,E x t e n t =c ( row . names ( pos . d a t a ) [ pos . image ] , row .91names ( unknown .
d a t a ) [ gnew ] ) ,E x t e n t _ i n d e x = l i s t ( pos . image= pos . image , gnew=gnew),I n t e n t =wi ,Alpha = l e n g t h ( neg . image ) / nrow ( neg . d a t a ) ) )}}# Loop f o r n e g a t i v e c o n t e x t# f o r ( g i n 1 : nrow ( neg . d a t a ) ) {f o r ( i i n 1 : num_ i t e r s ) {g<−sample ( 1 : nrow ( neg . d a t a ) , s i z e = s u b s a m p l e . s i z e ∗nrow ( neg .