SlideShare a Scribd company logo
1 of 78
Download to read offline
7




Aggregate Data Analysis
Data                Data                Data



mapper              mapper              mapper

   mapper              mapper              mapper

           mapper              mapper              mapper
#$%&'()*'                          -'.            #$%   0
           1"23                45)667'
      &'()*'           0      1"23        0    1"        3
                   "
"
Welcome to My HomePage.
      Thank you.
 Where is your house? ....




                                                                  "
                  " !+/"-'.                         "
"
mapper

Big Data   mapper

           mapper
map: (k1, v1) ! [(k2, v2)] // []

//word count
class Mapper
   method Map(docid a, doc d)
      for all term t ∈ doc d do
         Emit(term t, count 1)
> require 'msgpack'
> msg = [1,2,3].to_msgpack 
                      #=>"x93x01x02x03"
> MessagePack.unpack(msg)  #=> [1,2,3]
// word count
class Combiner
   method Combine(string t, counts [c1, c2, . . .])
      sum ← 0
      for all count c ∈ counts [c1, c2, . . .] do
         sum ← sum + c
      Emit(string t, count sum)
reduce: (k2, [v2]) ! [(k3, v3)]

//word count
class Reducer
   method Reduce(term t, counts [c1, c2, . . .])
      sum ← 0
      for all count c ∈ counts [c1,c2,...] do
         sum ← sum + c
      Emit(term t, count sum)
30   CHAPTER 2. MAPREDUCE BASICS

                          !       "       #       $       %         &




                 '())*+
                   ))              '())*+
                                     ))                 '())*+
                                                          ))                 '())*+
                                                                               ))


                ( -   , .         / 0     / 1         ( 2     / .           , 3     / 4

                 /5',67*+          /5',67*+             /5',67*+             /5',67*+


                ( -   , .               / 8           ( 2     / .           , 3     / 4

                )
                )(+969657*+       )
                                  )(+969657*+         )
                                                      )(+969657*+           )
                                                                            )(+969657*+

                         :;<==>*?(7@?:5+9A (BB+*B(9*?C(><*D?,E?F*ED

                              (   - 2            ,    . 3               /   . 8 4



                        +*@</*+               +*@</*+            +*@</*+


                            G 2                 H 3                 I 8
Hadoop Tutorial Series, Issue #2: Getting Started With (Customized) Partitioning
Hadoop Tutorial Series, Issue #2:
Getting Started With (Customized) Partitioning
Hadoop Tutorial Series, Issue #2: Getting Started With (Customized) Partitioning
package com.philippeadjiman.hadooptraining; 
package com.philippeadjiman.hadooptraining;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Partitioner;
 
public class MyPartitioner implements Partitioner<IntWritable,Text> {
" @Override
" public int getPartition(IntWritable key, Text value, int numPartitions) {
" " /* Pretty ugly hard coded partitioning function. Don't do that in practice,
it is just for the sake of understanding. */
" " int nbOccurences = key.get();
 
" " if( nbOccurences < 3 )
" " " return 0;
" " else
" " " return 1;
" }
 
" @Override
" public void configure(JobConf arg0) {
 
" }
}                      Hadoop Tutorial Series, Issue #2: Getting Started With (Customized) Partitioning
x + y = y + x
x    y = y     x

(x + y) + z = x + (y + z)
(x    y)     z = x   (y   z)
class Mapper {
   buffer
   init() {
       buffer = HashMap.new
   }
   map(id, data) {
       elements = process(data)
       for each element {
            ....
            check_and_put(buffer, k2, v2)
       }
   } //                           Designing algorithms for Map Reduce
check_and_put(buffer, k2, v2) {
        if buffer.full {
            for each k2 in buffer.keys {
                emit(k2, buffer[k2])
            }
        } else {
            buffer.incrby(k2, v2) // H[k2]+=v2
        }
    }
    close() {
        for each k2 in buffer.keys {
            emit(k2, buffer[k2])
        }
    }
}                                  Designing algorithms for Map Reduce
!           !        !         !"#                                 !             !             !                       2,%




                                                             6"#
                                                                                "#            "#            "#                      !"#$%&'
             "           "        "         $%&'(%)*'+




                                                                                                                                                    5,%
                                                                                '(            '(            '(                      ()*+*,-./0$1/

  !         ! !#!                           !+(,+       !         !             !
                                                                                $             $ 2,%$                                :*8+"*6$+/
                             !#!"#



                                              6"#
                                                                                                    !"#$%&'
  "         "   "                $%&'(%)*'+ -(.*#/0 "#           "#             "#




                                                                                                                      5,%
                     #       #




                                                             (+2*3+
                                                    '(            '(            '(                  ()*+*,-./0$1/




                                                                                                                                                    ,33"/3,+*#)9+"//
                                                                                                    !"           !"                 2/"3/
                     $       $ !+(,+        1+2*3+      $         $             $                   :*8+"*6$+/
      !#       !#                                                                                   "%           "%                 !"#$%&'
                             % -(.*#/0      4.5&*6+(
      #          #%                                                                                     &        &                  4#56*)/
                                              (+2*3+




                                                                                                                      ,33"/3,+*#)9+"//
                                                                           !"            !"         2/"3/
                             1+2*3+
       $ Figure 1: Distributed execution plan for MapReduce
              $
           when reduce cannot be decomposed to perform partial             "%            "%         !"#$%&'
       %   aggregation.
                 %            4.5&*6+(                                           !"            !"                                   2/"3/
                                                                            &            &          4#56*)/




                                                                                                                                                    "/0$1/
Figure 1: Distributed execution plan for function, and merge and
              With this user-defined MapReduce                                    "%                "%                               !"#$%&'
when reduce cannot beoperators provided by partial
           grouping decomposed to perform the system, it is pos-
aggregation.
            sible to execute a simple distributed computation as
                                                         !"           !"             )             )2/"3/                           7*),-./0$1/
            shown in Figure 1. The computation has exactly




                                                                                                                      "/0$1/
                                                          "%
   With this user-definedthe first phase merge anda Map function
            two phases: function, and executes                         "%            *             *!"#$%&'                         4#)8$5/"
grouping operatorsinputs to by the system, and pos-
            on the provided extract keys it is records, then per-
                                                           )
sible to execute a simple distributed computation as based on the
            forms a partitioning of these outputs
                                                                       )                            7*),-./0$1/
                                                                            Figure 2: Distributed execution plan for MapReduce
shown in Figureof the records. The second phase collects and
            keys 1. The computation has exactly                             when reduce supports partial aggregation. The imple-
two phases: the first phase executes a Map function         *           *                    4#)8$5/"
                                                                            mentation of GroupBy in the first stage may be different to
Def. 1
  x: data items, x1 ⊕ x2: concatenation of x1, x2.
             H        decomposable         2    I   C

                 :

1) ∀x1, x2 : H(x1 ⊕ x2) = C(I(x1 ⊕ x2)) = C(I(x1) ⊕ I(x2))
2) ∀x1, x2 : I(x1 ⊕ x2) = I(x2 ⊕ x1)
3) ∀x1, x2 : C(x1 ⊕ x2) = C(x2 ⊕ x1)

Def. 2
         H           associative-decomposable       Def.1

1-3                      C

4) ∀x1, x2, x3 : C(C(x1 ⊕ x2) ⊕ x3) = C(x1 ⊕ C(x2 ⊕ x3))
( i.e. C is associative )
class Combiner {
   share_space
   init(share_space_info) {
       share_space = conn(share_space_info)
   }
   combine(key, elements) {
       sum = 0
       for each element {
              ...
              sum += v
       } //
share_space.incrby(key, sum)
        emit(key, share_space_info)
    } // end combine()
}
class Reducer {
    reduce(key, list_of_share_space_info) {
        for each share_space_info {
            share_space = conn(share_space_info)
            sum = 0
            elements = share_space.hget(key)
            for each elemnt {
            ...
        }
    }
}
partition(key) {
   range = (KEY_MAX - KEY_MIN) / NUM_OF_REDUCERS
   reducer_no = (key - KEY_MIN) / range
   return reducer_no
}                                    Designing algorithms for Map Reduce
(t1, m1, r80521), (t1, m2, r14209), (t1, m3, r76042),
(t2, m1, r21823), (t2, m2, r66508), (t2, m3, r98347),...




 map: m1 ! (t1, r80521) //

 // t1,t2,t3,...
 (m1) ! [(t1, r80521), (t3, r146925), (t2, r21823)]
 (m2) ! [(t2, r66508), (t1, r14209), (t3, r14720)]
map: (m1, t1) ! r80521




(m1, t1) ! [(r80521)] // t1,t2,t3,...
(m1, t2) ! [(r21823)]
(m1, t3) ! [(r146925)]
class Mapper {
          buffer
          map(id, number) {
             buffer.append(number)
             if (buffer.is_full) {
                   max = compute_max(buffer)
                   emit(1, max)
             }
      }
}                                        Designing algorithms for Map Reduce
class Reducer {
    reduce(key, list_of_local_max) {
        global_max = 0
        for local_max in list_of_local_max {
            if local_max > global_max {
                global_max = local_max
            }
        }
        emit(1, global_max)
    }
}                                  Designing algorithms for Map Reduce
class Combiner {
    combine(key, list_of_local_max) {
       local_max = maximum(list_of_local_max)
       emit(1, local_max)
    } // Max()

}                               Designing algorithms for Map Reduce
class Mapper {
    map(id, data) {
        key, value = process(data)
        if rand() < 0.1 {   //rand() ∈ [0.0, 1.0)
            emit(key, value)
        }
    }
}
Map Reduce and Stream Processing
# Call at each hit record
 map(k1, hitRecord) {
     site = hitRecord.site
     #     key(=site)    slice

     slice = lookupSlice(site)
     if (slice.time - now > 60.minutes) {
         # Notify reducer whole slice of site is sent
         advance(site, slice)
         slice = lookupSlice(site)
     }
     emitIntermediate(site, slice, 1)
 }                                      Map Reduce and Stream Processing
combine(site, slice, countList) {
    hitCount = 0
    for count in countList {
        hitCount += count
    }
    # Send the message to the downstream node
    emitIntermediate(site, slice, hitCount)
}                                     Map Reduce and Stream Processing
#       mapper   slice

reduce(site, slice, countList) {
    hitCount = 0
    for count in countList {
        hitCount += count
    }
    sv = SliceValue.new
    sv.hitCount = hitCount
    return sv
}                                  Map Reduce and Stream Processing
# Window
init(slice) {
    rangeValue = RangeValue.new
    rangeValue.hitCount = 0
    return rangeValue
}
# Reduce
merge(rangeValue, slice, sliceValue) {
    rangeValue.hitCount += sliceValue.hitCount
}
#     slice   slicing window
unmerge(rangeValue, slice, sliceValue) {
    rangeValue.hitCount -= sliceValue.hitCount
}                                 Map Reduce and Stream Processing
5&4.)1*,!,);3-00+*0-1*,!&/*+!*-58!.-$*9!-$%!@+&22,!).A!18*!          -!:2*=#;2*!'-<!1&!4&$#1&+!,1+*-4#$0!%-1-6!!
.-$*3-00+*0-1*,! 1&! 5&4.)1*! '#$%&'3-00+*0-1*,6! >)+! *=3           R)++*$1!.+&.&,-2,!:&+!*/-2)-1#$0!,2#%#$03'#$%&'!-00+*0-1*!
.*+#4*$1-2! ,1)%<! ,8&',! 18-1! ),#$0! .-$*,! 8-,! ,#0$#:#5-$1!      ()*+#*,!;)::*+!*-58!#$.)1!1).2*!)$1#2!#1!#,!$&!2&$0*+!$**%*%!
.*+:&+4-$5*!;*$*:#1,6!!                                              INP6! D#$5*! *-58! #$.)1! 1).2*! ;*2&$0,! 1&! 4)21#.2*! '#$%&',9!
                                                                     ,)58!-..+&-58*,!;)::*+!-!1).2*!)$1#2!#1!#,!.+&5*,,*%!:&+!18*!
'(# )*+,-./0+1-*2                                                    -00+*0-1*! &/*+! 18*! 2-,1! '#$%&'! 1&! '8#58! #1! ;*2&$0,6! -58!
B-$<! -..2#5-1#&$,! $**%! 1&! .+&5*,,! ,1+*-4,9! :&+! *=-4.2*9!      #$.)1! 1).2*! #,! -55*,,*%! 4)21#.2*! 1#4*,9! &$5*! :&+! *-58! '#$3
:#$-$5#-2! %-1-! -$-2<,#,9! $*1'&+C! 1+-::#5! 4&$#1&+#$09! -$%!      %&'!18-1!#1!.-+1#5#.-1*,!#$6!!!
1*2*5&44)$#5-1#&$! 4&$#1&+#$06! D*/*+-2! %-1-;-,*! +*,*-+58!
0+&).,! -+*! ;)#2%#$0! --1-! .1+*-4! /-$-0*4*$1! .<,1*4,!            "*! ,**! 1'&! .+&;2*4,! '#18! ,)58! -..+&-58*,6! W#+,1! 18*!
EFDBDG!,&!18-1!-..2#5-1#&$,!5-$!#,,)*!()*+#*,!1&!0*1!1#4*2<!         ;)::*+!,#H*!+*()#+*%!#,!)$;&)$%*%T!Q1!-$<!1#4*!#$,1-$19!-22!
#$:&+4-1#&$! :+&4! ,1+*-4,6! B-$-0#$0! -$%! .+&5*,,#$0!              1).2*,! 5&$1-#$*%! #$! 18*! 5)++*$1! '#$%&'! -+*! #$! 18*! ;)::*+9!
,1+*-4,!0#/*,!+#,*!1&!58-22*$0*,!18-1!8-/*!;**$!*=1*$,#/*2<!         -$%!,&!18*!,#H*!&:!18*!+*()#+*%!;)::*+,!#,!%*1*+4#$*%!;<!18*!
%#,5),,*%!-$%!+*5&0$#H*%!IJ9!K9!L9!M9!NOP6!!                         '#$%&'!+-$0*!-$%!18*!%-1-!-++#/-2!+-1*6!D*5&$%9!.+&5*,,#$0!
                                                                     *-58!#$.)1!1).2*!4)21#.2*!1#4*,!2*-%,!1&!-!8#08!5&4.)1-1#&$!
Q$!#4.&+1-$1!52-,,!&:!()*+#*,!&/*+!%-1-!,1+*-4,!#,!,2#%#$03          5&,16!W&+!*=-4.2*!#$!X)*+<!N9!*-58!#$.)1!1).2*!#,!.+&5*,,*%!
'#$%&'!-00+*0-1*!()*+#*,6!R&$,#%*+!-$!&$2#$*!-)51#&$!,<,3            :&)+!1#4*,6!Q,!18*!+-1#&!&:!YQZ[!&/*+!D]7F!#$5+*-,*,9!
1*4!#$!'8#58!;#%,!&$!-)51#&$!#1*4,!-+*!,1+*-4*%!#$1&!-!5*$3          ,&!%&*,!18*!$)4;*+!&:!1#4*,!*-58!1).2*!#,!.+&5*,,*%6!R&$3
1+-2!-)51#&$!.+&5*,,#$0!,<,1*46!S8*!,58*4-!&:!*-58!;#%!#,T!          ,#%*+#$0!18*!2-+0*!/&2)4*!-$%!:-,1!-++#/-2!+-1*!&:!,1+*-4#$0!
U#1*43#%9! ;#%3.+#5*9! 1#4*,1-4.V6! W&+! *-,*! &:! .+*,*$1-1#&$9!    %-1-9!+*%)5#$0!18*!-4&)$1!&:!+*()#+*%!;)::*+!,.-5*!E#%*-22<!
'*!-,,)4*!18-1!;#%,!-++#/*!#$!&+%*+!&$!18*#+!1#4*,1-4.!-13           1&!-!5&$,1-$1!;&)$%G!-$%!5&4.)1-1#&$!1#4*!#,!-$!#4.&+1-$1!
1+#;)1*6! E"*! -+*! -51#/*2<! #$/*,1#0-1#$0! .+&5*,,#$0! %#,&+3
%*+*%!%-1-!,1+*-4,G!X)*+<!N!,8&',!-$!*=-4.2*!&:!-!,2#%#$03
'#$%&'!-00+*0-1*!()*+<6!
3/4,52'T!@W#$%!18*!4-=#4)4!;#%!.+#5*!:&+!18*!.-,1!K!4#$3
)1*,!-$%!).%-1*!18*!+*,)21!*/*+<!N!4#$)1*6A!
!"#"$%&'()*+,-./0,123&
4567&+,-89:;%%5&<,'28<('/&
&&&&&&&&&&5;=>"&?&',@A<28&
&&&&&&&&&&!#BC"&D&',@A<2E&
7$! 18*! ()*+<! -;&/*9! '*! #$1+&%)5*! -! '#$%&'! ,.*5#:#5-1#&$!
'#18!18+**!.-+-4*1*+,T!YQZ[!,.*5#:#*,!18*!'#$%&'!,#H*9!
D]7F! ,.*5#:#*,! 8&'! 18*! '#$%&'! 4&/*,9! -$%! "QSSY!
,.*5#:#*,! 18*! '#$%&'#$0! -11+#;)1*! &$! '8#58! 18-1! 18*!
YQZ[! -$%! D]7F! .-+-4*1*+,! -+*! %*:#$*%6! S8*! '#$%&'!
,.*5#:#5-1#&$! &:! X)*+<! N! ;+*-C,! 18*! ;#%! ,1+*-4! #$1&! &/*+3
2-..#$0!K34#$)1*!,);3,1+*-4,!18-1!,1-+1!*/*+<!4#$)1*9!'#18!
+*,.*51! 1&! 18*! 1#4*,1-4.! -11+#;)1*6! S8*,*! &/*+2-..#$0! ,);3
,1+*-4,!-+*!5-22*%!!"#$#%&0(#%$)(!6!X)*+<!N!5-25)2-1*,!18*!                      617/,42'8291*.-:;2&-<=-;4.2->26-/,2?@*4;2
                                                              No Pane, No Gain: Efficient Evaluation of Sliding-Window
                                                                          Aggregates over Data Streams
K-Means Clustering in Map Reduce
Figure 2: MapReduce Classifier Training and Evaluation Procedure




                                A Comparison of Approaches for Large-Scale Data Mining
Google Pregel Graph Processing
Google Pregel Graph Processing
Map Reduce 〜入門編:仕組みの理解とアルゴリズムデザイン〜

More Related Content

What's hot

Apache Sparkに手を出してヤケドしないための基本 ~「Apache Spark入門より」~ (デブサミ 2016 講演資料)
Apache Sparkに手を出してヤケドしないための基本 ~「Apache Spark入門より」~ (デブサミ 2016 講演資料)Apache Sparkに手を出してヤケドしないための基本 ~「Apache Spark入門より」~ (デブサミ 2016 講演資料)
Apache Sparkに手を出してヤケドしないための基本 ~「Apache Spark入門より」~ (デブサミ 2016 講演資料)NTT DATA OSS Professional Services
 
Apache Hadoop YARNとマルチテナントにおけるリソース管理
Apache Hadoop YARNとマルチテナントにおけるリソース管理Apache Hadoop YARNとマルチテナントにおけるリソース管理
Apache Hadoop YARNとマルチテナントにおけるリソース管理Cloudera Japan
 
本当は恐ろしい分散システムの話
本当は恐ろしい分散システムの話本当は恐ろしい分散システムの話
本当は恐ろしい分散システムの話Kumazaki Hiroki
 
初心者向けMongoDBのキホン!
初心者向けMongoDBのキホン!初心者向けMongoDBのキホン!
初心者向けMongoDBのキホン!Tetsutaro Watanabe
 
アーキテクチャから理解するPostgreSQLのレプリケーション
アーキテクチャから理解するPostgreSQLのレプリケーションアーキテクチャから理解するPostgreSQLのレプリケーション
アーキテクチャから理解するPostgreSQLのレプリケーションMasahiko Sawada
 
Memoizeの仕組み(第41回PostgreSQLアンカンファレンス@オンライン 発表資料)
Memoizeの仕組み(第41回PostgreSQLアンカンファレンス@オンライン 発表資料)Memoizeの仕組み(第41回PostgreSQLアンカンファレンス@オンライン 発表資料)
Memoizeの仕組み(第41回PostgreSQLアンカンファレンス@オンライン 発表資料)NTT DATA Technology & Innovation
 
Hadoopの概念と基本的知識
Hadoopの概念と基本的知識Hadoopの概念と基本的知識
Hadoopの概念と基本的知識Ken SASAKI
 
マルチテナントのアプリケーション実装〜実践編〜
マルチテナントのアプリケーション実装〜実践編〜マルチテナントのアプリケーション実装〜実践編〜
マルチテナントのアプリケーション実装〜実践編〜Yoshiki Nakagawa
 
RDB開発者のためのApache Cassandra データモデリング入門
RDB開発者のためのApache Cassandra データモデリング入門RDB開発者のためのApache Cassandra データモデリング入門
RDB開発者のためのApache Cassandra データモデリング入門Yuki Morishita
 
Elasticsearch の検索精度のチューニング 〜テストを作って高速かつ安全に〜
Elasticsearch の検索精度のチューニング 〜テストを作って高速かつ安全に〜Elasticsearch の検索精度のチューニング 〜テストを作って高速かつ安全に〜
Elasticsearch の検索精度のチューニング 〜テストを作って高速かつ安全に〜Takahiko Ito
 
BuildKitの概要と最近の機能
BuildKitの概要と最近の機能BuildKitの概要と最近の機能
BuildKitの概要と最近の機能Kohei Tokunaga
 
【CNDT2022】SIerで実践!クラウドネイティブを普及させる取り組み
【CNDT2022】SIerで実践!クラウドネイティブを普及させる取り組み【CNDT2022】SIerで実践!クラウドネイティブを普及させる取り組み
【CNDT2022】SIerで実践!クラウドネイティブを普及させる取り組みYuta Shimada
 
Pythonによる黒魔術入門
Pythonによる黒魔術入門Pythonによる黒魔術入門
Pythonによる黒魔術入門大樹 小倉
 
PostgreSQLクエリ実行の基礎知識 ~Explainを読み解こう~
PostgreSQLクエリ実行の基礎知識 ~Explainを読み解こう~PostgreSQLクエリ実行の基礎知識 ~Explainを読み解こう~
PostgreSQLクエリ実行の基礎知識 ~Explainを読み解こう~Miki Shimogai
 
Hadoop/Spark で Amazon S3 を徹底的に使いこなすワザ (Hadoop / Spark Conference Japan 2019)
Hadoop/Spark で Amazon S3 を徹底的に使いこなすワザ (Hadoop / Spark Conference Japan 2019)Hadoop/Spark で Amazon S3 を徹底的に使いこなすワザ (Hadoop / Spark Conference Japan 2019)
Hadoop/Spark で Amazon S3 を徹底的に使いこなすワザ (Hadoop / Spark Conference Japan 2019)Noritaka Sekiyama
 
Hadoop -NameNode HAの仕組み-
Hadoop -NameNode HAの仕組み-Hadoop -NameNode HAの仕組み-
Hadoop -NameNode HAの仕組み-Yuki Gonda
 
トランザクションの設計と進化
トランザクションの設計と進化トランザクションの設計と進化
トランザクションの設計と進化Kumazaki Hiroki
 

What's hot (20)

Apache Sparkに手を出してヤケドしないための基本 ~「Apache Spark入門より」~ (デブサミ 2016 講演資料)
Apache Sparkに手を出してヤケドしないための基本 ~「Apache Spark入門より」~ (デブサミ 2016 講演資料)Apache Sparkに手を出してヤケドしないための基本 ~「Apache Spark入門より」~ (デブサミ 2016 講演資料)
Apache Sparkに手を出してヤケドしないための基本 ~「Apache Spark入門より」~ (デブサミ 2016 講演資料)
 
Apache Hadoop YARNとマルチテナントにおけるリソース管理
Apache Hadoop YARNとマルチテナントにおけるリソース管理Apache Hadoop YARNとマルチテナントにおけるリソース管理
Apache Hadoop YARNとマルチテナントにおけるリソース管理
 
本当は恐ろしい分散システムの話
本当は恐ろしい分散システムの話本当は恐ろしい分散システムの話
本当は恐ろしい分散システムの話
 
初心者向けMongoDBのキホン!
初心者向けMongoDBのキホン!初心者向けMongoDBのキホン!
初心者向けMongoDBのキホン!
 
アーキテクチャから理解するPostgreSQLのレプリケーション
アーキテクチャから理解するPostgreSQLのレプリケーションアーキテクチャから理解するPostgreSQLのレプリケーション
アーキテクチャから理解するPostgreSQLのレプリケーション
 
Memoizeの仕組み(第41回PostgreSQLアンカンファレンス@オンライン 発表資料)
Memoizeの仕組み(第41回PostgreSQLアンカンファレンス@オンライン 発表資料)Memoizeの仕組み(第41回PostgreSQLアンカンファレンス@オンライン 発表資料)
Memoizeの仕組み(第41回PostgreSQLアンカンファレンス@オンライン 発表資料)
 
Hadoopの概念と基本的知識
Hadoopの概念と基本的知識Hadoopの概念と基本的知識
Hadoopの概念と基本的知識
 
MapReduce入門
MapReduce入門MapReduce入門
MapReduce入門
 
マルチテナントのアプリケーション実装〜実践編〜
マルチテナントのアプリケーション実装〜実践編〜マルチテナントのアプリケーション実装〜実践編〜
マルチテナントのアプリケーション実装〜実践編〜
 
RDB開発者のためのApache Cassandra データモデリング入門
RDB開発者のためのApache Cassandra データモデリング入門RDB開発者のためのApache Cassandra データモデリング入門
RDB開発者のためのApache Cassandra データモデリング入門
 
Spark SQL - The internal -
Spark SQL - The internal -Spark SQL - The internal -
Spark SQL - The internal -
 
Elasticsearch の検索精度のチューニング 〜テストを作って高速かつ安全に〜
Elasticsearch の検索精度のチューニング 〜テストを作って高速かつ安全に〜Elasticsearch の検索精度のチューニング 〜テストを作って高速かつ安全に〜
Elasticsearch の検索精度のチューニング 〜テストを作って高速かつ安全に〜
 
BuildKitの概要と最近の機能
BuildKitの概要と最近の機能BuildKitの概要と最近の機能
BuildKitの概要と最近の機能
 
【CNDT2022】SIerで実践!クラウドネイティブを普及させる取り組み
【CNDT2022】SIerで実践!クラウドネイティブを普及させる取り組み【CNDT2022】SIerで実践!クラウドネイティブを普及させる取り組み
【CNDT2022】SIerで実践!クラウドネイティブを普及させる取り組み
 
Pythonによる黒魔術入門
Pythonによる黒魔術入門Pythonによる黒魔術入門
Pythonによる黒魔術入門
 
PostgreSQLクエリ実行の基礎知識 ~Explainを読み解こう~
PostgreSQLクエリ実行の基礎知識 ~Explainを読み解こう~PostgreSQLクエリ実行の基礎知識 ~Explainを読み解こう~
PostgreSQLクエリ実行の基礎知識 ~Explainを読み解こう~
 
Hadoop/Spark で Amazon S3 を徹底的に使いこなすワザ (Hadoop / Spark Conference Japan 2019)
Hadoop/Spark で Amazon S3 を徹底的に使いこなすワザ (Hadoop / Spark Conference Japan 2019)Hadoop/Spark で Amazon S3 を徹底的に使いこなすワザ (Hadoop / Spark Conference Japan 2019)
Hadoop/Spark で Amazon S3 を徹底的に使いこなすワザ (Hadoop / Spark Conference Japan 2019)
 
Hadoop -NameNode HAの仕組み-
Hadoop -NameNode HAの仕組み-Hadoop -NameNode HAの仕組み-
Hadoop -NameNode HAの仕組み-
 
トランザクションの設計と進化
トランザクションの設計と進化トランザクションの設計と進化
トランザクションの設計と進化
 
Apache Hadoopの未来 3系になって何が変わるのか?
Apache Hadoopの未来 3系になって何が変わるのか?Apache Hadoopの未来 3系になって何が変わるのか?
Apache Hadoopの未来 3系になって何が変わるのか?
 

Similar to Map Reduce 〜入門編:仕組みの理解とアルゴリズムデザイン〜

Prepositions made easy-xpert
Prepositions made easy-xpertPrepositions made easy-xpert
Prepositions made easy-xperthudaalmabadi
 
Moosecon native apps_blackberry_10-optimized
Moosecon native apps_blackberry_10-optimizedMoosecon native apps_blackberry_10-optimized
Moosecon native apps_blackberry_10-optimizedHeinrich Seeger
 
Map Reduce ~Continuous Map Reduce Design~
Map Reduce ~Continuous Map Reduce Design~Map Reduce ~Continuous Map Reduce Design~
Map Reduce ~Continuous Map Reduce Design~Takahiro Inoue
 
LAMP_TRAINING_SESSION_6
LAMP_TRAINING_SESSION_6LAMP_TRAINING_SESSION_6
LAMP_TRAINING_SESSION_6umapst
 
Sample portfolio1
Sample portfolio1Sample portfolio1
Sample portfolio1mkboudewyns
 
IASP World Conference, 2005 Beijing, China
IASP World Conference, 2005 Beijing, ChinaIASP World Conference, 2005 Beijing, China
IASP World Conference, 2005 Beijing, ChinaIlkka Kakko
 
Low Carbon Housing for Non-experts
Low Carbon Housing for Non-expertsLow Carbon Housing for Non-experts
Low Carbon Housing for Non-expertsurbed
 
Carnet des innovations 20 fev 2012
Carnet des innovations 20 fev 2012Carnet des innovations 20 fev 2012
Carnet des innovations 20 fev 2012DFIE Lyon
 
Bren Poster Presentation Workshop
Bren Poster Presentation WorkshopBren Poster Presentation Workshop
Bren Poster Presentation WorkshopMonica Bulger
 
Open Network Lab (At Tokyo 2point0)
Open Network Lab (At Tokyo 2point0)Open Network Lab (At Tokyo 2point0)
Open Network Lab (At Tokyo 2point0)Open Network Lab
 
School safety india handbook
School safety india handbookSchool safety india handbook
School safety india handbookKunal Ashar
 
Apresentação 4Q09
Apresentação 4Q09Apresentação 4Q09
Apresentação 4Q09CR2
 
DiplomadoIABMex_Equipo6
DiplomadoIABMex_Equipo6DiplomadoIABMex_Equipo6
DiplomadoIABMex_Equipo6jorgemacias23
 

Similar to Map Reduce 〜入門編:仕組みの理解とアルゴリズムデザイン〜 (20)

Rate4
Rate4Rate4
Rate4
 
Prepositions made easy-xpert
Prepositions made easy-xpertPrepositions made easy-xpert
Prepositions made easy-xpert
 
Moosecon native apps_blackberry_10-optimized
Moosecon native apps_blackberry_10-optimizedMoosecon native apps_blackberry_10-optimized
Moosecon native apps_blackberry_10-optimized
 
Map Reduce ~Continuous Map Reduce Design~
Map Reduce ~Continuous Map Reduce Design~Map Reduce ~Continuous Map Reduce Design~
Map Reduce ~Continuous Map Reduce Design~
 
Coanda Effect UAV
Coanda Effect UAVCoanda Effect UAV
Coanda Effect UAV
 
Import o matic_higher_ed
Import o matic_higher_edImport o matic_higher_ed
Import o matic_higher_ed
 
LAMP_TRAINING_SESSION_6
LAMP_TRAINING_SESSION_6LAMP_TRAINING_SESSION_6
LAMP_TRAINING_SESSION_6
 
Sample portfolio1
Sample portfolio1Sample portfolio1
Sample portfolio1
 
IASP World Conference, 2005 Beijing, China
IASP World Conference, 2005 Beijing, ChinaIASP World Conference, 2005 Beijing, China
IASP World Conference, 2005 Beijing, China
 
#ThisIsHappening
#ThisIsHappening#ThisIsHappening
#ThisIsHappening
 
Rothke Press
Rothke PressRothke Press
Rothke Press
 
Low Carbon Housing for Non-experts
Low Carbon Housing for Non-expertsLow Carbon Housing for Non-experts
Low Carbon Housing for Non-experts
 
IWRM National Dialogues
IWRM National DialoguesIWRM National Dialogues
IWRM National Dialogues
 
Carnet des innovations 20 fev 2012
Carnet des innovations 20 fev 2012Carnet des innovations 20 fev 2012
Carnet des innovations 20 fev 2012
 
Bren Poster Presentation Workshop
Bren Poster Presentation WorkshopBren Poster Presentation Workshop
Bren Poster Presentation Workshop
 
Open Network Lab (At Tokyo 2point0)
Open Network Lab (At Tokyo 2point0)Open Network Lab (At Tokyo 2point0)
Open Network Lab (At Tokyo 2point0)
 
Mv10 all oneslides-100408
Mv10 all oneslides-100408Mv10 all oneslides-100408
Mv10 all oneslides-100408
 
School safety india handbook
School safety india handbookSchool safety india handbook
School safety india handbook
 
Apresentação 4Q09
Apresentação 4Q09Apresentação 4Q09
Apresentação 4Q09
 
DiplomadoIABMex_Equipo6
DiplomadoIABMex_Equipo6DiplomadoIABMex_Equipo6
DiplomadoIABMex_Equipo6
 

More from Takahiro Inoue

Treasure Data × Wave Analytics EC Demo
Treasure Data × Wave Analytics EC DemoTreasure Data × Wave Analytics EC Demo
Treasure Data × Wave Analytics EC DemoTakahiro Inoue
 
トレジャーデータとtableau実現する自動レポーティング
トレジャーデータとtableau実現する自動レポーティングトレジャーデータとtableau実現する自動レポーティング
トレジャーデータとtableau実現する自動レポーティングTakahiro Inoue
 
Tableauが魅せる Data Visualization の世界
Tableauが魅せる Data Visualization の世界Tableauが魅せる Data Visualization の世界
Tableauが魅せる Data Visualization の世界Takahiro Inoue
 
トレジャーデータのバッチクエリとアドホッククエリを理解する
トレジャーデータのバッチクエリとアドホッククエリを理解するトレジャーデータのバッチクエリとアドホッククエリを理解する
トレジャーデータのバッチクエリとアドホッククエリを理解するTakahiro Inoue
 
20140708 オンラインゲームソリューション
20140708 オンラインゲームソリューション20140708 オンラインゲームソリューション
20140708 オンラインゲームソリューションTakahiro Inoue
 
トレジャーデータ流,データ分析の始め方
トレジャーデータ流,データ分析の始め方トレジャーデータ流,データ分析の始め方
トレジャーデータ流,データ分析の始め方Takahiro Inoue
 
オンラインゲームソリューション@トレジャーデータ
オンラインゲームソリューション@トレジャーデータオンラインゲームソリューション@トレジャーデータ
オンラインゲームソリューション@トレジャーデータTakahiro Inoue
 
事例で学ぶトレジャーデータ 20140612
事例で学ぶトレジャーデータ 20140612事例で学ぶトレジャーデータ 20140612
事例で学ぶトレジャーデータ 20140612Takahiro Inoue
 
トレジャーデータ株式会社について(for all Data_Enthusiast!!)
トレジャーデータ株式会社について(for all Data_Enthusiast!!)トレジャーデータ株式会社について(for all Data_Enthusiast!!)
トレジャーデータ株式会社について(for all Data_Enthusiast!!)Takahiro Inoue
 
この Visualization がすごい2014 〜データ世界を彩るツール6選〜
この Visualization がすごい2014 〜データ世界を彩るツール6選〜この Visualization がすごい2014 〜データ世界を彩るツール6選〜
この Visualization がすごい2014 〜データ世界を彩るツール6選〜Takahiro Inoue
 
Treasure Data Intro for Data Enthusiast!!
Treasure Data Intro for Data Enthusiast!!Treasure Data Intro for Data Enthusiast!!
Treasure Data Intro for Data Enthusiast!!Takahiro Inoue
 
Hadoop and the Data Scientist
Hadoop and the Data ScientistHadoop and the Data Scientist
Hadoop and the Data ScientistTakahiro Inoue
 
MongoDB: Intro & Application for Big Data
MongoDB: Intro & Application  for Big DataMongoDB: Intro & Application  for Big Data
MongoDB: Intro & Application for Big DataTakahiro Inoue
 
An Introduction to Fluent & MongoDB Plugins
An Introduction to Fluent & MongoDB PluginsAn Introduction to Fluent & MongoDB Plugins
An Introduction to Fluent & MongoDB PluginsTakahiro Inoue
 
An Introduction to Tinkerpop
An Introduction to TinkerpopAn Introduction to Tinkerpop
An Introduction to TinkerpopTakahiro Inoue
 
An Introduction to Neo4j
An Introduction to Neo4jAn Introduction to Neo4j
An Introduction to Neo4jTakahiro Inoue
 
The Definition of GraphDB
The Definition of GraphDBThe Definition of GraphDB
The Definition of GraphDBTakahiro Inoue
 
Large-Scale Graph Processing〜Introduction〜(完全版)
Large-Scale Graph Processing〜Introduction〜(完全版)Large-Scale Graph Processing〜Introduction〜(完全版)
Large-Scale Graph Processing〜Introduction〜(完全版)Takahiro Inoue
 
Large-Scale Graph Processing〜Introduction〜(LT版)
Large-Scale Graph Processing〜Introduction〜(LT版)Large-Scale Graph Processing〜Introduction〜(LT版)
Large-Scale Graph Processing〜Introduction〜(LT版)Takahiro Inoue
 

More from Takahiro Inoue (20)

Treasure Data × Wave Analytics EC Demo
Treasure Data × Wave Analytics EC DemoTreasure Data × Wave Analytics EC Demo
Treasure Data × Wave Analytics EC Demo
 
トレジャーデータとtableau実現する自動レポーティング
トレジャーデータとtableau実現する自動レポーティングトレジャーデータとtableau実現する自動レポーティング
トレジャーデータとtableau実現する自動レポーティング
 
Tableauが魅せる Data Visualization の世界
Tableauが魅せる Data Visualization の世界Tableauが魅せる Data Visualization の世界
Tableauが魅せる Data Visualization の世界
 
トレジャーデータのバッチクエリとアドホッククエリを理解する
トレジャーデータのバッチクエリとアドホッククエリを理解するトレジャーデータのバッチクエリとアドホッククエリを理解する
トレジャーデータのバッチクエリとアドホッククエリを理解する
 
20140708 オンラインゲームソリューション
20140708 オンラインゲームソリューション20140708 オンラインゲームソリューション
20140708 オンラインゲームソリューション
 
トレジャーデータ流,データ分析の始め方
トレジャーデータ流,データ分析の始め方トレジャーデータ流,データ分析の始め方
トレジャーデータ流,データ分析の始め方
 
オンラインゲームソリューション@トレジャーデータ
オンラインゲームソリューション@トレジャーデータオンラインゲームソリューション@トレジャーデータ
オンラインゲームソリューション@トレジャーデータ
 
事例で学ぶトレジャーデータ 20140612
事例で学ぶトレジャーデータ 20140612事例で学ぶトレジャーデータ 20140612
事例で学ぶトレジャーデータ 20140612
 
トレジャーデータ株式会社について(for all Data_Enthusiast!!)
トレジャーデータ株式会社について(for all Data_Enthusiast!!)トレジャーデータ株式会社について(for all Data_Enthusiast!!)
トレジャーデータ株式会社について(for all Data_Enthusiast!!)
 
この Visualization がすごい2014 〜データ世界を彩るツール6選〜
この Visualization がすごい2014 〜データ世界を彩るツール6選〜この Visualization がすごい2014 〜データ世界を彩るツール6選〜
この Visualization がすごい2014 〜データ世界を彩るツール6選〜
 
Treasure Data Intro for Data Enthusiast!!
Treasure Data Intro for Data Enthusiast!!Treasure Data Intro for Data Enthusiast!!
Treasure Data Intro for Data Enthusiast!!
 
Hadoop and the Data Scientist
Hadoop and the Data ScientistHadoop and the Data Scientist
Hadoop and the Data Scientist
 
MongoDB: Intro & Application for Big Data
MongoDB: Intro & Application  for Big DataMongoDB: Intro & Application  for Big Data
MongoDB: Intro & Application for Big Data
 
An Introduction to Fluent & MongoDB Plugins
An Introduction to Fluent & MongoDB PluginsAn Introduction to Fluent & MongoDB Plugins
An Introduction to Fluent & MongoDB Plugins
 
An Introduction to Tinkerpop
An Introduction to TinkerpopAn Introduction to Tinkerpop
An Introduction to Tinkerpop
 
An Introduction to Neo4j
An Introduction to Neo4jAn Introduction to Neo4j
An Introduction to Neo4j
 
The Definition of GraphDB
The Definition of GraphDBThe Definition of GraphDB
The Definition of GraphDB
 
Large-Scale Graph Processing〜Introduction〜(完全版)
Large-Scale Graph Processing〜Introduction〜(完全版)Large-Scale Graph Processing〜Introduction〜(完全版)
Large-Scale Graph Processing〜Introduction〜(完全版)
 
Large-Scale Graph Processing〜Introduction〜(LT版)
Large-Scale Graph Processing〜Introduction〜(LT版)Large-Scale Graph Processing〜Introduction〜(LT版)
Large-Scale Graph Processing〜Introduction〜(LT版)
 
Advanced MongoDB #1
Advanced MongoDB #1Advanced MongoDB #1
Advanced MongoDB #1
 

Recently uploaded

Advanced Computer Architecture – An Introduction
Advanced Computer Architecture – An IntroductionAdvanced Computer Architecture – An Introduction
Advanced Computer Architecture – An IntroductionDilum Bandara
 
Take control of your SAP testing with UiPath Test Suite
Take control of your SAP testing with UiPath Test SuiteTake control of your SAP testing with UiPath Test Suite
Take control of your SAP testing with UiPath Test SuiteDianaGray10
 
Streamlining Python Development: A Guide to a Modern Project Setup
Streamlining Python Development: A Guide to a Modern Project SetupStreamlining Python Development: A Guide to a Modern Project Setup
Streamlining Python Development: A Guide to a Modern Project SetupFlorian Wilhelm
 
Unraveling Multimodality with Large Language Models.pdf
Unraveling Multimodality with Large Language Models.pdfUnraveling Multimodality with Large Language Models.pdf
Unraveling Multimodality with Large Language Models.pdfAlex Barbosa Coqueiro
 
H2O.ai CEO/Founder: Sri Ambati Keynote at Wells Fargo Day
H2O.ai CEO/Founder: Sri Ambati Keynote at Wells Fargo DayH2O.ai CEO/Founder: Sri Ambati Keynote at Wells Fargo Day
H2O.ai CEO/Founder: Sri Ambati Keynote at Wells Fargo DaySri Ambati
 
Dev Dives: Streamline document processing with UiPath Studio Web
Dev Dives: Streamline document processing with UiPath Studio WebDev Dives: Streamline document processing with UiPath Studio Web
Dev Dives: Streamline document processing with UiPath Studio WebUiPathCommunity
 
Gen AI in Business - Global Trends Report 2024.pdf
Gen AI in Business - Global Trends Report 2024.pdfGen AI in Business - Global Trends Report 2024.pdf
Gen AI in Business - Global Trends Report 2024.pdfAddepto
 
Developer Data Modeling Mistakes: From Postgres to NoSQL
Developer Data Modeling Mistakes: From Postgres to NoSQLDeveloper Data Modeling Mistakes: From Postgres to NoSQL
Developer Data Modeling Mistakes: From Postgres to NoSQLScyllaDB
 
Advanced Test Driven-Development @ php[tek] 2024
Advanced Test Driven-Development @ php[tek] 2024Advanced Test Driven-Development @ php[tek] 2024
Advanced Test Driven-Development @ php[tek] 2024Scott Keck-Warren
 
What's New in Teams Calling, Meetings and Devices March 2024
What's New in Teams Calling, Meetings and Devices March 2024What's New in Teams Calling, Meetings and Devices March 2024
What's New in Teams Calling, Meetings and Devices March 2024Stephanie Beckett
 
New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024
New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024
New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024BookNet Canada
 
Human Factors of XR: Using Human Factors to Design XR Systems
Human Factors of XR: Using Human Factors to Design XR SystemsHuman Factors of XR: Using Human Factors to Design XR Systems
Human Factors of XR: Using Human Factors to Design XR SystemsMark Billinghurst
 
Nell’iperspazio con Rocket: il Framework Web di Rust!
Nell’iperspazio con Rocket: il Framework Web di Rust!Nell’iperspazio con Rocket: il Framework Web di Rust!
Nell’iperspazio con Rocket: il Framework Web di Rust!Commit University
 
SAP Build Work Zone - Overview L2-L3.pptx
SAP Build Work Zone - Overview L2-L3.pptxSAP Build Work Zone - Overview L2-L3.pptx
SAP Build Work Zone - Overview L2-L3.pptxNavinnSomaal
 
"LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks...
"LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks..."LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks...
"LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks...Fwdays
 
DevoxxFR 2024 Reproducible Builds with Apache Maven
DevoxxFR 2024 Reproducible Builds with Apache MavenDevoxxFR 2024 Reproducible Builds with Apache Maven
DevoxxFR 2024 Reproducible Builds with Apache MavenHervé Boutemy
 
CloudStudio User manual (basic edition):
CloudStudio User manual (basic edition):CloudStudio User manual (basic edition):
CloudStudio User manual (basic edition):comworks
 
Anypoint Exchange: It’s Not Just a Repo!
Anypoint Exchange: It’s Not Just a Repo!Anypoint Exchange: It’s Not Just a Repo!
Anypoint Exchange: It’s Not Just a Repo!Manik S Magar
 
Vertex AI Gemini Prompt Engineering Tips
Vertex AI Gemini Prompt Engineering TipsVertex AI Gemini Prompt Engineering Tips
Vertex AI Gemini Prompt Engineering TipsMiki Katsuragi
 
Designing IA for AI - Information Architecture Conference 2024
Designing IA for AI - Information Architecture Conference 2024Designing IA for AI - Information Architecture Conference 2024
Designing IA for AI - Information Architecture Conference 2024Enterprise Knowledge
 

Recently uploaded (20)

Advanced Computer Architecture – An Introduction
Advanced Computer Architecture – An IntroductionAdvanced Computer Architecture – An Introduction
Advanced Computer Architecture – An Introduction
 
Take control of your SAP testing with UiPath Test Suite
Take control of your SAP testing with UiPath Test SuiteTake control of your SAP testing with UiPath Test Suite
Take control of your SAP testing with UiPath Test Suite
 
Streamlining Python Development: A Guide to a Modern Project Setup
Streamlining Python Development: A Guide to a Modern Project SetupStreamlining Python Development: A Guide to a Modern Project Setup
Streamlining Python Development: A Guide to a Modern Project Setup
 
Unraveling Multimodality with Large Language Models.pdf
Unraveling Multimodality with Large Language Models.pdfUnraveling Multimodality with Large Language Models.pdf
Unraveling Multimodality with Large Language Models.pdf
 
H2O.ai CEO/Founder: Sri Ambati Keynote at Wells Fargo Day
H2O.ai CEO/Founder: Sri Ambati Keynote at Wells Fargo DayH2O.ai CEO/Founder: Sri Ambati Keynote at Wells Fargo Day
H2O.ai CEO/Founder: Sri Ambati Keynote at Wells Fargo Day
 
Dev Dives: Streamline document processing with UiPath Studio Web
Dev Dives: Streamline document processing with UiPath Studio WebDev Dives: Streamline document processing with UiPath Studio Web
Dev Dives: Streamline document processing with UiPath Studio Web
 
Gen AI in Business - Global Trends Report 2024.pdf
Gen AI in Business - Global Trends Report 2024.pdfGen AI in Business - Global Trends Report 2024.pdf
Gen AI in Business - Global Trends Report 2024.pdf
 
Developer Data Modeling Mistakes: From Postgres to NoSQL
Developer Data Modeling Mistakes: From Postgres to NoSQLDeveloper Data Modeling Mistakes: From Postgres to NoSQL
Developer Data Modeling Mistakes: From Postgres to NoSQL
 
Advanced Test Driven-Development @ php[tek] 2024
Advanced Test Driven-Development @ php[tek] 2024Advanced Test Driven-Development @ php[tek] 2024
Advanced Test Driven-Development @ php[tek] 2024
 
What's New in Teams Calling, Meetings and Devices March 2024
What's New in Teams Calling, Meetings and Devices March 2024What's New in Teams Calling, Meetings and Devices March 2024
What's New in Teams Calling, Meetings and Devices March 2024
 
New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024
New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024
New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024
 
Human Factors of XR: Using Human Factors to Design XR Systems
Human Factors of XR: Using Human Factors to Design XR SystemsHuman Factors of XR: Using Human Factors to Design XR Systems
Human Factors of XR: Using Human Factors to Design XR Systems
 
Nell’iperspazio con Rocket: il Framework Web di Rust!
Nell’iperspazio con Rocket: il Framework Web di Rust!Nell’iperspazio con Rocket: il Framework Web di Rust!
Nell’iperspazio con Rocket: il Framework Web di Rust!
 
SAP Build Work Zone - Overview L2-L3.pptx
SAP Build Work Zone - Overview L2-L3.pptxSAP Build Work Zone - Overview L2-L3.pptx
SAP Build Work Zone - Overview L2-L3.pptx
 
"LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks...
"LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks..."LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks...
"LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks...
 
DevoxxFR 2024 Reproducible Builds with Apache Maven
DevoxxFR 2024 Reproducible Builds with Apache MavenDevoxxFR 2024 Reproducible Builds with Apache Maven
DevoxxFR 2024 Reproducible Builds with Apache Maven
 
CloudStudio User manual (basic edition):
CloudStudio User manual (basic edition):CloudStudio User manual (basic edition):
CloudStudio User manual (basic edition):
 
Anypoint Exchange: It’s Not Just a Repo!
Anypoint Exchange: It’s Not Just a Repo!Anypoint Exchange: It’s Not Just a Repo!
Anypoint Exchange: It’s Not Just a Repo!
 
Vertex AI Gemini Prompt Engineering Tips
Vertex AI Gemini Prompt Engineering TipsVertex AI Gemini Prompt Engineering Tips
Vertex AI Gemini Prompt Engineering Tips
 
Designing IA for AI - Information Architecture Conference 2024
Designing IA for AI - Information Architecture Conference 2024Designing IA for AI - Information Architecture Conference 2024
Designing IA for AI - Information Architecture Conference 2024
 

Map Reduce 〜入門編:仕組みの理解とアルゴリズムデザイン〜

  • 1.
  • 2.
  • 3.
  • 4.
  • 5.
  • 6.
  • 7.
  • 8.
  • 9.
  • 10.
  • 11.
  • 13.
  • 14. Data Data Data mapper mapper mapper mapper mapper mapper mapper mapper mapper
  • 15.
  • 16.
  • 17.
  • 18.
  • 19. #$%&'()*' -'. #$% 0 1"23 45)667' &'()*' 0 1"23 0 1" 3 " " Welcome to My HomePage. Thank you. Where is your house? .... " " !+/"-'. " "
  • 20.
  • 21. mapper Big Data mapper mapper
  • 22. map: (k1, v1) ! [(k2, v2)] // [] //word count class Mapper method Map(docid a, doc d) for all term t ∈ doc d do Emit(term t, count 1)
  • 23. > require 'msgpack' > msg = [1,2,3].to_msgpack  #=>"x93x01x02x03" > MessagePack.unpack(msg)  #=> [1,2,3]
  • 24. // word count class Combiner method Combine(string t, counts [c1, c2, . . .]) sum ← 0 for all count c ∈ counts [c1, c2, . . .] do sum ← sum + c Emit(string t, count sum)
  • 25.
  • 26.
  • 27. reduce: (k2, [v2]) ! [(k3, v3)] //word count class Reducer method Reduce(term t, counts [c1, c2, . . .]) sum ← 0 for all count c ∈ counts [c1,c2,...] do sum ← sum + c Emit(term t, count sum)
  • 28. 30 CHAPTER 2. MAPREDUCE BASICS ! " # $ % & '())*+ )) '())*+ )) '())*+ )) '())*+ )) ( - , . / 0 / 1 ( 2 / . , 3 / 4 /5',67*+ /5',67*+ /5',67*+ /5',67*+ ( - , . / 8 ( 2 / . , 3 / 4 ) )(+969657*+ ) )(+969657*+ ) )(+969657*+ ) )(+969657*+ :;<==>*?(7@?:5+9A (BB+*B(9*?C(><*D?,E?F*ED ( - 2 , . 3 / . 8 4 +*@</*+ +*@</*+ +*@</*+ G 2 H 3 I 8
  • 29.
  • 30.
  • 31.
  • 32.
  • 33. Hadoop Tutorial Series, Issue #2: Getting Started With (Customized) Partitioning
  • 34. Hadoop Tutorial Series, Issue #2: Getting Started With (Customized) Partitioning
  • 35. Hadoop Tutorial Series, Issue #2: Getting Started With (Customized) Partitioning
  • 36. package com.philippeadjiman.hadooptraining;  package com.philippeadjiman.hadooptraining; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.Partitioner;   public class MyPartitioner implements Partitioner<IntWritable,Text> { " @Override " public int getPartition(IntWritable key, Text value, int numPartitions) { " " /* Pretty ugly hard coded partitioning function. Don't do that in practice, it is just for the sake of understanding. */ " " int nbOccurences = key.get();   " " if( nbOccurences < 3 ) " " " return 0; " " else " " " return 1; " }   " @Override " public void configure(JobConf arg0) {   " } } Hadoop Tutorial Series, Issue #2: Getting Started With (Customized) Partitioning
  • 37. x + y = y + x x y = y x (x + y) + z = x + (y + z) (x y) z = x (y z)
  • 38.
  • 39.
  • 40.
  • 41.
  • 42. class Mapper { buffer init() { buffer = HashMap.new } map(id, data) { elements = process(data) for each element { .... check_and_put(buffer, k2, v2) } } // Designing algorithms for Map Reduce
  • 43. check_and_put(buffer, k2, v2) { if buffer.full { for each k2 in buffer.keys { emit(k2, buffer[k2]) } } else { buffer.incrby(k2, v2) // H[k2]+=v2 } } close() { for each k2 in buffer.keys { emit(k2, buffer[k2]) } } } Designing algorithms for Map Reduce
  • 44. ! ! ! !"# ! ! ! 2,% 6"# "# "# "# !"#$%&' " " " $%&'(%)*'+ 5,% '( '( '( ()*+*,-./0$1/ ! ! !#! !+(,+ ! ! ! $ $ 2,%$ :*8+"*6$+/ !#!"# 6"# !"#$%&' " " " $%&'(%)*'+ -(.*#/0 "# "# "# 5,% # # (+2*3+ '( '( '( ()*+*,-./0$1/ ,33"/3,+*#)9+"// !" !" 2/"3/ $ $ !+(,+ 1+2*3+ $ $ $ :*8+"*6$+/ !# !# "% "% !"#$%&' % -(.*#/0 4.5&*6+( # #% & & 4#56*)/ (+2*3+ ,33"/3,+*#)9+"// !" !" 2/"3/ 1+2*3+ $ Figure 1: Distributed execution plan for MapReduce $ when reduce cannot be decomposed to perform partial "% "% !"#$%&' % aggregation. % 4.5&*6+( !" !" 2/"3/ & & 4#56*)/ "/0$1/ Figure 1: Distributed execution plan for function, and merge and With this user-defined MapReduce "% "% !"#$%&' when reduce cannot beoperators provided by partial grouping decomposed to perform the system, it is pos- aggregation. sible to execute a simple distributed computation as !" !" ) )2/"3/ 7*),-./0$1/ shown in Figure 1. The computation has exactly "/0$1/ "% With this user-definedthe first phase merge anda Map function two phases: function, and executes "% * *!"#$%&' 4#)8$5/" grouping operatorsinputs to by the system, and pos- on the provided extract keys it is records, then per- ) sible to execute a simple distributed computation as based on the forms a partitioning of these outputs ) 7*),-./0$1/ Figure 2: Distributed execution plan for MapReduce shown in Figureof the records. The second phase collects and keys 1. The computation has exactly when reduce supports partial aggregation. The imple- two phases: the first phase executes a Map function * * 4#)8$5/" mentation of GroupBy in the first stage may be different to
  • 45.
  • 46. Def. 1 x: data items, x1 ⊕ x2: concatenation of x1, x2. H decomposable 2 I C : 1) ∀x1, x2 : H(x1 ⊕ x2) = C(I(x1 ⊕ x2)) = C(I(x1) ⊕ I(x2)) 2) ∀x1, x2 : I(x1 ⊕ x2) = I(x2 ⊕ x1) 3) ∀x1, x2 : C(x1 ⊕ x2) = C(x2 ⊕ x1) Def. 2 H associative-decomposable Def.1 1-3 C 4) ∀x1, x2, x3 : C(C(x1 ⊕ x2) ⊕ x3) = C(x1 ⊕ C(x2 ⊕ x3)) ( i.e. C is associative )
  • 47.
  • 48.
  • 49. class Combiner { share_space init(share_space_info) { share_space = conn(share_space_info) } combine(key, elements) { sum = 0 for each element { ... sum += v } //
  • 50. share_space.incrby(key, sum) emit(key, share_space_info) } // end combine() } class Reducer { reduce(key, list_of_share_space_info) { for each share_space_info { share_space = conn(share_space_info) sum = 0 elements = share_space.hget(key) for each elemnt { ... } } }
  • 51. partition(key) { range = (KEY_MAX - KEY_MIN) / NUM_OF_REDUCERS reducer_no = (key - KEY_MIN) / range return reducer_no } Designing algorithms for Map Reduce
  • 52. (t1, m1, r80521), (t1, m2, r14209), (t1, m3, r76042), (t2, m1, r21823), (t2, m2, r66508), (t2, m3, r98347),... map: m1 ! (t1, r80521) // // t1,t2,t3,... (m1) ! [(t1, r80521), (t3, r146925), (t2, r21823)] (m2) ! [(t2, r66508), (t1, r14209), (t3, r14720)]
  • 53. map: (m1, t1) ! r80521 (m1, t1) ! [(r80521)] // t1,t2,t3,... (m1, t2) ! [(r21823)] (m1, t3) ! [(r146925)]
  • 54. class Mapper { buffer map(id, number) { buffer.append(number) if (buffer.is_full) { max = compute_max(buffer) emit(1, max) } } } Designing algorithms for Map Reduce
  • 55. class Reducer { reduce(key, list_of_local_max) { global_max = 0 for local_max in list_of_local_max { if local_max > global_max { global_max = local_max } } emit(1, global_max) } } Designing algorithms for Map Reduce
  • 56. class Combiner { combine(key, list_of_local_max) { local_max = maximum(list_of_local_max) emit(1, local_max) } // Max() } Designing algorithms for Map Reduce
  • 57. class Mapper { map(id, data) { key, value = process(data) if rand() < 0.1 { //rand() ∈ [0.0, 1.0) emit(key, value) } } }
  • 58.
  • 59.
  • 60.
  • 61.
  • 62.
  • 63. Map Reduce and Stream Processing
  • 64.
  • 65.
  • 66. # Call at each hit record map(k1, hitRecord) { site = hitRecord.site # key(=site) slice slice = lookupSlice(site) if (slice.time - now > 60.minutes) { # Notify reducer whole slice of site is sent advance(site, slice) slice = lookupSlice(site) } emitIntermediate(site, slice, 1) } Map Reduce and Stream Processing
  • 67. combine(site, slice, countList) { hitCount = 0 for count in countList { hitCount += count } # Send the message to the downstream node emitIntermediate(site, slice, hitCount) } Map Reduce and Stream Processing
  • 68. # mapper slice reduce(site, slice, countList) { hitCount = 0 for count in countList { hitCount += count } sv = SliceValue.new sv.hitCount = hitCount return sv } Map Reduce and Stream Processing
  • 69. # Window init(slice) { rangeValue = RangeValue.new rangeValue.hitCount = 0 return rangeValue } # Reduce merge(rangeValue, slice, sliceValue) { rangeValue.hitCount += sliceValue.hitCount } # slice slicing window unmerge(rangeValue, slice, sliceValue) { rangeValue.hitCount -= sliceValue.hitCount } Map Reduce and Stream Processing
  • 70. 5&4.)1*,!,);3-00+*0-1*,!&/*+!*-58!.-$*9!-$%!@+&22,!).A!18*! -!:2*=#;2*!'-<!1&!4&$#1&+!,1+*-4#$0!%-1-6!! .-$*3-00+*0-1*,! 1&! 5&4.)1*! '#$%&'3-00+*0-1*,6! >)+! *=3 R)++*$1!.+&.&,-2,!:&+!*/-2)-1#$0!,2#%#$03'#$%&'!-00+*0-1*! .*+#4*$1-2! ,1)%<! ,8&',! 18-1! ),#$0! .-$*,! 8-,! ,#0$#:#5-$1! ()*+#*,!;)::*+!*-58!#$.)1!1).2*!)$1#2!#1!#,!$&!2&$0*+!$**%*%! .*+:&+4-$5*!;*$*:#1,6!! INP6! D#$5*! *-58! #$.)1! 1).2*! ;*2&$0,! 1&! 4)21#.2*! '#$%&',9! ,)58!-..+&-58*,!;)::*+!-!1).2*!)$1#2!#1!#,!.+&5*,,*%!:&+!18*! '(# )*+,-./0+1-*2 -00+*0-1*! &/*+! 18*! 2-,1! '#$%&'! 1&! '8#58! #1! ;*2&$0,6! -58! B-$<! -..2#5-1#&$,! $**%! 1&! .+&5*,,! ,1+*-4,9! :&+! *=-4.2*9! #$.)1! 1).2*! #,! -55*,,*%! 4)21#.2*! 1#4*,9! &$5*! :&+! *-58! '#$3 :#$-$5#-2! %-1-! -$-2<,#,9! $*1'&+C! 1+-::#5! 4&$#1&+#$09! -$%! %&'!18-1!#1!.-+1#5#.-1*,!#$6!!! 1*2*5&44)$#5-1#&$! 4&$#1&+#$06! D*/*+-2! %-1-;-,*! +*,*-+58! 0+&).,! -+*! ;)#2%#$0! --1-! .1+*-4! /-$-0*4*$1! .<,1*4,! "*! ,**! 1'&! .+&;2*4,! '#18! ,)58! -..+&-58*,6! W#+,1! 18*! EFDBDG!,&!18-1!-..2#5-1#&$,!5-$!#,,)*!()*+#*,!1&!0*1!1#4*2<! ;)::*+!,#H*!+*()#+*%!#,!)$;&)$%*%T!Q1!-$<!1#4*!#$,1-$19!-22! #$:&+4-1#&$! :+&4! ,1+*-4,6! B-$-0#$0! -$%! .+&5*,,#$0! 1).2*,! 5&$1-#$*%! #$! 18*! 5)++*$1! '#$%&'! -+*! #$! 18*! ;)::*+9! ,1+*-4,!0#/*,!+#,*!1&!58-22*$0*,!18-1!8-/*!;**$!*=1*$,#/*2<! -$%!,&!18*!,#H*!&:!18*!+*()#+*%!;)::*+,!#,!%*1*+4#$*%!;<!18*! %#,5),,*%!-$%!+*5&0$#H*%!IJ9!K9!L9!M9!NOP6!! '#$%&'!+-$0*!-$%!18*!%-1-!-++#/-2!+-1*6!D*5&$%9!.+&5*,,#$0! *-58!#$.)1!1).2*!4)21#.2*!1#4*,!2*-%,!1&!-!8#08!5&4.)1-1#&$! Q$!#4.&+1-$1!52-,,!&:!()*+#*,!&/*+!%-1-!,1+*-4,!#,!,2#%#$03 5&,16!W&+!*=-4.2*!#$!X)*+<!N9!*-58!#$.)1!1).2*!#,!.+&5*,,*%! '#$%&'!-00+*0-1*!()*+#*,6!R&$,#%*+!-$!&$2#$*!-)51#&$!,<,3 :&)+!1#4*,6!Q,!18*!+-1#&!&:!YQZ[!&/*+!D]7F!#$5+*-,*,9! 1*4!#$!'8#58!;#%,!&$!-)51#&$!#1*4,!-+*!,1+*-4*%!#$1&!-!5*$3 ,&!%&*,!18*!$)4;*+!&:!1#4*,!*-58!1).2*!#,!.+&5*,,*%6!R&$3 1+-2!-)51#&$!.+&5*,,#$0!,<,1*46!S8*!,58*4-!&:!*-58!;#%!#,T! ,#%*+#$0!18*!2-+0*!/&2)4*!-$%!:-,1!-++#/-2!+-1*!&:!,1+*-4#$0! U#1*43#%9! ;#%3.+#5*9! 1#4*,1-4.V6! W&+! *-,*! &:! .+*,*$1-1#&$9! %-1-9!+*%)5#$0!18*!-4&)$1!&:!+*()#+*%!;)::*+!,.-5*!E#%*-22<! '*!-,,)4*!18-1!;#%,!-++#/*!#$!&+%*+!&$!18*#+!1#4*,1-4.!-13 1&!-!5&$,1-$1!;&)$%G!-$%!5&4.)1-1#&$!1#4*!#,!-$!#4.&+1-$1! 1+#;)1*6! E"*! -+*! -51#/*2<! #$/*,1#0-1#$0! .+&5*,,#$0! %#,&+3 %*+*%!%-1-!,1+*-4,G!X)*+<!N!,8&',!-$!*=-4.2*!&:!-!,2#%#$03 '#$%&'!-00+*0-1*!()*+<6! 3/4,52'T!@W#$%!18*!4-=#4)4!;#%!.+#5*!:&+!18*!.-,1!K!4#$3 )1*,!-$%!).%-1*!18*!+*,)21!*/*+<!N!4#$)1*6A! !"#"$%&'()*+,-./0,123& 4567&+,-89:;%%5&<,'28<('/& &&&&&&&&&&5;=>"&?&',@A<28& &&&&&&&&&&!#BC"&D&',@A<2E& 7$! 18*! ()*+<! -;&/*9! '*! #$1+&%)5*! -! '#$%&'! ,.*5#:#5-1#&$! '#18!18+**!.-+-4*1*+,T!YQZ[!,.*5#:#*,!18*!'#$%&'!,#H*9! D]7F! ,.*5#:#*,! 8&'! 18*! '#$%&'! 4&/*,9! -$%! "QSSY! ,.*5#:#*,! 18*! '#$%&'#$0! -11+#;)1*! &$! '8#58! 18-1! 18*! YQZ[! -$%! D]7F! .-+-4*1*+,! -+*! %*:#$*%6! S8*! '#$%&'! ,.*5#:#5-1#&$! &:! X)*+<! N! ;+*-C,! 18*! ;#%! ,1+*-4! #$1&! &/*+3 2-..#$0!K34#$)1*!,);3,1+*-4,!18-1!,1-+1!*/*+<!4#$)1*9!'#18! +*,.*51! 1&! 18*! 1#4*,1-4.! -11+#;)1*6! S8*,*! &/*+2-..#$0! ,);3 ,1+*-4,!-+*!5-22*%!!"#$#%&0(#%$)(!6!X)*+<!N!5-25)2-1*,!18*! 617/,42'8291*.-:;2&-<=-;4.2->26-/,2?@*4;2 No Pane, No Gain: Efficient Evaluation of Sliding-Window Aggregates over Data Streams
  • 71.
  • 72.
  • 73.
  • 74. K-Means Clustering in Map Reduce
  • 75. Figure 2: MapReduce Classifier Training and Evaluation Procedure A Comparison of Approaches for Large-Scale Data Mining
  • 76. Google Pregel Graph Processing
  • 77. Google Pregel Graph Processing