SlideShare a Scribd company logo
1 of 78
Download to read offline

Aggregate Data Analysis
Data                Data                Data

mapper              mapper              mapper

   mapper              mapper              mapper

           mapper              mapper              mapper
#$%&'()*'                          -'.            #$%   0
           1"23                45)667'
      &'()*'           0      1"23        0    1"        3
Welcome to My HomePage.
      Thank you.
 Where is your house? ....

                  " !+/"-'.                         "

Big Data   mapper

map: (k1, v1) ! [(k2, v2)] // []

//word count
class Mapper
   method Map(docid a, doc d)
      for all term t ∈ doc d do
         Emit(term t, count 1)
> require 'msgpack'
> msg = [1,2,3].to_msgpack 
> MessagePack.unpack(msg)  #=> [1,2,3]
// word count
class Combiner
   method Combine(string t, counts [c1, c2, . . .])
      sum ← 0
      for all count c ∈ counts [c1, c2, . . .] do
         sum ← sum + c
      Emit(string t, count sum)
reduce: (k2, [v2]) ! [(k3, v3)]

//word count
class Reducer
   method Reduce(term t, counts [c1, c2, . . .])
      sum ← 0
      for all count c ∈ counts [c1,c2,...] do
         sum ← sum + c
      Emit(term t, count sum)

                          !       "       #       $       %         &

                   ))              '())*+
                                     ))                 '())*+
                                                          ))                 '())*+

                ( -   , .         / 0     / 1         ( 2     / .           , 3     / 4

                 /5',67*+          /5',67*+             /5',67*+             /5',67*+

                ( -   , .               / 8           ( 2     / .           , 3     / 4

                )(+969657*+       )
                                  )(+969657*+         )
                                                      )(+969657*+           )

                         :;<==>*?(7@?:5+9A (BB+*B(9*?C(><*D?,E?F*ED

                              (   - 2            ,    . 3               /   . 8 4

                        +*@</*+               +*@</*+            +*@</*+

                            G 2                 H 3                 I 8
Hadoop Tutorial Series, Issue #2: Getting Started With (Customized) Partitioning
Hadoop Tutorial Series, Issue #2:
Getting Started With (Customized) Partitioning
Hadoop Tutorial Series, Issue #2: Getting Started With (Customized) Partitioning
package com.philippeadjiman.hadooptraining; 
package com.philippeadjiman.hadooptraining;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Partitioner;
public class MyPartitioner implements Partitioner<IntWritable,Text> {
" @Override
" public int getPartition(IntWritable key, Text value, int numPartitions) {
" " /* Pretty ugly hard coded partitioning function. Don't do that in practice,
it is just for the sake of understanding. */
" " int nbOccurences = key.get();
" " if( nbOccurences < 3 )
" " " return 0;
" " else
" " " return 1;
" }
" @Override
" public void configure(JobConf arg0) {
" }
}                      Hadoop Tutorial Series, Issue #2: Getting Started With (Customized) Partitioning
x + y = y + x
x    y = y     x

(x + y) + z = x + (y + z)
(x    y)     z = x   (y   z)
class Mapper {
   init() {
       buffer =
   map(id, data) {
       elements = process(data)
       for each element {
            check_and_put(buffer, k2, v2)
   } //                           Designing algorithms for Map Reduce
check_and_put(buffer, k2, v2) {
        if buffer.full {
            for each k2 in buffer.keys {
                emit(k2, buffer[k2])
        } else {
            buffer.incrby(k2, v2) // H[k2]+=v2
    close() {
        for each k2 in buffer.keys {
            emit(k2, buffer[k2])
}                                  Designing algorithms for Map Reduce
!           !        !         !"#                                 !             !             !                       2,%

                                                                                "#            "#            "#                      !"#$%&'
             "           "        "         $%&'(%)*'+

                                                                                '(            '(            '(                      ()*+*,-./0$1/

  !         ! !#!                           !+(,+       !         !             !
                                                                                $             $ 2,%$                                :*8+"*6$+/

  "         "   "                $%&'(%)*'+ -(.*#/0 "#           "#             "#

                     #       #

                                                    '(            '(            '(                  ()*+*,-./0$1/

                                                                                                    !"           !"                 2/"3/
                     $       $ !+(,+        1+2*3+      $         $             $                   :*8+"*6$+/
      !#       !#                                                                                   "%           "%                 !"#$%&'
                             % -(.*#/0      4.5&*6+(
      #          #%                                                                                     &        &                  4#56*)/

                                                                           !"            !"         2/"3/
       $ Figure 1: Distributed execution plan for MapReduce
           when reduce cannot be decomposed to perform partial             "%            "%         !"#$%&'
       %   aggregation.
                 %            4.5&*6+(                                           !"            !"                                   2/"3/
                                                                            &            &          4#56*)/

Figure 1: Distributed execution plan for function, and merge and
              With this user-defined MapReduce                                    "%                "%                               !"#$%&'
when reduce cannot beoperators provided by partial
           grouping decomposed to perform the system, it is pos-
            sible to execute a simple distributed computation as
                                                         !"           !"             )             )2/"3/                           7*),-./0$1/
            shown in Figure 1. The computation has exactly

   With this user-definedthe first phase merge anda Map function
            two phases: function, and executes                         "%            *             *!"#$%&'                         4#)8$5/"
grouping operatorsinputs to by the system, and pos-
            on the provided extract keys it is records, then per-
sible to execute a simple distributed computation as based on the
            forms a partitioning of these outputs
                                                                       )                            7*),-./0$1/
                                                                            Figure 2: Distributed execution plan for MapReduce
shown in Figureof the records. The second phase collects and
            keys 1. The computation has exactly                             when reduce supports partial aggregation. The imple-
two phases: the first phase executes a Map function         *           *                    4#)8$5/"
                                                                            mentation of GroupBy in the first stage may be different to
Def. 1
  x: data items, x1 ⊕ x2: concatenation of x1, x2.
             H        decomposable         2    I   C


1) ∀x1, x2 : H(x1 ⊕ x2) = C(I(x1 ⊕ x2)) = C(I(x1) ⊕ I(x2))
2) ∀x1, x2 : I(x1 ⊕ x2) = I(x2 ⊕ x1)
3) ∀x1, x2 : C(x1 ⊕ x2) = C(x2 ⊕ x1)

Def. 2
         H           associative-decomposable       Def.1

1-3                      C

4) ∀x1, x2, x3 : C(C(x1 ⊕ x2) ⊕ x3) = C(x1 ⊕ C(x2 ⊕ x3))
( i.e. C is associative )
class Combiner {
   init(share_space_info) {
       share_space = conn(share_space_info)
   combine(key, elements) {
       sum = 0
       for each element {
              sum += v
       } //
share_space.incrby(key, sum)
        emit(key, share_space_info)
    } // end combine()
class Reducer {
    reduce(key, list_of_share_space_info) {
        for each share_space_info {
            share_space = conn(share_space_info)
            sum = 0
            elements = share_space.hget(key)
            for each elemnt {
partition(key) {
   reducer_no = (key - KEY_MIN) / range
   return reducer_no
}                                    Designing algorithms for Map Reduce
(t1, m1, r80521), (t1, m2, r14209), (t1, m3, r76042),
(t2, m1, r21823), (t2, m2, r66508), (t2, m3, r98347),...

 map: m1 ! (t1, r80521) //

 // t1,t2,t3,...
 (m1) ! [(t1, r80521), (t3, r146925), (t2, r21823)]
 (m2) ! [(t2, r66508), (t1, r14209), (t3, r14720)]
map: (m1, t1) ! r80521

(m1, t1) ! [(r80521)] // t1,t2,t3,...
(m1, t2) ! [(r21823)]
(m1, t3) ! [(r146925)]
class Mapper {
          map(id, number) {
             if (buffer.is_full) {
                   max = compute_max(buffer)
                   emit(1, max)
}                                        Designing algorithms for Map Reduce
class Reducer {
    reduce(key, list_of_local_max) {
        global_max = 0
        for local_max in list_of_local_max {
            if local_max > global_max {
                global_max = local_max
        emit(1, global_max)
}                                  Designing algorithms for Map Reduce
class Combiner {
    combine(key, list_of_local_max) {
       local_max = maximum(list_of_local_max)
       emit(1, local_max)
    } // Max()

}                               Designing algorithms for Map Reduce
class Mapper {
    map(id, data) {
        key, value = process(data)
        if rand() < 0.1 {   //rand() ∈ [0.0, 1.0)
            emit(key, value)
Map Reduce and Stream Processing
# Call at each hit record
 map(k1, hitRecord) {
     site =
     #     key(=site)    slice

     slice = lookupSlice(site)
     if (slice.time - now > 60.minutes) {
         # Notify reducer whole slice of site is sent
         advance(site, slice)
         slice = lookupSlice(site)
     emitIntermediate(site, slice, 1)
 }                                      Map Reduce and Stream Processing
combine(site, slice, countList) {
    hitCount = 0
    for count in countList {
        hitCount += count
    # Send the message to the downstream node
    emitIntermediate(site, slice, hitCount)
}                                     Map Reduce and Stream Processing
#       mapper   slice

reduce(site, slice, countList) {
    hitCount = 0
    for count in countList {
        hitCount += count
    sv =
    sv.hitCount = hitCount
    return sv
}                                  Map Reduce and Stream Processing
# Window
init(slice) {
    rangeValue =
    rangeValue.hitCount = 0
    return rangeValue
# Reduce
merge(rangeValue, slice, sliceValue) {
    rangeValue.hitCount += sliceValue.hitCount
#     slice   slicing window
unmerge(rangeValue, slice, sliceValue) {
    rangeValue.hitCount -= sliceValue.hitCount
}                                 Map Reduce and Stream Processing
5&4.)1*,!,);3-00+*0-1*,!&/*+!*-58!.-$*9!-$%!@+&22,!).A!18*!          -!:2*=#;2*!'-<!1&!4&$#1&+!,1+*-4#$0!%-1-6!!
.-$*3-00+*0-1*,! 1&! 5&4.)1*! '#$%&'3-00+*0-1*,6! >)+! *=3           R)++*$1!.+&.&,-2,!:&+!*/-2)-1#$0!,2#%#$03'#$%&'!-00+*0-1*!
.*+#4*$1-2! ,1)%<! ,8&',! 18-1! ),#$0! .-$*,! 8-,! ,#0$#:#5-$1!      ()*+#*,!;)::*+!*-58!#$.)1!1).2*!)$1#2!#1!#,!$&!2&$0*+!$**%*%!
.*+:&+4-$5*!;*$*:#1,6!!                                              INP6! D#$5*! *-58! #$.)1! 1).2*! ;*2&$0,! 1&! 4)21#.2*! '#$%&',9!
'(# )*+,-./0+1-*2                                                    -00+*0-1*! &/*+! 18*! 2-,1! '#$%&'! 1&! '8#58! #1! ;*2&$0,6! -58!
B-$<! -..2#5-1#&$,! $**%! 1&! .+&5*,,! ,1+*-4,9! :&+! *=-4.2*9!      #$.)1! 1).2*! #,! -55*,,*%! 4)21#.2*! 1#4*,9! &$5*! :&+! *-58! '#$3
:#$-$5#-2! %-1-! -$-2<,#,9! $*1'&+C! 1+-::#5! 4&$#1&+#$09! -$%!      %&'!18-1!#1!.-+1#5#.-1*,!#$6!!!
1*2*5&44)$#5-1#&$! 4&$#1&+#$06! D*/*+-2! %-1-;-,*! +*,*-+58!
0+&).,! -+*! ;)#2%#$0! --1-! .1+*-4! /-$-0*4*$1! .<,1*4,!            "*! ,**! 1'&! .+&;2*4,! '#18! ,)58! -..+&-58*,6! W#+,1! 18*!
EFDBDG!,&!18-1!-..2#5-1#&$,!5-$!#,,)*!()*+#*,!1&!0*1!1#4*2<!         ;)::*+!,#H*!+*()#+*%!#,!)$;&)$%*%T!Q1!-$<!1#4*!#$,1-$19!-22!
#$:&+4-1#&$! :+&4! ,1+*-4,6! B-$-0#$0! -$%! .+&5*,,#$0!              1).2*,! 5&$1-#$*%! #$! 18*! 5)++*$1! '#$%&'! -+*! #$! 18*! ;)::*+9!
,1+*-4,!0#/*,!+#,*!1&!58-22*$0*,!18-1!8-/*!;**$!*=1*$,#/*2<!         -$%!,&!18*!,#H*!&:!18*!+*()#+*%!;)::*+,!#,!%*1*+4#$*%!;<!18*!
%#,5),,*%!-$%!+*5&0$#H*%!IJ9!K9!L9!M9!NOP6!!                         '#$%&'!+-$0*!-$%!18*!%-1-!-++#/-2!+-1*6!D*5&$%9!.+&5*,,#$0!
Q$!#4.&+1-$1!52-,,!&:!()*+#*,!&/*+!%-1-!,1+*-4,!#,!,2#%#$03          5&,16!W&+!*=-4.2*!#$!X)*+<!N9!*-58!#$.)1!1).2*!#,!.+&5*,,*%!
'#$%&'!-00+*0-1*!()*+#*,6!R&$,#%*+!-$!&$2#$*!-)51#&$!,<,3            :&)+!1#4*,6!Q,!18*!+-1#&!&:!YQZ[!&/*+!D]7F!#$5+*-,*,9!
1*4!#$!'8#58!;#%,!&$!-)51#&$!#1*4,!-+*!,1+*-4*%!#$1&!-!5*$3          ,&!%&*,!18*!$)4;*+!&:!1#4*,!*-58!1).2*!#,!.+&5*,,*%6!R&$3
1+-2!-)51#&$!.+&5*,,#$0!,<,1*46!S8*!,58*4-!&:!*-58!;#%!#,T!          ,#%*+#$0!18*!2-+0*!/&2)4*!-$%!:-,1!-++#/-2!+-1*!&:!,1+*-4#$0!
U#1*43#%9! ;#%3.+#5*9! 1#4*,1-4.V6! W&+! *-,*! &:! .+*,*$1-1#&$9!    %-1-9!+*%)5#$0!18*!-4&)$1!&:!+*()#+*%!;)::*+!,.-5*!E#%*-22<!
'*!-,,)4*!18-1!;#%,!-++#/*!#$!&+%*+!&$!18*#+!1#4*,1-4.!-13           1&!-!5&$,1-$1!;&)$%G!-$%!5&4.)1-1#&$!1#4*!#,!-$!#4.&+1-$1!
1+#;)1*6! E"*! -+*! -51#/*2<! #$/*,1#0-1#$0! .+&5*,,#$0! %#,&+3
7$! 18*! ()*+<! -;&/*9! '*! #$1+&%)5*! -! '#$%&'! ,.*5#:#5-1#&$!
D]7F! ,.*5#:#*,! 8&'! 18*! '#$%&'! 4&/*,9! -$%! "QSSY!
,.*5#:#*,! 18*! '#$%&'#$0! -11+#;)1*! &$! '8#58! 18-1! 18*!
YQZ[! -$%! D]7F! .-+-4*1*+,! -+*! %*:#$*%6! S8*! '#$%&'!
,.*5#:#5-1#&$! &:! X)*+<! N! ;+*-C,! 18*! ;#%! ,1+*-4! #$1&! &/*+3
+*,.*51! 1&! 18*! 1#4*,1-4.! -11+#;)1*6! S8*,*! &/*+2-..#$0! ,);3
,1+*-4,!-+*!5-22*%!!"#$#%&0(#%$)(!6!X)*+<!N!5-25)2-1*,!18*!                      617/,42'8291*.-:;2&-<=-;4.2->26-/,2?@*4;2
                                                              No Pane, No Gain: Efficient Evaluation of Sliding-Window
                                                                          Aggregates over Data Streams
K-Means Clustering in Map Reduce
Figure 2: MapReduce Classifier Training and Evaluation Procedure

                                A Comparison of Approaches for Large-Scale Data Mining
Google Pregel Graph Processing
Google Pregel Graph Processing
Map Reduce 〜入門編:仕組みの理解とアルゴリズムデザイン〜

More Related Content

What's hot

Apache Sparkに手を出してヤケドしないための基本 ~「Apache Spark入門より」~ (デブサミ 2016 講演資料)
Apache Sparkに手を出してヤケドしないための基本 ~「Apache Spark入門より」~ (デブサミ 2016 講演資料)Apache Sparkに手を出してヤケドしないための基本 ~「Apache Spark入門より」~ (デブサミ 2016 講演資料)
Apache Sparkに手を出してヤケドしないための基本 ~「Apache Spark入門より」~ (デブサミ 2016 講演資料)NTT DATA OSS Professional Services
Apache Hadoop YARNとマルチテナントにおけるリソース管理
Apache Hadoop YARNとマルチテナントにおけるリソース管理Apache Hadoop YARNとマルチテナントにおけるリソース管理
Apache Hadoop YARNとマルチテナントにおけるリソース管理Cloudera Japan
本当は恐ろしい分散システムの話Kumazaki Hiroki
初心者向けMongoDBのキホン!Tetsutaro Watanabe
アーキテクチャから理解するPostgreSQLのレプリケーションMasahiko Sawada
Memoizeの仕組み(第41回PostgreSQLアンカンファレンス@オンライン 発表資料)
Memoizeの仕組み(第41回PostgreSQLアンカンファレンス@オンライン 発表資料)Memoizeの仕組み(第41回PostgreSQLアンカンファレンス@オンライン 発表資料)
Memoizeの仕組み(第41回PostgreSQLアンカンファレンス@オンライン 発表資料)NTT DATA Technology & Innovation
Hadoopの概念と基本的知識Ken SASAKI
マルチテナントのアプリケーション実装〜実践編〜Yoshiki Nakagawa
RDB開発者のためのApache Cassandra データモデリング入門
RDB開発者のためのApache Cassandra データモデリング入門RDB開発者のためのApache Cassandra データモデリング入門
RDB開発者のためのApache Cassandra データモデリング入門Yuki Morishita
Elasticsearch の検索精度のチューニング 〜テストを作って高速かつ安全に〜
Elasticsearch の検索精度のチューニング 〜テストを作って高速かつ安全に〜Elasticsearch の検索精度のチューニング 〜テストを作って高速かつ安全に〜
Elasticsearch の検索精度のチューニング 〜テストを作って高速かつ安全に〜Takahiko Ito
BuildKitの概要と最近の機能Kohei Tokunaga
【CNDT2022】SIerで実践!クラウドネイティブを普及させる取り組みYuta Shimada
Pythonによる黒魔術入門大樹 小倉
PostgreSQLクエリ実行の基礎知識 ~Explainを読み解こう~
PostgreSQLクエリ実行の基礎知識 ~Explainを読み解こう~PostgreSQLクエリ実行の基礎知識 ~Explainを読み解こう~
PostgreSQLクエリ実行の基礎知識 ~Explainを読み解こう~Miki Shimogai
Hadoop/Spark で Amazon S3 を徹底的に使いこなすワザ (Hadoop / Spark Conference Japan 2019)
Hadoop/Spark で Amazon S3 を徹底的に使いこなすワザ (Hadoop / Spark Conference Japan 2019)Hadoop/Spark で Amazon S3 を徹底的に使いこなすワザ (Hadoop / Spark Conference Japan 2019)
Hadoop/Spark で Amazon S3 を徹底的に使いこなすワザ (Hadoop / Spark Conference Japan 2019)Noritaka Sekiyama
Hadoop -NameNode HAの仕組み-
Hadoop -NameNode HAの仕組み-Hadoop -NameNode HAの仕組み-
Hadoop -NameNode HAの仕組み-Yuki Gonda
トランザクションの設計と進化Kumazaki Hiroki

What's hot (20)

Apache Sparkに手を出してヤケドしないための基本 ~「Apache Spark入門より」~ (デブサミ 2016 講演資料)
Apache Sparkに手を出してヤケドしないための基本 ~「Apache Spark入門より」~ (デブサミ 2016 講演資料)Apache Sparkに手を出してヤケドしないための基本 ~「Apache Spark入門より」~ (デブサミ 2016 講演資料)
Apache Sparkに手を出してヤケドしないための基本 ~「Apache Spark入門より」~ (デブサミ 2016 講演資料)
Apache Hadoop YARNとマルチテナントにおけるリソース管理
Apache Hadoop YARNとマルチテナントにおけるリソース管理Apache Hadoop YARNとマルチテナントにおけるリソース管理
Apache Hadoop YARNとマルチテナントにおけるリソース管理
Memoizeの仕組み(第41回PostgreSQLアンカンファレンス@オンライン 発表資料)
Memoizeの仕組み(第41回PostgreSQLアンカンファレンス@オンライン 発表資料)Memoizeの仕組み(第41回PostgreSQLアンカンファレンス@オンライン 発表資料)
Memoizeの仕組み(第41回PostgreSQLアンカンファレンス@オンライン 発表資料)
RDB開発者のためのApache Cassandra データモデリング入門
RDB開発者のためのApache Cassandra データモデリング入門RDB開発者のためのApache Cassandra データモデリング入門
RDB開発者のためのApache Cassandra データモデリング入門
Spark SQL - The internal -
Spark SQL - The internal -Spark SQL - The internal -
Spark SQL - The internal -
Elasticsearch の検索精度のチューニング 〜テストを作って高速かつ安全に〜
Elasticsearch の検索精度のチューニング 〜テストを作って高速かつ安全に〜Elasticsearch の検索精度のチューニング 〜テストを作って高速かつ安全に〜
Elasticsearch の検索精度のチューニング 〜テストを作って高速かつ安全に〜
PostgreSQLクエリ実行の基礎知識 ~Explainを読み解こう~
PostgreSQLクエリ実行の基礎知識 ~Explainを読み解こう~PostgreSQLクエリ実行の基礎知識 ~Explainを読み解こう~
PostgreSQLクエリ実行の基礎知識 ~Explainを読み解こう~
Hadoop/Spark で Amazon S3 を徹底的に使いこなすワザ (Hadoop / Spark Conference Japan 2019)
Hadoop/Spark で Amazon S3 を徹底的に使いこなすワザ (Hadoop / Spark Conference Japan 2019)Hadoop/Spark で Amazon S3 を徹底的に使いこなすワザ (Hadoop / Spark Conference Japan 2019)
Hadoop/Spark で Amazon S3 を徹底的に使いこなすワザ (Hadoop / Spark Conference Japan 2019)
Hadoop -NameNode HAの仕組み-
Hadoop -NameNode HAの仕組み-Hadoop -NameNode HAの仕組み-
Hadoop -NameNode HAの仕組み-
Apache Hadoopの未来 3系になって何が変わるのか?
Apache Hadoopの未来 3系になって何が変わるのか?Apache Hadoopの未来 3系になって何が変わるのか?
Apache Hadoopの未来 3系になって何が変わるのか?

Similar to Map Reduce 〜入門編:仕組みの理解とアルゴリズムデザイン〜

Prepositions made easy-xpert
Prepositions made easy-xpertPrepositions made easy-xpert
Prepositions made easy-xperthudaalmabadi
Moosecon native apps_blackberry_10-optimized
Moosecon native apps_blackberry_10-optimizedMoosecon native apps_blackberry_10-optimized
Moosecon native apps_blackberry_10-optimizedHeinrich Seeger
Map Reduce ~Continuous Map Reduce Design~
Map Reduce ~Continuous Map Reduce Design~Map Reduce ~Continuous Map Reduce Design~
Map Reduce ~Continuous Map Reduce Design~Takahiro Inoue
Sample portfolio1
Sample portfolio1Sample portfolio1
Sample portfolio1mkboudewyns
IASP World Conference, 2005 Beijing, China
IASP World Conference, 2005 Beijing, ChinaIASP World Conference, 2005 Beijing, China
IASP World Conference, 2005 Beijing, ChinaIlkka Kakko
Low Carbon Housing for Non-experts
Low Carbon Housing for Non-expertsLow Carbon Housing for Non-experts
Low Carbon Housing for Non-expertsurbed
Carnet des innovations 20 fev 2012
Carnet des innovations 20 fev 2012Carnet des innovations 20 fev 2012
Carnet des innovations 20 fev 2012DFIE Lyon
Bren Poster Presentation Workshop
Bren Poster Presentation WorkshopBren Poster Presentation Workshop
Bren Poster Presentation WorkshopMonica Bulger
Open Network Lab (At Tokyo 2point0)
Open Network Lab (At Tokyo 2point0)Open Network Lab (At Tokyo 2point0)
Open Network Lab (At Tokyo 2point0)Open Network Lab
School safety india handbook
School safety india handbookSchool safety india handbook
School safety india handbookKunal Ashar
Apresentação 4Q09
Apresentação 4Q09Apresentação 4Q09
Apresentação 4Q09CR2

Similar to Map Reduce 〜入門編:仕組みの理解とアルゴリズムデザイン〜 (20)

Prepositions made easy-xpert
Prepositions made easy-xpertPrepositions made easy-xpert
Prepositions made easy-xpert
Moosecon native apps_blackberry_10-optimized
Moosecon native apps_blackberry_10-optimizedMoosecon native apps_blackberry_10-optimized
Moosecon native apps_blackberry_10-optimized
Map Reduce ~Continuous Map Reduce Design~
Map Reduce ~Continuous Map Reduce Design~Map Reduce ~Continuous Map Reduce Design~
Map Reduce ~Continuous Map Reduce Design~
Coanda Effect UAV
Coanda Effect UAVCoanda Effect UAV
Coanda Effect UAV
Import o matic_higher_ed
Import o matic_higher_edImport o matic_higher_ed
Import o matic_higher_ed
Sample portfolio1
Sample portfolio1Sample portfolio1
Sample portfolio1
IASP World Conference, 2005 Beijing, China
IASP World Conference, 2005 Beijing, ChinaIASP World Conference, 2005 Beijing, China
IASP World Conference, 2005 Beijing, China
Rothke Press
Rothke PressRothke Press
Rothke Press
Low Carbon Housing for Non-experts
Low Carbon Housing for Non-expertsLow Carbon Housing for Non-experts
Low Carbon Housing for Non-experts
IWRM National Dialogues
IWRM National DialoguesIWRM National Dialogues
IWRM National Dialogues
Carnet des innovations 20 fev 2012
Carnet des innovations 20 fev 2012Carnet des innovations 20 fev 2012
Carnet des innovations 20 fev 2012
Bren Poster Presentation Workshop
Bren Poster Presentation WorkshopBren Poster Presentation Workshop
Bren Poster Presentation Workshop
Open Network Lab (At Tokyo 2point0)
Open Network Lab (At Tokyo 2point0)Open Network Lab (At Tokyo 2point0)
Open Network Lab (At Tokyo 2point0)
Mv10 all oneslides-100408
Mv10 all oneslides-100408Mv10 all oneslides-100408
Mv10 all oneslides-100408
School safety india handbook
School safety india handbookSchool safety india handbook
School safety india handbook
Apresentação 4Q09
Apresentação 4Q09Apresentação 4Q09
Apresentação 4Q09

More from Takahiro Inoue

Treasure Data × Wave Analytics EC Demo
Treasure Data × Wave Analytics EC DemoTreasure Data × Wave Analytics EC Demo
Treasure Data × Wave Analytics EC DemoTakahiro Inoue
トレジャーデータとtableau実現する自動レポーティングTakahiro Inoue
Tableauが魅せる Data Visualization の世界
Tableauが魅せる Data Visualization の世界Tableauが魅せる Data Visualization の世界
Tableauが魅せる Data Visualization の世界Takahiro Inoue
トレジャーデータのバッチクエリとアドホッククエリを理解するTakahiro Inoue
20140708 オンラインゲームソリューション
20140708 オンラインゲームソリューション20140708 オンラインゲームソリューション
20140708 オンラインゲームソリューションTakahiro Inoue
トレジャーデータ流,データ分析の始め方Takahiro Inoue
オンラインゲームソリューション@トレジャーデータTakahiro Inoue
事例で学ぶトレジャーデータ 20140612
事例で学ぶトレジャーデータ 20140612事例で学ぶトレジャーデータ 20140612
事例で学ぶトレジャーデータ 20140612Takahiro Inoue
トレジャーデータ株式会社について(for all Data_Enthusiast!!)
トレジャーデータ株式会社について(for all Data_Enthusiast!!)トレジャーデータ株式会社について(for all Data_Enthusiast!!)
トレジャーデータ株式会社について(for all Data_Enthusiast!!)Takahiro Inoue
この Visualization がすごい2014 〜データ世界を彩るツール6選〜
この Visualization がすごい2014 〜データ世界を彩るツール6選〜この Visualization がすごい2014 〜データ世界を彩るツール6選〜
この Visualization がすごい2014 〜データ世界を彩るツール6選〜Takahiro Inoue
Treasure Data Intro for Data Enthusiast!!
Treasure Data Intro for Data Enthusiast!!Treasure Data Intro for Data Enthusiast!!
Treasure Data Intro for Data Enthusiast!!Takahiro Inoue
Hadoop and the Data Scientist
Hadoop and the Data ScientistHadoop and the Data Scientist
Hadoop and the Data ScientistTakahiro Inoue
MongoDB: Intro & Application for Big Data
MongoDB: Intro & Application  for Big DataMongoDB: Intro & Application  for Big Data
MongoDB: Intro & Application for Big DataTakahiro Inoue
An Introduction to Fluent & MongoDB Plugins
An Introduction to Fluent & MongoDB PluginsAn Introduction to Fluent & MongoDB Plugins
An Introduction to Fluent & MongoDB PluginsTakahiro Inoue
An Introduction to Tinkerpop
An Introduction to TinkerpopAn Introduction to Tinkerpop
An Introduction to TinkerpopTakahiro Inoue
An Introduction to Neo4j
An Introduction to Neo4jAn Introduction to Neo4j
An Introduction to Neo4jTakahiro Inoue
The Definition of GraphDB
The Definition of GraphDBThe Definition of GraphDB
The Definition of GraphDBTakahiro Inoue
Large-Scale Graph Processing〜Introduction〜(完全版)
Large-Scale Graph Processing〜Introduction〜(完全版)Large-Scale Graph Processing〜Introduction〜(完全版)
Large-Scale Graph Processing〜Introduction〜(完全版)Takahiro Inoue
Large-Scale Graph Processing〜Introduction〜(LT版)
Large-Scale Graph Processing〜Introduction〜(LT版)Large-Scale Graph Processing〜Introduction〜(LT版)
Large-Scale Graph Processing〜Introduction〜(LT版)Takahiro Inoue

More from Takahiro Inoue (20)

Treasure Data × Wave Analytics EC Demo
Treasure Data × Wave Analytics EC DemoTreasure Data × Wave Analytics EC Demo
Treasure Data × Wave Analytics EC Demo
Tableauが魅せる Data Visualization の世界
Tableauが魅せる Data Visualization の世界Tableauが魅せる Data Visualization の世界
Tableauが魅せる Data Visualization の世界
20140708 オンラインゲームソリューション
20140708 オンラインゲームソリューション20140708 オンラインゲームソリューション
20140708 オンラインゲームソリューション
事例で学ぶトレジャーデータ 20140612
事例で学ぶトレジャーデータ 20140612事例で学ぶトレジャーデータ 20140612
事例で学ぶトレジャーデータ 20140612
トレジャーデータ株式会社について(for all Data_Enthusiast!!)
トレジャーデータ株式会社について(for all Data_Enthusiast!!)トレジャーデータ株式会社について(for all Data_Enthusiast!!)
トレジャーデータ株式会社について(for all Data_Enthusiast!!)
この Visualization がすごい2014 〜データ世界を彩るツール6選〜
この Visualization がすごい2014 〜データ世界を彩るツール6選〜この Visualization がすごい2014 〜データ世界を彩るツール6選〜
この Visualization がすごい2014 〜データ世界を彩るツール6選〜
Treasure Data Intro for Data Enthusiast!!
Treasure Data Intro for Data Enthusiast!!Treasure Data Intro for Data Enthusiast!!
Treasure Data Intro for Data Enthusiast!!
Hadoop and the Data Scientist
Hadoop and the Data ScientistHadoop and the Data Scientist
Hadoop and the Data Scientist
MongoDB: Intro & Application for Big Data
MongoDB: Intro & Application  for Big DataMongoDB: Intro & Application  for Big Data
MongoDB: Intro & Application for Big Data
An Introduction to Fluent & MongoDB Plugins
An Introduction to Fluent & MongoDB PluginsAn Introduction to Fluent & MongoDB Plugins
An Introduction to Fluent & MongoDB Plugins
An Introduction to Tinkerpop
An Introduction to TinkerpopAn Introduction to Tinkerpop
An Introduction to Tinkerpop
An Introduction to Neo4j
An Introduction to Neo4jAn Introduction to Neo4j
An Introduction to Neo4j
The Definition of GraphDB
The Definition of GraphDBThe Definition of GraphDB
The Definition of GraphDB
Large-Scale Graph Processing〜Introduction〜(完全版)
Large-Scale Graph Processing〜Introduction〜(完全版)Large-Scale Graph Processing〜Introduction〜(完全版)
Large-Scale Graph Processing〜Introduction〜(完全版)
Large-Scale Graph Processing〜Introduction〜(LT版)
Large-Scale Graph Processing〜Introduction〜(LT版)Large-Scale Graph Processing〜Introduction〜(LT版)
Large-Scale Graph Processing〜Introduction〜(LT版)
Advanced MongoDB #1
Advanced MongoDB #1Advanced MongoDB #1
Advanced MongoDB #1

Recently uploaded

Advanced Computer Architecture – An Introduction
Advanced Computer Architecture – An IntroductionAdvanced Computer Architecture – An Introduction
Advanced Computer Architecture – An IntroductionDilum Bandara
Take control of your SAP testing with UiPath Test Suite
Take control of your SAP testing with UiPath Test SuiteTake control of your SAP testing with UiPath Test Suite
Take control of your SAP testing with UiPath Test SuiteDianaGray10
Streamlining Python Development: A Guide to a Modern Project Setup
Streamlining Python Development: A Guide to a Modern Project SetupStreamlining Python Development: A Guide to a Modern Project Setup
Streamlining Python Development: A Guide to a Modern Project SetupFlorian Wilhelm
Unraveling Multimodality with Large Language Models.pdf
Unraveling Multimodality with Large Language Models.pdfUnraveling Multimodality with Large Language Models.pdf
Unraveling Multimodality with Large Language Models.pdfAlex Barbosa Coqueiro CEO/Founder: Sri Ambati Keynote at Wells Fargo Day CEO/Founder: Sri Ambati Keynote at Wells Fargo CEO/Founder: Sri Ambati Keynote at Wells Fargo Day CEO/Founder: Sri Ambati Keynote at Wells Fargo DaySri Ambati
Dev Dives: Streamline document processing with UiPath Studio Web
Dev Dives: Streamline document processing with UiPath Studio WebDev Dives: Streamline document processing with UiPath Studio Web
Dev Dives: Streamline document processing with UiPath Studio WebUiPathCommunity
Gen AI in Business - Global Trends Report 2024.pdf
Gen AI in Business - Global Trends Report 2024.pdfGen AI in Business - Global Trends Report 2024.pdf
Gen AI in Business - Global Trends Report 2024.pdfAddepto
Developer Data Modeling Mistakes: From Postgres to NoSQL
Developer Data Modeling Mistakes: From Postgres to NoSQLDeveloper Data Modeling Mistakes: From Postgres to NoSQL
Developer Data Modeling Mistakes: From Postgres to NoSQLScyllaDB
Advanced Test Driven-Development @ php[tek] 2024
Advanced Test Driven-Development @ php[tek] 2024Advanced Test Driven-Development @ php[tek] 2024
Advanced Test Driven-Development @ php[tek] 2024Scott Keck-Warren
What's New in Teams Calling, Meetings and Devices March 2024
What's New in Teams Calling, Meetings and Devices March 2024What's New in Teams Calling, Meetings and Devices March 2024
What's New in Teams Calling, Meetings and Devices March 2024Stephanie Beckett
New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024
New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024
New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024BookNet Canada
Human Factors of XR: Using Human Factors to Design XR Systems
Human Factors of XR: Using Human Factors to Design XR SystemsHuman Factors of XR: Using Human Factors to Design XR Systems
Human Factors of XR: Using Human Factors to Design XR SystemsMark Billinghurst
Nell’iperspazio con Rocket: il Framework Web di Rust!
Nell’iperspazio con Rocket: il Framework Web di Rust!Nell’iperspazio con Rocket: il Framework Web di Rust!
Nell’iperspazio con Rocket: il Framework Web di Rust!Commit University
SAP Build Work Zone - Overview L2-L3.pptx
SAP Build Work Zone - Overview L2-L3.pptxSAP Build Work Zone - Overview L2-L3.pptx
SAP Build Work Zone - Overview L2-L3.pptxNavinnSomaal
"LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks...
"LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks..."LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks...
"LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks...Fwdays
DevoxxFR 2024 Reproducible Builds with Apache Maven
DevoxxFR 2024 Reproducible Builds with Apache MavenDevoxxFR 2024 Reproducible Builds with Apache Maven
DevoxxFR 2024 Reproducible Builds with Apache MavenHervé Boutemy
CloudStudio User manual (basic edition):
CloudStudio User manual (basic edition):CloudStudio User manual (basic edition):
CloudStudio User manual (basic edition):comworks
Anypoint Exchange: It’s Not Just a Repo!
Anypoint Exchange: It’s Not Just a Repo!Anypoint Exchange: It’s Not Just a Repo!
Anypoint Exchange: It’s Not Just a Repo!Manik S Magar
Vertex AI Gemini Prompt Engineering Tips
Vertex AI Gemini Prompt Engineering TipsVertex AI Gemini Prompt Engineering Tips
Vertex AI Gemini Prompt Engineering TipsMiki Katsuragi
Designing IA for AI - Information Architecture Conference 2024
Designing IA for AI - Information Architecture Conference 2024Designing IA for AI - Information Architecture Conference 2024
Designing IA for AI - Information Architecture Conference 2024Enterprise Knowledge

Recently uploaded (20)

Advanced Computer Architecture – An Introduction
Advanced Computer Architecture – An IntroductionAdvanced Computer Architecture – An Introduction
Advanced Computer Architecture – An Introduction
Take control of your SAP testing with UiPath Test Suite
Take control of your SAP testing with UiPath Test SuiteTake control of your SAP testing with UiPath Test Suite
Take control of your SAP testing with UiPath Test Suite
Streamlining Python Development: A Guide to a Modern Project Setup
Streamlining Python Development: A Guide to a Modern Project SetupStreamlining Python Development: A Guide to a Modern Project Setup
Streamlining Python Development: A Guide to a Modern Project Setup
Unraveling Multimodality with Large Language Models.pdf
Unraveling Multimodality with Large Language Models.pdfUnraveling Multimodality with Large Language Models.pdf
Unraveling Multimodality with Large Language Models.pdf CEO/Founder: Sri Ambati Keynote at Wells Fargo Day CEO/Founder: Sri Ambati Keynote at Wells Fargo CEO/Founder: Sri Ambati Keynote at Wells Fargo Day CEO/Founder: Sri Ambati Keynote at Wells Fargo Day
Dev Dives: Streamline document processing with UiPath Studio Web
Dev Dives: Streamline document processing with UiPath Studio WebDev Dives: Streamline document processing with UiPath Studio Web
Dev Dives: Streamline document processing with UiPath Studio Web
Gen AI in Business - Global Trends Report 2024.pdf
Gen AI in Business - Global Trends Report 2024.pdfGen AI in Business - Global Trends Report 2024.pdf
Gen AI in Business - Global Trends Report 2024.pdf
Developer Data Modeling Mistakes: From Postgres to NoSQL
Developer Data Modeling Mistakes: From Postgres to NoSQLDeveloper Data Modeling Mistakes: From Postgres to NoSQL
Developer Data Modeling Mistakes: From Postgres to NoSQL
Advanced Test Driven-Development @ php[tek] 2024
Advanced Test Driven-Development @ php[tek] 2024Advanced Test Driven-Development @ php[tek] 2024
Advanced Test Driven-Development @ php[tek] 2024
What's New in Teams Calling, Meetings and Devices March 2024
What's New in Teams Calling, Meetings and Devices March 2024What's New in Teams Calling, Meetings and Devices March 2024
What's New in Teams Calling, Meetings and Devices March 2024
New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024
New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024
New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024
Human Factors of XR: Using Human Factors to Design XR Systems
Human Factors of XR: Using Human Factors to Design XR SystemsHuman Factors of XR: Using Human Factors to Design XR Systems
Human Factors of XR: Using Human Factors to Design XR Systems
Nell’iperspazio con Rocket: il Framework Web di Rust!
Nell’iperspazio con Rocket: il Framework Web di Rust!Nell’iperspazio con Rocket: il Framework Web di Rust!
Nell’iperspazio con Rocket: il Framework Web di Rust!
SAP Build Work Zone - Overview L2-L3.pptx
SAP Build Work Zone - Overview L2-L3.pptxSAP Build Work Zone - Overview L2-L3.pptx
SAP Build Work Zone - Overview L2-L3.pptx
"LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks...
"LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks..."LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks...
"LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks...
DevoxxFR 2024 Reproducible Builds with Apache Maven
DevoxxFR 2024 Reproducible Builds with Apache MavenDevoxxFR 2024 Reproducible Builds with Apache Maven
DevoxxFR 2024 Reproducible Builds with Apache Maven
CloudStudio User manual (basic edition):
CloudStudio User manual (basic edition):CloudStudio User manual (basic edition):
CloudStudio User manual (basic edition):
Anypoint Exchange: It’s Not Just a Repo!
Anypoint Exchange: It’s Not Just a Repo!Anypoint Exchange: It’s Not Just a Repo!
Anypoint Exchange: It’s Not Just a Repo!
Vertex AI Gemini Prompt Engineering Tips
Vertex AI Gemini Prompt Engineering TipsVertex AI Gemini Prompt Engineering Tips
Vertex AI Gemini Prompt Engineering Tips
Designing IA for AI - Information Architecture Conference 2024
Designing IA for AI - Information Architecture Conference 2024Designing IA for AI - Information Architecture Conference 2024
Designing IA for AI - Information Architecture Conference 2024

Map Reduce 〜入門編:仕組みの理解とアルゴリズムデザイン〜

  • 1.
  • 2.
  • 3.
  • 4.
  • 5.
  • 6.
  • 7.
  • 8.
  • 9.
  • 10.
  • 11.
  • 13.
  • 14. Data Data Data mapper mapper mapper mapper mapper mapper mapper mapper mapper
  • 15.
  • 16.
  • 17.
  • 18.
  • 19. #$%&'()*' -'. #$% 0 1"23 45)667' &'()*' 0 1"23 0 1" 3 " " Welcome to My HomePage. Thank you. Where is your house? .... " " !+/"-'. " "
  • 20.
  • 21. mapper Big Data mapper mapper
  • 22. map: (k1, v1) ! [(k2, v2)] // [] //word count class Mapper method Map(docid a, doc d) for all term t ∈ doc d do Emit(term t, count 1)
  • 23. > require 'msgpack' > msg = [1,2,3].to_msgpack  #=>"x93x01x02x03" > MessagePack.unpack(msg)  #=> [1,2,3]
  • 24. // word count class Combiner method Combine(string t, counts [c1, c2, . . .]) sum ← 0 for all count c ∈ counts [c1, c2, . . .] do sum ← sum + c Emit(string t, count sum)
  • 25.
  • 26.
  • 27. reduce: (k2, [v2]) ! [(k3, v3)] //word count class Reducer method Reduce(term t, counts [c1, c2, . . .]) sum ← 0 for all count c ∈ counts [c1,c2,...] do sum ← sum + c Emit(term t, count sum)
  • 28. 30 CHAPTER 2. MAPREDUCE BASICS ! " # $ % & '())*+ )) '())*+ )) '())*+ )) '())*+ )) ( - , . / 0 / 1 ( 2 / . , 3 / 4 /5',67*+ /5',67*+ /5',67*+ /5',67*+ ( - , . / 8 ( 2 / . , 3 / 4 ) )(+969657*+ ) )(+969657*+ ) )(+969657*+ ) )(+969657*+ :;<==>*?(7@?:5+9A (BB+*B(9*?C(><*D?,E?F*ED ( - 2 , . 3 / . 8 4 +*@</*+ +*@</*+ +*@</*+ G 2 H 3 I 8
  • 29.
  • 30.
  • 31.
  • 32.
  • 33. Hadoop Tutorial Series, Issue #2: Getting Started With (Customized) Partitioning
  • 34. Hadoop Tutorial Series, Issue #2: Getting Started With (Customized) Partitioning
  • 35. Hadoop Tutorial Series, Issue #2: Getting Started With (Customized) Partitioning
  • 36. package com.philippeadjiman.hadooptraining;  package com.philippeadjiman.hadooptraining; import; import; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.Partitioner;   public class MyPartitioner implements Partitioner<IntWritable,Text> { " @Override " public int getPartition(IntWritable key, Text value, int numPartitions) { " " /* Pretty ugly hard coded partitioning function. Don't do that in practice, it is just for the sake of understanding. */ " " int nbOccurences = key.get();   " " if( nbOccurences < 3 ) " " " return 0; " " else " " " return 1; " }   " @Override " public void configure(JobConf arg0) {   " } } Hadoop Tutorial Series, Issue #2: Getting Started With (Customized) Partitioning
  • 37. x + y = y + x x y = y x (x + y) + z = x + (y + z) (x y) z = x (y z)
  • 38.
  • 39.
  • 40.
  • 41.
  • 42. class Mapper { buffer init() { buffer = } map(id, data) { elements = process(data) for each element { .... check_and_put(buffer, k2, v2) } } // Designing algorithms for Map Reduce
  • 43. check_and_put(buffer, k2, v2) { if buffer.full { for each k2 in buffer.keys { emit(k2, buffer[k2]) } } else { buffer.incrby(k2, v2) // H[k2]+=v2 } } close() { for each k2 in buffer.keys { emit(k2, buffer[k2]) } } } Designing algorithms for Map Reduce
  • 44. ! ! ! !"# ! ! ! 2,% 6"# "# "# "# !"#$%&' " " " $%&'(%)*'+ 5,% '( '( '( ()*+*,-./0$1/ ! ! !#! !+(,+ ! ! ! $ $ 2,%$ :*8+"*6$+/ !#!"# 6"# !"#$%&' " " " $%&'(%)*'+ -(.*#/0 "# "# "# 5,% # # (+2*3+ '( '( '( ()*+*,-./0$1/ ,33"/3,+*#)9+"// !" !" 2/"3/ $ $ !+(,+ 1+2*3+ $ $ $ :*8+"*6$+/ !# !# "% "% !"#$%&' % -(.*#/0 4.5&*6+( # #% & & 4#56*)/ (+2*3+ ,33"/3,+*#)9+"// !" !" 2/"3/ 1+2*3+ $ Figure 1: Distributed execution plan for MapReduce $ when reduce cannot be decomposed to perform partial "% "% !"#$%&' % aggregation. % 4.5&*6+( !" !" 2/"3/ & & 4#56*)/ "/0$1/ Figure 1: Distributed execution plan for function, and merge and With this user-defined MapReduce "% "% !"#$%&' when reduce cannot beoperators provided by partial grouping decomposed to perform the system, it is pos- aggregation. sible to execute a simple distributed computation as !" !" ) )2/"3/ 7*),-./0$1/ shown in Figure 1. The computation has exactly "/0$1/ "% With this user-definedthe first phase merge anda Map function two phases: function, and executes "% * *!"#$%&' 4#)8$5/" grouping operatorsinputs to by the system, and pos- on the provided extract keys it is records, then per- ) sible to execute a simple distributed computation as based on the forms a partitioning of these outputs ) 7*),-./0$1/ Figure 2: Distributed execution plan for MapReduce shown in Figureof the records. The second phase collects and keys 1. The computation has exactly when reduce supports partial aggregation. The imple- two phases: the first phase executes a Map function * * 4#)8$5/" mentation of GroupBy in the first stage may be different to
  • 45.
  • 46. Def. 1 x: data items, x1 ⊕ x2: concatenation of x1, x2. H decomposable 2 I C : 1) ∀x1, x2 : H(x1 ⊕ x2) = C(I(x1 ⊕ x2)) = C(I(x1) ⊕ I(x2)) 2) ∀x1, x2 : I(x1 ⊕ x2) = I(x2 ⊕ x1) 3) ∀x1, x2 : C(x1 ⊕ x2) = C(x2 ⊕ x1) Def. 2 H associative-decomposable Def.1 1-3 C 4) ∀x1, x2, x3 : C(C(x1 ⊕ x2) ⊕ x3) = C(x1 ⊕ C(x2 ⊕ x3)) ( i.e. C is associative )
  • 47.
  • 48.
  • 49. class Combiner { share_space init(share_space_info) { share_space = conn(share_space_info) } combine(key, elements) { sum = 0 for each element { ... sum += v } //
  • 50. share_space.incrby(key, sum) emit(key, share_space_info) } // end combine() } class Reducer { reduce(key, list_of_share_space_info) { for each share_space_info { share_space = conn(share_space_info) sum = 0 elements = share_space.hget(key) for each elemnt { ... } } }
  • 51. partition(key) { range = (KEY_MAX - KEY_MIN) / NUM_OF_REDUCERS reducer_no = (key - KEY_MIN) / range return reducer_no } Designing algorithms for Map Reduce
  • 52. (t1, m1, r80521), (t1, m2, r14209), (t1, m3, r76042), (t2, m1, r21823), (t2, m2, r66508), (t2, m3, r98347),... map: m1 ! (t1, r80521) // // t1,t2,t3,... (m1) ! [(t1, r80521), (t3, r146925), (t2, r21823)] (m2) ! [(t2, r66508), (t1, r14209), (t3, r14720)]
  • 53. map: (m1, t1) ! r80521 (m1, t1) ! [(r80521)] // t1,t2,t3,... (m1, t2) ! [(r21823)] (m1, t3) ! [(r146925)]
  • 54. class Mapper { buffer map(id, number) { buffer.append(number) if (buffer.is_full) { max = compute_max(buffer) emit(1, max) } } } Designing algorithms for Map Reduce
  • 55. class Reducer { reduce(key, list_of_local_max) { global_max = 0 for local_max in list_of_local_max { if local_max > global_max { global_max = local_max } } emit(1, global_max) } } Designing algorithms for Map Reduce
  • 56. class Combiner { combine(key, list_of_local_max) { local_max = maximum(list_of_local_max) emit(1, local_max) } // Max() } Designing algorithms for Map Reduce
  • 57. class Mapper { map(id, data) { key, value = process(data) if rand() < 0.1 { //rand() ∈ [0.0, 1.0) emit(key, value) } } }
  • 58.
  • 59.
  • 60.
  • 61.
  • 62.
  • 63. Map Reduce and Stream Processing
  • 64.
  • 65.
  • 66. # Call at each hit record map(k1, hitRecord) { site = # key(=site) slice slice = lookupSlice(site) if (slice.time - now > 60.minutes) { # Notify reducer whole slice of site is sent advance(site, slice) slice = lookupSlice(site) } emitIntermediate(site, slice, 1) } Map Reduce and Stream Processing
  • 67. combine(site, slice, countList) { hitCount = 0 for count in countList { hitCount += count } # Send the message to the downstream node emitIntermediate(site, slice, hitCount) } Map Reduce and Stream Processing
  • 68. # mapper slice reduce(site, slice, countList) { hitCount = 0 for count in countList { hitCount += count } sv = sv.hitCount = hitCount return sv } Map Reduce and Stream Processing
  • 69. # Window init(slice) { rangeValue = rangeValue.hitCount = 0 return rangeValue } # Reduce merge(rangeValue, slice, sliceValue) { rangeValue.hitCount += sliceValue.hitCount } # slice slicing window unmerge(rangeValue, slice, sliceValue) { rangeValue.hitCount -= sliceValue.hitCount } Map Reduce and Stream Processing
  • 70. 5&4.)1*,!,);3-00+*0-1*,!&/*+!*-58!.-$*9!-$%!@+&22,!).A!18*! -!:2*=#;2*!'-<!1&!4&$#1&+!,1+*-4#$0!%-1-6!! .-$*3-00+*0-1*,! 1&! 5&4.)1*! '#$%&'3-00+*0-1*,6! >)+! *=3 R)++*$1!.+&.&,-2,!:&+!*/-2)-1#$0!,2#%#$03'#$%&'!-00+*0-1*! .*+#4*$1-2! ,1)%<! ,8&',! 18-1! ),#$0! .-$*,! 8-,! ,#0$#:#5-$1! ()*+#*,!;)::*+!*-58!#$.)1!1).2*!)$1#2!#1!#,!$&!2&$0*+!$**%*%! .*+:&+4-$5*!;*$*:#1,6!! INP6! D#$5*! *-58! #$.)1! 1).2*! ;*2&$0,! 1&! 4)21#.2*! '#$%&',9! ,)58!-..+&-58*,!;)::*+!-!1).2*!)$1#2!#1!#,!.+&5*,,*%!:&+!18*! '(# )*+,-./0+1-*2 -00+*0-1*! &/*+! 18*! 2-,1! '#$%&'! 1&! '8#58! #1! ;*2&$0,6! -58! B-$<! -..2#5-1#&$,! $**%! 1&! .+&5*,,! ,1+*-4,9! :&+! *=-4.2*9! #$.)1! 1).2*! #,! -55*,,*%! 4)21#.2*! 1#4*,9! &$5*! :&+! *-58! '#$3 :#$-$5#-2! %-1-! -$-2<,#,9! $*1'&+C! 1+-::#5! 4&$#1&+#$09! -$%! %&'!18-1!#1!.-+1#5#.-1*,!#$6!!! 1*2*5&44)$#5-1#&$! 4&$#1&+#$06! D*/*+-2! %-1-;-,*! +*,*-+58! 0+&).,! -+*! ;)#2%#$0! --1-! .1+*-4! /-$-0*4*$1! .<,1*4,! "*! ,**! 1'&! .+&;2*4,! '#18! ,)58! -..+&-58*,6! W#+,1! 18*! EFDBDG!,&!18-1!-..2#5-1#&$,!5-$!#,,)*!()*+#*,!1&!0*1!1#4*2<! ;)::*+!,#H*!+*()#+*%!#,!)$;&)$%*%T!Q1!-$<!1#4*!#$,1-$19!-22! #$:&+4-1#&$! :+&4! ,1+*-4,6! B-$-0#$0! -$%! .+&5*,,#$0! 1).2*,! 5&$1-#$*%! #$! 18*! 5)++*$1! '#$%&'! -+*! #$! 18*! ;)::*+9! ,1+*-4,!0#/*,!+#,*!1&!58-22*$0*,!18-1!8-/*!;**$!*=1*$,#/*2<! -$%!,&!18*!,#H*!&:!18*!+*()#+*%!;)::*+,!#,!%*1*+4#$*%!;<!18*! %#,5),,*%!-$%!+*5&0$#H*%!IJ9!K9!L9!M9!NOP6!! '#$%&'!+-$0*!-$%!18*!%-1-!-++#/-2!+-1*6!D*5&$%9!.+&5*,,#$0! *-58!#$.)1!1).2*!4)21#.2*!1#4*,!2*-%,!1&!-!8#08!5&4.)1-1#&$! Q$!#4.&+1-$1!52-,,!&:!()*+#*,!&/*+!%-1-!,1+*-4,!#,!,2#%#$03 5&,16!W&+!*=-4.2*!#$!X)*+<!N9!*-58!#$.)1!1).2*!#,!.+&5*,,*%! '#$%&'!-00+*0-1*!()*+#*,6!R&$,#%*+!-$!&$2#$*!-)51#&$!,<,3 :&)+!1#4*,6!Q,!18*!+-1#&!&:!YQZ[!&/*+!D]7F!#$5+*-,*,9! 1*4!#$!'8#58!;#%,!&$!-)51#&$!#1*4,!-+*!,1+*-4*%!#$1&!-!5*$3 ,&!%&*,!18*!$)4;*+!&:!1#4*,!*-58!1).2*!#,!.+&5*,,*%6!R&$3 1+-2!-)51#&$!.+&5*,,#$0!,<,1*46!S8*!,58*4-!&:!*-58!;#%!#,T! ,#%*+#$0!18*!2-+0*!/&2)4*!-$%!:-,1!-++#/-2!+-1*!&:!,1+*-4#$0! U#1*43#%9! ;#%3.+#5*9! 1#4*,1-4.V6! W&+! *-,*! &:! .+*,*$1-1#&$9! %-1-9!+*%)5#$0!18*!-4&)$1!&:!+*()#+*%!;)::*+!,.-5*!E#%*-22<! '*!-,,)4*!18-1!;#%,!-++#/*!#$!&+%*+!&$!18*#+!1#4*,1-4.!-13 1&!-!5&$,1-$1!;&)$%G!-$%!5&4.)1-1#&$!1#4*!#,!-$!#4.&+1-$1! 1+#;)1*6! E"*! -+*! -51#/*2<! #$/*,1#0-1#$0! .+&5*,,#$0! %#,&+3 %*+*%!%-1-!,1+*-4,G!X)*+<!N!,8&',!-$!*=-4.2*!&:!-!,2#%#$03 '#$%&'!-00+*0-1*!()*+<6! 3/4,52'T!@W#$%!18*!4-=#4)4!;#%!.+#5*!:&+!18*!.-,1!K!4#$3 )1*,!-$%!).%-1*!18*!+*,)21!*/*+<!N!4#$)1*6A! !"#"$%&'()*+,-./0,123& 4567&+,-89:;%%5&<,'28<('/& &&&&&&&&&&5;=>"&?&',@A<28& &&&&&&&&&&!#BC"&D&',@A<2E& 7$! 18*! ()*+<! -;&/*9! '*! #$1+&%)5*! -! '#$%&'! ,.*5#:#5-1#&$! '#18!18+**!.-+-4*1*+,T!YQZ[!,.*5#:#*,!18*!'#$%&'!,#H*9! D]7F! ,.*5#:#*,! 8&'! 18*! '#$%&'! 4&/*,9! -$%! "QSSY! ,.*5#:#*,! 18*! '#$%&'#$0! -11+#;)1*! &$! '8#58! 18-1! 18*! YQZ[! -$%! D]7F! .-+-4*1*+,! -+*! %*:#$*%6! S8*! '#$%&'! ,.*5#:#5-1#&$! &:! X)*+<! N! ;+*-C,! 18*! ;#%! ,1+*-4! #$1&! &/*+3 2-..#$0!K34#$)1*!,);3,1+*-4,!18-1!,1-+1!*/*+<!4#$)1*9!'#18! +*,.*51! 1&! 18*! 1#4*,1-4.! -11+#;)1*6! S8*,*! &/*+2-..#$0! ,);3 ,1+*-4,!-+*!5-22*%!!"#$#%&0(#%$)(!6!X)*+<!N!5-25)2-1*,!18*! 617/,42'8291*.-:;2&-<=-;4.2->26-/,2?@*4;2 No Pane, No Gain: Efficient Evaluation of Sliding-Window Aggregates over Data Streams
  • 71.
  • 72.
  • 73.
  • 74. K-Means Clustering in Map Reduce
  • 75. Figure 2: MapReduce Classifier Training and Evaluation Procedure A Comparison of Approaches for Large-Scale Data Mining
  • 76. Google Pregel Graph Processing
  • 77. Google Pregel Graph Processing