1
+ /**
2
+ A parallel word-frequency counting program.
3
+
4
+ This is meant primarily to demonstrate Rust's MapReduce framework.
5
+
6
+ It takes a list of files on the command line and outputs a list of
7
+ words along with how many times each word is used.
8
+
9
+ */
10
+
11
+ use std;
12
+
13
+ import std:: io;
14
+ import option = std:: option:: t;
15
+ import std:: option:: some;
16
+ import std:: option:: none;
17
+ import std:: str;
18
+ import std:: vec;
19
+ import std:: map;
20
+
21
+ mod map_reduce {
22
+ export putter;
23
+ export getter;
24
+ export mapper;
25
+ export reducer;
26
+ export map_reduce;
27
+
28
+ type putter = fn ( str , str ) -> ( ) ;
29
+
30
+ type mapper = fn ( str , putter ) ;
31
+
32
+ type getter = fn ( ) -> option[ str ] ;
33
+
34
+ type reducer = fn ( str , getter ) ;
35
+
36
+
37
+ fn map_reduce ( vec[ str] inputs ,
38
+ mapper f,
39
+ reducer reduce) {
40
+ auto intermediates = map:: new_str_hash[ vec[ str] ] ( ) ;
41
+
42
+ fn emit ( & map:: hashmap[ str, vec[ str] ] im ,
43
+ str key, str val) {
44
+ auto old = [ ] ;
45
+ alt ( im. remove ( key) ) {
46
+ case ( some ( ?v) ) {
47
+ old = v;
48
+ }
49
+ case ( none) { }
50
+ }
51
+
52
+ im. insert ( key, old + [ val] ) ;
53
+ }
54
+
55
+ for ( str i in inputs) {
56
+ f( i, bind emit( intermediates, _, _) ) ;
57
+ }
58
+
59
+ fn get ( vec[ str] vals , & mutable uint i) -> option[ str ] {
60
+ i += 1 u;
61
+ if ( i <= vec:: len ( vals) ) {
62
+ some ( vals. ( i - 1 u) )
63
+ }
64
+ else {
65
+ none
66
+ }
67
+ }
68
+
69
+ for each ( @tup( str , vec[ str ] ) kv in intermediates. items( ) ) {
70
+ auto i = 0 u;
71
+ reduce( kv. _0, bind get( kv. _1, i) ) ;
72
+ }
73
+ }
74
+ }
75
+
76
+ fn main ( vec[ str] argv ) {
77
+ if ( vec:: len ( argv) < 2 u) {
78
+ auto out = io:: stdout ( ) ;
79
+
80
+ out. write_line ( #fmt ( "Usage: %s <filename> ..." , argv. ( 0 ) ) ) ;
81
+ fail;
82
+ }
83
+
84
+ fn map ( str filename , map_reduce:: putter emit) {
85
+ auto f = io:: file_reader ( filename) ;
86
+
87
+ while ( true ) {
88
+ alt ( read_word ( f) ) {
89
+ case ( some ( ?w) ) {
90
+ emit ( w, "1" ) ;
91
+ }
92
+ case ( none) {
93
+ break ;
94
+ }
95
+ }
96
+ }
97
+ }
98
+
99
+ fn reduce ( str word , map_reduce:: getter get) {
100
+ auto count = 0 ;
101
+
102
+ while ( true ) {
103
+ alt ( get ( ) ) {
104
+ case ( some ( _) ) { count += 1 }
105
+ case ( none) { break }
106
+ }
107
+ }
108
+
109
+ auto out = io:: stdout ( ) ;
110
+ out. write_line ( #fmt ( "%s: %d" , word, count) ) ;
111
+ }
112
+
113
+ map_reduce:: map_reduce ( vec:: slice ( argv, 1 u, vec:: len ( argv) ) , map, reduce) ;
114
+ }
115
+
116
+ fn read_word ( io:: reader r) -> option[ str ] {
117
+ auto w = "" ;
118
+
119
+ while ( !r. eof ( ) ) {
120
+ auto c = r. read_char ( ) ;
121
+
122
+ if ( is_word_char ( c) ) {
123
+ w += str:: from_char ( c) ;
124
+ }
125
+ else {
126
+ if ( w != "" ) {
127
+ ret some ( w) ;
128
+ }
129
+ }
130
+ }
131
+ ret none;
132
+ }
133
+
134
+ fn is_digit ( char c) -> bool {
135
+ alt ( c) {
136
+ case ( '0' ) { true }
137
+ case ( '1' ) { true }
138
+ case ( '2' ) { true }
139
+ case ( '3' ) { true }
140
+ case ( '4' ) { true }
141
+ case ( '5' ) { true }
142
+ case ( '6' ) { true }
143
+ case ( '7' ) { true }
144
+ case ( '8' ) { true }
145
+ case ( '9' ) { true }
146
+ case ( _) { false }
147
+ }
148
+ }
149
+
150
+ fn is_alpha_lower ( char c) -> bool {
151
+ alt ( c) {
152
+ case ( 'a' ) { true }
153
+ case ( 'b' ) { true }
154
+ case ( 'c' ) { true }
155
+ case ( 'd' ) { true }
156
+ case ( 'e' ) { true }
157
+ case ( 'f' ) { true }
158
+ case ( 'g' ) { true }
159
+ case ( 'h' ) { true }
160
+ case ( 'i' ) { true }
161
+ case ( 'j' ) { true }
162
+ case ( 'k' ) { true }
163
+ case ( 'l' ) { true }
164
+ case ( 'm' ) { true }
165
+ case ( 'n' ) { true }
166
+ case ( 'o' ) { true }
167
+ case ( 'p' ) { true }
168
+ case ( 'q' ) { true }
169
+ case ( 'r' ) { true }
170
+ case ( 's' ) { true }
171
+ case ( 't' ) { true }
172
+ case ( 'u' ) { true }
173
+ case ( 'v' ) { true }
174
+ case ( 'w' ) { true }
175
+ case ( 'x' ) { true }
176
+ case ( 'y' ) { true }
177
+ case ( 'z' ) { true }
178
+ case ( _) { false }
179
+ }
180
+ }
181
+
182
+ fn is_alpha_upper ( char c) -> bool {
183
+ alt ( c) {
184
+ case ( 'A' ) { true }
185
+ case ( 'B' ) { true }
186
+ case ( 'C' ) { true }
187
+ case ( 'D' ) { true }
188
+ case ( 'E' ) { true }
189
+ case ( 'F' ) { true }
190
+ case ( 'G' ) { true }
191
+ case ( 'H' ) { true }
192
+ case ( 'I' ) { true }
193
+ case ( 'J' ) { true }
194
+ case ( 'K' ) { true }
195
+ case ( 'L' ) { true }
196
+ case ( 'M' ) { true }
197
+ case ( 'N' ) { true }
198
+ case ( 'O' ) { true }
199
+ case ( 'P' ) { true }
200
+ case ( 'Q' ) { true }
201
+ case ( 'R' ) { true }
202
+ case ( 'S' ) { true }
203
+ case ( 'T' ) { true }
204
+ case ( 'U' ) { true }
205
+ case ( 'V' ) { true }
206
+ case ( 'W' ) { true }
207
+ case ( 'X' ) { true }
208
+ case ( 'Y' ) { true }
209
+ case ( 'Z' ) { true }
210
+ case ( _) { false }
211
+ }
212
+ }
213
+
214
+ fn is_alpha ( char c) -> bool {
215
+ is_alpha_upper ( c) || is_alpha_lower ( c)
216
+ }
217
+
218
+ fn is_word_char ( char c) -> bool {
219
+ is_alpha ( c) || is_digit ( c) || c == '_'
220
+ }
0 commit comments