|
| 1 | +// multi tasking k-nucleotide |
| 2 | + |
| 3 | +import io::reader_util; |
| 4 | + |
| 5 | +use std; |
| 6 | +import std::map; |
| 7 | +import std::map::hashmap; |
| 8 | +import std::sort; |
| 9 | + |
| 10 | +// given a map, print a sorted version of it |
| 11 | +fn sort_and_fmt(mm: hashmap<[u8], uint>, total: uint) -> str { |
| 12 | + fn pct(xx: uint, yy: uint) -> float { |
| 13 | + ret (xx as float) * 100f / (yy as float); |
| 14 | + } |
| 15 | + |
| 16 | + fn le_by_val<TT: copy, UU: copy>(kv0: (TT,UU), kv1: (TT,UU)) -> bool { |
| 17 | + let (_, v0) = kv0; |
| 18 | + let (_, v1) = kv1; |
| 19 | + ret v0 >= v1; |
| 20 | + } |
| 21 | + |
| 22 | + fn le_by_key<TT: copy, UU: copy>(kv0: (TT,UU), kv1: (TT,UU)) -> bool { |
| 23 | + let (k0, _) = kv0; |
| 24 | + let (k1, _) = kv1; |
| 25 | + ret k0 <= k1; |
| 26 | + } |
| 27 | + |
| 28 | + // sort by key, then by value |
| 29 | + fn sortKV<TT: copy, UU: copy>(orig: [(TT,UU)]) -> [(TT,UU)] { |
| 30 | + ret sort::merge_sort(le_by_val, sort::merge_sort(le_by_key, orig)); |
| 31 | + } |
| 32 | + |
| 33 | + let mut pairs = []; |
| 34 | + |
| 35 | + // map -> [(k,%)] |
| 36 | + mm.each(fn&(key: [u8], val: uint) -> bool { |
| 37 | + pairs += [(key, pct(val, total))]; |
| 38 | + ret true; |
| 39 | + }); |
| 40 | + |
| 41 | + let pairs_sorted = sortKV(pairs); |
| 42 | + |
| 43 | + let mut buffer = ""; |
| 44 | + |
| 45 | + pairs_sorted.each(fn&(kv: ([u8], float)) -> bool unsafe { |
| 46 | + let (k,v) = kv; |
| 47 | + buffer += (#fmt["%s %0.3f\n", str::to_upper(str::unsafe::from_bytes(k)), v]); |
| 48 | + ret true; |
| 49 | + }); |
| 50 | + |
| 51 | + ret buffer; |
| 52 | +} |
| 53 | + |
| 54 | +// given a map, search for the frequency of a pattern |
| 55 | +fn find(mm: hashmap<[u8], uint>, key: str) -> uint { |
| 56 | + alt mm.find(str::bytes(str::to_lower(key))) { |
| 57 | + option::none { ret 0u; } |
| 58 | + option::some(num) { ret num; } |
| 59 | + } |
| 60 | +} |
| 61 | + |
| 62 | +// given a map, increment the counter for a key |
| 63 | +fn update_freq(mm: hashmap<[u8], uint>, key: [u8]) { |
| 64 | + alt mm.find(key) { |
| 65 | + option::none { mm.insert(key, 1u ); } |
| 66 | + option::some(val) { mm.insert(key, 1u + val); } |
| 67 | + } |
| 68 | +} |
| 69 | + |
| 70 | +// given a [u8], for each window call a function |
| 71 | +// i.e., for "hello" and windows of size four, |
| 72 | +// run it("hell") and it("ello"), then return "llo" |
| 73 | +fn windows_with_carry(bb: [const u8], nn: uint, it: fn(window: [u8])) -> [u8] { |
| 74 | + let mut ii = 0u; |
| 75 | + |
| 76 | + let len = vec::len(bb); |
| 77 | + while ii < len - (nn - 1u) { |
| 78 | + it(vec::slice(bb, ii, ii+nn)); |
| 79 | + ii += 1u; |
| 80 | + } |
| 81 | + |
| 82 | + ret vec::slice(bb, len - (nn - 1u), len); |
| 83 | +} |
| 84 | + |
| 85 | +fn make_sequence_processor(sz: uint, from_parent: comm::port<[u8]>, to_parent: comm::chan<str>) { |
| 86 | + |
| 87 | + let freqs: hashmap<[u8], uint> = map::bytes_hash(); |
| 88 | + let mut carry: [u8] = []; |
| 89 | + let mut total: uint = 0u; |
| 90 | + |
| 91 | + let mut line: [u8]; |
| 92 | + |
| 93 | + loop { |
| 94 | + |
| 95 | + line = comm::recv(from_parent); |
| 96 | + if line == [] { break; } |
| 97 | + |
| 98 | + carry = windows_with_carry(carry + line, sz, { |window| |
| 99 | + update_freq(freqs, window); |
| 100 | + total += 1u; |
| 101 | + }); |
| 102 | + } |
| 103 | + |
| 104 | + let buffer = alt sz { |
| 105 | + 1u { sort_and_fmt(freqs, total) } |
| 106 | + 2u { sort_and_fmt(freqs, total) } |
| 107 | + 3u { #fmt["%u\t%s", find(freqs, "GGT"), "GGT"] } |
| 108 | + 4u { #fmt["%u\t%s", find(freqs, "GGTA"), "GGTA"] } |
| 109 | + 6u { #fmt["%u\t%s", find(freqs, "GGTATT"), "GGTATT"] } |
| 110 | + 12u { #fmt["%u\t%s", find(freqs, "GGTATTTTAATT"), "GGTATTTTAATT"] } |
| 111 | + 18u { #fmt["%u\t%s", find(freqs, "GGTATTTTAATTTATAGT"), "GGTATTTTAATTTATAGT"] } |
| 112 | + _ { "" } |
| 113 | + }; |
| 114 | + |
| 115 | + //comm::send(to_parent, #fmt["yay{%u}", sz]); |
| 116 | + comm::send(to_parent, buffer); |
| 117 | +} |
| 118 | + |
| 119 | +// given a FASTA file on stdin, process sequence THREE |
| 120 | +fn main(args: [str]) { |
| 121 | + let rdr = if os::getenv("RUST_BENCH").is_some() { |
| 122 | + result::get(io::file_reader("./shootout-fasta.data")) |
| 123 | + } else { |
| 124 | + io::stdin() |
| 125 | + }; |
| 126 | + |
| 127 | + |
| 128 | + |
| 129 | + // initialize each sequence sorter |
| 130 | + let sizes = [1u,2u,3u,4u,6u,12u,18u]; |
| 131 | + let from_child = vec::map (sizes, { |_sz| comm::port() }); |
| 132 | + let to_parent = vec::mapi(sizes, { |ii, _sz| comm::chan(from_child[ii]) }); |
| 133 | + let to_child = vec::mapi(sizes, fn@(ii: uint, sz: uint) -> comm::chan<[u8]> { |
| 134 | + ret task::spawn_listener { |from_parent| |
| 135 | + make_sequence_processor(sz, from_parent, to_parent[ii]); |
| 136 | + }; |
| 137 | + }); |
| 138 | + |
| 139 | + |
| 140 | + // latch stores true after we've started |
| 141 | + // reading the sequence of interest |
| 142 | + let mut proc_mode = false; |
| 143 | + |
| 144 | + while !rdr.eof() { |
| 145 | + let line: str = rdr.read_line(); |
| 146 | + |
| 147 | + if str::len(line) == 0u { cont; } |
| 148 | + |
| 149 | + alt (line[0], proc_mode) { |
| 150 | + |
| 151 | + // start processing if this is the one |
| 152 | + ('>' as u8, false) { |
| 153 | + alt str::find_str_from(line, "THREE", 1u) { |
| 154 | + option::some(_) { proc_mode = true; } |
| 155 | + option::none { } |
| 156 | + } |
| 157 | + } |
| 158 | + |
| 159 | + // break our processing |
| 160 | + ('>' as u8, true) { break; } |
| 161 | + |
| 162 | + // process the sequence for k-mers |
| 163 | + (_, true) { |
| 164 | + let line_bytes = str::bytes(line); |
| 165 | + |
| 166 | + for sizes.eachi { |ii, _sz| |
| 167 | + let mut lb = line_bytes; |
| 168 | + comm::send(to_child[ii], lb); |
| 169 | + } |
| 170 | + } |
| 171 | + |
| 172 | + // whatever |
| 173 | + _ { } |
| 174 | + } |
| 175 | + } |
| 176 | + |
| 177 | + // finish... |
| 178 | + for sizes.eachi { |ii, _sz| |
| 179 | + comm::send(to_child[ii], []); |
| 180 | + } |
| 181 | + |
| 182 | + // now fetch and print result messages |
| 183 | + for sizes.eachi { |ii, _sz| |
| 184 | + io::println(comm::recv(from_child[ii])); |
| 185 | + } |
| 186 | +} |
| 187 | + |
0 commit comments