Skip to content

Commit 1d32313

Browse files
committed
test: Add k-nucleotide
1 parent 10aa1c3 commit 1d32313

File tree

1 file changed

+314
-0
lines changed

1 file changed

+314
-0
lines changed
Lines changed: 314 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,314 @@
1+
extern mod std;
2+
3+
use core::cast::transmute;
4+
use core::i32::range;
5+
use core::libc::{STDIN_FILENO, c_int, fdopen, fgets, fileno, fopen, fstat};
6+
use core::libc::{stat, strlen};
7+
use core::ptr::null;
8+
use core::unstable::intrinsics::init;
9+
use core::vec::{reverse, slice};
10+
use std::sort::quick_sort3;
11+
12+
static LINE_LEN: uint = 80;
13+
static TABLE: [u8, ..4] = [ 'A' as u8, 'C' as u8, 'G' as u8, 'T' as u8 ];
14+
static TABLE_SIZE: uint = 2 << 16;
15+
16+
static OCCURRENCES: [&'static str, ..5] = [
17+
"GGT",
18+
"GGTA",
19+
"GGTATT",
20+
"GGTATTTTAATT",
21+
"GGTATTTTAATTTATAGT",
22+
];
23+
24+
// Code implementation
25+
26+
#[deriving(Eq, Ord)]
27+
struct Code(u64);
28+
29+
impl Code {
30+
fn hash(&self) -> u64 {
31+
**self
32+
}
33+
34+
#[inline(always)]
35+
fn push_char(&self, c: u8) -> Code {
36+
Code((**self << 2) + (pack_symbol(c) as u64))
37+
}
38+
39+
fn rotate(&self, c: u8, frame: i32) -> Code {
40+
Code(*self.push_char(c) & ((1u64 << (2 * (frame as u64))) - 1))
41+
}
42+
43+
fn pack(string: &str) -> Code {
44+
let mut code = Code(0u64);
45+
for uint::range(0, string.len()) |i| {
46+
code = code.push_char(string[i]);
47+
}
48+
code
49+
}
50+
51+
// XXX: Inefficient.
52+
fn unpack(&self, frame: i32) -> ~str {
53+
let mut key = **self;
54+
let mut result = ~[];
55+
for (frame as uint).times {
56+
result.push(unpack_symbol((key as u8) & 3));
57+
key >>= 2;
58+
}
59+
60+
reverse(result);
61+
str::from_bytes(result)
62+
}
63+
}
64+
65+
// Hash table implementation
66+
67+
trait TableCallback {
68+
fn f(&self, entry: &mut Entry);
69+
}
70+
71+
struct BumpCallback;
72+
73+
impl TableCallback for BumpCallback {
74+
fn f(&self, entry: &mut Entry) {
75+
entry.count += 1;
76+
}
77+
}
78+
79+
struct PrintCallback(&'static str);
80+
81+
impl TableCallback for PrintCallback {
82+
fn f(&self, entry: &mut Entry) {
83+
println(fmt!("%d\t%s", entry.count as int, **self));
84+
}
85+
}
86+
87+
struct Entry {
88+
code: Code,
89+
count: i32,
90+
next: Option<~Entry>,
91+
}
92+
93+
struct Table {
94+
count: i32,
95+
items: [Option<~Entry>, ..TABLE_SIZE]
96+
}
97+
98+
impl Table {
99+
fn new() -> Table {
100+
Table {
101+
count: 0,
102+
items: [ None, ..TABLE_SIZE ],
103+
}
104+
}
105+
106+
fn search_remainder<C:TableCallback>(item: &mut Entry, key: Code, c: C) {
107+
match item.next {
108+
None => {
109+
let mut entry = ~Entry {
110+
code: key,
111+
count: 0,
112+
next: None,
113+
};
114+
c.f(entry);
115+
item.next = Some(entry);
116+
}
117+
Some(ref mut entry) => {
118+
if entry.code == key {
119+
c.f(*entry);
120+
return;
121+
}
122+
123+
Table::search_remainder(*entry, key, c)
124+
}
125+
}
126+
}
127+
128+
fn lookup<C:TableCallback>(&mut self, key: Code, c: C) {
129+
let index = *key % (TABLE_SIZE as u64);
130+
131+
{
132+
if self.items[index].is_none() {
133+
let mut entry = ~Entry {
134+
code: key,
135+
count: 0,
136+
next: None,
137+
};
138+
c.f(entry);
139+
self.items[index] = Some(entry);
140+
return;
141+
}
142+
}
143+
144+
{
145+
let mut entry = &mut *self.items[index].get_mut_ref();
146+
if entry.code == key {
147+
c.f(*entry);
148+
return;
149+
}
150+
151+
Table::search_remainder(*entry, key, c)
152+
}
153+
}
154+
155+
fn each(&self, f: &fn(entry: &Entry) -> bool) {
156+
for self.items.each |item| {
157+
match *item {
158+
None => {}
159+
Some(ref item) => {
160+
let mut item: &Entry = *item;
161+
loop {
162+
if !f(item) {
163+
return;
164+
}
165+
166+
match item.next {
167+
None => break,
168+
Some(ref next_item) => item = &**next_item,
169+
}
170+
}
171+
}
172+
};
173+
}
174+
}
175+
}
176+
177+
// Main program
178+
179+
fn pack_symbol(c: u8) -> u8 {
180+
match c {
181+
'a' as u8 | 'A' as u8 => 0,
182+
'c' as u8 | 'C' as u8 => 1,
183+
'g' as u8 | 'G' as u8 => 2,
184+
't' as u8 | 'T' as u8 => 3,
185+
_ => fail!(c.to_str())
186+
}
187+
}
188+
189+
fn unpack_symbol(c: u8) -> u8 {
190+
TABLE[c]
191+
}
192+
193+
fn next_char<'a>(mut buf: &'a [u8]) -> &'a [u8] {
194+
loop {
195+
buf = slice(buf, 1, buf.len());
196+
if buf.len() == 0 {
197+
break;
198+
}
199+
if buf[0] != (' ' as u8) && buf[0] != ('\t' as u8) &&
200+
buf[0] != ('\n' as u8) && buf[0] != 0 {
201+
break;
202+
}
203+
}
204+
buf
205+
}
206+
207+
#[inline(never)]
208+
fn read_stdin() -> ~[u8] {
209+
unsafe {
210+
let mode = "r";
211+
//let stdin = fdopen(STDIN_FILENO as c_int, transmute(&mode[0]));
212+
let path = "knucleotide-input.txt";
213+
let stdin = fopen(transmute(&path[0]), transmute(&mode[0]));
214+
215+
let mut st: stat = init();
216+
fstat(fileno(stdin), &mut st);
217+
let mut buf = vec::from_elem(st.st_size as uint, 0);
218+
219+
let header = str::byte_slice_no_callback(">THREE");
220+
let header = vec::slice(header, 0, 6);
221+
222+
{
223+
let mut window: &mut [u8] = buf;
224+
loop {
225+
fgets(transmute(&mut window[0]), LINE_LEN as c_int, stdin);
226+
227+
{
228+
if vec::slice(window, 0, 6) == header {
229+
break;
230+
}
231+
}
232+
}
233+
234+
while fgets(transmute(&mut window[0]),
235+
LINE_LEN as c_int,
236+
stdin) != null() {
237+
window = vec::mut_slice(window,
238+
strlen(transmute(&window[0])) as uint,
239+
window.len());
240+
}
241+
}
242+
243+
buf
244+
}
245+
}
246+
247+
#[inline(never)]
248+
#[fixed_stack_segment]
249+
fn generate_frequencies(frequencies: &mut Table,
250+
mut input: &[u8],
251+
frame: i32) {
252+
let mut code = Code(0);
253+
254+
// Pull first frame.
255+
for (frame as uint).times {
256+
code = code.push_char(input[0]);
257+
input = next_char(input);
258+
}
259+
frequencies.lookup(code, BumpCallback);
260+
261+
while input.len() != 0 && input[0] != ('>' as u8) {
262+
code = code.rotate(input[0], frame);
263+
frequencies.lookup(code, BumpCallback);
264+
input = next_char(input);
265+
}
266+
}
267+
268+
#[inline(never)]
269+
#[fixed_stack_segment]
270+
fn print_frequencies(frequencies: &Table, frame: i32) {
271+
let mut vector = ~[];
272+
for frequencies.each |entry| {
273+
vector.push((entry.code, entry.count));
274+
}
275+
quick_sort3(vector);
276+
277+
let mut total_count = 0;
278+
for vector.each |&(_, count)| {
279+
total_count += count;
280+
}
281+
282+
for vector.each |&(key, count)| {
283+
println(fmt!("%s %.3f",
284+
key.unpack(frame),
285+
(count as float * 100.0) / (total_count as float)));
286+
}
287+
}
288+
289+
fn print_occurrences(frequencies: &mut Table, occurrence: &'static str) {
290+
frequencies.lookup(Code::pack(occurrence), PrintCallback(occurrence))
291+
}
292+
293+
#[fixed_stack_segment]
294+
fn main() {
295+
let input = read_stdin();
296+
297+
let mut frequencies = ~Table::new();
298+
generate_frequencies(frequencies, input, 1);
299+
print_frequencies(frequencies, 1);
300+
301+
*frequencies = Table::new();
302+
generate_frequencies(frequencies, input, 2);
303+
print_frequencies(frequencies, 2);
304+
305+
for range(0, 5) |i| {
306+
let occurrence = OCCURRENCES[i];
307+
*frequencies = Table::new();
308+
generate_frequencies(frequencies,
309+
input,
310+
occurrence.len() as i32);
311+
print_occurrences(frequencies, occurrence);
312+
}
313+
}
314+

0 commit comments

Comments
 (0)