|
| 1 | +# Interacting with foreign code |
| 2 | + |
| 3 | +One of Rust's aims, as a system programming language, is to |
| 4 | +interoperate well with C code. |
| 5 | + |
| 6 | +We'll start with an example. It's a bit bigger than usual, and |
| 7 | +contains a number of new concepts. We'll go over it one piece at a |
| 8 | +time. |
| 9 | + |
| 10 | +This is a program that uses OpenSSL's `SHA1` function to compute the |
| 11 | +hash of its first command-line argument, which it then converts to a |
| 12 | +hexadecimal string and prints to standard output. If you have the |
| 13 | +OpenSSL libraries installed, it should 'just work'. |
| 14 | + |
| 15 | +~~~~ {.xfail-test} |
| 16 | +use std; |
| 17 | +import libc::c_uint; |
| 18 | +
|
| 19 | +extern mod crypto { |
| 20 | + fn SHA1(src: *u8, sz: c_uint, out: *u8) -> *u8; |
| 21 | +} |
| 22 | +
|
| 23 | +fn as_hex(data: ~[u8]) -> ~str { |
| 24 | + let mut acc = ~""; |
| 25 | + for data.each |byte| { acc += fmt!("%02x", byte as uint); } |
| 26 | + return acc; |
| 27 | +} |
| 28 | +
|
| 29 | +fn sha1(data: ~str) -> ~str unsafe { |
| 30 | + let bytes = str::to_bytes(data); |
| 31 | + let hash = crypto::SHA1(vec::unsafe::to_ptr(bytes), |
| 32 | + vec::len(bytes) as c_uint, ptr::null()); |
| 33 | + return as_hex(vec::unsafe::from_buf(hash, 20u)); |
| 34 | +} |
| 35 | +
|
| 36 | +fn main(args: ~[~str]) { |
| 37 | + io::println(sha1(args[1])); |
| 38 | +} |
| 39 | +~~~~ |
| 40 | + |
| 41 | +## Foreign modules |
| 42 | + |
| 43 | +Before we can call `SHA1`, we have to declare it. That is what this |
| 44 | +part of the program is responsible for: |
| 45 | + |
| 46 | +~~~~ {.xfail-test} |
| 47 | +extern mod crypto { |
| 48 | + fn SHA1(src: *u8, sz: uint, out: *u8) -> *u8; |
| 49 | +} |
| 50 | +~~~~ |
| 51 | + |
| 52 | +An `extern` module declaration containing function signatures introduces |
| 53 | +the functions listed as _foreign functions_, that are implemented in some |
| 54 | +other language (usually C) and accessed through Rust's foreign function |
| 55 | +interface (FFI). An extern module like this is called a foreign module, and |
| 56 | +implicitly tells the compiler to link with a library with the same name as |
| 57 | +the module, and that it will find the foreign functions in that library. |
| 58 | + |
| 59 | +In this case, it'll change the name `crypto` to a shared library name |
| 60 | +in a platform-specific way (`libcrypto.so` on Linux, for example), and |
| 61 | +link that in. If you want the module to have a different name from the |
| 62 | +actual library, you can use the `"link_name"` attribute, like: |
| 63 | + |
| 64 | +~~~~ {.xfail-test} |
| 65 | +#[link_name = "crypto"] |
| 66 | +extern mod something { |
| 67 | + fn SHA1(src: *u8, sz: uint, out: *u8) -> *u8; |
| 68 | +} |
| 69 | +~~~~ |
| 70 | + |
| 71 | +## Foreign calling conventions |
| 72 | + |
| 73 | +Most foreign code will be C code, which usually uses the `cdecl` calling |
| 74 | +convention, so that is what Rust uses by default when calling foreign |
| 75 | +functions. Some foreign functions, most notably the Windows API, use other |
| 76 | +calling conventions, so Rust provides a way to hint to the compiler which |
| 77 | +is expected by using the `"abi"` attribute: |
| 78 | + |
| 79 | +~~~~ |
| 80 | +#[cfg(target_os = "win32")] |
| 81 | +#[abi = "stdcall"] |
| 82 | +extern mod kernel32 { |
| 83 | + fn SetEnvironmentVariableA(n: *u8, v: *u8) -> int; |
| 84 | +} |
| 85 | +~~~~ |
| 86 | + |
| 87 | +The `"abi"` attribute applies to a foreign module (it can not be applied |
| 88 | +to a single function within a module), and must be either `"cdecl"` |
| 89 | +or `"stdcall"`. Other conventions may be defined in the future. |
| 90 | + |
| 91 | +## Unsafe pointers |
| 92 | + |
| 93 | +The foreign `SHA1` function is declared to take three arguments, and |
| 94 | +return a pointer. |
| 95 | + |
| 96 | +~~~~ {.xfail-test} |
| 97 | +# extern mod crypto { |
| 98 | +fn SHA1(src: *u8, sz: libc::c_uint, out: *u8) -> *u8; |
| 99 | +# } |
| 100 | +~~~~ |
| 101 | + |
| 102 | +When declaring the argument types to a foreign function, the Rust |
| 103 | +compiler has no way to check whether your declaration is correct, so |
| 104 | +you have to be careful. If you get the number or types of the |
| 105 | +arguments wrong, you're likely to get a segmentation fault. Or, |
| 106 | +probably even worse, your code will work on one platform, but break on |
| 107 | +another. |
| 108 | + |
| 109 | +In this case, `SHA1` is defined as taking two `unsigned char*` |
| 110 | +arguments and one `unsigned long`. The rust equivalents are `*u8` |
| 111 | +unsafe pointers and an `uint` (which, like `unsigned long`, is a |
| 112 | +machine-word-sized type). |
| 113 | + |
| 114 | +Unsafe pointers can be created through various functions in the |
| 115 | +standard lib, usually with `unsafe` somewhere in their name. You can |
| 116 | +dereference an unsafe pointer with `*` operator, but use |
| 117 | +caution—unlike Rust's other pointer types, unsafe pointers are |
| 118 | +completely unmanaged, so they might point at invalid memory, or be |
| 119 | +null pointers. |
| 120 | + |
| 121 | +## Unsafe blocks |
| 122 | + |
| 123 | +The `sha1` function is the most obscure part of the program. |
| 124 | + |
| 125 | +~~~~ |
| 126 | +# mod crypto { fn SHA1(src: *u8, sz: uint, out: *u8) -> *u8 { out } } |
| 127 | +# fn as_hex(data: ~[u8]) -> ~str { ~"hi" } |
| 128 | +fn sha1(data: ~str) -> ~str { |
| 129 | + unsafe { |
| 130 | + let bytes = str::to_bytes(data); |
| 131 | + let hash = crypto::SHA1(vec::unsafe::to_ptr(bytes), |
| 132 | + vec::len(bytes), ptr::null()); |
| 133 | + return as_hex(vec::unsafe::from_buf(hash, 20u)); |
| 134 | + } |
| 135 | +} |
| 136 | +~~~~ |
| 137 | + |
| 138 | +Firstly, what does the `unsafe` keyword at the top of the function |
| 139 | +mean? `unsafe` is a block modifier—it declares the block following it |
| 140 | +to be known to be unsafe. |
| 141 | + |
| 142 | +Some operations, like dereferencing unsafe pointers or calling |
| 143 | +functions that have been marked unsafe, are only allowed inside unsafe |
| 144 | +blocks. With the `unsafe` keyword, you're telling the compiler 'I know |
| 145 | +what I'm doing'. The main motivation for such an annotation is that |
| 146 | +when you have a memory error (and you will, if you're using unsafe |
| 147 | +constructs), you have some idea where to look—it will most likely be |
| 148 | +caused by some unsafe code. |
| 149 | + |
| 150 | +Unsafe blocks isolate unsafety. Unsafe functions, on the other hand, |
| 151 | +advertise it to the world. An unsafe function is written like this: |
| 152 | + |
| 153 | +~~~~ |
| 154 | +unsafe fn kaboom() { ~"I'm harmless!"; } |
| 155 | +~~~~ |
| 156 | + |
| 157 | +This function can only be called from an unsafe block or another |
| 158 | +unsafe function. |
| 159 | + |
| 160 | +## Pointer fiddling |
| 161 | + |
| 162 | +The standard library defines a number of helper functions for dealing |
| 163 | +with unsafe data, casting between types, and generally subverting |
| 164 | +Rust's safety mechanisms. |
| 165 | + |
| 166 | +Let's look at our `sha1` function again. |
| 167 | + |
| 168 | +~~~~ |
| 169 | +# mod crypto { fn SHA1(src: *u8, sz: uint, out: *u8) -> *u8 { out } } |
| 170 | +# fn as_hex(data: ~[u8]) -> ~str { ~"hi" } |
| 171 | +# fn x(data: ~str) -> ~str { |
| 172 | +# unsafe { |
| 173 | +let bytes = str::to_bytes(data); |
| 174 | +let hash = crypto::SHA1(vec::unsafe::to_ptr(bytes), |
| 175 | + vec::len(bytes), ptr::null()); |
| 176 | +return as_hex(vec::unsafe::from_buf(hash, 20u)); |
| 177 | +# } |
| 178 | +# } |
| 179 | +~~~~ |
| 180 | + |
| 181 | +The `str::to_bytes` function is perfectly safe: it converts a string to |
| 182 | +a `[u8]`. This byte array is then fed to `vec::unsafe::to_ptr`, which |
| 183 | +returns an unsafe pointer to its contents. |
| 184 | + |
| 185 | +This pointer will become invalid as soon as the vector it points into |
| 186 | +is cleaned up, so you should be very careful how you use it. In this |
| 187 | +case, the local variable `bytes` outlives the pointer, so we're good. |
| 188 | + |
| 189 | +Passing a null pointer as the third argument to `SHA1` makes it use a |
| 190 | +static buffer, and thus save us the effort of allocating memory |
| 191 | +ourselves. `ptr::null` is a generic function that will return an |
| 192 | +unsafe null pointer of the correct type (Rust generics are awesome |
| 193 | +like that—they can take the right form depending on the type that they |
| 194 | +are expected to return). |
| 195 | + |
| 196 | +Finally, `vec::unsafe::from_buf` builds up a new `[u8]` from the |
| 197 | +unsafe pointer that was returned by `SHA1`. SHA1 digests are always |
| 198 | +twenty bytes long, so we can pass `20u` for the length of the new |
| 199 | +vector. |
| 200 | + |
| 201 | +## Passing structures |
| 202 | + |
| 203 | +C functions often take pointers to structs as arguments. Since Rust |
| 204 | +records are binary-compatible with C structs, Rust programs can call |
| 205 | +such functions directly. |
| 206 | + |
| 207 | +This program uses the POSIX function `gettimeofday` to get a |
| 208 | +microsecond-resolution timer. |
| 209 | + |
| 210 | +~~~~ |
| 211 | +use std; |
| 212 | +import libc::c_ulonglong; |
| 213 | +
|
| 214 | +type timeval = {mut tv_sec: c_ulonglong, |
| 215 | + mut tv_usec: c_ulonglong}; |
| 216 | +#[nolink] |
| 217 | +extern mod lib_c { |
| 218 | + fn gettimeofday(tv: *timeval, tz: *()) -> i32; |
| 219 | +} |
| 220 | +fn unix_time_in_microseconds() -> u64 unsafe { |
| 221 | + let x = {mut tv_sec: 0 as c_ulonglong, mut tv_usec: 0 as c_ulonglong}; |
| 222 | + lib_c::gettimeofday(ptr::addr_of(x), ptr::null()); |
| 223 | + return (x.tv_sec as u64) * 1000_000_u64 + (x.tv_usec as u64); |
| 224 | +} |
| 225 | +
|
| 226 | +# fn main() { assert fmt!("%?", unix_time_in_microseconds()) != ~""; } |
| 227 | +~~~~ |
| 228 | + |
| 229 | +The `#[nolink]` attribute indicates that there's no foreign library to |
| 230 | +link in. The standard C library is already linked with Rust programs. |
| 231 | + |
| 232 | +A `timeval`, in C, is a struct with two 32-bit integers. Thus, we |
| 233 | +define a record type with the same contents, and declare |
| 234 | +`gettimeofday` to take a pointer to such a record. |
| 235 | + |
| 236 | +The second argument to `gettimeofday` (the time zone) is not used by |
| 237 | +this program, so it simply declares it to be a pointer to the nil |
| 238 | +type. Since all null pointers have the same representation regardless of |
| 239 | +their referent type, this is safe. |
| 240 | + |
0 commit comments