From aca268f2725af61fea6f50cc81ca06ab2e670ac8 Mon Sep 17 00:00:00 2001 From: Alexis Beingessner Date: Sat, 20 Jun 2015 23:15:48 -0700 Subject: [PATCH] vec exmaple maybe --- conversions.md | 43 +++++++++++---- intro.md | 16 +----- vec.md | 141 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 174 insertions(+), 26 deletions(-) create mode 100644 vec.md diff --git a/conversions.md b/conversions.md index ae3ce9e..7467dec 100644 --- a/conversions.md +++ b/conversions.md @@ -168,6 +168,8 @@ applied. +TODO: receiver coercions? + # Casts @@ -212,11 +214,10 @@ For numeric casts, there are quite a few cases to consider: * casting from a larger integer to a smaller integer (e.g. u8 -> u32) will * zero-extend if the target is unsigned * sign-extend if the target is signed -* casting from a float to an integer will: - * round the float towards zero if finite +* casting from a float to an integer will round the float towards zero * **NOTE: currently this will cause Undefined Behaviour if the rounded value cannot be represented by the target integer type**. This is a bug - and will be fixed. + and will be fixed. (TODO: figure out what Inf and NaN do) * casting from an integer to float will produce the floating point representation of the integer, rounded if necessary (rounding strategy unspecified). * casting from an f32 to an f64 is perfect and lossless. @@ -226,21 +227,41 @@ For numeric casts, there are quite a few cases to consider: is finite but larger or smaller than the largest or smallest finite value representable by f32**. This is a bug and will be fixed. -The casts involving rawptrs also allow us to completely bypass type-safety -by re-interpretting a pointer of T to a pointer of U for arbitrary types, as -well as interpret integers as addresses. However it is impossible to actually -*capitalize* on this violation in Safe Rust, because derefencing a raw ptr is -`unsafe`. - - # Conversion Traits -TODO +TODO? # Transmuting Types +Get out of our way type system! We're going to reinterpret these bits or die +trying! Even though this book is all about doing things that are unsafe, I really +can't emphasize that you should deeply think about finding Another Way than the +operations covered in this section. This is really, truly, the most horribly +unsafe thing you can do in Rust. The railguards here are dental floss. + +`mem::transmute` takes a value of type `T` and reinterprets it to have +type `U`. The only restriction is that the `T` and `U` are verified to have the +same size. The ways to cause Undefined Behaviour with this are mind boggling. + +* First and foremost, creating an instance of *any* type with an invalid state + is going to cause arbitrary chaos that can't really be predicted. +* Transmute has an overloaded return type. If you do not specify the return type + it may produce a surprising type to satisfy inference. +* Making a primitive with an invalid value is UB +* Transmuting between non-repr(C) types is UB +* Transmuting an & to &mut is UB +* Transmuting to a reference without an explicitly provided lifetime + produces an [unbound lifetime](lifetimes.html#unbounded-lifetimes) + +`mem::transmute_copy` somehow manages to be *even more* wildly unsafe than +this. It copies `size_of` bytes out of an `&T` and interprets them as a `U`. +The size check that `mem::transmute` has is gone (as it may be valid to copy +out a prefix), though it is Undefined Behaviour for `U` to be larger than `T`. + +Also of course you can get most of the functionality of these functions using +pointer casts. diff --git a/intro.md b/intro.md index b955d78..f380f98 100644 --- a/intro.md +++ b/intro.md @@ -23,6 +23,7 @@ stack or heap, we will not explain the syntax. * [Uninitialized Memory](uninitialized.html) * [Ownership-oriented resource management (RAII)](raii.html) * [Concurrency](concurrency.html) +* [Example: Implementing Vec](vec.html) @@ -232,10 +233,6 @@ struct Vec { // We currently live in a nice imaginary world of only postive fixed-size // types. impl Vec { - fn new() -> Self { - Vec { ptr: heap::EMPTY, len: 0, cap: 0 } - } - fn push(&mut self, elem: T) { if self.len == self.cap { // not important for this example @@ -246,17 +243,6 @@ impl Vec { self.len += 1; } } - - fn pop(&mut self) -> Option { - if self.len > 0 { - self.len -= 1; - unsafe { - Some(ptr::read(self.ptr.offset(self.len as isize))) - } - } else { - None - } - } } ``` diff --git a/vec.md b/vec.md new file mode 100644 index 0000000..116626f --- /dev/null +++ b/vec.md @@ -0,0 +1,141 @@ +% Example: Implementing Vec + +To bring everything together, we're going to write `std::Vec` from scratch. +Because the all the best tools for writing unsafe code are unstable, this +project will only work on nightly (as of Rust 1.2.0). + +First off, we need to come up with the struct layout. Naively we want this +design: + +``` +struct Vec { + ptr: *mut T, + cap: usize, + len: usize, +} +``` + +And indeed this would compile. Unfortunately, it would be incorrect. The compiler +will give us too strict variance, so e.g. an `&Vec<&'static str>` couldn't be used +where an `&Vec<&'a str>` was expected. More importantly, it will give incorrect +ownership information to dropck, as it will conservatively assume we don't own +any values of type `T`. See [the chapter on ownership and lifetimes] +(lifetimes.html) for details. + +As we saw in the lifetimes chapter, we should use `Unique` in place of `*mut T` +when we have a raw pointer to an allocation we own: + + +``` +#![feature(unique)] + +use std::ptr::Unique; + +pub struct Vec { + ptr: Unique, + cap: usize, + len: usize, +} +``` + +As a recap, Unique is a wrapper around a raw pointer that declares that: + +* We own at least one value of type `T` +* We are Send/Sync iff `T` is Send/Sync +* Our pointer is never null (and therefore `Option` is null-pointer-optimized) + +That last point is subtle. First, it makes `Unique::new` unsafe to call, because +putting `null` inside of it is Undefined Behaviour. It also throws a +wrench in an important feature of Vec (and indeed all of the std collections): +an empty Vec doesn't actually allocate at all. So if we can't allocate, +but also can't put a null pointer in `ptr`, what do we do in +`Vec::new`? Well, we just put some other garbage in there! + +This is perfectly fine because we already have `cap == 0` as our sentinel for no +allocation. We don't even need to handle it specially in almost any code because +we usually need to check if `cap > len` or `len > 0` anyway. The traditional +Rust value to put here is `0x01`. The standard library actually exposes this +as `std::rt::heap::EMPTY`. There are quite a few places where we'll want to use +`heap::EMPTY` because there's no real allocation to talk about but `null` would +make the compiler angry. + +All of the `heap` API is totally unstable under the `alloc` feature, though. +We could trivially define `heap::EMPTY` ourselves, but we'll want the rest of +the `heap` API anyway, so let's just get that dependency over with. + +So: + +```rust +#![feature(alloc)] + +use std::rt::heap::EMPTY; +use std::mem; + +impl Vec { + fn new() -> Self { + assert!(mem::size_of::() != 0, "We're not ready to handle ZSTs"); + unsafe { + // need to cast EMPTY to the actual ptr type we want, let + // inference handle it. + Vec { ptr: Unique::new(heap::EMPTY as *mut _), len: 0, cap: 0 } + } + } +} +``` + +I slipped in that assert there because zero-sized types will require some +special handling throughout our code, and I want to defer the issue for now. +Without this assert, some of our early drafts will do some Very Bad Things. + +Next we need to figure out what to actually do when we *do* want space. For that, +we'll need to use the rest of the heap APIs. These basically allow us to +talk directly to Rust's instance of jemalloc. + +We'll also need a way to handle out-of-memory conditions. The standard library +calls the `abort` intrinsic, but calling intrinsics from normal Rust code is a +pretty bad idea. Unfortunately, the `abort` exposed by the standard library +allocates. Not something we want to do during `oom`! Instead, we'll call +`std::process::exit`. + +```rust +fn oom() { + ::std::process::exit(-9999); +} +``` + +Okay, now we can write growing: + +```rust +fn grow(&mut self) { + unsafe { + let align = mem::min_align_of::(); + let elem_size = mem::size_of::(); + + let (new_cap, ptr) = if self.cap == 0 { + let ptr = heap::allocate(elem_size, align); + (1, ptr) + } else { + let new_cap = 2 * self.cap; + let ptr = heap::reallocate(*self.ptr as *mut _, + self.cap * elem_size, + new_cap * elem_size, + align); + (new_cap, ptr) + }; + + // If allocate or reallocate fail, we'll get `null` back + if ptr.is_null() { oom() } + + self.ptr = Unique::new(ptr as *mut _); + self.cap = new_cap; + } +} +``` + +There's nothing particularly tricky in here: if we're totally empty, we need +to do a fresh allocation. Otherwise, we need to reallocate the current pointer. +Although we have a subtle bug here with the multiply overflow. + +TODO: rest of this + +