Keyboard shortcuts

Press or to navigate between chapters

Press S or / to search in the book

Press ? to show this help

Press Esc to hide this help

Collections

Count Word Frequency in a Document

std-badge cat-data-structures-badge

Splits a string into words with split_whitespace and counts each word’s occurrences. HashMap::entry with or_insert initializes a missing key to zero and returns a mutable reference, so the increment requires no separate lookup. sort_by_key with Reverse sorts the results descending by count; wrapping the count in Reverse flips the natural ascending order without a manual comparator.

use std::cmp::Reverse;
use std::collections::HashMap;

fn word_count(text: &str) -> HashMap<&str, usize> {
    let mut counts = HashMap::new();
    for word in text.split_whitespace() {
        *counts.entry(word).or_insert(0) += 1;
    }
    counts
}

fn main() {
    let text = "the quick brown fox jumps over the lazy dog the fox";
    let counts = word_count(text);

    let mut pairs: Vec<(&str, usize)> = counts.iter().map(|(&w, &c)| (w, c)).collect();
    pairs.sort_by_key(|&(word, count)| (Reverse(count), word));
    for (word, count) in &pairs {
        println!("{word:>12}  {count}");
    }

    assert_eq!(counts["the"], 3);
    assert_eq!(counts["fox"], 2);
    assert_eq!(counts["quick"], 1);
}

Group Records by Key

std-badge cat-data-structures-badge

Groups a flat list of records into a HashMap of vectors using entry with or_default, which inserts an empty Vec the first time a key is seen and returns a mutable reference to it on every call. keys with copied produces an owned Vec<&str> for sorting — copied turns the &&str references that keys yields into &str values.

use std::collections::HashMap;

fn main() {
    let log_lines = vec![
        ("ERROR", "disk full"),
        ("WARN",  "high memory usage"),
        ("INFO",  "server started"),
        ("ERROR", "connection refused"),
        ("INFO",  "request received"),
    ];

    let mut by_level: HashMap<&str, Vec<&str>> = HashMap::new();
    for (level, message) in log_lines {
        by_level.entry(level).or_default().push(message);
    }

    let mut levels: Vec<&str> = by_level.keys().copied().collect();
    levels.sort();
    for level in levels {
        println!("[{level}]");
        for msg in &by_level[level] {
            println!("  {msg}");
        }
    }

    assert_eq!(by_level["ERROR"].len(), 2);
    assert_eq!(by_level["INFO"].len(), 2);
    assert_eq!(by_level["WARN"].len(), 1);
}

Look Up Records Within a Time Range

std-badge cat-data-structures-badge

Stores timestamped sensor readings in a BTreeMap, which keeps keys in sorted order using a B-tree structure. BTreeMap::range locates the start of the range with binary search in O(log n) time and then iterates forward — so the cost scales with the result size, not the total number of entries. A HashMap cannot do this at all; it has no ordering to exploit.

use std::collections::BTreeMap;

fn main() {
    let mut readings: BTreeMap<u32, f64> = BTreeMap::new();
    readings.insert(1_000, 20.1);
    readings.insert(2_000, 21.3);
    readings.insert(3_000, 19.8);
    readings.insert(4_000, 22.5);
    readings.insert(5_000, 23.1);

    let window: Vec<(&u32, &f64)> = readings.range(2_000..=4_000).collect();
    println!("Readings from t=2000 to t=4000:");
    for (ts, temp) in &window {
        println!("  t={ts:>5}  {temp:.1}°C");
    }

    assert_eq!(window.len(), 3);
    assert_eq!(*window[0].0, 2_000);
    assert_eq!(*window[2].0, 4_000);
}

Set Operations with HashSet

std-badge cat-data-structures-badge

Computes the difference, intersection, and union of two HashSets. Each operation returns an iterator of references; copied converts &&str to &str for easier collection.

use std::collections::HashSet;

fn main() {
    let deployed: HashSet<&str> = ["web", "api", "worker", "scheduler"]
        .iter()
        .copied()
        .collect();
    let monitored: HashSet<&str> = ["web", "api", "database"]
        .iter()
        .copied()
        .collect();

    // Services running but not being monitored
    let mut unmonitored: Vec<&str> = deployed.difference(&monitored).copied().collect();
    unmonitored.sort();
    println!("Deployed but not monitored: {unmonitored:?}");
    assert_eq!(unmonitored, ["scheduler", "worker"]);

    // Services present in both sets
    let mut common: Vec<&str> = deployed.intersection(&monitored).copied().collect();
    common.sort();
    println!("Monitored and deployed:     {common:?}");
    assert_eq!(common, ["api", "web"]);

    // All services mentioned across both sets
    let mut all: Vec<&str> = deployed.union(&monitored).copied().collect();
    all.sort();
    println!("All services:               {all:?}");
    assert_eq!(all.len(), 5);
}

Process Tasks in Priority Order

std-badge cat-data-structures-badge

Uses BinaryHeap as a max-priority queue: push inserts an item and pop always removes the highest-priority one first. Wrapping items in Reverse turns it into a min-priority queue without a custom comparator.

use std::cmp::Reverse;
use std::collections::BinaryHeap;

fn main() {
    // Max-heap: highest priority number is processed first
    let mut queue: BinaryHeap<(u32, &str)> = BinaryHeap::new();
    queue.push((3, "send report"));
    queue.push((1, "check email"));
    queue.push((5, "fix production bug"));
    queue.push((2, "review PR"));

    println!("Max-heap — processing order:");
    while let Some((priority, task)) = queue.pop() {
        println!("  [{priority}] {task}");
    }

    // Min-heap: wrap with Reverse so lowest number is processed first
    let mut min_queue: BinaryHeap<Reverse<(u32, &str)>> = BinaryHeap::new();
    min_queue.push(Reverse((3, "send report")));
    min_queue.push(Reverse((1, "check email")));
    min_queue.push(Reverse((5, "fix production bug")));

    println!("Min-heap — processing order:");
    while let Some(Reverse((priority, task))) = min_queue.pop() {
        println!("  [{priority}] {task}");
    }
}

Sliding-Window Buffer with VecDeque

std-badge cat-data-structures-badge

Maintains a fixed-size window over a stream of values using VecDeque. push_back appends new values and pop_front discards the oldest, keeping memory bounded regardless of input length. with_capacity is optional — VecDeque::new() works fine — but pre-allocating the window size avoids reallocations as the buffer fills up for the first time. zip pairs each original reading with its computed average for display.

use std::collections::VecDeque;

fn sliding_average(values: &[f64], window: usize) -> Vec<f64> {
    let mut buf: VecDeque<f64> = VecDeque::with_capacity(window);
    let mut averages = Vec::with_capacity(values.len());

    for &v in values {
        if buf.len() == window {
            buf.pop_front();
        }
        buf.push_back(v);
        averages.push(buf.iter().sum::<f64>() / buf.len() as f64);
    }
    averages
}

fn main() {
    let readings = [1.0, 2.0, 3.0, 4.0, 5.0];
    let averages = sliding_average(&readings, 3);

    println!("reading  average");
    for (r, a) in readings.iter().zip(&averages) {
        println!("  {r:.1}      {a:.2}");
    }

    assert_eq!(averages[0], 1.0);  // window: [1.0]
    assert_eq!(averages[1], 1.5);  // window: [1.0, 2.0]
    assert_eq!(averages[2], 2.0);  // window: [1.0, 2.0, 3.0]
    assert_eq!(averages[3], 3.0);  // window: [2.0, 3.0, 4.0]
    assert_eq!(averages[4], 4.0);  // window: [3.0, 4.0, 5.0]
}