Column Selection

In the context of select, you can use the col() expression to select certain columns. col("*") or all() indicates selecting all columns. exclude(["logged_at", "index"]) indicates excluding specified columns. * can be used as a wildcard. cols(["date", "logged_at"]) selects specified column names.

#![allow(unused)]
fn main() {
// Single column selection
let out = df.clone().lazy().select([col("place")]).collect()?;
// Wildcard selection, selecting all columns with names starting with 'a'
let out = df.clone().lazy().select([col("a*")]).collect()?;
// Regular expression
let out = df.clone().lazy().select([col("^.*(as|sa).*$")]).collect()?;
// Multiple column names selection
let out = df.clone().lazy().select([cols(["date", "logged_at"])]).collect()?;
// Data type selection, selecting all columns that satisfy the data type.
let out = df.clone().lazy()
    .select([dtype_cols([DataType::Int64, DataType::UInt32, DataType::Boolean]).n_unique()])
.collect()?;
}

Dataset generation for this section

#![allow(unused)]
fn main() {
use chrono::prelude::*;
use polars::time::*;
let date_df = df!(
    "id" => &[9, 4, 2],
    "place" => &["Mars", "Earth", "Saturn"],
    "date" => date_range("date",
NaiveDate::from_ymd_opt(2022, 1, 1).unwrap().and_hms_opt(0, 0, 0).unwrap(),
NaiveDate::from_ymd_opt(2022, 1, 3).unwrap().and_hms_opt(0, 0, 0).unwrap(),
Duration::parse("1d"),ClosedWindow::Both, TimeUnit::Milliseconds, None)?,
    "sales" => &[33.4, 2142134.1, 44.7],
    "has_people" => &[false, true, false],
    "logged_at" => date_range("logged_at",
    NaiveDate::from_ymd_opt(2022, 1, 1).unwrap().and_hms_opt(0, 0, 0).unwrap(), NaiveDate::from_ymd_opt(2022, 1, 1).unwrap().and_hms_opt(0, 0, 2).unwrap(), Duration::parse("1s"),ClosedWindow::Both, TimeUnit::Milliseconds, None)?,
    )?
    .with_row_index("index", None)?;
println!("{}", &df);
}

alt text