Column Selection
In the context of select, you can use the col() expression to select certain columns.
col("*") or all() indicates selecting all columns. exclude(["logged_at", "index"]) indicates excluding specified columns. * can be used as a wildcard.
cols(["date", "logged_at"]) selects specified column names.
#![allow(unused)] fn main() { // Single column selection let out = df.clone().lazy().select([col("place")]).collect()?; // Wildcard selection, selecting all columns with names starting with 'a' let out = df.clone().lazy().select([col("a*")]).collect()?; // Regular expression let out = df.clone().lazy().select([col("^.*(as|sa).*$")]).collect()?; // Multiple column names selection let out = df.clone().lazy().select([cols(["date", "logged_at"])]).collect()?; // Data type selection, selecting all columns that satisfy the data type. let out = df.clone().lazy() .select([dtype_cols([DataType::Int64, DataType::UInt32, DataType::Boolean]).n_unique()]) .collect()?; }
Dataset generation for this section
#![allow(unused)] fn main() { use chrono::prelude::*; use polars::time::*; let date_df = df!( "id" => &[9, 4, 2], "place" => &["Mars", "Earth", "Saturn"], "date" => date_range("date", NaiveDate::from_ymd_opt(2022, 1, 1).unwrap().and_hms_opt(0, 0, 0).unwrap(), NaiveDate::from_ymd_opt(2022, 1, 3).unwrap().and_hms_opt(0, 0, 0).unwrap(), Duration::parse("1d"),ClosedWindow::Both, TimeUnit::Milliseconds, None)?, "sales" => &[33.4, 2142134.1, 44.7], "has_people" => &[false, true, false], "logged_at" => date_range("logged_at", NaiveDate::from_ymd_opt(2022, 1, 1).unwrap().and_hms_opt(0, 0, 0).unwrap(), NaiveDate::from_ymd_opt(2022, 1, 1).unwrap().and_hms_opt(0, 0, 2).unwrap(), Duration::parse("1s"),ClosedWindow::Both, TimeUnit::Milliseconds, None)?, )? .with_row_index("index", None)?; println!("{}", &df); }
