Utilities
Base.dump
Base.unique
Base.unique!
DataTables.completecases
DataTables.eltypes
DataTables.head
DataTables.names!
DataTables.nonunique
DataTables.rename
DataTables.rename!
DataTables.tail
NullableArrays.dropnull
NullableArrays.dropnull!
StatsBase.describe
DataTables.eltypes
— Function.Return element types of columns
eltypes(dt::AbstractDataTable)
Arguments
dt
: the AbstractDataTable
Result
::Vector{Type}
: the element type of each column
Examples
dt = DataTable(i = 1:10, x = rand(10), y = rand(["a", "b", "c"], 10))
eltypes(dt)
DataTables.head
— Function.Show the first or last part of an AbstractDataTable
head(dt::AbstractDataTable, r::Int = 6)
tail(dt::AbstractDataTable, r::Int = 6)
Arguments
dt
: the AbstractDataTabler
: the number of rows to show
Result
::AbstractDataTable
: the first or last part ofdt
Examples
dt = DataTable(i = 1:10, x = rand(10), y = rand(["a", "b", "c"], 10))
head(dt)
tail(dt)
DataTables.completecases
— Function.Indexes of complete cases (rows without null values)
completecases(dt::AbstractDataTable)
Arguments
dt
: the AbstractDataTable
Result
::Vector{Bool}
: indexes of complete cases
See also dropnull
and dropnull!
.
Examples
dt = DataTable(i = 1:10, x = rand(10), y = rand(["a", "b", "c"], 10))
dt[[1,4,5], :x] = Nullable()
dt[[9,10], :y] = Nullable()
completecases(dt)
StatsBase.describe
— Function.Summarize the columns of an AbstractDataTable
describe(dt::AbstractDataTable)
describe(io, dt::AbstractDataTable)
Arguments
dt
: the AbstractDataTableio
: optional output descriptor
Result
nothing
Details
If the column's base type derives from Number, compute the minimum, first quantile, median, mean, third quantile, and maximum. Nulls are filtered and reported separately.
For boolean columns, report trues, falses, and nulls.
For other types, show column characteristics and number of nulls.
Examples
dt = DataTable(i = 1:10, x = rand(10), y = rand(["a", "b", "c"], 10))
describe(dt)
NullableArrays.dropnull
— Function.dropnull(X::AbstractVector)
Return a vector containing only the non-null entries of X
, unwrapping Nullable
entries. A copy is always returned, even when X
does not contain any null values.
Remove rows with null values.
dropnull(dt::AbstractDataTable)
Arguments
dt
: the AbstractDataTable
Result
::AbstractDataTable
: the updated copy
See also completecases
and dropnull!
.
Examples
dt = DataTable(i = 1:10, x = rand(10), y = rand(["a", "b", "c"], 10))
dt[[1,4,5], :x] = Nullable()
dt[[9,10], :y] = Nullable()
dropnull(dt)
NullableArrays.dropnull!
— Function.dropnull!(X::AbstractVector)
Remove null entries of X
in-place and return a Vector
view of the unwrapped Nullable
entries. If no nulls are present, this is a no-op and X
is returned.
dropnull!(X::NullableVector)
Remove null entries of X
in-place and return a Vector
view of the unwrapped Nullable
entries.
Remove rows with null values in-place.
dropnull!(dt::AbstractDataTable)
Arguments
dt
: the AbstractDataTable
Result
::AbstractDataTable
: the updated version
See also dropnull
and completecases
.
Examples
dt = DataTable(i = 1:10, x = rand(10), y = rand(["a", "b", "c"], 10))
dt[[1,4,5], :x] = Nullable()
dt[[9,10], :y] = Nullable()
dropnull!(dt)
Base.dump
— Function.Show the structure of an AbstractDataTable, in a tree-like format
dump(dt::AbstractDataTable, n::Int = 5)
dump(io::IO, dt::AbstractDataTable, n::Int = 5)
Arguments
dt
: the AbstractDataTablen
: the number of levels to showio
: optional output descriptor
Result
nothing
Examples
dt = DataTable(i = 1:10, x = rand(10), y = rand(["a", "b", "c"], 10))
dump(dt)
DataTables.names!
— Function.Set column names
names!(dt::AbstractDataTable, vals)
Arguments
dt
: the AbstractDataTablevals
: column names, normally a Vector{Symbol} the same length as the number of columns indt
allow_duplicates
: iffalse
(the default), an error will be raised if duplicate names are found; iftrue
, duplicate names will be suffixed with_i
(i
starting at 1 for the first duplicate).
Result
::AbstractDataTable
: the updated result
Examples
dt = DataTable(i = 1:10, x = rand(10), y = rand(["a", "b", "c"], 10))
names!(dt, [:a, :b, :c])
names!(dt, [:a, :b, :a]) # throws ArgumentError
names!(dt, [:a, :b, :a], allow_duplicates=true) # renames second :a to :a_1
DataTables.nonunique
— Function.Indexes of duplicate rows (a row that is a duplicate of a prior row)
nonunique(dt::AbstractDataTable)
nonunique(dt::AbstractDataTable, cols)
Arguments
dt
: the AbstractDataTablecols
: a column indicator (Symbol, Int, Vector{Symbol}, etc.) specifying the column(s) to compare
Result
::Vector{Bool}
: indicates whether the row is a duplicate of some prior row
Examples
dt = DataTable(i = 1:10, x = rand(10), y = rand(["a", "b", "c"], 10))
dt = vcat(dt, dt)
nonunique(dt)
nonunique(dt, 1)
DataTables.rename
— Function.Rename columns
rename!(dt::AbstractDataTable, from::Symbol, to::Symbol)
rename!(dt::AbstractDataTable, d::Associative)
rename!(f::Function, dt::AbstractDataTable)
rename(dt::AbstractDataTable, from::Symbol, to::Symbol)
rename(f::Function, dt::AbstractDataTable)
Arguments
dt
: the AbstractDataTabled
: an Associative type that maps the original name to a new namef
: a function that has the old column name (a symbol) as input and new column name (a symbol) as output
Result
::AbstractDataTable
: the updated result
Examples
dt = DataTable(i = 1:10, x = rand(10), y = rand(["a", "b", "c"], 10))
rename(x -> @compat(Symbol)(uppercase(string(x))), dt)
rename(dt, @compat(Dict(:i=>:A, :x=>:X)))
rename(dt, :y, :Y)
rename!(dt, @compat(Dict(:i=>:A, :x=>:X)))
DataTables.rename!
— Function.Rename columns
rename!(dt::AbstractDataTable, from::Symbol, to::Symbol)
rename!(dt::AbstractDataTable, d::Associative)
rename!(f::Function, dt::AbstractDataTable)
rename(dt::AbstractDataTable, from::Symbol, to::Symbol)
rename(f::Function, dt::AbstractDataTable)
Arguments
dt
: the AbstractDataTabled
: an Associative type that maps the original name to a new namef
: a function that has the old column name (a symbol) as input and new column name (a symbol) as output
Result
::AbstractDataTable
: the updated result
Examples
dt = DataTable(i = 1:10, x = rand(10), y = rand(["a", "b", "c"], 10))
rename(x -> @compat(Symbol)(uppercase(string(x))), dt)
rename(dt, @compat(Dict(:i=>:A, :x=>:X)))
rename(dt, :y, :Y)
rename!(dt, @compat(Dict(:i=>:A, :x=>:X)))
DataTables.tail
— Function.Show the first or last part of an AbstractDataTable
head(dt::AbstractDataTable, r::Int = 6)
tail(dt::AbstractDataTable, r::Int = 6)
Arguments
dt
: the AbstractDataTabler
: the number of rows to show
Result
::AbstractDataTable
: the first or last part ofdt
Examples
dt = DataTable(i = 1:10, x = rand(10), y = rand(["a", "b", "c"], 10))
head(dt)
tail(dt)
Base.unique
— Function.unique(A::CategoricalArray)
unique(A::NullableCategoricalArray)
Return levels which appear in A
, in the same order as levels
(and not in their order of appearance). This function is significantly slower than levels
since it needs to check whether levels are used or not.
Delete duplicate rows
unique(dt::AbstractDataTable)
unique(dt::AbstractDataTable, cols)
unique!(dt::AbstractDataTable)
unique!(dt::AbstractDataTable, cols)
Arguments
dt
: the AbstractDataTablecols
: column indicator (Symbol, Int, Vector{Symbol}, etc.)
specifying the column(s) to compare.
Result
::AbstractDataTable
: the updated version ofdt
with unique rows.
When cols
is specified, the return DataTable contains complete rows, retaining in each case the first instance for which dt[cols]
is unique.
See also nonunique
.
Examples
dt = DataTable(i = 1:10, x = rand(10), y = rand(["a", "b", "c"], 10))
dt = vcat(dt, dt)
unique(dt) # doesn't modify dt
unique(dt, 1)
unique!(dt) # modifies dt
Base.unique!
— Function.Delete duplicate rows
unique(dt::AbstractDataTable)
unique(dt::AbstractDataTable, cols)
unique!(dt::AbstractDataTable)
unique!(dt::AbstractDataTable, cols)
Arguments
dt
: the AbstractDataTablecols
: column indicator (Symbol, Int, Vector{Symbol}, etc.)
specifying the column(s) to compare.
Result
::AbstractDataTable
: the updated version ofdt
with unique rows.
When cols
is specified, the return DataTable contains complete rows, retaining in each case the first instance for which dt[cols]
is unique.
See also nonunique
.
Examples
dt = DataTable(i = 1:10, x = rand(10), y = rand(["a", "b", "c"], 10))
dt = vcat(dt, dt)
unique(dt) # doesn't modify dt
unique(dt, 1)
unique!(dt) # modifies dt