Skip to content

Commit

Permalink
non docs related changes to get rid of the warnings when loading pkg
Browse files Browse the repository at this point in the history
  • Loading branch information
drizk1 committed Apr 7, 2024
1 parent f4affe9 commit 8df920e
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 106 deletions.
124 changes: 42 additions & 82 deletions docs/examples/UserGuide/key_differences.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,109 +5,69 @@

## group_by -> mutate
# In TidierDB, when performing `@group_by` then `@mutate`, after applying all of the mutations in the clause to the grouped data, the table is ungrouped. To perform subsequent grouped mutations/slices/summarizations, the user would have to regroup the data. This is something we will work to resolve, but as of version .0.1.0, this is the bevahior. This is demonstrated below with
#using TidierDB
#df = DataFrame(id = [string('A' + i ÷ 26, 'A' + i % 26) for i in 0:9],
# groups = [i % 2 == 0 ? "aa" : "bb" for i in 1:10],
# value = repeat(1:5, 2),
# percent = 0.1:0.1:1.0);

# mem = duckdb_open(":memory:");
# db = duckdb_connect(mem);
using TidierDB
df = DataFrame(id = [string('A' + i ÷ 26, 'A' + i % 26) for i in 0:9],
groups = [i % 2 == 0 ? "aa" : "bb" for i in 1:10],
value = repeat(1:5, 2),
percent = 0.1:0.1:1.0);

mem = duckdb_open(":memory:");
db = duckdb_connect(mem);
# For these examples we will use DuckDB, the default backend, although SQLite, Postgres, MySQL, MSSQL, and ClickHouse are possible.
# copy_to(db, df, "df_mem"); # copying over the df to memory

# @chain db_table(db, :df_mem) begin
# @group_by(groups)
# @summarise(mean = mean(percent))
# @slice_max(percent)
# @collect
# end

# @chain db_table(db, :df_mem) begin
# @group_by(groups)
# @mutate(max = maximum(percent), min = minimum(percent))
# @group_by(groups)
# @summarise(mean = mean(percent))
# @collect
# end
@chain db_table(db, :df_mem) begin
@group_by(groups)
@summarise(mean = mean(percent))
@slice_max(percent)
@collect
end

@chain db_table(db, :df_mem) begin
@group_by(groups)
@mutate(max = maximum(percent), min = minimum(percent))
@group_by(groups)
@summarise(mean = mean(percent))
@collect
end

## Joining
# There are 2 key differences for joining:
# 1. When joining 2 tables, the new table you are choosing to join must be prefixed with a colon.
# 2. The column on both the new and old table must be specified. They do not need to be the same, and given SQL behavior where both columns are kept when joining two tables, it is preferrable if they have different names. This avoids "ambiguous reference" errors that would otherwise come up and complicate the use of tidy selection for columns.

# df2 = DataFrame(id2 = ["AA", "AC", "AE", "AG", "AI", "AK", "AM"],
# category = ["X", "Y", "X", "Y", "X", "Y", "X"],
# score = [88, 92, 77, 83, 95, 68, 74]);
df2 = DataFrame(id2 = ["AA", "AC", "AE", "AG", "AI", "AK", "AM"],
category = ["X", "Y", "X", "Y", "X", "Y", "X"],
score = [88, 92, 77, 83, 95, 68, 74]);

# copy_to(db, df2, "df_join");
copy_to(db, df2, "df_join");

# @chain db_table(db, :df_mem) begin
# @left_join(:df_join, id2, id)
# @collect
#end
@chain db_table(db, :df_mem) begin
@left_join(:df_join, id2, id)
@collect
end

## `case_when`
# In TidierDB, after the clause is completed, the result for the new column should is separated by comma ( , )
# this is in contrast to TidierData.jl, where the result for the new column is separated by a =>
# @chain db_table(db, :df_mem) begin
# @mutate(new_col = case_when(percent > .5, "Pass", # in TidierData, percent > .5 => "Pass",
# percent <= .5, "Try Again", # percent <= .5 => "Try Again"
# true, "middle"))
# @collect
# end
@chain db_table(db, :df_mem) begin
@mutate(new_col = case_when(percent > .5, "Pass", # in TidierData, percent > .5 => "Pass",
percent <= .5, "Try Again", # percent <= .5 => "Try Again"
true, "middle"))
@collect
end

## Interpolation
# To use !! Interpolation, instead of being able to define the alternate names/value in the global context, the user has to `add_interp_parameter!`. This will hopefully be fixed in future versions. Otherwise behavior is the same.
# Also, when using interpolation with exponenents, the interpolated value must go inside of parenthesis.
# add_interp_parameter!(:test, :percent) # this still supports strings, vectors of names, and values

# @chain db_table(db, :df_mem) begin
# @mutate(new_col = case_when((!!test)^2 > .5, "Pass",
# (!!test)^2 < .5, "Try Again",
# "middle"))
# @collect
# end
@chain db_table(db, :df_mem) begin
@mutate(new_col = case_when((!!test)^2 > .5, "Pass",
(!!test)^2 < .5, "Try Again",
"middle"))
@collect
end

## Slicing Ties
# Slice will always return ties due to SQL behavior
## Joining
# There are 2 key differences for joining:
# 1. When joining 2 tables, the new table you are choosing to join must be prefixed with a colon.
# 2. The column on both the new and old table must be specified. They do not need to be the same, and given SQL behavior where both columns are kept when joining two tables, it is preferrable if they have different names. This avoids "ambiguous reference" errors that would otherwise come up and complicate the use of tidy selection for columns.

# df2 = DataFrame(id2 = ["AA", "AC", "AE", "AG", "AI", "AK", "AM"],
# category = ["X", "Y", "X", "Y", "X", "Y", "X"],
# score = [88, 92, 77, 83, 95, 68, 74]);

#copy_to(db, df2, "df_join");

# @chain db_table(db, :df_mem) begin
# @left_join(:df_join, id2, id)
# @collect
# end

## `case_when`
# In TidierDB, after the clause is completed, the result for the new column should is separated by comma ( , )
# this is in contrast to TidierData.jl, where the result for the new column is separated by a =>
# @chain db_table(db, :df_mem) begin
# @mutate(new_col = case_when(percent > .5, "Pass", # in TidierData, percent > .5 => "Pass",
# percent <= .5, "Try Again", # percent <= .5 => "Try Again"
# true, "middle"))
# @collect
# end

## Interpolation
# To use !! Interpolation, instead of being able to define the alternate names/value in the global context, the user has to `add_interp_parameter!`. This will hopefully be fixed in future versions. Otherwise behavior is the same.
# Also, when using interpolation with exponenents, the interpolated value must go inside of parenthesis.
# add_interp_parameter!(:test, :percent) # this still supports strings, vectors of names, and values

# @chain db_table(db, :df_mem) begin
# @mutate(new_col = case_when((!!test)^2 > .5, "Pass",
# (!!test)^2 < .5, "Try Again",
# "middle"))
# @collect
# end

## Slicing Ties
# Slice will always return ties due to SQL behavior
22 changes: 0 additions & 22 deletions src/TidierDB.jl
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,6 @@ function get_table_metadata(db::SQLite.DB, table_name::String)
return select(result, 2 => :name, 3 => :type, :current_selxn)
end

function db_table(db::SQLite.DB, table::Symbol)
metadata = get_table_metadata(db, string(table))
return SQLQuery(from=string(table), metadata=metadata, db=db) # Pass db to the constructor
end

function finalize_ctes(ctes::Vector{CTE})
if isempty(ctes)
Expand Down Expand Up @@ -155,24 +151,6 @@ function get_table_metadata(conn::LibPQ.Connection, table_name::String)
return select(result, 1 => :name, 2 => :type, :current_selxn)
end


# Database-agnostic db_table function
function db_table(db, table::Symbol)
table_name = string(table)
metadata = if current_sql_mode[] == :lite
get_table_metadata(db, table_name)
elseif current_sql_mode[] == :postgres
get_table_metadata(db, table_name)
elseif current_sql_mode[] == :duckdb
get_table_metadata(db, table_name)
elseif current_sql_mode[] == :mssql
get_table_metadata(db, table_name)
else
error("Unsupported SQL mode: $(current_sql_mode[])")
end
return SQLQuery(from=table_name, metadata=metadata, db=db)
end

# DuckDB
function get_table_metadata(conn::DuckDB.Connection, table_name::String)
query = """
Expand Down
4 changes: 2 additions & 2 deletions src/structs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ mutable struct CTE
# Additional fields as necessary

# Default constructor
CTE() = new("", "", "", "", "", "")
#CTE() = new("", "", "", "", "", "")

# Custom constructor accepting keyword arguments
function CTE(;name::String="", select::String="", from::String="", where::String="", groupBy::String="", having::String="")
Expand All @@ -33,7 +33,7 @@ mutable struct SQLQuery
ctes::Vector{CTE}
cte_count::Int

SQLQuery() = new("", "", "", "", "", "", "", "", false, false, DataFrame(), false, nothing, Vector{CTE}(), 0)
#SQLQuery() = new("", "", "", "", "", "", "", "", false, false, DataFrame(), false, nothing, Vector{CTE}(), 0)

function SQLQuery(;select::String="", from::String="", where::String="", groupBy::String="", orderBy::String="", having::String="", window_order::String="", windowFrame::String="", is_aggregated::Bool=false, post_aggregation::Bool=false, metadata::DataFrame=DataFrame(), distinct::Bool=false, db::Any=nothing, ctes::Vector{CTE}=Vector{CTE}(), cte_count::Int=0)
new(select, from, where, groupBy, orderBy, having, window_order, windowFrame, is_aggregated, post_aggregation, metadata, distinct, db, ctes, cte_count)
Expand Down

0 comments on commit 8df920e

Please sign in to comment.