non docs related changes to get rid of the warnings when loading pkg

TidierOrg · Apr 7, 2024 · 8df920e · 8df920e
1 parent f4affe9
commit 8df920e
Show file tree

Hide file tree

Showing 3 changed files with 44 additions and 106 deletions.
diff --git a/docs/examples/UserGuide/key_differences.jl b/docs/examples/UserGuide/key_differences.jl
@@ -5,109 +5,69 @@
 
 ## group_by -> mutate
 # In TidierDB, when performing `@group_by` then `@mutate`, after applying all of the mutations in the clause to the grouped data, the table is ungrouped. To perform subsequent grouped mutations/slices/summarizations, the user would have to regroup the data. This is something we will work to resolve, but as of version .0.1.0, this is the bevahior. This is demonstrated below with 
-#using TidierDB
-#df = DataFrame(id = [string('A' + i ÷ 26, 'A' + i % 26) for i in 0:9], 
-#                        groups = [i % 2 == 0 ? "aa" : "bb" for i in 1:10], 
-#                        value = repeat(1:5, 2), 
-#                        percent = 0.1:0.1:1.0);
-
-# mem = duckdb_open(":memory:");
-# db = duckdb_connect(mem);
+using TidierDB
+df = DataFrame(id = [string('A' + i ÷ 26, 'A' + i % 26) for i in 0:9], 
+                        groups = [i % 2 == 0 ? "aa" : "bb" for i in 1:10], 
+                        value = repeat(1:5, 2), 
+                        percent = 0.1:0.1:1.0);
+
+ mem = duckdb_open(":memory:");
+ db = duckdb_connect(mem);
 # For these examples we will use DuckDB, the default backend, although SQLite, Postgres, MySQL, MSSQL, and ClickHouse are possible.
 # copy_to(db, df, "df_mem"); # copying over the df to memory
 
-# @chain db_table(db, :df_mem) begin
-#    @group_by(groups)
-#    @summarise(mean = mean(percent))
-#    @slice_max(percent)
-#    @collect
-# end     
-
-# @chain db_table(db, :df_mem) begin
-#    @group_by(groups)
-#    @mutate(max = maximum(percent), min = minimum(percent))
-#    @group_by(groups)
-#    @summarise(mean = mean(percent))
-#    @collect
-# end     
+@chain db_table(db, :df_mem) begin
+    @group_by(groups)
+    @summarise(mean = mean(percent))
+    @slice_max(percent)
+    @collect
+ end     
+
+ @chain db_table(db, :df_mem) begin
+    @group_by(groups)
+    @mutate(max = maximum(percent), min = minimum(percent))
+    @group_by(groups)
+    @summarise(mean = mean(percent))
+    @collect
+end     
 
 ## Joining
 # There are 2 key differences for joining:
 # 1. When joining 2 tables, the new table you are choosing to join must be prefixed with a colon. 
 # 2. The column on both the new and old table must be specified. They do not need to be the same, and given SQL behavior where both columns are kept when joining two tables, it is preferrable if they have different names. This avoids "ambiguous reference" errors that would otherwise come up and complicate the use of tidy selection for columns. 
 
-# df2 = DataFrame(id2 = ["AA", "AC", "AE", "AG", "AI", "AK", "AM"],
-#                category = ["X", "Y", "X", "Y", "X", "Y", "X"],
-#                score = [88, 92, 77, 83, 95, 68, 74]);
+df2 = DataFrame(id2 = ["AA", "AC", "AE", "AG", "AI", "AK", "AM"],
+                category = ["X", "Y", "X", "Y", "X", "Y", "X"],
+                score = [88, 92, 77, 83, 95, 68, 74]);
 
-# copy_to(db, df2, "df_join");
+ copy_to(db, df2, "df_join");
 
-# @chain db_table(db, :df_mem) begin
-#    @left_join(:df_join, id2, id)
-#    @collect
-#end
+ @chain db_table(db, :df_mem) begin
+    @left_join(:df_join, id2, id)
+    @collect
+end
 
 ## `case_when`
 # In TidierDB, after the clause is completed, the result for the new column should is separated by comma ( , )
 # this is in contrast to TidierData.jl, where the result for the new column is separated by a => 
-# @chain db_table(db, :df_mem) begin
-#    @mutate(new_col = case_when(percent > .5, "Pass",  # in TidierData, percent > .5 => "Pass", 
-#                                percent <= .5, "Try Again", # percent <= .5 => "Try Again"
-#                                true, "middle"))
-#    @collect
-# end
+@chain db_table(db, :df_mem) begin
+    @mutate(new_col = case_when(percent > .5, "Pass",  # in TidierData, percent > .5 => "Pass", 
+                                percent <= .5, "Try Again", # percent <= .5 => "Try Again"
+                                true, "middle"))
+    @collect
+ end
 
 ## Interpolation
 # To use !! Interpolation, instead of being able to define the alternate names/value in the global context, the user has to `add_interp_parameter!`. This will hopefully be fixed in future versions. Otherwise behavior is the same.
 # Also, when using interpolation with exponenents, the interpolated value must go inside of parenthesis. 
 # add_interp_parameter!(:test, :percent) # this still supports strings, vectors of names, and values
 
-# @chain db_table(db, :df_mem) begin
-#    @mutate(new_col = case_when((!!test)^2 > .5, "Pass",
-#                                (!!test)^2 < .5, "Try Again",
-#                                "middle"))
-#    @collect
-# end
+@chain db_table(db, :df_mem) begin
+    @mutate(new_col = case_when((!!test)^2 > .5, "Pass",
+                                (!!test)^2 < .5, "Try Again",
+                                "middle"))
+    @collect
+end
 
 ## Slicing Ties
 # Slice will always return ties due to SQL behavior
-## Joining
-# There are 2 key differences for joining:
-# 1. When joining 2 tables, the new table you are choosing to join must be prefixed with a colon. 
-# 2. The column on both the new and old table must be specified. They do not need to be the same, and given SQL behavior where both columns are kept when joining two tables, it is preferrable if they have different names. This avoids "ambiguous reference" errors that would otherwise come up and complicate the use of tidy selection for columns. 
-
-# df2 = DataFrame(id2 = ["AA", "AC", "AE", "AG", "AI", "AK", "AM"],
-#                category = ["X", "Y", "X", "Y", "X", "Y", "X"],
-#                score = [88, 92, 77, 83, 95, 68, 74]);
-
-#copy_to(db, df2, "df_join");
-
-# @chain db_table(db, :df_mem) begin
-#    @left_join(:df_join, id2, id)
-#    @collect
-# end
-
-## `case_when`
-# In TidierDB, after the clause is completed, the result for the new column should is separated by comma ( , )
-# this is in contrast to TidierData.jl, where the result for the new column is separated by a => 
-# @chain db_table(db, :df_mem) begin
-#    @mutate(new_col = case_when(percent > .5, "Pass",  # in TidierData, percent > .5 => "Pass", 
-#                                percent <= .5, "Try Again", # percent <= .5 => "Try Again"
-#                                true, "middle"))
-#    @collect
-# end
-
-## Interpolation
-# To use !! Interpolation, instead of being able to define the alternate names/value in the global context, the user has to `add_interp_parameter!`. This will hopefully be fixed in future versions. Otherwise behavior is the same.
-# Also, when using interpolation with exponenents, the interpolated value must go inside of parenthesis. 
-# add_interp_parameter!(:test, :percent) # this still supports strings, vectors of names, and values
-
-# @chain db_table(db, :df_mem) begin
-#    @mutate(new_col = case_when((!!test)^2 > .5, "Pass",
-#                                (!!test)^2 < .5, "Try Again",
-#                                "middle"))
-#    @collect
-# end
-
-## Slicing Ties
-# Slice will always return ties due to SQL behavior
diff --git a/src/TidierDB.jl b/src/TidierDB.jl
@@ -74,10 +74,6 @@ function get_table_metadata(db::SQLite.DB, table_name::String)
     return select(result, 2 => :name, 3 => :type, :current_selxn)
 end
 
-function db_table(db::SQLite.DB, table::Symbol)
-    metadata = get_table_metadata(db, string(table))
-    return SQLQuery(from=string(table), metadata=metadata, db=db)  # Pass db to the constructor
-end
 
 function finalize_ctes(ctes::Vector{CTE})
     if isempty(ctes)
@@ -155,24 +151,6 @@ function get_table_metadata(conn::LibPQ.Connection, table_name::String)
     return select(result, 1 => :name, 2 => :type, :current_selxn)
 end
 
-
-# Database-agnostic db_table function
-function db_table(db, table::Symbol)
-    table_name = string(table)
-    metadata = if current_sql_mode[] == :lite
-        get_table_metadata(db, table_name)
-    elseif current_sql_mode[] == :postgres 
-        get_table_metadata(db, table_name)
-    elseif current_sql_mode[] == :duckdb 
-        get_table_metadata(db, table_name)
-    elseif current_sql_mode[] == :mssql 
-        get_table_metadata(db, table_name)
-    else
-        error("Unsupported SQL mode: $(current_sql_mode[])")
-    end
-    return SQLQuery(from=table_name, metadata=metadata, db=db)
-end
-
 # DuckDB
 function get_table_metadata(conn::DuckDB.Connection, table_name::String)
     query = """

diff --git a/src/structs.jl b/src/structs.jl
@@ -8,7 +8,7 @@ mutable struct CTE
     # Additional fields as necessary
 
     # Default constructor
-    CTE() = new("", "", "", "", "", "")
+    #CTE() = new("", "", "", "", "", "")
 
     # Custom constructor accepting keyword arguments
     function CTE(;name::String="", select::String="", from::String="", where::String="", groupBy::String="", having::String="")
@@ -33,7 +33,7 @@ mutable struct SQLQuery
     ctes::Vector{CTE}
     cte_count::Int
 
-    SQLQuery() = new("", "", "", "", "", "", "", "", false, false, DataFrame(), false, nothing, Vector{CTE}(), 0)
+    #SQLQuery() = new("", "", "", "", "", "", "", "", false, false, DataFrame(), false, nothing, Vector{CTE}(), 0)
 
     function SQLQuery(;select::String="", from::String="", where::String="", groupBy::String="", orderBy::String="", having::String="", window_order::String="", windowFrame::String="", is_aggregated::Bool=false, post_aggregation::Bool=false, metadata::DataFrame=DataFrame(), distinct::Bool=false, db::Any=nothing, ctes::Vector{CTE}=Vector{CTE}(), cte_count::Int=0)
         new(select, from, where, groupBy, orderBy, having, window_order, windowFrame, is_aggregated, post_aggregation, metadata, distinct, db, ctes, cte_count)