diff options
author | Sébastien Dailly <sebastien@dailly.me> | 2025-03-01 08:39:02 +0100 |
---|---|---|
committer | Sébastien Dailly <sebastien@dailly.me> | 2025-03-06 20:57:10 +0100 |
commit | 81db1bfd580791910646525e30bc45af34533987 (patch) | |
tree | c610f53c284d3707a3d6fe49486b5c09e66dc41f /tests | |
parent | 67320d8f04e1f302306b9aafdaaf4bafcf443839 (diff) |
Rewrite the way to handle filters
Diffstat (limited to 'tests')
-rw-r--r-- | tests/analyser_filters.ml | 187 | ||||
-rw-r--r-- | tests/analyser_query_test.ml | 64 | ||||
-rw-r--r-- | tests/confLoader.ml | 6 | ||||
-rw-r--r-- | tests/configuration_toml.ml | 24 | ||||
-rw-r--r-- | tests/importer_test.ml | 1 | ||||
-rw-r--r-- | tests/sql_db.ml | 119 | ||||
-rw-r--r-- | tests/test_migration.ml | 27 |
7 files changed, 392 insertions, 36 deletions
diff --git a/tests/analyser_filters.ml b/tests/analyser_filters.ml new file mode 100644 index 0000000..c329af8 --- /dev/null +++ b/tests/analyser_filters.ml @@ -0,0 +1,187 @@ +module CTE = ImportConf.CTE +module Filters = ImportAnalyser.Filters +module Chunk = ImportAnalyser.Chunk + +(* Remove the externals links in the coonfiguration because the queries are + already complicated enough without *) +let conf = { ConfLoader.conf with externals = [] } + +let path' = + Expression_builder.path ImportDataTypes.Path.{ alias = None; column = 1 } + +(** Build a simple expression expressing a group. This expression will trigger + the creation of a CTE in the query. *) +let group_expression = + Expression_builder.(equal (max path' [ path' ] [ path' ]) integer_one) + +(** Basic test ensuring we create nothing when we have nothing to create :) *) +let empty_predicates () = + let chunk_links = Chunk.create () in + let chunk_predicates = Filters.generate_sql ~conf [] chunk_links in + + Alcotest.(check @@ pair Test_migration.chunk Test_migration.chunk) + "No predicates" + (Chunk.create (), Chunk.create ()) + (chunk_links, chunk_predicates) + +(** Simple numeric filter + + The path is identified as pointing to a numeric column, and the associated + query will replace null with 0. *) +let simple_filter () = + let filter = CTE.of_filters Expression_builder.[ equal integer_one path' ] in + let chunk_links = Chunk.create () in + let chunk_predicates = Filters.generate_sql ~conf filter chunk_links in + + let expected_predicates = Chunk.create () in + Chunk.add_string expected_predicates " WHERE 1=COALESCE('source'.'col_1',0)"; + + Alcotest.(check @@ pair Test_migration.chunk Test_migration.chunk) + "Simple predicate" + (expected_predicates, Chunk.create ()) + (chunk_links, chunk_predicates) + +(** Combined filter + + The path is identified as pointing to a literal column, andn the associated + query will replace null with empty string. *) +let multiple_filters () = + let filter = + CTE.of_filters Expression_builder.[ integer_one; equal path' literal_zero ] + in + let chunk_links = Chunk.create () in + let chunk_predicates = Filters.generate_sql ~conf filter chunk_links in + + let expected_predicates = Chunk.create () in + (* The predicates can be executed in reverse order, but it’s not an issue + because they all are applied at the same time in the projection *) + Chunk.add_string expected_predicates + " WHERE COALESCE('source'.'col_1','')=?\nAND 1"; + + Alcotest.(check @@ pair Test_migration.chunk Test_migration.chunk) + "Combined predicate" + (expected_predicates, Chunk.create ()) + (chunk_links, chunk_predicates) + +(** Create a simple configuration with a group expression and no other filters +*) +let group_filter () = + let filter = CTE.of_filters [ group_expression ] in + let chunk_links = Chunk.create () in + let chunk_predicates = Filters.generate_sql ~conf filter chunk_links in + + let expected_predicates = Chunk.create () in + Chunk.add_string expected_predicates + "\n\ + INNER JOIN 'filter0' ON filter0.id = source.id\n\ + WHERE filter0.group_function"; + + let expected_links = Chunk.create () in + Chunk.add_string expected_links + "WITH filter0 AS (SELECT source.id, LAST_VALUE('source'.'col_1') OVER \ + (PARTITION BY 'source'.'col_1' ORDER BY 'source'.'col_1' RANGE BETWEEN \ + UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)=1 AS group_function\n\ + FROM 'source' AS 'source')\n"; + + Alcotest.(check @@ pair Test_migration.chunk Test_migration.chunk) + "Predicate with a group function" + (expected_predicates, expected_links) + (chunk_links, chunk_predicates) + +let expression_with_group () = + let filter = + CTE.of_filters + Expression_builder.[ equal integer_one path'; group_expression ] + in + let chunk_links = Chunk.create () in + let chunk_predicates = Filters.generate_sql ~conf filter chunk_links in + + let expected_predicates = Chunk.create () in + Chunk.add_string expected_predicates + "\n\ + INNER JOIN 'filter0' ON filter0.id = source.id\n\ + WHERE filter0.group_function"; + + let expected_links = Chunk.create () in + Chunk.add_string expected_links + "WITH filter0 AS (SELECT source.id, LAST_VALUE('source'.'col_1') OVER \ + (PARTITION BY 'source'.'col_1' ORDER BY 'source'.'col_1' RANGE BETWEEN \ + UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)=1 AS group_function\n\ + FROM 'source' AS 'source' WHERE 1=COALESCE('source'.'col_1',0))\n"; + + Alcotest.(check @@ pair Test_migration.chunk Test_migration.chunk) + "The predicate expression is inside of the CTE" + (expected_predicates, expected_links) + (chunk_links, chunk_predicates) + +(** The last filter is given after the group. The predicate shall not be + included inside the CTE but in the main query. *) +let group_with_expression () = + let filter = + CTE.of_filters + Expression_builder.[ group_expression; equal integer_one path' ] + in + let chunk_links = Chunk.create () in + let chunk_predicates = Filters.generate_sql ~conf filter chunk_links in + + let expected_predicates = Chunk.create () in + Chunk.add_string expected_predicates + "\n\ + INNER JOIN 'filter0' ON filter0.id = source.id\n\ + WHERE 1=COALESCE('source'.'col_1',0) AND filter0.group_function"; + + let expected_links = Chunk.create () in + Chunk.add_string expected_links + "WITH filter0 AS (SELECT source.id, LAST_VALUE('source'.'col_1') OVER \ + (PARTITION BY 'source'.'col_1' ORDER BY 'source'.'col_1' RANGE BETWEEN \ + UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)=1 AS group_function\n\ + FROM 'source' AS 'source')\n"; + + Alcotest.(check @@ pair Test_migration.chunk Test_migration.chunk) + "The predicate expression is out of the CTE" + (expected_predicates, expected_links) + (chunk_links, chunk_predicates) + +(** Test the configuration with two group, each one generating it’s own CTE in + the query *) +let group_with_group () = + let filter = CTE.of_filters [ group_expression; group_expression ] in + let chunk_links = Chunk.create () in + let chunk_predicates = Filters.generate_sql ~conf filter chunk_links in + + let expected_predicates = Chunk.create () in + Chunk.add_string expected_predicates + "\n\ + INNER JOIN 'filter1' ON filter1.id = source.id\n\ + WHERE filter1.group_function"; + + let expected_links = Chunk.create () in + Chunk.add_string expected_links + "WITH filter0 AS (SELECT source.id, LAST_VALUE('source'.'col_1') OVER \ + (PARTITION BY 'source'.'col_1' ORDER BY 'source'.'col_1' RANGE BETWEEN \ + UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)=1 AS group_function\n\ + FROM 'source' AS 'source')\n\ + , filter1 AS (SELECT source.id, LAST_VALUE('source'.'col_1') OVER \ + (PARTITION BY 'source'.'col_1' ORDER BY 'source'.'col_1' RANGE BETWEEN \ + UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)=1 AS group_function\n\ + FROM 'source' AS 'source'\n\ + INNER JOIN 'filter0' ON filter0.id = source.id\n\ + WHERE filter0.group_function)\n"; + + Alcotest.(check @@ pair Test_migration.chunk Test_migration.chunk) + "The two CTE shall be generated" + (expected_predicates, expected_links) + (chunk_links, chunk_predicates) + +let tests = + Alcotest. + ( "filters", + [ + test_case "No filters" `Quick empty_predicates; + test_case "Simple filter" `Quick simple_filter; + test_case "Combined filter" `Quick multiple_filters; + test_case "Group filter" `Quick group_filter; + test_case "Expression and Group filter" `Quick expression_with_group; + test_case "Group filter and Expression" `Quick group_with_expression; + test_case "Group and Group" `Quick group_with_group; + ] ) diff --git a/tests/analyser_query_test.ml b/tests/analyser_query_test.ml index 0e23f11..c4ab6d8 100644 --- a/tests/analyser_query_test.ml +++ b/tests/analyser_query_test.ml @@ -32,12 +32,12 @@ let select = "Select" >:: fun _ -> let query, _ = Q.select conf in let expected_query = - {|SELECT COALESCE('source'.col_1,'') || ? || '' AS result_0, -'source'.col_2 AS result_1, -'last_file'.col_5 AS result_2 + {|SELECT COALESCE('source'.'col_1','') || ? || '' AS result_0, +'source'.'col_2' AS result_1, +'last_file'.'col_5' AS result_2 FROM 'source' AS 'source' -LEFT JOIN 'other' AS 'other' ON rtrim(upper('source'.col_1)) = 'other'.'key_other' -LEFT JOIN 'last' AS 'last_file' ON rtrim(upper('other'.col_1)) = 'last_file'.'key_last_file'|} +LEFT JOIN 'other' AS 'other' ON rtrim(upper('source'.'col_1')) = 'other'.'key_other' +LEFT JOIN 'last' AS 'last_file' ON rtrim(upper('other'.'col_1')) = 'last_file'.'key_last_file'|} in Alcotest.check Alcotest.string "" expected_query query.q @@ -47,11 +47,11 @@ let check_externals = let query = Q.check_external conf (List.hd conf.externals) in let expected_query = - "SELECT 'source'.'id', 'source'.col_1 FROM\n\ - 'source' AS 'source'\n\ - LEFT JOIN 'other' AS 'other' ON rtrim(upper('source'.col_1)) = \ - 'other'.'key_other' WHERE 'other'.'key_other' IS NULL AND 'source'.col_1 \ - IS NOT NULL AND 'source'.col_1 <> ''" + "SELECT 'source'.'id', 'source'.'col_1'\n\ + FROM 'source' AS 'source'\n\ + LEFT JOIN 'other' AS 'other' ON rtrim(upper('source'.'col_1')) = \ + 'other'.'key_other' WHERE 'other'.'key_other' IS NULL AND \ + 'source'.'col_1' IS NOT NULL AND 'source'.'col_1' <> ''" in Alcotest.check Alcotest.string "" expected_query query.q @@ -81,8 +81,8 @@ let previous = let query, _ = ImportAnalyser.Query.select conf in let expected_query = - "SELECT LAG('previous'.col_5) OVER (PARTITION BY 'previous'.col_1 ORDER BY \ - 'previous'.col_3) AS result_0\n\ + "SELECT LAG('previous'.'col_5') OVER (PARTITION BY 'previous'.'col_1' \ + ORDER BY 'previous'.'col_3') AS result_0\n\ FROM 'source' AS 'previous'" in Alcotest.check Alcotest.string "" expected_query query.q @@ -112,7 +112,7 @@ let sum = let query, _ = ImportAnalyser.Query.select conf in let expected_query = - "SELECT SUM('previous'.col_5) OVER (PARTITION BY 'previous'.col_1) AS \ + "SELECT SUM('previous'.'col_5') OVER (PARTITION BY 'previous'.'col_1') AS \ result_0\n\ FROM 'source' AS 'previous'" in @@ -137,7 +137,7 @@ let sum_total = let query, _ = ImportAnalyser.Query.select conf in let expected_query = - "SELECT SUM('previous'.col_5) AS result_0\nFROM 'source' AS 'previous'" + "SELECT SUM('previous'.'col_5') AS result_0\nFROM 'source' AS 'previous'" in Alcotest.check Alcotest.string "" expected_query query.q @@ -166,7 +166,7 @@ let sum_unfiltered = let query, _ = ImportAnalyser.Query.select conf in let expected_query = - "SELECT SUM('previous'.col_5) AS result_0\nFROM 'source' AS 'previous'" + "SELECT SUM('previous'.'col_5') AS result_0\nFROM 'source' AS 'previous'" in Alcotest.check Alcotest.string "" expected_query query.q @@ -206,16 +206,16 @@ let filter_group = let contents, _ = ImportAnalyser.Query.select conf in let expected = - {|WITH cte AS (SELECT source.id, LAST_VALUE('source'.col_3) OVER (PARTITION BY 'source'.col_1 ORDER BY 'source'.col_1 RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS group0 + {|WITH filter0 AS (SELECT source.id, LAST_VALUE('source'.'col_3') OVER (PARTITION BY 'source'.'col_1' ORDER BY 'source'.'col_1' RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS group_function FROM 'source' AS 'source' -LEFT JOIN 'other' AS 'other' ON rtrim(upper('source'.col_1)) = 'other'.'key_other' -LEFT JOIN 'last' AS 'last_file' ON rtrim(upper('other'.col_1)) = 'last_file'.'key_last_file') -SELECT 'source'.col_1 AS result_0 +LEFT JOIN 'other' AS 'other' ON rtrim(upper('source'.'col_1')) = 'other'.'key_other' +LEFT JOIN 'last' AS 'last_file' ON rtrim(upper('other'.'col_1')) = 'last_file'.'key_last_file') +SELECT 'source'.'col_1' AS result_0 FROM 'source' AS 'source' -LEFT JOIN 'other' AS 'other' ON rtrim(upper('source'.col_1)) = 'other'.'key_other' -LEFT JOIN 'last' AS 'last_file' ON rtrim(upper('other'.col_1)) = 'last_file'.'key_last_file' -INNER JOIN 'cte' ON cte.id = source.id -WHERE (cte.group0)|} +LEFT JOIN 'other' AS 'other' ON rtrim(upper('source'.'col_1')) = 'other'.'key_other' +LEFT JOIN 'last' AS 'last_file' ON rtrim(upper('other'.'col_1')) = 'last_file'.'key_last_file' +INNER JOIN 'filter0' ON filter0.id = source.id +WHERE filter0.group_function|} in Alcotest.check Alcotest.string "" expected contents.q @@ -238,18 +238,16 @@ let filter_group2 = let contents, _ = ImportAnalyser.Query.select conf in let expected = - {|WITH cte AS (SELECT source.id, LAST_VALUE('source'.col_3) OVER (PARTITION BY 'source'.col_1 ORDER BY 'source'.col_1 RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS group0 + {|WITH filter0 AS (SELECT source.id, LAST_VALUE('source'.'col_3') OVER (PARTITION BY 'source'.'col_1' ORDER BY 'source'.'col_1' RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS group_function FROM 'source' AS 'source' -LEFT JOIN 'other' AS 'other' ON rtrim(upper('source'.col_1)) = 'other'.'key_other' -LEFT JOIN 'last' AS 'last_file' ON rtrim(upper('other'.col_1)) = 'last_file'.'key_last_file' -WHERE COALESCE('source'.col_3,0)=0) -SELECT 'source'.col_1 AS result_0 +LEFT JOIN 'other' AS 'other' ON rtrim(upper('source'.'col_1')) = 'other'.'key_other' +LEFT JOIN 'last' AS 'last_file' ON rtrim(upper('other'.'col_1')) = 'last_file'.'key_last_file') +SELECT 'source'.'col_1' AS result_0 FROM 'source' AS 'source' -LEFT JOIN 'other' AS 'other' ON rtrim(upper('source'.col_1)) = 'other'.'key_other' -LEFT JOIN 'last' AS 'last_file' ON rtrim(upper('other'.col_1)) = 'last_file'.'key_last_file' -INNER JOIN 'cte' ON cte.id = source.id -WHERE COALESCE('source'.col_3,0)=0 -AND (cte.group0)|} +LEFT JOIN 'other' AS 'other' ON rtrim(upper('source'.'col_1')) = 'other'.'key_other' +LEFT JOIN 'last' AS 'last_file' ON rtrim(upper('other'.'col_1')) = 'last_file'.'key_last_file' +INNER JOIN 'filter0' ON filter0.id = source.id +WHERE COALESCE('source'.'col_3',0)=0 AND filter0.group_function|} in Alcotest.check Alcotest.string "" expected contents.q diff --git a/tests/confLoader.ml b/tests/confLoader.ml index 4adedd6..692cda9 100644 --- a/tests/confLoader.ml +++ b/tests/confLoader.ml @@ -1,7 +1,9 @@ +let load' : string -> (ImportConf.Syntax.t, string) Result.t = + fun content -> Otoml.Parser.from_string content |> ImportConf.t_of_toml + (** Read the configuration in toml and return the internal representation *) let load : string -> ImportConf.Syntax.t = - fun content -> - Otoml.Parser.from_string content |> ImportConf.t_of_toml |> Result.get_ok + fun content -> Result.get_ok (load' content) let conf = load diff --git a/tests/configuration_toml.ml b/tests/configuration_toml.ml index dc1f769..e51c727 100644 --- a/tests/configuration_toml.ml +++ b/tests/configuration_toml.ml @@ -2,6 +2,29 @@ module Expression = ImportExpression.T module Path = ImportDataTypes.Path open Test_migration +let nested_group () = + let expected = + Error + "in field \"sheet\":\n\ + \ in field \"columns\":\n\ + \ while decoding a list:\n\ + \ element 0:\n\ + \ A group function cannot contains another group function, but got\n\ + \ \"max(:A, [counter([:A], [:A])], [])\" \n" + and result = + ConfLoader.load' + {|[source] +name = "source_name" +file = "source_file" + +[sheet] +columns = [ + "max(:A, [counter([:A], [:A])], [])", +]|} + in + Alcotest.(check (result Test_migration.syntax string)) + "duplicate" expected result + let test_suit = [ ( "parse_extern" >:: fun _ -> @@ -63,6 +86,7 @@ let test_suit = let toml = Otoml.Parser.from_file "configuration/example_csv.toml" in let toml = ImportConf.t_of_toml toml in ignore toml ); + ("nested group", `Quick, nested_group); ] let tests = "configuration_toml" >::: test_suit diff --git a/tests/importer_test.ml b/tests/importer_test.ml index a7b7f0d..78b7f59 100644 --- a/tests/importer_test.ml +++ b/tests/importer_test.ml @@ -11,6 +11,7 @@ let _ = Expression_type_of.tests; Expression_query.tests; Analyser_dependency.tests; + Analyser_filters.tests; Analyser_query_test.tests; Sql_db.tests; ] diff --git a/tests/sql_db.ml b/tests/sql_db.ml index 75b8293..65a93ce 100644 --- a/tests/sql_db.ml +++ b/tests/sql_db.ml @@ -200,6 +200,121 @@ columns = [ ImportCSV.DataType. [ [| Integer 1; Integer 200 |]; [| Integer 2; Integer 100 |] ]) +let filter_group = + run_test "filter_group" + ~configuration: + {|[source] +name = "source_name" +file = "source_file" + +[sheet] +columns = [ + ":A", +] + +filters = [ + "max(:B, [:C], [:B]) = :B", +] + +|} + ~input: + ImportCSV.DataType. + [ + [ (0, Integer 1); (1, Integer 100); (2, Integer 0) ]; + [ (0, Integer 2); (1, Integer 150); (2, Integer 0) ]; + [ (0, Integer 3); (1, Integer 200); (2, Integer 0) ]; + ] + ~expected:(Ok ImportCSV.DataType.[ [| Integer 3 |] ]) + +(** The first filter will prevent the max value to pop, and only the second one + will be reported *) +let filter_expression_and_group = + run_test "filter expression then group" + ~configuration: + {|[source] +name = "source_name" +file = "source_file" + +[sheet] +columns = [ + ":A", +] + +filters = [ + ":B <> 200", + "max(:B, [:C], [:B]) = :B", +] + +|} + ~input: + ImportCSV.DataType. + [ + [ (0, Integer 1); (1, Integer 100); (2, Integer 0) ]; + [ (0, Integer 2); (1, Integer 150); (2, Integer 0) ]; + [ (0, Integer 3); (1, Integer 200); (2, Integer 0) ]; + ] + ~expected:(Ok ImportCSV.DataType.[ [| Integer 2 |] ]) + +(** In this case, we first filter the line and keep only the max value, but the + second filter will match the result and will produce an empty list *) +let filter_group_and_expression = + run_test "filter group then expression" + ~configuration: + {|[source] +name = "source_name" +file = "source_file" + +[sheet] +columns = [ + ":A", +] + +filters = [ + "max(:B, [:C], [:B]) = :B", + ":B <> 200", +] + +|} + ~input: + ImportCSV.DataType. + [ + [ (0, Integer 1); (1, Integer 100); (2, Integer 0) ]; + [ (0, Integer 2); (1, Integer 150); (2, Integer 0) ]; + [ (0, Integer 3); (1, Integer 200); (2, Integer 0) ]; + ] + ~expected:(Ok []) + +(** In this case, each filter remove the line with the higher value in B. + + After the application of the two filters, the only remaining result is the + first line. *) +let filter_group_and_group = + run_test "filter group then group" + ~configuration: + {|[source] +name = "source_name" +file = "source_file" + +[sheet] +columns = [ + ":A", +] + +filters = [ + "max(:B, [:C], [:B]) <> :B", + "max(:B, [:C], [:B]) <> :B", +] + +|} + ~input: + ImportCSV.DataType. + [ + [ (0, Integer 1); (1, Integer 100); (2, Integer 0) ]; + [ (0, Integer 2); (1, Integer 150); (2, Integer 0) ]; + [ (0, Integer 3); (1, Integer 200); (2, Integer 0) ]; + ] + ~expected:(Ok [ [| ImportCSV.DataType.Integer 1 |] ]) + let test_suit = [ simple_extraction; @@ -208,6 +323,10 @@ let test_suit = sum_unfiltered; sum_group; sum_group_uniq; + filter_group; + filter_expression_and_group; + filter_group_and_expression; + filter_group_and_group; ] let tests = "sql_db" >::: test_suit diff --git a/tests/test_migration.ml b/tests/test_migration.ml index 35480d1..e26f354 100644 --- a/tests/test_migration.ml +++ b/tests/test_migration.ml @@ -45,7 +45,7 @@ let int_container_testable = make_test (module ImportContainers.IntSet) let expression_testable = make_test (module struct - type t = ImportConf.Path.t ImportExpression.T.t [@@deriving show, eq] + type t = ImportDataTypes.Path.t ImportExpression.T.t [@@deriving show, eq] end) let dep_key_testable = @@ -53,3 +53,28 @@ let dep_key_testable = (module struct type t = ImportAnalyser.Dependency.key [@@deriving show, eq] end) + +let chunk = + make_test + (module struct + type t = ImportAnalyser.Chunk.t + + let pp formater t = + Format.fprintf formater "%s" (Buffer.contents t.ImportAnalyser.Chunk.b) + + let equal t1 t2 = + let to_string t = Buffer.contents t.ImportAnalyser.Chunk.b in + String.equal (to_string t1) (to_string t2) + end) + +let syntax = + make_test + (module struct + type t = ImportConf.Syntax.t + + let pp format t = + Format.fprintf format "%s" + (Otoml.Printer.to_string (ImportConf.Syntax.repr t)) + + let equal t1 t2 = t1 = t2 + end) |