From 72802f11c14bb72fd06a6e93bc2e3ef8e21424e0 Mon Sep 17 00:00:00 2001 From: Kim Morrison Date: Mon, 28 Oct 2024 21:56:37 +1100 Subject: [PATCH 1/8] faster? --- ImportGraph/Imports.lean | 6 ++---- ImportGraph/RequiredModules.lean | 26 ++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/ImportGraph/Imports.lean b/ImportGraph/Imports.lean index 8025b60..d061ebd 100644 --- a/ImportGraph/Imports.lean +++ b/ImportGraph/Imports.lean @@ -176,15 +176,13 @@ end Lean.NameMap Returns a `List (Name × List Name)` with a key for each module `n` in `amongst`, whose corresponding value is the list of modules `m` in `amongst` which are transitively imported by `n`, but no declaration in `n` makes use of a declaration in `m`. - -The current implementation is too slow to run on the entirety of Mathlib, -although it should be fine for any sequential chain of imports in Mathlib. -/ def unusedTransitiveImports (amongst : List Name) : CoreM (List (Name × List Name)) := do let env ← getEnv let transitiveImports := env.importGraph.transitiveClosure + let transitivelyRequired ← env.transitivelyRequiredModules' amongst amongst.mapM fun n => do return (n, - let unused := (transitiveImports.find? n).getD {} \ (← env.transitivelyRequiredModules n) + let unused := (transitiveImports.find? n).getD {} \ (transitivelyRequired.find? n |>.getD {}) amongst.filter (fun m => unused.contains m)) /-- diff --git a/ImportGraph/RequiredModules.lean b/ImportGraph/RequiredModules.lean index 7ca2fd5..89c4922 100644 --- a/ImportGraph/RequiredModules.lean +++ b/ImportGraph/RequiredModules.lean @@ -115,6 +115,32 @@ def Environment.transitivelyRequiredModules (env : Environment) (module : Name) |>.filter (env.getModuleFor? · = some module) (NameSet.ofList constants).transitivelyRequiredModules env +/-- +Computes all the modules transitively required by the specified modules. +Should be equivalent to calling `transitivelyRequiredModules` on each module, but shares more of the work. +-/ +partial def Environment.transitivelyRequiredModules' (env : Environment) (modules : List Name) : + CoreM (NameMap NameSet) := do + let mut c2m : NameMap NameSet := {} + let mut result : NameMap NameSet := {} + for m in modules do + let mut r : NameSet := {} + for n in env.header.moduleData[(env.header.moduleNames.getIdx? m).getD 0]!.constNames do + c2m ← process c2m n + r := r.union ((c2m.find? n).getD {}) + result := result.insert m r + return result +where process (constantsToModules : NameMap NameSet) (const : Name) : CoreM (NameMap NameSet) := do + if constantsToModules.contains const then + return constantsToModules + let mut c2m := constantsToModules + let ci ← getConstInfo const + let mut r : NameSet := {} + for n in ci.getUsedConstantsAsSet do + c2m ← process c2m n + r := r.union ((c2m.find? n).getD {}) + return c2m.insert const r + /-- Return the names of the modules in which constants used in the current file were defined. From 8738783bbd9ab73df0cf4e12549bbbe3b7e4adef Mon Sep 17 00:00:00 2001 From: Kim Morrison Date: Tue, 29 Oct 2024 00:26:41 +1100 Subject: [PATCH 2/8] stackoverflow --- ImportGraph/RequiredModules.lean | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/ImportGraph/RequiredModules.lean b/ImportGraph/RequiredModules.lean index 89c4922..0443a42 100644 --- a/ImportGraph/RequiredModules.lean +++ b/ImportGraph/RequiredModules.lean @@ -126,20 +126,25 @@ partial def Environment.transitivelyRequiredModules' (env : Environment) (module for m in modules do let mut r : NameSet := {} for n in env.header.moduleData[(env.header.moduleNames.getIdx? m).getD 0]!.constNames do - c2m ← process c2m n + let mut stack : Array (ConstantInfo × Option NameSet) := #[⟨← getConstInfo n, none⟩] + while !stack.isEmpty do + let (ci, used?) := stack.back + match used? with + | none => + stack := stack.pop + if !c2m.contains ci.name then + let used := ci.getUsedConstantsAsSet + stack := stack.pop.push ⟨ci, some used⟩ + for u in used do + if !c2m.contains u then + stack := stack.push ⟨← getConstInfo u, none⟩ + | some used => + let transitivelyUsed : NameSet := used.fold (init := used) (fun s u => s.union ((c2m.find? u).getD {})) + c2m := c2m.insert ci.name transitivelyUsed + stack := stack.pop r := r.union ((c2m.find? n).getD {}) result := result.insert m r return result -where process (constantsToModules : NameMap NameSet) (const : Name) : CoreM (NameMap NameSet) := do - if constantsToModules.contains const then - return constantsToModules - let mut c2m := constantsToModules - let ci ← getConstInfo const - let mut r : NameSet := {} - for n in ci.getUsedConstantsAsSet do - c2m ← process c2m n - r := r.union ((c2m.find? n).getD {}) - return c2m.insert const r /-- Return the names of the modules in which constants used in the current file were defined. From 0c61e26bf68ab56ec37578ec82f7abb519ee6da9 Mon Sep 17 00:00:00 2001 From: Kim Morrison Date: Tue, 29 Oct 2024 00:40:26 +1100 Subject: [PATCH 3/8] . --- ImportGraph/RequiredModules.lean | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/ImportGraph/RequiredModules.lean b/ImportGraph/RequiredModules.lean index 0443a42..294d745 100644 --- a/ImportGraph/RequiredModules.lean +++ b/ImportGraph/RequiredModules.lean @@ -126,22 +126,25 @@ partial def Environment.transitivelyRequiredModules' (env : Environment) (module for m in modules do let mut r : NameSet := {} for n in env.header.moduleData[(env.header.moduleNames.getIdx? m).getD 0]!.constNames do + -- This is messy: Mathlib is big enough that writing a recursive function causes a stack overflow. + -- So we use an explicit stack instead. We visit each constant twice: + -- once to record the constants transitively used by it, + -- and again to record the modules which defined those constants. let mut stack : Array (ConstantInfo × Option NameSet) := #[⟨← getConstInfo n, none⟩] while !stack.isEmpty do let (ci, used?) := stack.back + stack := stack.pop match used? with | none => - stack := stack.pop if !c2m.contains ci.name then let used := ci.getUsedConstantsAsSet - stack := stack.pop.push ⟨ci, some used⟩ + stack := stack.push ⟨ci, some used⟩ for u in used do if !c2m.contains u then stack := stack.push ⟨← getConstInfo u, none⟩ | some used => let transitivelyUsed : NameSet := used.fold (init := used) (fun s u => s.union ((c2m.find? u).getD {})) c2m := c2m.insert ci.name transitivelyUsed - stack := stack.pop r := r.union ((c2m.find? n).getD {}) result := result.insert m r return result From e61efb3adb638795204358a92e5454c863359b12 Mon Sep 17 00:00:00 2001 From: Kim Morrison Date: Tue, 29 Oct 2024 11:52:35 +1100 Subject: [PATCH 4/8] use BitVec --- ImportGraph/RequiredModules.lean | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/ImportGraph/RequiredModules.lean b/ImportGraph/RequiredModules.lean index 294d745..2eb5dfd 100644 --- a/ImportGraph/RequiredModules.lean +++ b/ImportGraph/RequiredModules.lean @@ -121,10 +121,11 @@ Should be equivalent to calling `transitivelyRequiredModules` on each module, bu -/ partial def Environment.transitivelyRequiredModules' (env : Environment) (modules : List Name) : CoreM (NameMap NameSet) := do - let mut c2m : NameMap NameSet := {} + let N := env.header.moduleNames.size + let mut c2m : NameMap (BitVec N) := {} let mut result : NameMap NameSet := {} for m in modules do - let mut r : NameSet := {} + let mut r : BitVec N := 0 for n in env.header.moduleData[(env.header.moduleNames.getIdx? m).getD 0]!.constNames do -- This is messy: Mathlib is big enough that writing a recursive function causes a stack overflow. -- So we use an explicit stack instead. We visit each constant twice: @@ -143,11 +144,16 @@ partial def Environment.transitivelyRequiredModules' (env : Environment) (module if !c2m.contains u then stack := stack.push ⟨← getConstInfo u, none⟩ | some used => - let transitivelyUsed : NameSet := used.fold (init := used) (fun s u => s.union ((c2m.find? u).getD {})) + let transitivelyUsed : BitVec N := used.fold (init := toBitVec used) (fun s u => s ||| ((c2m.find? u).getD 0)) c2m := c2m.insert ci.name transitivelyUsed - r := r.union ((c2m.find? n).getD {}) - result := result.insert m r + r := r ||| ((c2m.find? n).getD 0) + result := result.insert m (toNameSet r) return result +where + toBitVec {N : Nat} (s : NameSet) : BitVec N := + s.fold (init := 0) (fun b n => b ||| BitVec.twoPow _ ((env.header.moduleNames.getIdx? n).getD 0)) + toNameSet {N : Nat} (b : BitVec N) : NameSet := + env.header.moduleNames.zipWithIndex.foldl (init := {}) (fun s (n, i) => if b.getLsbD i then s.insert n else s) /-- Return the names of the modules in which constants used in the current file were defined. From b6a8d266e1beddd354e4f02185a80a15ac6763a8 Mon Sep 17 00:00:00 2001 From: Kim Morrison Date: Tue, 29 Oct 2024 11:53:08 +1100 Subject: [PATCH 5/8] verbose --- ImportGraph/RequiredModules.lean | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ImportGraph/RequiredModules.lean b/ImportGraph/RequiredModules.lean index 2eb5dfd..41a21c0 100644 --- a/ImportGraph/RequiredModules.lean +++ b/ImportGraph/RequiredModules.lean @@ -119,12 +119,14 @@ def Environment.transitivelyRequiredModules (env : Environment) (module : Name) Computes all the modules transitively required by the specified modules. Should be equivalent to calling `transitivelyRequiredModules` on each module, but shares more of the work. -/ -partial def Environment.transitivelyRequiredModules' (env : Environment) (modules : List Name) : +partial def Environment.transitivelyRequiredModules' (env : Environment) (modules : List Name) (verbose : Bool := false) : CoreM (NameMap NameSet) := do let N := env.header.moduleNames.size let mut c2m : NameMap (BitVec N) := {} let mut result : NameMap NameSet := {} for m in modules do + if verbose then + IO.println s!"Processing module {m}" let mut r : BitVec N := 0 for n in env.header.moduleData[(env.header.moduleNames.getIdx? m).getD 0]!.constNames do -- This is messy: Mathlib is big enough that writing a recursive function causes a stack overflow. From d1c7eb7034012a9a7e6d793cc6a0c9e74b83f27b Mon Sep 17 00:00:00 2001 From: Kim Morrison Date: Tue, 29 Oct 2024 11:56:22 +1100 Subject: [PATCH 6/8] verbose --- ImportGraph/Imports.lean | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ImportGraph/Imports.lean b/ImportGraph/Imports.lean index d061ebd..8184999 100644 --- a/ImportGraph/Imports.lean +++ b/ImportGraph/Imports.lean @@ -177,10 +177,10 @@ Returns a `List (Name × List Name)` with a key for each module `n` in `amongst` whose corresponding value is the list of modules `m` in `amongst` which are transitively imported by `n`, but no declaration in `n` makes use of a declaration in `m`. -/ -def unusedTransitiveImports (amongst : List Name) : CoreM (List (Name × List Name)) := do +def unusedTransitiveImports (amongst : List Name) (verbose : Bool := false) : CoreM (List (Name × List Name)) := do let env ← getEnv let transitiveImports := env.importGraph.transitiveClosure - let transitivelyRequired ← env.transitivelyRequiredModules' amongst + let transitivelyRequired ← env.transitivelyRequiredModules' amongst verbose amongst.mapM fun n => do return (n, let unused := (transitiveImports.find? n).getD {} \ (transitivelyRequired.find? n |>.getD {}) amongst.filter (fun m => unused.contains m)) From 481c60bee93f4a3ce60371a5b2a590de11ba645c Mon Sep 17 00:00:00 2001 From: Kim Morrison Date: Tue, 29 Oct 2024 12:37:27 +1100 Subject: [PATCH 7/8] fix --- ImportGraph/RequiredModules.lean | 37 +++++++++++++++--------- ImportGraph/UnusedTransitiveImports.lean | 23 +++++++++++++++ ImportGraphTest/Imports.lean | 9 ++++++ lakefile.toml | 7 +++++ 4 files changed, 62 insertions(+), 14 deletions(-) create mode 100644 ImportGraph/UnusedTransitiveImports.lean diff --git a/ImportGraph/RequiredModules.lean b/ImportGraph/RequiredModules.lean index 41a21c0..5e90a90 100644 --- a/ImportGraph/RequiredModules.lean +++ b/ImportGraph/RequiredModules.lean @@ -123,31 +123,40 @@ partial def Environment.transitivelyRequiredModules' (env : Environment) (module CoreM (NameMap NameSet) := do let N := env.header.moduleNames.size let mut c2m : NameMap (BitVec N) := {} + let mut pushed : NameSet := {} let mut result : NameMap NameSet := {} for m in modules do if verbose then IO.println s!"Processing module {m}" let mut r : BitVec N := 0 for n in env.header.moduleData[(env.header.moduleNames.getIdx? m).getD 0]!.constNames do + if ! n.isInternal then -- This is messy: Mathlib is big enough that writing a recursive function causes a stack overflow. -- So we use an explicit stack instead. We visit each constant twice: -- once to record the constants transitively used by it, -- and again to record the modules which defined those constants. - let mut stack : Array (ConstantInfo × Option NameSet) := #[⟨← getConstInfo n, none⟩] + let mut stack : List (Name × Option NameSet) := [⟨n, none⟩] + pushed := pushed.insert n while !stack.isEmpty do - let (ci, used?) := stack.back - stack := stack.pop - match used? with - | none => - if !c2m.contains ci.name then - let used := ci.getUsedConstantsAsSet - stack := stack.push ⟨ci, some used⟩ - for u in used do - if !c2m.contains u then - stack := stack.push ⟨← getConstInfo u, none⟩ - | some used => - let transitivelyUsed : BitVec N := used.fold (init := toBitVec used) (fun s u => s ||| ((c2m.find? u).getD 0)) - c2m := c2m.insert ci.name transitivelyUsed + match stack with + | [] => panic! "Stack is empty" + | (c, used?) :: tail => + stack := tail + match used? with + | none => + if !c2m.contains c then + let used := (← getConstInfo c).getUsedConstantsAsSet + stack := ⟨c, some used⟩ :: stack + for u in used do + if !pushed.contains u then + stack := ⟨u, none⟩ :: stack + pushed := pushed.insert u + | some used => + let usedModules : NameSet := + used.fold (init := {}) (fun s u => if let some m := env.getModuleFor? u then s.insert m else s) + let transitivelyUsed : BitVec N := + used.fold (init := toBitVec usedModules) (fun s u => s ||| ((c2m.find? u).getD 0)) + c2m := c2m.insert c transitivelyUsed r := r ||| ((c2m.find? n).getD 0) result := result.insert m (toNameSet r) return result diff --git a/ImportGraph/UnusedTransitiveImports.lean b/ImportGraph/UnusedTransitiveImports.lean new file mode 100644 index 0000000..4743757 --- /dev/null +++ b/ImportGraph/UnusedTransitiveImports.lean @@ -0,0 +1,23 @@ +import ImportGraph.Imports + +open Lean + +def Core.withImportModules (modules : Array Name) {α} (f : CoreM α) : IO α := do + searchPathRef.set compile_time_search_path% + unsafe Lean.withImportModules (modules.map (fun m => {module := m})) {} (trustLevel := 1024) + fun env => Prod.fst <$> Core.CoreM.toIO + (ctx := { fileName := "", fileMap := default }) (s := { env := env }) do f + +/-- +`lake exe unused_transitive_imports m1 m2 ...` + +For each specified module `m`, prints those `n` from the argument list which are imported, but transitively unused by `m`. +-/ +def main (args : List String) : IO UInt32 := do + let (flags, args) := args.partition (fun s => s.startsWith "-") + let mut modules := args.map (fun s => s.toName) + Core.withImportModules modules.toArray do + let r ← unusedTransitiveImports modules (verbose := flags.contains "-v" || flags.contains "--verbose") + for (n, u) in r do + IO.println s!"{n}: {u}" + return 0 diff --git a/ImportGraphTest/Imports.lean b/ImportGraphTest/Imports.lean index c2e1047..0abc260 100644 --- a/ImportGraphTest/Imports.lean +++ b/ImportGraphTest/Imports.lean @@ -1,5 +1,6 @@ import ImportGraph.Imports import ImportGraph.RequiredModules +import ImportGraphTest.Used open Lean @@ -36,6 +37,7 @@ elab "#unused_transitive_imports" names:ident* : command => do logInfo <| s!"Transitively unused imports of {n}:\n{"\n".intercalate (u.map (fun i => s!" {i}"))}" -- This test case can be removed after nightly-2024-10-24, because these imports have been cleaned up. +-- It should be replaced with another test case! /-- info: Transitively unused imports of Init.Control.StateRef: Init.System.IO @@ -46,6 +48,13 @@ info: Transitively unused imports of Init.System.IO: #guard_msgs in #unused_transitive_imports Init.Control.StateRef Init.System.IO Init.Control.Reader Init.Control.Basic +/-- +info: Transitively unused imports of ImportGraphTest.Used: + ImportGraphTest.Unused +-/ +#guard_msgs in +#unused_transitive_imports ImportGraphTest.Used ImportGraphTest.Unused Init.Control.Reader + -- This is a spurious unused transitive import, because it relies on notation from `Init.Core`. /-- info: Transitively unused imports of Init.Control.Basic: diff --git a/lakefile.toml b/lakefile.toml index 5d47ad9..f613939 100644 --- a/lakefile.toml +++ b/lakefile.toml @@ -28,5 +28,12 @@ root = "Main" # Remove this line if you do not need such functionality. supportInterpreter = true +# `lake exe unused_transitive_imports` prints unused transitive imports from amongst a given list of modules. +[[lean_exe]] +name = "unused_transitive_imports" +root = "ImportGraph.UnusedTransitiveImports" +supportInterpreter = true + [[lean_lib]] name = "ImportGraphTest" + From 793740f63564dfc84ff6beca7e7c83c4d2bafbf5 Mon Sep 17 00:00:00 2001 From: Kim Morrison Date: Tue, 29 Oct 2024 14:37:59 +1100 Subject: [PATCH 8/8] readme --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index 397a134..a931d52 100644 --- a/README.md +++ b/README.md @@ -54,6 +54,12 @@ There are a few commands implemented, which help you analysing the imports of a (Must be run at the end of the file. Tactics and macros may result in incorrect output.) * `#find_home decl`: suggests files higher up the import hierarchy to which `decl` could be moved. +## Other executables + +`lake exe unused_transitive_imports m1 m2 ...` + +For each specified module `m`, prints those `n` from the argument list which are imported, but transitively unused by `m`. + ## Installation The installation works exactly like for any [Lake package](https://reservoir.lean-lang.org/),