Skip to content

Commit

Permalink
WIP: "Hoard Factor" calculation using BitVectors
Browse files Browse the repository at this point in the history
  • Loading branch information
Vierkantor committed Oct 30, 2024
1 parent c1970be commit 5bd7ed7
Show file tree
Hide file tree
Showing 4 changed files with 199 additions and 33 deletions.
131 changes: 131 additions & 0 deletions ImportGraph/HoardFactor.lean
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
/-
Copyright (c) 2024 Lean FRO LLC. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Authors: Anne Baanen, Johan Commelin
-/
import Cli.Basic
import ImportGraph.Imports

/-!
# `lake exe hoard_factor`
Computes a measure of unused declarations per module.
-/

open Lean Core System

-- Copied from `UnusedTransitiveImports` with modifications to allow running from another Lean package.
def Core.withImportModules (modules : Array Name) {α} (f : CoreM α) : IO α := do
searchPathRef.set (← addSearchPathFromEnv compile_time_search_path%)
unsafe Lean.withImportModules (modules.map (fun m => {module := m})) {} (trustLevel := 1024)
fun env => Prod.fst <$> Core.CoreM.toIO
(ctx := { fileName := "<CoreM>", fileMap := default }) (s := { env := env }) do f

def Lean.Environment.getModuleInfo? (env : Environment) (module : Name) : Option ModuleData := do
let idx ← env.getModuleIdx? module
env.header.moduleData.get? idx

def Lean.Name.isBlackListed {m} [Monad m] [MonadEnv m] (declName : Name) : m Bool := do
if declName == ``sorryAx then return true
if declName matches .str _ "inj" then return true
if declName matches .str _ "noConfusionType" then return true
let env ← getEnv
pure <| declName.isInternalDetail
|| isAuxRecursor env declName
|| isNoConfusion env declName
<||> isRec declName -- <||> isMatcher declName

/-- Compute the local size and transitive size of each module's declarations. -/
def Lean.Environment.importSizes (env : Environment) (verbose : Bool := false) :
CoreM (NameMap NameSet × NameMap Nat × NameMap Nat) := do
let modules := (env.header.moduleNames.zip env.header.moduleData)
let mut importSizes : NameMap Nat := .empty
let mut localSizes : NameMap Nat := .empty
let importMap := env.importGraph.transitiveClosure
let mut iteration := 0

for (name, module) in modules do
let names := module.constNames
let localSize := names.size
localSizes := localSizes.insert name localSize

-- Note that the `getD` assigns a size of `0` to all `Init` files.
let importSize : Nat := ((importMap.find? name).getD ∅).fold (init := 0)
(fun size i => size + localSizes.find! i)
importSizes := importSizes.insert name importSize

iteration := iteration + 1
if verbose && iteration % 100 == 0 then
println!"Processed imports for {name}"

pure (importMap, localSizes, importSizes)

def Lean.Environment.hoardFactor (env : Environment) (verbose : Bool := false) : CoreM Float := do
-- TODO: improve the standard library so the next line can be `env.constants.keys`
let decls := (env.constants.map₁.toArray.append env.constants.map₂.toArray).map (·.fst)
if verbose then
println!"Preprocessing approximately {decls.size} declarations."
let ⟨N, c2m⟩ ← env.transitivelyRequiredModulesForDecls' decls verbose
if verbose then
println!"Declarations preprocessed. Preprocessing modules."
let (transitiveImports, localSizes, importSizes) ← env.importSizes verbose

let mut sum : Nat := 0
let mut tot : Nat := 0
let mut iteration : Nat := 0

if verbose then
println!"Entering main loop."
for (decl, requiredModules) in c2m do
if ← decl.isBlackListed then continue

iteration := iteration + 1
if verbose && iteration % 1000 == 0 then
println!"{decl}: {sum.toFloat / tot.toFloat}"
match env.getModuleFor? decl with
| none => panic!"Could not determine module for {decl}"
| some module => do
tot := tot + importSizes.find! module
let transImports := transitiveImports.findD module ∅
for (n, i) in env.header.moduleNames.zipWithIndex do
if !requiredModules.getLsbD i
&& transImports.contains n then do
sum := sum + localSizes.find! n
if verbose then
println!"Processed {iteration} declarations."

return sum.toFloat / tot.toFloat

open IO.FS IO.Process Name in
/-- Implementation of the hoard factor command line program. -/
def hoardFactorCLI (args : Cli.Parsed) : IO UInt32 := do
-- Default target is `Mathlib`
let targets : Array Name := match args.variableArgsAs! Cli.ModuleName with
| #[] => #[`Mathlib]
| targets => targets

let factor ← Core.withImportModules targets do
let env ← getEnv
env.hoardFactor (verbose := args.hasFlag "verbose")

println!"Hoard factor: {factor}"
return 0

open Cli

/-- Setting up command line options and help text for `lake exe hoard_factor`. -/
def hoard_factor : Cmd := `[Cli|
hoard_factor VIA hoardFactorCLI; ["0.0.1"]
"Compute a measure of unused declarations per module."

FLAGS:
"verbose"; "Output more information during computations."

ARGS:
...targets : String; "Module name(s) to analyze. \
Default value is `Mathlib`."
]

/-- `lake exe hoard_factor` -/
def main (args : List String) : IO UInt32 :=
hoard_factor.validate args
93 changes: 61 additions & 32 deletions ImportGraph/RequiredModules.lean
Original file line number Diff line number Diff line change
Expand Up @@ -115,54 +115,83 @@ def Environment.transitivelyRequiredModules (env : Environment) (module : Name)
|>.filter (env.getModuleFor? · = some module)
(NameSet.ofList constants).transitivelyRequiredModules env

/--
Computes all the modules transitively required by each declaration inside of the specified modules.
-/
partial def Environment.transitivelyRequiredModulesForDecls' (env : Environment) (decls : Array Name) (verbose : Bool := false) :
CoreM ((N : Nat) × NameMap (BitVec N)) := do
let N := env.header.moduleNames.size
let mut c2m : NameMap (BitVec N) := {}
let mut pushed : NameSet := {}
for n in decls do
if ! n.isInternal then
-- This is messy: Mathlib is big enough that writing a recursive function causes a stack overflow.
-- So we use an explicit stack instead. We visit each constant twice:
-- once to record the constants transitively used by it,
-- and again to record the modules which defined those constants.
let mut stack : List (Name × Option NameSet) := [⟨n, none⟩]
pushed := pushed.insert n
while !stack.isEmpty do
match stack with
| [] => panic! "Stack is empty"
| (c, used?) :: tail =>
stack := tail
match used? with
| none =>
if !c2m.contains c then
let used := (← getConstInfo c).getUsedConstantsAsSet
stack := ⟨c, some used⟩ :: stack
for u in used do
if !pushed.contains u then
stack := ⟨u, none⟩ :: stack
pushed := pushed.insert u
| some used =>
let usedModules : NameSet :=
used.fold (init := {}) (fun s u => if let some m := env.getModuleFor? u then s.insert m else s)
let transitivelyUsed : BitVec N :=
used.fold (init := toBitVec usedModules) (fun s u => s ||| ((c2m.find? u).getD 0))
c2m := c2m.insert c transitivelyUsed
return ⟨N, c2m⟩
where
toBitVec {N : Nat} (s : NameSet) : BitVec N :=
s.fold (init := 0) (fun b n => b ||| BitVec.twoPow _ ((env.header.moduleNames.getIdx? n).getD 0))

/--
Computes all the modules transitively required by each declaration inside of the specified modules.
-/
partial def Environment.transitivelyRequiredModulesForDecls (env : Environment) (decls : Array Name) (verbose : Bool := false) :
CoreM (NameMap NameSet) := do
let ⟨N, c2m⟩ ← env.transitivelyRequiredModulesForDecls' decls verbose
if verbose then println!"Converting to NameSet..."
-- TODO: is there really no `Lean.RBMap.map`?
return ⟨c2m.1.map (fun _ => toNameSet), sorry
where
toBitVec {N : Nat} (s : NameSet) : BitVec N :=
s.fold (init := 0) (fun b n => b ||| BitVec.twoPow _ ((env.header.moduleNames.getIdx? n).getD 0))
toNameSet {N : Nat} (b : BitVec N) : NameSet :=
env.header.moduleNames.zipWithIndex.foldl (init := {}) (fun s (n, i) => if b.getLsbD i then s.insert n else s)

/--
Computes all the modules transitively required by the specified modules.
Should be equivalent to calling `transitivelyRequiredModules` on each module, but shares more of the work.
-/
partial def Environment.transitivelyRequiredModules' (env : Environment) (modules : List Name) (verbose : Bool := false) :
CoreM (NameMap NameSet) := do
let N := env.header.moduleNames.size
let mut c2m : NameMap (BitVec N) := {}
let mut pushed : NameSet := {}
let mut decls : Array Name := #[]
for m in modules do
let names := env.header.moduleData[(env.header.moduleNames.getIdx? m).getD 0]!.constNames
decls := decls.append names
let ⟨N, c2m⟩ ← env.transitivelyRequiredModulesForDecls' decls
let mut result : NameMap NameSet := {}
for m in modules do
if verbose then
IO.println s!"Processing module {m}"
let mut r : BitVec N := 0
for n in env.header.moduleData[(env.header.moduleNames.getIdx? m).getD 0]!.constNames do
if ! n.isInternal then
-- This is messy: Mathlib is big enough that writing a recursive function causes a stack overflow.
-- So we use an explicit stack instead. We visit each constant twice:
-- once to record the constants transitively used by it,
-- and again to record the modules which defined those constants.
let mut stack : List (Name × Option NameSet) := [⟨n, none⟩]
pushed := pushed.insert n
while !stack.isEmpty do
match stack with
| [] => panic! "Stack is empty"
| (c, used?) :: tail =>
stack := tail
match used? with
| none =>
if !c2m.contains c then
let used := (← getConstInfo c).getUsedConstantsAsSet
stack := ⟨c, some used⟩ :: stack
for u in used do
if !pushed.contains u then
stack := ⟨u, none⟩ :: stack
pushed := pushed.insert u
| some used =>
let usedModules : NameSet :=
used.fold (init := {}) (fun s u => if let some m := env.getModuleFor? u then s.insert m else s)
let transitivelyUsed : BitVec N :=
used.fold (init := toBitVec usedModules) (fun s u => s ||| ((c2m.find? u).getD 0))
c2m := c2m.insert c transitivelyUsed
r := r ||| ((c2m.find? n).getD 0)
result := result.insert m (toNameSet r)
return result
where
toBitVec {N : Nat} (s : NameSet) : BitVec N :=
s.fold (init := 0) (fun b n => b ||| BitVec.twoPow _ ((env.header.moduleNames.getIdx? n).getD 0))
toNameSet {N : Nat} (b : BitVec N) : NameSet :=
env.header.moduleNames.zipWithIndex.foldl (init := {}) (fun s (n, i) => if b.getLsbD i then s.insert n else s)

Expand Down
6 changes: 6 additions & 0 deletions lakefile.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,12 @@ name = "unused_transitive_imports"
root = "ImportGraph.UnusedTransitiveImports"
supportInterpreter = true

# `lake exe hoard_factor` computes a measure of unused declarations for each module.
[[lean_exe]]
name = "hoard_factor"
root = "ImportGraph.HoardFactor"
supportInterpreter = true

[[lean_lib]]
name = "ImportGraphTest"

2 changes: 1 addition & 1 deletion lean-toolchain
Original file line number Diff line number Diff line change
@@ -1 +1 @@
leanprover/lean4:v4.13.0-rc1
leanprover/lean4:v4.13.0-rc3

0 comments on commit 5bd7ed7

Please sign in to comment.