From 21fd6e82f6d9eab48b4197a56a5c9b9f06b51b0e Mon Sep 17 00:00:00 2001 From: Heinrich Lukas Weil Date: Tue, 30 Jan 2024 12:47:37 +0100 Subject: [PATCH 1/3] improve reader speed by adjusting sharedStringTable usage --- FsSpreadsheet.sln | 15 +++++++ build/ReleaseTasks.fs | 8 ++-- src/FsSpreadsheet.ExcelIO/Cell.fs | 17 ++++---- src/FsSpreadsheet.ExcelIO/FsExtensions.fs | 8 ++-- .../FsSpreadsheet.ExcelIO.fsproj | 2 +- src/FsSpreadsheet.ExcelIO/Row.fs | 12 +++--- .../SharedStringTable.fs | 7 +++- src/FsSpreadsheet.ExcelIO/SheetData.fs | 14 +++---- src/FsSpreadsheet.ExcelIO/Spreadsheet.fs | 8 +++- src/FsSpreadsheet.ExcelIO/Table.fs | 10 ++--- .../FsSpreadsheet.ExcelIO.Tests/FsWorkbook.fs | 3 +- .../OpenXml/Cell.fs | 4 +- .../OpenXml/FsExtensions.fs | 3 +- tests/Speedtest/Program.fs | 39 +++++++++++++++++++ tests/Speedtest/Speedtest.fsproj | 24 ++++++++++++ 15 files changed, 130 insertions(+), 44 deletions(-) create mode 100644 tests/Speedtest/Program.fs create mode 100644 tests/Speedtest/Speedtest.fsproj diff --git a/FsSpreadsheet.sln b/FsSpreadsheet.sln index 556dfc60..385997eb 100644 --- a/FsSpreadsheet.sln +++ b/FsSpreadsheet.sln @@ -49,6 +49,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "JS", "JS", "{ADCF7D08-F2EE- EndProject Project("{6EC3EE1D-3C4E-46DD-8F32-0CC8E7565705}") = "TestUtils", "tests\TestUtils\TestUtils.fsproj", "{60678E53-EDC4-4ADE-A9EE-B194BDC76B37}" EndProject +Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "Speedtest", "tests\Speedtest\Speedtest.fsproj", "{6DAD5C65-64CA-4ED4-B609-2D068F021024}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -191,6 +193,18 @@ Global {60678E53-EDC4-4ADE-A9EE-B194BDC76B37}.Release|x64.Build.0 = Release|Any CPU {60678E53-EDC4-4ADE-A9EE-B194BDC76B37}.Release|x86.ActiveCfg = Release|Any CPU {60678E53-EDC4-4ADE-A9EE-B194BDC76B37}.Release|x86.Build.0 = Release|Any CPU + {6DAD5C65-64CA-4ED4-B609-2D068F021024}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {6DAD5C65-64CA-4ED4-B609-2D068F021024}.Debug|Any CPU.Build.0 = Debug|Any CPU + {6DAD5C65-64CA-4ED4-B609-2D068F021024}.Debug|x64.ActiveCfg = Debug|Any CPU + {6DAD5C65-64CA-4ED4-B609-2D068F021024}.Debug|x64.Build.0 = Debug|Any CPU + {6DAD5C65-64CA-4ED4-B609-2D068F021024}.Debug|x86.ActiveCfg = Debug|Any CPU + {6DAD5C65-64CA-4ED4-B609-2D068F021024}.Debug|x86.Build.0 = Debug|Any CPU + {6DAD5C65-64CA-4ED4-B609-2D068F021024}.Release|Any CPU.ActiveCfg = Release|Any CPU + {6DAD5C65-64CA-4ED4-B609-2D068F021024}.Release|Any CPU.Build.0 = Release|Any CPU + {6DAD5C65-64CA-4ED4-B609-2D068F021024}.Release|x64.ActiveCfg = Release|Any CPU + {6DAD5C65-64CA-4ED4-B609-2D068F021024}.Release|x64.Build.0 = Release|Any CPU + {6DAD5C65-64CA-4ED4-B609-2D068F021024}.Release|x86.ActiveCfg = Release|Any CPU + {6DAD5C65-64CA-4ED4-B609-2D068F021024}.Release|x86.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -208,6 +222,7 @@ Global {96E12F19-B25A-415E-B965-F9DE8D713C67} = {F77AD108-C6B4-46BB-B7BC-13573F45F876} {ADCF7D08-F2EE-4DFD-A96A-7E0134A1546F} = {F77AD108-C6B4-46BB-B7BC-13573F45F876} {60678E53-EDC4-4ADE-A9EE-B194BDC76B37} = {F77AD108-C6B4-46BB-B7BC-13573F45F876} + {6DAD5C65-64CA-4ED4-B609-2D068F021024} = {F77AD108-C6B4-46BB-B7BC-13573F45F876} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {0EDE6697-0F13-4DB1-AC56-12C15A72D395} diff --git a/build/ReleaseTasks.fs b/build/ReleaseTasks.fs index 10bb74ff..4f267dc2 100644 --- a/build/ReleaseTasks.fs +++ b/build/ReleaseTasks.fs @@ -15,7 +15,7 @@ open Fake.Tools open Fake.IO open Fake.IO.Globbing.Operators -let createTag = BuildTask.create "CreateTag" [clean; build; runTests; pack] { +let createTag = BuildTask.create "CreateTag" [clean; build; runTests] { if promptYesNo (sprintf "tagging branch with %s OK?" stableVersionTag ) then Git.Branches.tag "" stableVersionTag Git.Branches.pushTag "" projectRepo stableVersionTag @@ -23,7 +23,7 @@ let createTag = BuildTask.create "CreateTag" [clean; build; runTests; pack] { failwith "aborted" } -let createPrereleaseTag = BuildTask.create "CreatePrereleaseTag" [setPrereleaseTag; clean; build; runTests; packPrerelease] { +let createPrereleaseTag = BuildTask.create "CreatePrereleaseTag" [setPrereleaseTag; clean; build; runTests] { if promptYesNo (sprintf "tagging branch with %s OK?" prereleaseTag ) then Git.Branches.tag "" prereleaseTag Git.Branches.pushTag "" projectRepo prereleaseTag @@ -32,7 +32,7 @@ let createPrereleaseTag = BuildTask.create "CreatePrereleaseTag" [setPrereleaseT } -let publishNuget = BuildTask.create "PublishNuget" [clean; build; runTests; pack] { +let publishNuget = BuildTask.create "PublishNuget" [clean; build; runTests; packDotNet] { let targets = (!! (sprintf "%s/*.*pkg" pkgDir )) for target in targets do printfn "%A" target let msg = sprintf "[.NET] release package with version %s?" stableVersionTag @@ -45,7 +45,7 @@ let publishNuget = BuildTask.create "PublishNuget" [clean; build; runTests; pack else failwith "aborted" } -let publishNugetPrerelease = BuildTask.create "PublishNugetPrerelease" [clean; build; runTests; packPrerelease] { +let publishNugetPrerelease = BuildTask.create "PublishNugetPrerelease" [clean; build; runTests; packDotNetPrerelease] { let targets = (!! (sprintf "%s/*.*pkg" pkgDir )) for target in targets do printfn "%A" target let msg = sprintf "[.NET] release package with version %s?" prereleaseTag diff --git a/src/FsSpreadsheet.ExcelIO/Cell.fs b/src/FsSpreadsheet.ExcelIO/Cell.fs index 06870a6c..e0f3aad2 100644 --- a/src/FsSpreadsheet.ExcelIO/Cell.fs +++ b/src/FsSpreadsheet.ExcelIO/Cell.fs @@ -242,7 +242,7 @@ module Cell = /// /// Maps a Cell to the value string using a shared string table. /// - let tryGetValue (sharedStringTable:SharedStringTable Option) (cell:Cell) = + let tryGetValue (sharedStringTable:SST Option) (cell:Cell) = match cell |> tryGetType with | Some (CellValues.SharedString) when sharedStringTable.IsSome-> let sharedStringTable = sharedStringTable.Value @@ -251,8 +251,7 @@ module Cell = |> Option.map ( CellValue.getValue >> int - >> fun i -> SharedStringTable.getText i sharedStringTable - >> SharedStringTable.SharedStringItem.getText + >> fun i -> sharedStringTable.[i] ) | _ -> @@ -263,7 +262,7 @@ module Cell = /// /// Maps a Cell to the value string using a sharedStringTable. /// - let getValue (sharedStringTable : SharedStringTable Option) (cell : Cell) = + let getValue (sharedStringTable : SST Option) (cell : Cell) = match cell |> tryGetType with | Some (CellValues.SharedString) when sharedStringTable.IsSome-> let sharedStringTable = sharedStringTable.Value @@ -273,9 +272,7 @@ module Cell = |> CellValue.getValue |> int - sharedStringTable - |> SharedStringTable.getText sharedStringTableIndex - |> SharedStringTable.SharedStringItem.getText + sharedStringTable.[sharedStringTableIndex] | _ -> cell |> getCellValue @@ -291,15 +288,15 @@ module Cell = /// /// Includes a value from the sharedStringTable in Cell.CellValue.Text. /// - let includeSharedStringValue (sharedStringTable:SharedStringTable) (cell:Cell) = + let includeSharedStringValue (sharedStringTable:SST) (cell:Cell) = if not (isNull cell.DataType) then match cell |> tryGetType with | Some (CellValues.SharedString) -> let index = int cell.InnerText - match sharedStringTable |> Seq.tryItem index with + match sharedStringTable |> Array.tryItem index with | Some value -> cell.DataType <- EnumValue(CellValues.String) - cell.CellValue.Text <- value.InnerText + cell.CellValue.Text <- value | None -> cell.CellValue.Text <- cell.InnerText cell diff --git a/src/FsSpreadsheet.ExcelIO/FsExtensions.fs b/src/FsSpreadsheet.ExcelIO/FsExtensions.fs index d2ee0bcd..4af8e316 100644 --- a/src/FsSpreadsheet.ExcelIO/FsExtensions.fs +++ b/src/FsSpreadsheet.ExcelIO/FsExtensions.fs @@ -58,8 +58,7 @@ module FsExtensions = /// /// Creates an FsCell on the basis of an XlsxCell. Uses a SharedStringTable if present to get the XlsxCell's value. /// - static member ofXlsxCell (doc : Packaging.SpreadsheetDocument) (xlsxCell : Cell) = - let sst = Spreadsheet.tryGetSharedStringTable doc + static member ofXlsxCell (doc : Packaging.SpreadsheetDocument) (sst : SST option) (xlsxCell : Cell) = let cellValueString = Cell.getValue sst xlsxCell let col, row = xlsxCell.CellReference.Value |> CellReference.toIndices let dt = @@ -201,7 +200,7 @@ module FsExtensions = /// Creates an FsWorkbook from a given SpreadsheetDocument. /// static member fromSpreadsheetDocument (doc : Packaging.SpreadsheetDocument) = - let sst = Spreadsheet.tryGetSharedStringTable doc + let sst = Spreadsheet.tryGetSharedStringTable doc |> Option.map SharedStringTable.toSST let xlsxWorkbookPart = Spreadsheet.getWorkbookPart doc let xlsxSheets = try @@ -228,7 +227,8 @@ module FsExtensions = let sheetId = Sheet.getID xlsxSheet let xlsxCells = Spreadsheet.getCellsBySheetID sheetId doc - |> Seq.map (FsCell.ofXlsxCell doc) + |> Seq.toArray + |> Array.map (FsCell.ofXlsxCell doc sst) let assocXlsxTables = xlsxTables |> Seq.tryPick (fun (sid,ts) -> if sid = sheetId then Some ts else None) diff --git a/src/FsSpreadsheet.ExcelIO/FsSpreadsheet.ExcelIO.fsproj b/src/FsSpreadsheet.ExcelIO/FsSpreadsheet.ExcelIO.fsproj index aaf316d2..d4f73109 100644 --- a/src/FsSpreadsheet.ExcelIO/FsSpreadsheet.ExcelIO.fsproj +++ b/src/FsSpreadsheet.ExcelIO/FsSpreadsheet.ExcelIO.fsproj @@ -33,7 +33,7 @@ - + diff --git a/src/FsSpreadsheet.ExcelIO/Row.fs b/src/FsSpreadsheet.ExcelIO/Row.fs index 4f9d38f4..9af080dc 100644 --- a/src/FsSpreadsheet.ExcelIO/Row.fs +++ b/src/FsSpreadsheet.ExcelIO/Row.fs @@ -192,7 +192,7 @@ module Row = row) /// If the row contains a value at the given index, returns it. Returns none if not. - let tryGetValueAt (sst : SharedStringTable Option) index (row : Row) = + let tryGetValueAt (sst : SST Option) index (row : Row) = row |> tryGetCellAt index |> Option.bind (Cell.tryGetValue sst) @@ -257,7 +257,7 @@ module Row = |> extendSpanRight offset /// Maps the cells of the given row to tuples of 1-based column indices and the value strings using a sharedStringTable. - let getIndexedValues (sst : SharedStringTable Option) (row : Row) = + let getIndexedValues (sst : SST Option) (row : Row) = row |> toCellSeq |> Seq.choose (fun cell -> @@ -273,18 +273,18 @@ module Row = /// Maps the cells of the given row to the value strings. - let getRowValues (sst : SharedStringTable Option) (row : Row) = + let getRowValues (sst : SST Option) (row : Row) = row |> toCellSeq |> Seq.choose (Cell.tryGetValue sst) /// Maps each cell of the given row to each respective value strings if it exists, else returns None. - let tryGetRowValues (sst : SharedStringTable option) (row : Row) = + let tryGetRowValues (sst : SST option) (row : Row) = toCellSeq row |> Seq.map (Cell.tryGetValue sst) /// Maps the cells of the given row to the value strings for all existing cells. - let getPresentRowValues (sst : SharedStringTable option) (row : Row) = + let getPresentRowValues (sst : SST option) (row : Row) = toCellSeq row |> Seq.choose (Cell.tryGetValue sst) @@ -369,6 +369,6 @@ module Row = |> appendCell cell /// Includes a value from a sharedStringTable in the cells of the row. - let includeSharedStringValue (sst : SharedStringTable) (row : Row) = + let includeSharedStringValue (sst : SST) (row : Row) = row |> mapCells (Cell.includeSharedStringValue sst) diff --git a/src/FsSpreadsheet.ExcelIO/SharedStringTable.fs b/src/FsSpreadsheet.ExcelIO/SharedStringTable.fs index 1359fe7a..4a1557c0 100644 --- a/src/FsSpreadsheet.ExcelIO/SharedStringTable.fs +++ b/src/FsSpreadsheet.ExcelIO/SharedStringTable.fs @@ -5,6 +5,8 @@ open DocumentFormat.OpenXml.Packaging open DocumentFormat.OpenXml +type SST = string [] + /// Functions for working with SharedStringTables. module SharedStringTable = @@ -99,6 +101,9 @@ module SharedStringTable = try spreadsheetDocument.WorkbookPart.SharedStringTablePart.SharedStringTable |> Some with | _ -> None - + let toSST (sst : SharedStringTable) = + sst.Elements() + |> Seq.toArray + |> Array.map SharedStringItem.getText diff --git a/src/FsSpreadsheet.ExcelIO/SheetData.fs b/src/FsSpreadsheet.ExcelIO/SheetData.fs index 6be3f2e6..1e2939e9 100644 --- a/src/FsSpreadsheet.ExcelIO/SheetData.fs +++ b/src/FsSpreadsheet.ExcelIO/SheetData.fs @@ -120,19 +120,19 @@ module SheetData = /// Gets the string value of the cell at the given 1-based column and rowIndex, if it exists, else returns None. - let tryGetRowValuesAt (sst : SharedStringTable Option) rowIndex (sheet : SheetData) = + let tryGetRowValuesAt (sst : SST Option) rowIndex (sheet : SheetData) = sheet |> tryGetRowAt rowIndex |> Option.map (Row.getRowValues sst) /// Gets the string values of the row at the given 1-based rowIndex. - let getRowValuesAt (sst : SharedStringTable Option) rowIndex (sheet : SheetData) = + let getRowValuesAt (sst : SST Option) rowIndex (sheet : SheetData) = sheet |> getRowAt rowIndex |> Row.getRowValues sst /// Maps the cells of the given row to tuples of 1-based column indices and the value strings using a sharedStringTable, if it exists, else returns None. - let tryGetIndexedRowValuesAt (sst : SharedStringTable Option) rowIndex (sheet : SheetData) = + let tryGetIndexedRowValuesAt (sst : SST Option) rowIndex (sheet : SheetData) = sheet |> tryGetRowAt rowIndex |> Option.map (Row.getIndexedValues sst) @@ -214,13 +214,13 @@ module SheetData = |> Row.getCellAt columnIndex /// Gets the string value of the cell at the given 1-based column- and rowIndex using a sharedStringTable. - let getCellValueAt (sst : SharedStringTable Option) (rowIndex : uint32) (columnIndex : uint32) (sheetData : SheetData) = + let getCellValueAt (sst : SST Option) (rowIndex : uint32) (columnIndex : uint32) (sheetData : SheetData) = sheetData |> getCellAt rowIndex columnIndex |> Cell.getValue sst /// Gets the string value of the cell at the given 1-based column- and rowIndex using a sharedStringTable if it exists. Else returns None. - let tryGetCellValueAt (sst : SharedStringTable Option) (rowIndex: uint32) (columnIndex : uint32) (sheetData:SheetData) = + let tryGetCellValueAt (sst : SST Option) (rowIndex: uint32) (columnIndex : uint32) (sheetData:SheetData) = sheetData |> tryGetCellAt rowIndex columnIndex |> Option.bind (Cell.tryGetValue sst) @@ -279,7 +279,7 @@ module SheetData = sheet /// Includes a value from sharedStringTable in the cells of the rows of the sheetData - let includeSharedStringValue (sharedStringTable : SharedStringTable) (sheetData : SheetData) = + let includeSharedStringValue (sharedStringTable : SST) (sheetData : SheetData) = sheetData |> mapRows (Row.includeSharedStringValue sharedStringTable) @@ -288,7 +288,7 @@ module SheetData = //---------------------------------------------------------------------------------------------------------------------- /// Reads the values of all cells from a sheetData and a sharedStringTable and converts them into a sparse matrix. Values are stored sparsely in a dictionary, with the key being a row index and column index tuple. - let toSparseValueMatrix (sst : SharedStringTable) sheetData = + let toSparseValueMatrix (sst : SST) sheetData = let rows = getRows sheetData let noOfRows = countRows sheetData let noOfCols = diff --git a/src/FsSpreadsheet.ExcelIO/Spreadsheet.fs b/src/FsSpreadsheet.ExcelIO/Spreadsheet.fs index a5d8750f..8ef6f396 100644 --- a/src/FsSpreadsheet.ExcelIO/Spreadsheet.fs +++ b/src/FsSpreadsheet.ExcelIO/Spreadsheet.fs @@ -174,6 +174,7 @@ module Spreadsheet = let workbookPart = spreadsheetDocument.WorkbookPart let worksheetPart = Worksheet.WorksheetPart.getByID sheet.Id.Value workbookPart let stringTablePart = getOrInitSharedStringTablePart spreadsheetDocument + let sst = SharedStringTable.toSST stringTablePart.SharedStringTable seq { use reader = OpenXmlReader.Create(worksheetPart) @@ -183,7 +184,7 @@ module Spreadsheet = row.Elements() |> Seq.iter (fun item -> let cell = item :?> Cell - Cell.includeSharedStringValue stringTablePart.SharedStringTable cell |> ignore + Cell.includeSharedStringValue sst cell |> ignore ) yield row } @@ -195,9 +196,12 @@ module Spreadsheet = let getCellsBySheet (sheet : Sheet) (spreadsheetDocument : SpreadsheetDocument) = let workbookPart = spreadsheetDocument.WorkbookPart let worksheetPart = Worksheet.WorksheetPart.getByID sheet.Id.Value workbookPart + let includeSSV = match tryGetSharedStringTable spreadsheetDocument with - | Some sst -> Cell.includeSharedStringValue sst + | Some sst -> + let sstArray = sst |> SharedStringTable.toSST + Cell.includeSharedStringValue sstArray | None -> id seq { use reader = OpenXmlReader.Create(worksheetPart) diff --git a/src/FsSpreadsheet.ExcelIO/Table.fs b/src/FsSpreadsheet.ExcelIO/Table.fs index 662c94ca..b332f370 100644 --- a/src/FsSpreadsheet.ExcelIO/Table.fs +++ b/src/FsSpreadsheet.ExcelIO/Table.fs @@ -263,7 +263,7 @@ module Table = worksheetPart /// Create a table object by an area. If the first row of this area contains values in the given sheet, these are chosen as headers for the table and a table is returned. - let tryCreateWithExistingHeaders (sst : SharedStringTable Option) sheetData name area = + let tryCreateWithExistingHeaders (sst : SST Option) sheetData name area = if Area.isCorrect area then try let columns = @@ -301,7 +301,7 @@ module Table = |> Seq.tryFind (TableColumn.getName >> (=) name) /// If a column with the given header exists in the table, returns its values. Else returns None. - let tryGetColumnValuesByColumnHeader (sst : SharedStringTable Option) sheetData columnHeader (table : Table) = + let tryGetColumnValuesByColumnHeader (sst : SST Option) sheetData columnHeader (table : Table) = let area = getArea table table.TableColumns |> TableColumns.getTableColumns @@ -313,7 +313,7 @@ module Table = ) /// If a column with the given header exists in the table, returns its indexed values. Else returns None. - let tryGetIndexedColumnValuesByColumnHeader (sst : SharedStringTable Option) sheetData columnHeader (table : Table) = + let tryGetIndexedColumnValuesByColumnHeader (sst : SST Option) sheetData columnHeader (table : Table) = let area = getArea table table.TableColumns |> TableColumns.getTableColumns @@ -330,7 +330,7 @@ module Table = ) /// If a key column and a value with the given header exist in the table, returns a tuple list of keys and values (else returns None). Missing values get replaced with the given default value. - let tryGetKeyValuesByColumnHeaders (sst : SharedStringTable Option) sheetData keyColHeader valColHeader defaultValue (table : Table) = + let tryGetKeyValuesByColumnHeaders (sst : SST Option) sheetData keyColHeader valColHeader defaultValue (table : Table) = let area = getArea table let tableCols = table.TableColumns @@ -349,7 +349,7 @@ module Table = | _ -> None /// Reads a complete table. Values are stored sparsely in a dictionary, with the key being a row index and column header tuple. - let toSparseValueMatrix (sst : SharedStringTable Option) sheetData (table : Table) = + let toSparseValueMatrix (sst : SST Option) sheetData (table : Table) = let area = getArea table let dictionary = System.Collections.Generic.Dictionary() [Area.leftBoundary area .. Area.rightBoundary area] diff --git a/tests/FsSpreadsheet.ExcelIO.Tests/FsWorkbook.fs b/tests/FsSpreadsheet.ExcelIO.Tests/FsWorkbook.fs index c320968b..1a4fac25 100644 --- a/tests/FsSpreadsheet.ExcelIO.Tests/FsWorkbook.fs +++ b/tests/FsSpreadsheet.ExcelIO.Tests/FsWorkbook.fs @@ -73,7 +73,8 @@ let performance = sw.Start() let wb = FsWorkbook.fromXlsxFile(p) sw.Stop() - Expect.isLessThan sw.Elapsed.Milliseconds 2000 "Elapsed time should be less than 2000ms" + let elapsed = sw.Elapsed.Milliseconds + Expect.isLessThan elapsed 2000 $"Elapsed time should be less than 2000ms, but was {elapsed}ms" Expect.equal (wb.GetWorksheetAt(1).Rows.Count) 153991 "Row count should be 153991" ) diff --git a/tests/FsSpreadsheet.ExcelIO.Tests/OpenXml/Cell.fs b/tests/FsSpreadsheet.ExcelIO.Tests/OpenXml/Cell.fs index 22ae6f3c..3987747a 100644 --- a/tests/FsSpreadsheet.ExcelIO.Tests/OpenXml/Cell.fs +++ b/tests/FsSpreadsheet.ExcelIO.Tests/OpenXml/Cell.fs @@ -10,8 +10,8 @@ let testFilePath = System.IO.Path.Combine(__SOURCE_DIRECTORY__, "../data", "test let ssdFox = Packaging.SpreadsheetDocument.Open(testFilePath, false) let wbpFox = ssdFox.WorkbookPart let sstpFox = wbpFox.SharedStringTablePart -let sstFox = sstpFox.SharedStringTable -let sstFoxInnerText = sstFox.InnerText +let sstFox = SharedStringTable.toSST sstpFox.SharedStringTable +let sstFoxInnerText = sstpFox.SharedStringTable.InnerText let wsp1Fox = (wbpFox.WorksheetParts |> Array.ofSeq)[0] let cbsi1Fox = wsp1Fox.Worksheet.Descendants() |> Array.ofSeq let nullCell = Cell.create (Some Spreadsheet.CellValues.Error) "A1" (Cell.CellValue.create "") diff --git a/tests/FsSpreadsheet.ExcelIO.Tests/OpenXml/FsExtensions.fs b/tests/FsSpreadsheet.ExcelIO.Tests/OpenXml/FsExtensions.fs index 5ffc6d5c..de63d6a1 100644 --- a/tests/FsSpreadsheet.ExcelIO.Tests/OpenXml/FsExtensions.fs +++ b/tests/FsSpreadsheet.ExcelIO.Tests/OpenXml/FsExtensions.fs @@ -84,7 +84,8 @@ let fsExtensionTests = testList "ofXlsxCell" [ let stream = new MemoryStream() let doc = Spreadsheet.initEmptyOnStream stream - let testCell = FsCell.ofXlsxCell doc dummyXlsxCell + let sst = SharedStringTable.tryGet doc |> Option.map SharedStringTable.toSST + let testCell = FsCell.ofXlsxCell doc sst dummyXlsxCell testCase "is equal in value" <| fun _ -> Expect.equal (testCell.ValueAsString()) dummyXlsxCell.CellValue.Text "values are not equal" testCase "is equal in address/reference" <| fun _ -> diff --git a/tests/Speedtest/Program.fs b/tests/Speedtest/Program.fs new file mode 100644 index 00000000..c01019f0 --- /dev/null +++ b/tests/Speedtest/Program.fs @@ -0,0 +1,39 @@ +open FsSpreadsheet +open FsSpreadsheet.ExcelIO + +open ClosedXML.Excel + +[] +let main argv = + + let assayPath = @"C:\Users\HLWei\Downloads\ipk-gabi-wheat\assays\alpha_lattice\isa.assay.xlsx" + let studyPath = @"C:\Users\HLWei\Downloads\ipk-gabi-wheat\studies\alpha_lattice_trial_data\isa.study.xlsx" + let investigationPath = @"C:\Users\HLWei\Downloads\ipk-gabi-wheat\isa.investigation.xlsx" + + let fsSpreadsheet() = + + + let readAssay() = FsWorkbook.fromXlsxFile assayPath + let readStudy() = FsWorkbook.fromXlsxFile studyPath + let readInvestigation() = FsWorkbook.fromXlsxFile investigationPath + + readAssay() |> ignore + readStudy() |> ignore + readInvestigation() |> ignore + + let closedXML() = + + // Read xlsx file using closedxml + let readAssay() = new ClosedXML.Excel.XLWorkbook(assayPath) + let readStudy() = new ClosedXML.Excel.XLWorkbook(studyPath) + let readInvestigation() = new ClosedXML.Excel.XLWorkbook(investigationPath) + + readAssay() |> ignore + readStudy() |> ignore + readInvestigation() |> ignore + + + fsSpreadsheet() + closedXML() + + 1 \ No newline at end of file diff --git a/tests/Speedtest/Speedtest.fsproj b/tests/Speedtest/Speedtest.fsproj new file mode 100644 index 00000000..bf38f3f6 --- /dev/null +++ b/tests/Speedtest/Speedtest.fsproj @@ -0,0 +1,24 @@ + + + + Exe + net6.0 + + + + + + + + + + + + + + + + + + + From 235372355450ddb0414fc0168c01ff21a134f3fd Mon Sep 17 00:00:00 2001 From: Heinrich Lukas Weil Date: Tue, 30 Jan 2024 12:54:54 +0100 Subject: [PATCH 2/3] bump to 5.1.2 --- .gitignore | 1 + RELEASE_NOTES.md | 6 ++++++ package.json | 2 +- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 0d7f3130..31a62efa 100644 --- a/.gitignore +++ b/.gitignore @@ -352,6 +352,7 @@ MigrationBackup/ pkg/ tmp/ +output/ .fsdocs/ /tests/FsSpreadsheet.JsNativeTests/fable/**/*.js /tests/FsSpreadsheet.ExcelIO.Tests/TestFiles/WRITE_*.xlsx diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 1a52d90a..0aa79480 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -1,3 +1,9 @@ +### 5.1.2+21fd6e8 (Released 2024-1-30) +* Additions: + * [[#21fd6e8](https://github.com/CSBiology/FsSpreadsheet/commit/21fd6e82f6d9eab48b4197a56a5c9b9f06b51b0e)] improve reader speed by adjusting sharedStringTable usage +* Bugfixes: + * [[#66b6713](https://github.com/CSBiology/FsSpreadsheet/commit/66b6713381be3f133c58db7ca78fd36bd63c0ad1)] fix npm publish target + ### 5.1.1+e7cc638 (Released 2024-1-26) * Additions: * [[#e7cc638](https://github.com/CSBiology/FsSpreadsheet/commit/e7cc638b170c570005b6cd8378d1e0ac31075be7)] improve rowWithRange SkipSearch performance diff --git a/package.json b/package.json index 808e057e..2e65b64c 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@fslab/fsspreadsheet", - "version": "5.1.1", + "version": "5.1.2", "description": "Minimal spreadsheet creation and manipulation using exceljs io.", "type": "module", "main": "Xlsx.js", From 2b882586b12aae0500eaad98d01603521dfee26f Mon Sep 17 00:00:00 2001 From: Heinrich Lukas Weil Date: Wed, 31 Jan 2024 16:34:59 +0100 Subject: [PATCH 3/3] small speed improvement of reader by skipping sst include on opendocument cell --- src/FsSpreadsheet.ExcelIO/Cell.fs | 2 +- src/FsSpreadsheet.ExcelIO/FsExtensions.fs | 40 ++++++++++++++++++- src/FsSpreadsheet.ExcelIO/Spreadsheet.fs | 12 +++--- src/FsSpreadsheet/FsAddress.fs | 10 ++++- .../OpenXml/FsExtensions.fs | 15 +++---- .../OpenXml/Spreadsheet.fs | 2 +- 6 files changed, 61 insertions(+), 20 deletions(-) diff --git a/src/FsSpreadsheet.ExcelIO/Cell.fs b/src/FsSpreadsheet.ExcelIO/Cell.fs index e0f3aad2..6ba80fde 100644 --- a/src/FsSpreadsheet.ExcelIO/Cell.fs +++ b/src/FsSpreadsheet.ExcelIO/Cell.fs @@ -264,7 +264,7 @@ module Cell = /// let getValue (sharedStringTable : SST Option) (cell : Cell) = match cell |> tryGetType with - | Some (CellValues.SharedString) when sharedStringTable.IsSome-> + | Some (CellValues.SharedString) when sharedStringTable.IsSome -> let sharedStringTable = sharedStringTable.Value let sharedStringTableIndex = cell diff --git a/src/FsSpreadsheet.ExcelIO/FsExtensions.fs b/src/FsSpreadsheet.ExcelIO/FsExtensions.fs index 4af8e316..70d3eaca 100644 --- a/src/FsSpreadsheet.ExcelIO/FsExtensions.fs +++ b/src/FsSpreadsheet.ExcelIO/FsExtensions.fs @@ -89,6 +89,42 @@ module FsExtensions = //let dt, v = DataType.InferCellValue v FsCell.createWithDataType dt (int row) (int col) (cellValue) + /// + /// Creates an FsCell on the basis of an XlsxCell. Uses a SharedStringTable if present to get the XlsxCell's value. + /// + static member tryOfXlsxCell (doc : Packaging.SpreadsheetDocument) (sst : SST option) (xlsxCell : Cell) = + Cell.tryGetValue sst xlsxCell + |> Option.map (fun cellValueString -> + let col, row = xlsxCell.CellReference.Value |> CellReference.toIndices + let dt = + try DataType.ofXlsXCell doc xlsxCell + with _ -> DataType.Number // default is number + let mutable cellValue : obj = cellValueString + match dt with + | Date -> + try + // datetime is written as float counting days since 1900. + // We use the .NET helper because we really do not want to deal with datetime issues. + cellValue <- System.DateTime.FromOADate(float cellValueString) + with + | _ -> () + | Boolean -> + // boolean is written as int/float either 0 or null + match cellValueString.ToLower() with + | "1" | "true" -> cellValue <- true + | "0" | "false" -> cellValue <- false + | _ -> () + | Number -> + try + cellValue <- float cellValueString + with + | _ -> + () + | Empty | String -> () + //let dt, v = DataType.InferCellValue v + FsCell.createWithDataType dt (int row) (int col) (cellValue) + ) + static member toXlsxCell (doc : Packaging.SpreadsheetDocument) (cell : FsCell) = Cell.fromValueWithDataType doc (uint32 cell.ColumnNumber) (uint32 cell.RowNumber) (cell.ValueAsString()) cell.DataType @@ -226,9 +262,9 @@ module FsExtensions = fun xlsxSheet -> let sheetId = Sheet.getID xlsxSheet let xlsxCells = - Spreadsheet.getCellsBySheetID sheetId doc + Spreadsheet.getCellsBySheetID sheetId doc true |> Seq.toArray - |> Array.map (FsCell.ofXlsxCell doc sst) + |> Array.choose (FsCell.tryOfXlsxCell doc sst) let assocXlsxTables = xlsxTables |> Seq.tryPick (fun (sid,ts) -> if sid = sheetId then Some ts else None) diff --git a/src/FsSpreadsheet.ExcelIO/Spreadsheet.fs b/src/FsSpreadsheet.ExcelIO/Spreadsheet.fs index 8ef6f396..a0c0370f 100644 --- a/src/FsSpreadsheet.ExcelIO/Spreadsheet.fs +++ b/src/FsSpreadsheet.ExcelIO/Spreadsheet.fs @@ -193,11 +193,13 @@ module Spreadsheet = /// /// Returns a 1D-sequence of Cells for the given Sheet of the given SpreadsheetDocument. /// - let getCellsBySheet (sheet : Sheet) (spreadsheetDocument : SpreadsheetDocument) = + let getCellsBySheet (sheet : Sheet) (spreadsheetDocument : SpreadsheetDocument) (skipSST : bool) = let workbookPart = spreadsheetDocument.WorkbookPart let worksheetPart = Worksheet.WorksheetPart.getByID sheet.Id.Value workbookPart let includeSSV = + if skipSST then id + else match tryGetSharedStringTable spreadsheetDocument with | Some sst -> let sstArray = sst |> SharedStringTable.toSST @@ -217,18 +219,18 @@ module Spreadsheet = /// Returns a 1D-sequence of Cells for the given sheetIndex of the given SpreadsheetDocument. /// /// SheetIndices are 1-based. - let getCellsBySheetIndex (sheetIndex : uint) (spreadsheetDocument : SpreadsheetDocument) = + let getCellsBySheetIndex (sheetIndex : uint) (spreadsheetDocument : SpreadsheetDocument) (skipSST : bool) = match Sheet.tryItem sheetIndex spreadsheetDocument with - | Some sheet -> getCellsBySheet sheet spreadsheetDocument + | Some sheet -> getCellsBySheet sheet spreadsheetDocument skipSST | None -> seq {()} /// /// Returns a 1D-sequence of Cells for the given sheetIndex of the given SpreadsheetDocument. /// /// SheetIndices are 1-based. - let getCellsBySheetID (sheetID : string) (spreadsheetDocument : SpreadsheetDocument) = + let getCellsBySheetID (sheetID : string) (spreadsheetDocument : SpreadsheetDocument) (skipSST : bool) = match Sheet.tryGetById sheetID spreadsheetDocument with - | Some sheet -> getCellsBySheet sheet spreadsheetDocument + | Some sheet -> getCellsBySheet sheet spreadsheetDocument skipSST | None -> seq {()} //---------------------------------------------------------------------------------------------------------------------- diff --git a/src/FsSpreadsheet/FsAddress.fs b/src/FsSpreadsheet/FsAddress.fs index c0f0a204..26e2348e 100644 --- a/src/FsSpreadsheet/FsAddress.fs +++ b/src/FsSpreadsheet/FsAddress.fs @@ -4,6 +4,13 @@ /// Module containing functions to work with "A1" style excel cell references. module CellReference = + [] + let indexPattern = + "([A-Z]*)(\d*)" + + let indexRegex = + System.Text.RegularExpressions.Regex(indexPattern) + /// Transforms excel column string indices (e.g. A, B, Z, AA, CD) to index number (starting with A = 1). let colAdressToIndex (columnAdress : string) = let length = columnAdress.Length @@ -33,8 +40,7 @@ module CellReference = /// Maps a "A1" style excel cell reference to a column * row index tuple (1 Based indices). let toIndices (reference : string) = let inp = reference.ToUpper() - let pattern = "([A-Z]*)(\d*)" - let regex = System.Text.RegularExpressions.Regex.Match(inp,pattern) + let regex = indexRegex.Match(inp) if regex.Success then regex.Groups diff --git a/tests/FsSpreadsheet.ExcelIO.Tests/OpenXml/FsExtensions.fs b/tests/FsSpreadsheet.ExcelIO.Tests/OpenXml/FsExtensions.fs index de63d6a1..01d77aa4 100644 --- a/tests/FsSpreadsheet.ExcelIO.Tests/OpenXml/FsExtensions.fs +++ b/tests/FsSpreadsheet.ExcelIO.Tests/OpenXml/FsExtensions.fs @@ -81,19 +81,16 @@ let fsExtensionTests = // ] //] testList "FsCell" [ - testList "ofXlsxCell" [ + testCase "ofXlsxCell" (fun () -> let stream = new MemoryStream() let doc = Spreadsheet.initEmptyOnStream stream let sst = SharedStringTable.tryGet doc |> Option.map SharedStringTable.toSST let testCell = FsCell.ofXlsxCell doc sst dummyXlsxCell - testCase "is equal in value" <| fun _ -> - Expect.equal (testCell.ValueAsString()) dummyXlsxCell.CellValue.Text "values are not equal" - testCase "is equal in address/reference" <| fun _ -> - Expect.equal testCell.Address.Address dummyXlsxCell.CellReference.Value "addresses/references are not equal" - testCase "is equal in DataType/CellValues" <| fun _ -> - let dtOfCvs = DataType.ofXlsXCell doc dummyXlsxCell - Expect.equal testCell.DataType dtOfCvs "addresses/references are not equal" - ] + Expect.equal (testCell.ValueAsString()) dummyXlsxCell.CellValue.Text "values are not equal" + Expect.equal testCell.Address.Address dummyXlsxCell.CellReference.Value "addresses/references are not equal" + let dtOfCvs = DataType.ofXlsXCell doc dummyXlsxCell + Expect.equal testCell.DataType dtOfCvs "addresses/references are not equal" + ) ] testList "FsTable" [ testList "GetWorksheetOfTable" [ diff --git a/tests/FsSpreadsheet.ExcelIO.Tests/OpenXml/Spreadsheet.fs b/tests/FsSpreadsheet.ExcelIO.Tests/OpenXml/Spreadsheet.fs index cebdccfb..1e9a3ea3 100644 --- a/tests/FsSpreadsheet.ExcelIO.Tests/OpenXml/Spreadsheet.fs +++ b/tests/FsSpreadsheet.ExcelIO.Tests/OpenXml/Spreadsheet.fs @@ -63,7 +63,7 @@ let spreadsheetTests = Expect.equal wbp wbpFox "Differs" ] testList "getCellsBySheetIndex" [ - let cbsi1 = Spreadsheet.getCellsBySheetIndex 1u ssd |> Array.ofSeq + let cbsi1 = Spreadsheet.getCellsBySheetIndex 1u ssd false |> Array.ofSeq // not applicable since Cell arrays and Cells always differ from each other (even if you compare the same cell, e.g. `cell1 = cell1`) //testCase "is equal to cbsi1Fox" <| fun _ -> // Expect.equal cbsi1 cbsi1Fox "Differs"