Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix and improve detection and import of ods, xlsx and csv documents #1446

Merged
merged 11 commits into from
Nov 27, 2024
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions lib/Service/ColumnTypes/DatetimeDateBusiness.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class DatetimeDateBusiness extends SuperBusiness implements IColumnTypeBusiness
* @return string
*/
public function parseValue($value, ?Column $column = null): string {
return json_encode($this->isValidDate($value, 'Y-m-d') ? $value : '');
return json_encode($this->isValidDate((string)$value, 'Y-m-d') ? (string)$value : '');
}

/**
Expand All @@ -26,7 +26,7 @@ public function parseValue($value, ?Column $column = null): string {
* @return bool
*/
public function canBeParsed($value, ?Column $column = null): bool {
return $this->isValidDate($value, 'Y-m-d');
return $this->isValidDate((string)$value, 'Y-m-d');
}

}
4 changes: 2 additions & 2 deletions lib/Service/ColumnTypes/DatetimeTimeBusiness.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class DatetimeTimeBusiness extends SuperBusiness implements IColumnTypeBusiness
* @return string
*/
public function parseValue($value, ?Column $column = null): string {
return json_encode($this->isValidDate($value, 'H:i') ? $value : '');
return json_encode($this->isValidDate((string)$value, 'H:i') ? $value : '');
}

/**
Expand All @@ -26,7 +26,7 @@ public function parseValue($value, ?Column $column = null): string {
* @return bool
*/
public function canBeParsed($value, ?Column $column = null): bool {
return $this->isValidDate($value, 'H:i');
return $this->isValidDate((string)$value, 'H:i');
}

}
4 changes: 2 additions & 2 deletions lib/Service/ColumnTypes/SelectionCheckBusiness.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
use OCA\Tables\Db\Column;

class SelectionCheckBusiness extends SuperBusiness implements IColumnTypeBusiness {
public const PATTERN_POSITIVE = ['yes', '1', true, 1, 'true'];
public const PATTERN_NEGATIVE = ['no', '0', false, 0, 'false'];
public const PATTERN_POSITIVE = ['yes', '1', true, 1, 'true', 'TRUE'];
public const PATTERN_NEGATIVE = ['no', '0', false, 0, 'false', 'FALSE'];

/**
* @param mixed $value
Expand Down
94 changes: 85 additions & 9 deletions lib/Service/ImportService.php
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ private function getPreviewData(Worksheet $worksheet): array {
$columns[] = [
'title' => $title,
'type' => $this->rawColumnDataTypes[$colIndex]['type'],
'subtype' => $this->rawColumnDataTypes[$colIndex]['subtype'],
'subtype' => $this->rawColumnDataTypes[$colIndex]['subtype'] ?? null,
'numberDecimals' => $this->rawColumnDataTypes[$colIndex]['number_decimals'] ?? 0,
'numberPrefix' => $this->rawColumnDataTypes[$colIndex]['number_prefix'] ?? '',
'numberSuffix' => $this->rawColumnDataTypes[$colIndex]['number_suffix'] ?? '',
Expand All @@ -154,13 +154,26 @@ private function getPreviewData(Worksheet $worksheet): array {
$cellIterator = $row->getCellIterator();
$cellIterator->setIterateOnlyExistingCells(false);

foreach ($cellIterator as $cellIndex => $cell) {
foreach ($cellIterator as $cell) {
$value = $cell->getValue();
$colIndex = (int) $cellIndex;
// $cellIterator`s index is based on 1, not 0.
$colIndex = $cellIterator->getCurrentColumnIndex() - 1;
$column = $this->columns[$colIndex];

if (($column && $column->getType() === 'datetime') || (is_array($columns[$colIndex]) && $columns[$colIndex]['type'] === 'datetime')) {
$value = Date::excelToDateTimeObject($value)->format('Y-m-d H:i');
if (isset($columns[$colIndex]['subtype']) && $columns[$colIndex]['subtype'] === 'date') {
$format = 'Y-m-d';
} elseif (isset($columns[$colIndex]['subtype']) && $columns[$colIndex]['subtype'] === 'time') {
$format = 'H:i';
} else {
$format = 'Y-m-d H:i';
}

try {
$value = Date::excelToDateTimeObject($value)->format($format);
} catch (\TypeError) {
$value = (new \DateTimeImmutable($value))->format($format);
}
} elseif (($column && $column->getType() === 'number' && $column->getNumberSuffix() === '%')
|| (is_array($columns[$colIndex]) && $columns[$colIndex]['type'] === 'number' && $columns[$colIndex]['numberSuffix'] === '%')) {
$value = $value * 100;
Expand Down Expand Up @@ -285,8 +298,14 @@ public function import(?int $tableId, ?int $viewId, string $path, bool $createMi
* @throws PermissionError
*/
private function loop(Worksheet $worksheet): void {
$firstRow = $worksheet->getRowIterator()->current();
$secondRow = $worksheet->getRowIterator()->seek(2)->current();
$rowIterator = $worksheet->getRowIterator();
$firstRow = $rowIterator->current();
$rowIterator->next();
if (!$rowIterator->valid()) {
return;
}
$secondRow = $rowIterator->current();
unset($rowIterator);
$this->getColumns($firstRow, $secondRow);

if (empty(array_filter($this->columns))) {
Expand Down Expand Up @@ -361,8 +380,32 @@ private function createRow(Row $row): void {

$value = $cell->getValue();
$hasData = $hasData || !empty($value);

if ($column->getType() === 'datetime') {
$value = Date::excelToDateTimeObject($value)->format('Y-m-d H:i');
if ($column->getType() === 'datetime' && $column->getSubtype() === 'date') {
$format = 'Y-m-d';
} elseif ($column->getType() === 'datetime' && $column->getSubtype() === 'time') {
$format = 'H:i';
} else {
$format = 'Y-m-d H:i';
}
try {
$value = Date::excelToDateTimeObject($value)->format($format);
} catch (\TypeError) {
$value = (new \DateTimeImmutable($value))->format($format);
}
} elseif ($column->getType() === 'datetime' && $column->getSubtype() === 'date') {
try {
$value = Date::excelToDateTimeObject($value)->format('Y-m-d');
} catch (\TypeError) {
$value = (new \DateTimeImmutable($value))->format('Y-m-d');
}
} elseif ($column->getType() === 'datetime' && $column->getSubtype() === 'time') {
try {
$value = Date::excelToDateTimeObject($value)->format('H:i');
} catch (\TypeError) {
$value = (new \DateTimeImmutable($value))->format('H:i');
}
} elseif ($column->getType() === 'number' && $column->getNumberSuffix() === '%') {
$value = $value * 100;
} elseif ($column->getType() === 'selection' && $column->getSubtype() === 'check') {
Expand Down Expand Up @@ -439,6 +482,12 @@ private function getColumns(Row $firstRow, Row $secondRow): void {
$dataTypes[] = $this->parseColumnDataType($secondRowCellIterator->current());
} else {
$this->logger->debug('No cell given or cellValue is empty while loading columns for importing');
if ($cell->getDataType() === 'null') {
// LibreOffice generated XLSX doc may have more empty columns in the first row.
// Continue without increasing error count.
// Question: What about tables where a column does not have a heading?
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this question left as a TODO? or addressed below?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ha. Good finding. Forgot about this one. Let me check me one thing, to ensure that this does not change from the previous behavior. Might indeed need a touch.

There is also a documentation component in that headers were expected on all columns.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, import works as before. But there is one difference:

Screenshot_20241126_123404

Previously it was one.

It was testing with a three-column table where the middle-one was not having a caption. But the values were set (always strings for simplicity). Before the change and now, only the columns with a caption were being added, the middle one ignored.

On import, however, the first two columns were added and the third column ignored.

To visualize, the original

Caption 1 Caption 3
Aaa A Zzz
Bbb B Yyy
Ccc C Xxx

turns (elements taken over in italic) into

Caption 1 Caption 3
Aaa A
Bbb B
Ccc C

Our columns require a caption. Something to document, but I look into increasing the error again, if it is not a trailing column.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Solved in 50815ae

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could also add that as a note to the import dialog, most users will probably never get to the wiki

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could also add that as a note to the import dialog, most users will probably never get to the wiki

👍 Added a brief supplement in 36179dc

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks 👍

continue;
}
$this->countErrors++;
}
$secondRowCellIterator->next();
Expand Down Expand Up @@ -468,9 +517,33 @@ private function parseColumnDataType(Cell $cell): array {
'subtype' => 'line',
];

if (Date::isDateTime($cell) || $originDataType === DataType::TYPE_ISO_DATE) {
try {
if ($value === false) {
throw new \Exception('We do not accept `false` here');
}
$dateValue = new \DateTimeImmutable($value);
} catch (\Exception) {
}

if (isset($dateValue)
|| Date::isDateTime($cell)
|| $originDataType === DataType::TYPE_ISO_DATE) {
// the formatted value stems from the office document and shows the original user intent
$dateAnalysis = date_parse($formattedValue);
$containsDate = $dateAnalysis['year'] !== false || $dateAnalysis['month'] !== false || $dateAnalysis['day'] !== false;
$containsTime = $dateAnalysis['hour'] !== false || $dateAnalysis['minute'] !== false || $dateAnalysis['second'] !== false;

if ($containsDate && !$containsTime) {
$subType = 'date';
} elseif (!$containsDate && $containsTime) {
$subType = 'time';
} else {
$subType = '';
}

$dataType = [
'type' => 'datetime',
'subtype' => $subType,
];
} elseif ($originDataType === DataType::TYPE_NUMERIC) {
if (str_contains($formattedValue, '%')) {
Expand Down Expand Up @@ -514,7 +587,10 @@ private function parseColumnDataType(Cell $cell): array {
'type' => 'number',
];
}
} elseif ($originDataType === DataType::TYPE_BOOL) {
} elseif ($originDataType === DataType::TYPE_BOOL
|| ($originDataType === DataType::TYPE_FORMULA
&& in_array($formattedValue, ['FALSE', 'TRUE'], true))
) {
$dataType = [
'type' => 'selection',
'subtype' => 'check',
Expand Down
51 changes: 28 additions & 23 deletions tests/integration/features/APIv1.feature
Original file line number Diff line number Diff line change
Expand Up @@ -187,31 +187,36 @@ Feature: APIv1
Then user deletes last created row
Then user "participant1" deletes table with keyword "Rows check"


@api1
Scenario: Import csv table
Given file "/import.csv" exists for user "participant1" with following data
| Col1 | Col2 | Col3 | num | emoji | special |
| Val1 | Val2 | Val3 | 1 | 💙 | Ä |
| great | news | here | 99 | ⚠️ | Ö |
Given table "Import test" with emoji "👨🏻‍💻" exists for user "participant1" as "base1"
When user imports file "/import.csv" into last created table
@api1 @import
Scenario Outline: Import a document file
Given user "participant1" uploads file "<importfile>"
And table "Import test" with emoji "👨🏻‍💻" exists for user "participant1" as "base1"
When user imports file "/<importfile>" into last created table
Then import results have the following data
| found_columns_count | 6 |
| created_columns_count | 6 |
| inserted_rows_count | 2 |
| errors_count | 0 |
Then table has at least following columns
| Col1 |
| Col2 |
| Col3 |
| num |
| emoji |
| special |
| found_columns_count | 10 |
| created_columns_count | 10 |
| inserted_rows_count | 2 |
| errors_count | 0 |
Then table has at least following typed columns
| Col1 | text |
| Col2 | text |
| Col3 | text |
| num | number |
| emoji | text |
| special | text |
| date | datetime |
| truth | selection |
Then table contains at least following rows
| Col1 | Col2 | Col3 | num | emoji | special |
| Val1 | Val2 | Val3 | 1 | 💙 | Ä |
| great | news | here | 99 | ⚠️ | Ö |
| Date and Time | Col1 | Col2 | Col3 | num | emoji | special | date | truth | time |
| 2022-02-20 08:42 | Val1 | Val2 | Val3 | 1 | 💙 | Ä | 2024-02-24 | false | 18:48 |
| 2016-06-01 13:37 | great | news | here | 99 | ⚠ | Ö | 2016-06-01 | true | 01:23 |

Examples:
| importfile |
| import-from-libreoffice.ods |
| import-from-libreoffice.xlsx |
| import-from-ms365.xlsx |
| import-from-libreoffice.csv |

@api1
Scenario: Create, edit and delete views
Expand Down
49 changes: 48 additions & 1 deletion tests/integration/features/bootstrap/FeatureContext.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
use GuzzleHttp\Cookie\CookieJar;
use GuzzleHttp\Exception\ClientException;
use GuzzleHttp\Exception\GuzzleException;
use GuzzleHttp\Psr7\Utils;
use PHPUnit\Framework\Assert;
use PHPUnit\Framework\ExpectationFailedException;
use Psr\Http\Message\ResponseInterface;
Expand Down Expand Up @@ -64,6 +65,8 @@ class FeatureContext implements Context {
private array $tableData = [];
private array $viewData = [];

private $importColumnData = null;

// use CommandLineTrait;
private CollectionManager $collectionManager;

Expand All @@ -89,6 +92,7 @@ public function setUp() {
* @AfterScenario
*/
public function cleanupUsers() {
$this->importColumnData = null;
$this->collectionManager->cleanUp();
foreach ($this->createdUsers as $user) {
$this->deleteUser($user);
Expand Down Expand Up @@ -467,8 +471,21 @@ public function columnsForNodeV2(string $nodeType, string $nodeName, ?TableNode
// (((((((((((((((((((((((((((( END API v2 )))))))))))))))))))))))))))))))))))


/**
* @Given user :user uploads file :file
*/
public function uploadFile(string $user, string $file): void {
$this->setCurrentUser($user);

$localFilePath = __DIR__ . '/../../resources/' . $file;

$url = sprintf('%sremote.php/dav/files/%s/%s', $this->baseUrl, $user, $file);
$body = Utils::streamFor(fopen($localFilePath, 'rb'));

$this->sendRequestFullUrl('PUT', $url, $body);

Assert::assertEquals(201, $this->response->getStatusCode());
}

// IMPORT --------------------------

Expand Down Expand Up @@ -574,7 +591,7 @@ public function checkRowsExists(TableNode $table): void {
$allValuesForColumn[] = $row[$indexForCol];
}
foreach ($table->getColumn($key) as $item) {
Assert::assertTrue(in_array($item, $allValuesForColumn));
Assert::assertTrue(in_array($item, $allValuesForColumn), sprintf('%s not in %s', $item, implode(', ', $allValuesForColumn)));
}
}
}
Expand Down Expand Up @@ -1190,6 +1207,36 @@ public function tableColumns(?TableNode $body = null): void {
}
}

/**
* @Then table has at least following typed columns
*
* @param TableNode|null $body
*/
public function tableTypedColumns(?TableNode $body = null): void {
$this->sendRequest(
'GET',
'/apps/tables/api/1/tables/'.$this->tableId.'/columns'
);

$data = $this->getDataFromResponse($this->response);
Assert::assertEquals(200, $this->response->getStatusCode());

// check if no columns exists
if ($body === null) {
Assert::assertCount(0, $data);
return;
}

$colByTitle = [];
foreach ($data as $d) {
$colByTitle[$d['title']] = $d['type'];
}
foreach ($body->getRows() as $columnData) {
Assert::assertArrayHasKey($columnData[0], $colByTitle);
Assert::assertSame($columnData[1], $colByTitle[$columnData[0]], sprintf('Column "%s" has unexpected type "%s"', $columnData[0], $colByTitle[$columnData[0]]));
}
}

/**
* @Then user deletes last created column
*/
Expand Down
3 changes: 3 additions & 0 deletions tests/integration/resources/import-from-libreoffice.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Date and Time,Col1,Col2,Col3,num,emoji,special,date,truth,time
2022-02-20 08:42,Val1,Val2,Val3,1,💙,Ä,2024-02-24,false,18:48
2016-06-01 13:37,great,news,here,99,⚠,Ö,2016-06-01,true,01:23
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SPDX-FileCopyrightText: 2024 Nextcloud GmbH and Nextcloud contributors
SPDX-License-Identifier: AGPL-3.0-or-later

Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SPDX-FileCopyrightText: 2024 Nextcloud GmbH and Nextcloud contributors
SPDX-License-Identifier: AGPL-3.0-or-later

Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SPDX-FileCopyrightText: 2024 Nextcloud GmbH and Nextcloud contributors
SPDX-License-Identifier: AGPL-3.0-or-later

Binary file not shown.
3 changes: 3 additions & 0 deletions tests/integration/resources/import-from-ms365.xlsx.license
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SPDX-FileCopyrightText: 2024 Nextcloud GmbH and Nextcloud contributors
SPDX-License-Identifier: AGPL-3.0-or-later

Loading