-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
MINOR - PII Scanner tests and log levels (#17686)
* MINOR - PII Scanner tests and log levels * MINOR - PII Scanner tests and log levels
- Loading branch information
Showing
6 changed files
with
75 additions
and
89 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,54 +11,70 @@ | |
""" | ||
Test Column Name Scanner | ||
""" | ||
from unittest import TestCase | ||
|
||
from metadata.pii.scanners.ner_scanner import NERScanner | ||
import pytest | ||
|
||
from metadata.pii.scanners.ner_scanner import NERScanner, StringAnalysis | ||
|
||
class NERScannerTest(TestCase): | ||
""" | ||
Validate various typical column names | ||
""" | ||
|
||
ner_scanner = NERScanner() | ||
@pytest.fixture | ||
def scanner() -> NERScanner: | ||
"""Return the scanner""" | ||
return NERScanner() | ||
|
||
def test_scanner_none(self): | ||
self.assertIsNone(self.ner_scanner.scan(list(range(100)))) | ||
self.assertIsNone( | ||
self.ner_scanner.scan( | ||
" ".split( | ||
"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nam consequat quam sagittis convallis cursus." | ||
) | ||
|
||
def test_scanner_none(scanner): | ||
assert scanner.scan(list(range(100))) is None | ||
assert ( | ||
scanner.scan( | ||
" ".split( | ||
"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nam consequat quam sagittis convallis cursus." | ||
) | ||
) | ||
) is None | ||
|
||
def test_scanner_sensitive(self): | ||
self.assertEqual( | ||
self.ner_scanner.scan( | ||
[ | ||
"[email protected]", | ||
"[email protected]", | ||
"[email protected]", | ||
] | ||
).tag_fqn, | ||
"PII.Sensitive", | ||
) | ||
self.assertEqual( | ||
self.ner_scanner.scan( | ||
["im ok", "[email protected]", "not sensitive"] | ||
).tag_fqn, | ||
"PII.Sensitive", | ||
) | ||
|
||
def test_scanner_nonsensitive(self): | ||
self.assertEqual( | ||
self.ner_scanner.scan( | ||
[ | ||
"Washington", | ||
"Alaska", | ||
"Netherfield Lea Street", | ||
] | ||
).tag_fqn, | ||
"PII.NonSensitive", | ||
) | ||
def test_scanner_sensitive(scanner): | ||
assert ( | ||
scanner.scan( | ||
[ | ||
"[email protected]", | ||
"[email protected]", | ||
"[email protected]", | ||
] | ||
).tag_fqn | ||
== "PII.Sensitive" | ||
) | ||
assert ( | ||
scanner.scan(["im ok", "[email protected]", "not sensitive"]).tag_fqn | ||
== "PII.Sensitive" | ||
) | ||
|
||
|
||
def test_scanner_nonsensitive(scanner): | ||
assert ( | ||
scanner.scan( | ||
[ | ||
"Washington", | ||
"Alaska", | ||
"Netherfield Lea Street", | ||
] | ||
).tag_fqn | ||
== "PII.NonSensitive" | ||
) | ||
|
||
|
||
def test_get_highest_score_label(scanner): | ||
"""Validate that even with score clashes, we only get one result back""" | ||
assert scanner.get_highest_score_label( | ||
{ | ||
"PII.Sensitive": StringAnalysis(score=0.9, appearances=1), | ||
"PII.NonSensitive": StringAnalysis(score=0.8, appearances=1), | ||
} | ||
) == ("PII.Sensitive", 0.9) | ||
assert scanner.get_highest_score_label( | ||
{ | ||
"PII.Sensitive": StringAnalysis(score=1.0, appearances=1), | ||
"PII.NonSensitive": StringAnalysis(score=1.0, appearances=1), | ||
} | ||
) == ("PII.Sensitive", 1.0) |