Skip to content

Commit

Permalink
Fix for issue #48
Browse files Browse the repository at this point in the history
  • Loading branch information
bjornpalmqvist authored and mfriedenhagen committed Mar 15, 2020
1 parent dea12f4 commit 52017dd
Show file tree
Hide file tree
Showing 7 changed files with 599 additions and 29 deletions.
6 changes: 0 additions & 6 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -138,12 +138,6 @@
<artifactId>maven-common-artifact-filters</artifactId>
<version>3.0.1</version>
</dependency>
<dependency>
<groupId>com.ibm.icu</groupId>
<artifactId>icu4j</artifactId>
<version>56.1</version>
</dependency>

</dependencies>

<build>
Expand Down
1 change: 1 addition & 0 deletions src/it/require-encoding-iso88591/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
<encoding>ISO-8859-1</encoding>
<includes>src/main/resources/ascii.txt,src/main/resources/iso88591.txt</includes>
<excludes>pom.xml</excludes>
<acceptAsciiSubset>true</acceptAsciiSubset>
</requireEncoding>
</rules>
</configuration>
Expand Down
2 changes: 1 addition & 1 deletion src/it/require-encoding-iso88591/verify.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ String text = file.getText("utf-8");
assert !text.contains( "Encoding US-ASCII is hard to detect." )
assert !text.contains( "Files not encoded in US-ASCII:" )
assert !text.contains( "src/main/resources/utf8.txt==>".replace('/', File.separator) )
assert text.contains( "src/main/resources/ascii.txt==>ISO-8859-1".replace('/', File.separator) )
assert text.contains( "src/main/resources/ascii.txt==>US-ASCII".replace('/', File.separator) )
assert text.contains( "src/main/resources/iso88591.txt==>ISO-8859-1".replace('/', File.separator) )

return true;
2 changes: 1 addition & 1 deletion src/it/require-encoding-usascii/verify.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ String text = file.getText("utf-8");
assert text.contains( "Encoding US-ASCII is hard to detect." )
assert text.contains( "Files not encoded in US-ASCII:" )
assert text.contains( "src/main/resources/utf8.txt==>UTF-8".replace('/', File.separator) )
assert text.contains( "src/main/resources/ascii.txt==>ISO-8859-1".replace('/', File.separator) )
assert text.contains( "src/main/resources/ascii.txt==>US-ASCII".replace('/', File.separator) )
assert text.contains( "src/main/resources/iso88591.txt==>ISO-8859-1".replace('/', File.separator) )

return true;
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,12 @@
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;

import org.apache.maven.enforcer.rule.api.EnforcerRule;
import org.apache.maven.enforcer.rule.api.EnforcerRuleException;
Expand All @@ -14,9 +19,7 @@
import org.codehaus.plexus.util.DirectoryScanner;
import org.codehaus.plexus.util.IOUtil;
import org.codehaus.plexus.util.StringUtils;

import com.ibm.icu.text.CharsetDetector;
import com.ibm.icu.text.CharsetMatch;
import org.freebsd.file.FileEncoding;

/**
* Checks file encodings to see if they match the project.build.sourceEncoding If file encoding can not be determined it
Expand Down Expand Up @@ -53,6 +56,11 @@ public class RequireEncoding
*/
private boolean failFast = true;

/**
* Should the rule accept US-ASCII as an subset of UTF-8 and ISO-8859-1.
*/
private boolean acceptAsciiSubset = false;

public void execute( EnforcerRuleHelper helper )
throws EnforcerRuleException
{
Expand All @@ -63,10 +71,18 @@ public void execute( EnforcerRuleHelper helper )
encoding = (String) helper.evaluate( "${project.build.sourceEncoding}" );
}
Log log = helper.getLog();

Set< String > acceptedEncodings = new HashSet< String >( Arrays.asList( encoding ) );
if ( encoding.equals( StandardCharsets.US_ASCII.name() ) )
{
log.warn( "Encoding US-ASCII is hard to detect. Use UTF-8 or ISO-8859-1" );
}

if ( acceptAsciiSubset && ( encoding.equals( StandardCharsets.ISO_8859_1.name() ) || encoding.equals( StandardCharsets.UTF_8.name() ) ) )
{
acceptedEncodings.add( StandardCharsets.US_ASCII.name() );
}

String basedir = (String) helper.evaluate( "${basedir}" );
DirectoryScanner ds = new DirectoryScanner();
ds.setBasedir( basedir );
Expand All @@ -91,7 +107,7 @@ public void execute( EnforcerRuleHelper helper )
{
log.debug( file + "==>" + fileEncoding );
}
if ( fileEncoding != null && !fileEncoding.equals( encoding ) )
if ( fileEncoding != null && !acceptedEncodings.contains( fileEncoding ) )
{
filesInMsg.append( file );
filesInMsg.append( "==>" );
Expand Down Expand Up @@ -121,27 +137,18 @@ public void execute( EnforcerRuleHelper helper )
protected String getEncoding( String requiredEncoding, File file, Log log )
throws IOException
{
FileInputStream fis = null;
try
FileEncoding fileEncoding = new FileEncoding();
if ( !fileEncoding.guessFileEncoding( Files.readAllBytes( file.toPath() ) ) )
{
fis = new FileInputStream( file );
CharsetDetector detector = new CharsetDetector();
detector.setDeclaredEncoding( requiredEncoding );
detector.setText( new BufferedInputStream( fis ) );
CharsetMatch[] charsets = detector.detectAll();
if ( charsets == null )
{
return null;
}
else
{
return charsets[0].getName();
}
return null;
}
finally

if ( log.isDebugEnabled() )
{
IOUtil.close( fis );
log.debug( String.format( "%s: (%s) %s; charset=%s", file, fileEncoding.getCode(), fileEncoding.getType(), fileEncoding.getCodeMime() ) );
}

return fileEncoding.getCodeMime().toUpperCase();
}

/**
Expand Down
Loading

0 comments on commit 52017dd

Please sign in to comment.