Skip to content

Commit

Permalink
TODO's
Browse files Browse the repository at this point in the history
  • Loading branch information
goulven authored and goulven committed Sep 10, 2024
1 parent 98a9420 commit a917d5d
Showing 1 changed file with 23 additions and 45 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -51,34 +51,33 @@
public class ResourceCompletionService extends AbstractCompletionService{


// TODO(p3, conf) : From yaml
private static final String NO_IMAGE_PATH = "/icons/no-image.png";

/**
* If true, will regenerate each time new file name for resources. To be used with caution in production, cause it will triggers 301 !
*/
// TODO(p3, conf) : From yaml
private static boolean forceEraseFileName = false;

private static final double SIMILARITY_SCORE = 0.20;
private static final int PERCEPTIV_HASH_SIZE = 32;

// TODO : Dedicated logger
protected static final Logger logger = LoggerFactory.getLogger(ResourceCompletionService.class);

private final ApiProperties apiProperties;
private final ImageMagickService imageService;
private final ResourceService resourceService;

// TODO : move to Tika V2
private static final Tika tika = new Tika();
private static final TikaConfig config = TikaConfig.getDefaultConfig();

// TODO : Warning : Probably not thread safe
// TODO(p2,safety) : Warning : Probably not thread safe
private static final HashingAlgorithm hasher = new PerceptiveHash(PERCEPTIV_HASH_SIZE);

// private final ElasticsearchRestTemplate esTemplate;

public ResourceCompletionService(ImageMagickService imageService, VerticalsConfigService verticalConfigService, ResourceService resourceService, ProductRepository dataRepository, ApiProperties apiProperties) {

// TODO : Should set a specific log level here (not "agg(regation)" one)
// TODO(p3,conf) : Should set a specific log level here (not "agg(regation)" one)
super(dataRepository, verticalConfigService, apiProperties.logsFolder(), apiProperties.aggLogLevel());

this.apiProperties = apiProperties;
Expand Down Expand Up @@ -139,8 +138,7 @@ public void processProduct(VerticalConfig vertical, Product data ) {
if (forceEraseFileName || StringUtils.isEmpty(r.getFileName())) {
String name;

// TODO : OfferNAme size from conf
List<String> offerNames = data.getNames().getOfferNames().stream().filter(e->e.length() < 60).toList();
List<String> offerNames = data.getNames().getOfferNames().stream().toList();

// No offer names, we generate a title by our own
if (offerNames.size() == 0) {
Expand Down Expand Up @@ -264,7 +262,6 @@ public void processProduct(VerticalConfig vertical, Product data ) {


// Extracting the cover image first
// TODO : cover as const
Resource cover = resultingImages.stream()
.filter(e->e.getHardTags().contains(ResourceTag.PRIMARY))
.max((o1,o2) -> o1.getImageInfo().pixels().compareTo(o2.getImageInfo().pixels()))
Expand All @@ -277,34 +274,32 @@ public void processProduct(VerticalConfig vertical, Product data ) {

if (null == cover) {
logger.warn("No cover image found for product : {}", data.gtin());
// TODO : from const
data.setCoverImagePath("/icons/no-image.png");
data.setCoverImagePath(NO_IMAGE_PATH);
} else {
data.setCoverImagePath(cover.path());
}

// Deleting useless files and unsetting attributes to preserve space
for (Resource r : data.getResources()) {
if (r.isEvicted()) {
// If an evicted resource, systematicaly delete file.
File evicted = resourceService.getCacheFile(r);
logger.info("Deleting evicted resource :{} -> {}",r, evicted);
// TODO : uncomment to effectivly rm files
// if (!evicted.delete()) {
// logger.error("Could not delete evicted resource : {}",resourceService.getCacheFile(r));
// }
// data.getResources().remove(r);
}

}
// // NOTE : uncomment to effectivly rm files. But we should not : effectiv removing we make resources classified and downloaded again...
// for (Resource r : data.getResources()) {
// if (r.isEvicted()) {
// // If an evicted resource, systematicaly delete file.
// File evicted = resourceService.getCacheFile(r);
//// logger.info("Deleting evicted resource :{} -> {}",r, evicted);
// // if (!evicted.delete()) {
// // logger.error("Could not delete evicted resource : {}",resourceService.getCacheFile(r));
// // }
// // data.getResources().remove(r);
// }
//
// }
}

private Resource processUrlTemplate(ResourceCompletionUrlTemplate ut, String gtin) {

Resource r = new Resource();
r.getHardTags().addAll(ut.getHardTags());

// TODO : add resource language
// TODO(p3,i18n) : add resource language
r.setUrl(ut.getUrl().replace("{GTIN}", gtin));

return r;
Expand All @@ -317,8 +312,6 @@ private Resource processUrlTemplate(ResourceCompletionUrlTemplate ut, String gti
* @return
*/
private ArrayList<List<Resource>> classify(List<Resource> list) {
// TODO Auto-generated method stub

logger.info("Starting image perceptive clusterisation");

Map<Resource, Set<Resource>> cluster = new HashMap<>();
Expand All @@ -337,7 +330,6 @@ private ArrayList<List<Resource>> classify(List<Resource> list) {

double similarityScore = hash0.normalizedHammingDistanceFast(hash1);
logger.info("image similarityScore : {} ", similarityScore);
// TODO : From conf
if (similarityScore < SIMILARITY_SCORE) {
// Considered a duplicate in this particular case
cluster.get(r1).add(r2);
Expand All @@ -362,23 +354,11 @@ private ArrayList<List<Resource>> classify(List<Resource> list) {
Collections.sort(tmpList, (o1, o2) -> o2.getImageInfo().pixels().compareTo(o1.getImageInfo().pixels()));
sortedCluster.add(tmpList);

// We priorize on amazon primary image
// TODO : Share const
// boolean primary = resourceGroups.stream().map(e->e.getTags()).anyMatch(e -> e.contains("cover"));
// if (primary) {
// forcedFirst = tmpList;
// }
}

// Sorting bucketsby number of occurences
Collections.sort(sortedCluster, (o1, o2) -> Integer.compare(o2.size(), o1.size()));

// if (null != forcedFirst) {
// sortedCluster.remove(forcedFirst);
// sortedCluster.addFirst(forcedFirst);
// }
// But


// Adding the group number
for (int i = 0; i < sortedCluster.size(); i++) {
Expand Down Expand Up @@ -423,7 +403,6 @@ public Resource fetchResource(Resource resource, VerticalConfig vertical) {
File target = resourceService.getCacheFile(resource);

// Downloading the file if not cached
// TODO : A specific config property to force re-download
if (target.exists()) {
logger.info("resource in file cache: {}", target);
} else {
Expand All @@ -432,7 +411,7 @@ public Resource fetchResource(Resource resource, VerticalConfig vertical) {

try {
Request.Get(resource.getUrl())
// TODO from conf
// TODO(p2,conf) from conf
.userAgent("Mozilla/5.0 (Windows NT 5.1; rv:5.0.1) Gecko/20100101 Firefox/5.0.1")
.connectTimeout(1000).socketTimeout(1000).execute().saveContent(target);

Expand Down Expand Up @@ -524,8 +503,7 @@ private void processVideo(Resource resource, File target) {
}

private void processPdf(final Resource indexed, final File target) {
// TODO(1,p3,feature) : Generate default PNG version, generate thumnails from
// config, html version, so on...
// TODO(p3,feature) : Generate default PNG version, generate thumnails from PDF
// handle metadatas
indexed.setResourceType(ResourceType.PDF);

Expand Down

0 comments on commit a917d5d

Please sign in to comment.