Skip to content

Commit

Permalink
Handling icecat PDF's bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
goulven authored and goulven committed Nov 1, 2024
1 parent ede0204 commit 031d4de
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 32 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ public void amazonCompletionAll() throws InvalidParameterException, IOException
// Icecat completion
///////////////////////////////////
public void icecatCompletionAll() throws InvalidParameterException, IOException {
logger.warn("Completing verticals with amazon");
logger.warn("Completing verticals with icecat");
icecatCompletionService.completeAll(true);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ public Map<String, Object> onDataFragment(final DataFragment input, final Produ
output.getResources().add(r);
}
}

onProduct(output, vConf);

return null;
}

Expand All @@ -83,6 +86,17 @@ public void close() throws IOException {

@Override
public void onProduct(Product data, VerticalConfig vConf) throws AggregationSkipException {

// TODO(p1, perf) : Remove when sure there are no more protected urls
// We clean icecat protected items
data.getResources().removeIf(e -> {
if (e.getUrl().contains("icecat.biz") && e.getUrl().contains("?access")) {
logger.error("Removing icecat protected url : {}",e.getUrl());
return true;
} else {
return false;
}
});
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,10 @@ public void processProduct(VerticalConfig vertical, Product data) {
logger.error("Error occurs during icecat aggregation",e);
}
}






try {
Expand Down Expand Up @@ -161,10 +165,10 @@ private Set<DataFragment> completeSearch(VerticalConfig vertical, Product data)
private DataFragment convert(IceDataItem iceItem, Product data) {
DataFragment df = initDataFragment(data);

completeGeneralInfos(iceItem.generalInfo, df);
completeImage(iceItem.image, df);
completeMultimedia(iceItem.multimedia,df);
completeGallery(iceItem.gallery,df);
completeGeneralInfos(iceItem.generalInfo, df,data);
completeImage(iceItem.image, df, data);
completeMultimedia(iceItem.multimedia,df,data);
completeGallery(iceItem.gallery,df,data);
completeFeaturesGroup(iceItem.featuresGroups,df);


Expand Down Expand Up @@ -207,63 +211,88 @@ private void completeFeaturesGroup(List<FeaturesGroups> featuresGroups, DataFrag

}

private void completeGallery(List<Gallery> gallery, DataFragment df) {
private void completeGallery(List<Gallery> gallery, DataFragment df, Product p) {

for (Gallery g : gallery) {
try {
// TODO : mutualize tag
df.addResource(g.pic , Sets.newHashSet(g.type,"gallery"));
addResourceIfAbsent(df, p, g.pic, g.type);
} catch (ValidationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
logger.warn("Error while adding resource {}",g.pic);
}
}

}

private void completeMultimedia(List<Multimedia> multimedia, DataFragment df) {
/**
* Adds the icecat only if not already done, filtering on the icecat completion token
* @param df
* @param p
* @param g
* @throws ValidationException
*/
private void addResourceIfAbsent(DataFragment df, Product p, String url, String tag) throws ValidationException {


String shortened = null;
int marker = url.indexOf("?access");
if (marker != -1) {
// TODO(P1,design) : Remove when tested
logger.error("Got an access protected resource from icecat : {} - {}",url,p );
shortened = url.substring(0,marker);
}

if (null != shortened) {

for (Resource r : p.getResources()) {
if (r.getUrl().startsWith(shortened)) {
if (r.isProcessed() == true && r.getFileSize() >0) {
logger.info("Resource have already been processed, skipping {}");
return;
}
}
}
}

df.addResource(url , Sets.newHashSet(tag,"gallery"));
}

private void completeMultimedia(List<Multimedia> multimedia, DataFragment df, Product p) {

for (Multimedia m : multimedia) {
try {
// TODO : handle i18
df.addResource(m.url , Sets.newHashSet(m.type,"fr"));
addResourceIfAbsent(df, p, m.url, "fr");
} catch (ValidationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
logger.info("Cannot validate multimedia resource : {}",m.url);
}
}


}

private void completeImage(Image image, DataFragment df) {
private void completeImage(Image image, DataFragment df, Product p) {
try {

// Tweak to exclude "brand" images sometimes used as logo
if (!image.highPic.contains("brand")) {
Resource r = new Resource(image.highPic);
r.getHardTags().add(ResourceTag.PRIMARY);
df.addResource(r);

addResourceIfAbsent(df, p, image.highPic, ResourceTag.PRIMARY.toString());
}

} catch (ValidationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
logger.info("Cannot validate image resource : {}",image.highPic);
}

}

private void completeGeneralInfos(GeneralInfo e, DataFragment df) {
private void completeGeneralInfos(GeneralInfo e, DataFragment df, Product p) {

// TODO : HAndle end of year / end of year
// TODO(p3, feature) : HAndle end of year / end of year
if (null != e.releaseDate) {
// TODO : i18n
try {
df.addAttribute("YEAR", e.releaseDate.substring(e.releaseDate.lastIndexOf("-")+1) , "fr", false, null);
} catch (Exception e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
logger.error("Parsing year failed ! ",e);
}
}

Expand All @@ -283,9 +312,7 @@ private void completeGeneralInfos(GeneralInfo e, DataFragment df) {
try {

if (e.description != null && e.description.leafletPDFURL != null) {
Resource r = new Resource(e.description.leafletPDFURL);
r.getHardTags().add(ResourceTag.LEAFLET);
df.addResource(r);
addResourceIfAbsent(df, p, e.description.leafletPDFURL, ResourceTag.LEAFLET.toString());
}

} catch (ValidationException e1) {
Expand All @@ -295,9 +322,8 @@ private void completeGeneralInfos(GeneralInfo e, DataFragment df) {

try {
if (e.description != null && e.description.manualPDFURL != null) {
Resource r = new Resource(e.description.manualPDFURL);
r.getHardTags().add(ResourceTag.MANUAL);
df.addResource(r);
addResourceIfAbsent(df, p, e.description.manualPDFURL, ResourceTag.MANUAL.toString());

}
} catch (ValidationException e1) {
logger.error("Error while adding manual pdf {}", e.description.leafletPDFURL, e);
Expand All @@ -318,7 +344,7 @@ private void completeGeneralInfos(GeneralInfo e, DataFragment df) {
*/
private DataFragment initDataFragment( Product data) {
DataFragment df = new DataFragment();
// TODO : Constants
// TODO(p3,conf) : Constants
df.setDatasourceName("icecat.biz");
df.setDatasourceConfigName("icecat.biz.yml");
df.setLastIndexationDate(System.currentTimeMillis());
Expand Down

0 comments on commit 031d4de

Please sign in to comment.