-
Notifications
You must be signed in to change notification settings - Fork 218
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implemented Remote, Search within State or Province, and Search within Country #113
Changes from all commits
e9087db
e35b0f8
8495377
45ba318
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -229,11 +229,42 @@ def _get_search_url(self, method: Optional[str] = 'get') -> str: | |
TODO: use Enum for method instead of str. | ||
""" | ||
if method == 'get': | ||
return ( | ||
"https://www.indeed.{}/jobs?q={}&l={}%2C+{}&radius={}&" | ||
"limit={}&filter={}{}".format( | ||
search_url = "https://www.indeed.{}/jobs?q={}".format( | ||
self.config.search_config.domain, | ||
self.query, | ||
self.query | ||
) | ||
# search countrywide supposing one is open to relocating within | ||
# the country (includes remote) | ||
if self.config.search_config.city is None and \ | ||
self.config.search_config.province_or_state is None and \ | ||
self.config.search_config.remote_within_country is False: | ||
return search_url | ||
# search remote within the country | ||
if self.config.search_config.remote_within_country is True: | ||
return ( | ||
search_url + "&l=remote&" | ||
"limit={}&filter={}{}".format( | ||
self.config.search_config.province_or_state.upper(), | ||
self.max_results_per_page, | ||
int(self.config.search_config.return_similar_results), | ||
REMOTENESS_TO_QUERY[self.config.search_config.remoteness] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it would make more sense for us to construct the URL in an additive way vs this if/else and return method. This way we can easily add or remove portions and update to using a URL-construction library (Which helps with constructing the URL string from tags). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i.e.
|
||
) | ||
) | ||
# search by state or province with no city | ||
elif self.config.search_config.city is None and \ | ||
self.config.search_config.province_or_state: | ||
return ( | ||
search_url + "&l={}&" | ||
"limit={}&filter={}{}".format( | ||
self.config.search_config.province_or_state.upper(), | ||
self.max_results_per_page, | ||
int(self.config.search_config.return_similar_results), | ||
REMOTENESS_TO_QUERY[self.config.search_config.remoteness] | ||
) | ||
) | ||
return ( | ||
search_url + "&l={}%2C+{}&radius={}&" | ||
"limit={}&filter={}{}".format( | ||
self.config.search_config.city.replace(' ', '+',), | ||
self.config.search_config.province_or_state.upper(), | ||
self._quantize_radius(self.config.search_config.radius), | ||
|
@@ -338,11 +369,42 @@ def _get_search_url(self, method: Optional[str] = 'get') -> str: | |
TODO: use Enum for method instead of str. | ||
""" | ||
if method == 'get': | ||
return ( | ||
"https://www.indeed.{}/jobs?q={}&l={}&radius={}&" | ||
"limit={}&filter={}{}".format( | ||
search_url = "https://www.indeed.{}/jobs?q={}".format( | ||
self.config.search_config.domain, | ||
self.query, | ||
self.query | ||
) | ||
# search countrywide supposing one is open to relocating within | ||
# the country (includes remote) | ||
if self.config.search_config.city is None and \ | ||
self.config.search_config.province_or_state is None and \ | ||
self.config.search_config.remote_within_country is False: | ||
return search_url | ||
# search remote within the country | ||
elif self.config.search_config.remote_within_country is True: | ||
return ( | ||
search_url + "&l=remote&" | ||
"limit={}&filter={}{}".format( | ||
self.max_results_per_page, | ||
int(self.config.search_config.return_similar_results), | ||
REMOTENESS_TO_QUERY[self.config.search_config.remoteness] | ||
) | ||
) | ||
# search by state or province, does not factor in city or radius | ||
elif self.config.search_config.city is None and \ | ||
self.config.search_config.province_or_state and \ | ||
self.config.search_config.remote_within_country is False: | ||
return ( | ||
search_url + "&l={}&" | ||
"limit={}&filter={}{}".format( | ||
self.config.search_config.province_or_state.upper(), | ||
self.max_results_per_page, | ||
int(self.config.search_config.return_similar_results), | ||
REMOTENESS_TO_QUERY[self.config.search_config.remoteness], | ||
) | ||
) | ||
return ( | ||
search_url + "&l={}&radius={}&" | ||
"limit={}&filter={}{}".format( | ||
self.config.search_config.city.replace(' ', '+',), | ||
self._quantize_radius(self.config.search_config.radius), | ||
self.max_results_per_page, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -260,15 +260,32 @@ def _get_search_url(self, method: Optional[str] = 'get', | |
all previous jobs as we go. | ||
""" | ||
if method == 'get': | ||
return ( | ||
'https://www.monster.{}/jobs/search/?{}q={}&where={}__2C-{}' | ||
'&rad={}'.format( | ||
search_url = 'https://www.monster.{}/jobs/search/?{}q={}'.format( | ||
self.config.search_config.domain, | ||
f'page={page}&' if page > 1 else '', | ||
self.query, | ||
self.config.search_config.city.replace(' ', '-'), | ||
) | ||
# search countrywide supposing one is open to relocating within | ||
# the country (includes remote) | ||
if self.config.search_config.city is None and \ | ||
self.config.search_config.province_or_state is None and \ | ||
self.config.search_config.remote_within_country is False: | ||
return search_url | ||
# search remote within the country | ||
elif self.config.search_config.remote_within_country is True: | ||
return search_url + '-remote' | ||
# search by state or province, does not factor in city or radius | ||
elif self.config.search_config.city is None and \ | ||
self.config.search_config.province_or_state and \ | ||
self.config.search_config.remote_within_country is False: | ||
return search_url + '&where={}'.format( | ||
self.config.search_config.province_or_state, | ||
self._convert_radius(self.config.search_config.radius) | ||
) | ||
return ( | ||
search_url + '&where={}__2C-{}&rad={}'.format( | ||
self.config.search_config.city.replace(' ', '-'), | ||
self.config.search_config.province_or_state, | ||
self._convert_radius(self.config.search_config.radius) | ||
Comment on lines
+263
to
+288
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this might overcomplicate things at this stage. The config builder should process all user provided input from the CLI and YAML and create a valid configuration. This configuration should then be directly processed by the scrapers. The potential issue I see with complicating the logic for setting up the URL is that it is a lot harder to extend the scrapers with different LOCALES. Right now, we try to optimise the scraper classes such that it is easy to support more LOCALES. |
||
) | ||
) | ||
elif method == 'post': | ||
|
@@ -360,14 +377,31 @@ def _get_search_url(self, method: Optional[str] = 'get', | |
all previous jobs as we go. | ||
""" | ||
if method == 'get': | ||
return ( | ||
'https://www.monster.{}/jobs/search/?{}q={}&where={}' | ||
'&rad={}'.format( | ||
search_url = 'https://www.monster.{}/jobs/search/?{}q={}'.format( | ||
self.config.search_config.domain, | ||
f'page={page}&' if page > 1 else '', | ||
self.query, | ||
self.config.search_config.city.replace(' ', '-'), | ||
self._convert_radius(self.config.search_config.radius) | ||
) | ||
# search countrywide supposing one is open to relocating within | ||
# the country (includes remote) | ||
if self.config.search_config.city is None and \ | ||
self.config.search_config.province_or_state is None and \ | ||
self.config.search_config.remote_within_country is False: | ||
return search_url | ||
# search remote within the country | ||
elif self.config.search_config.remote_within_country is True: | ||
return search_url + '-remote' | ||
# search by state or province, does not factor in city or radius | ||
elif self.config.search_config.city is None and \ | ||
self.config.search_config.province_or_state and \ | ||
self.config.search_config.remote_within_country is False: | ||
return search_url + '&where={}'.format( | ||
self.config.search_config.province_or_state, | ||
) | ||
return ( | ||
search_url + '&where={}&rad={}'.format( | ||
self.config.search_config.city.replace(' ', '-'), | ||
self._convert_radius(self.config.search_config.radius) | ||
Comment on lines
+384
to
+404
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See previous comment |
||
) | ||
) | ||
elif method == 'post': | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -151,18 +151,28 @@ def parse_cli(args: List[str]) -> Dict[str, Any]: | |
search_group.add_argument( | ||
'-ps', | ||
dest='search.province_or_state', | ||
type=str, | ||
type=None, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it possible to make this an |
||
default=None, | ||
help='Province/state value for your job-search area of interest. ' | ||
'(i.e. Ontario).', | ||
required=True, | ||
required=False, | ||
) | ||
|
||
search_group.add_argument( | ||
'-c', | ||
dest='search.city', | ||
type=str, | ||
type=None, | ||
default=None, | ||
help='City/town value for job-search region (i.e. Waterloo).', | ||
required=True, | ||
required=False, | ||
) | ||
|
||
search_group.add_argument( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am a bit confused by the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I just tried to make this part correspond 1:1 with the config.yaml. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess it doesn't make sense to me why we need this, since if a user provides a country but no city with --remote it would have the intended effect no? |
||
'-remote_in_ctry', | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe use |
||
dest='search.remote_within_country', | ||
action='store_false', | ||
help='Remote within locale', | ||
required=False, | ||
) | ||
|
||
search_group.add_argument( | ||
|
@@ -358,6 +368,7 @@ def get_config_manager(config: Dict[str, Any]) -> JobFunnelConfigManager: | |
blocked_company_names=config['search']['company_block_list'], | ||
locale=Locale[config['search']['locale']], | ||
providers=[Provider[p] for p in config['search']['providers']], | ||
remote_within_country=config['search']['remote_within_country'], | ||
remoteness=Remoteness[config['search']['remoteness']], | ||
) | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The way the schema has been implemented, we can simply not provide
province_or_state
/city
entirely and they will be set to the default value in the schema, which should be None.i.e. user shouldn't be setting this to a
Null
string