diff --git a/vlmeval/evaluate/multiple_choice.py b/vlmeval/evaluate/multiple_choice.py index 9a5d221a3..227f0a7a6 100644 --- a/vlmeval/evaluate/multiple_choice.py +++ b/vlmeval/evaluate/multiple_choice.py @@ -241,8 +241,11 @@ def multiple_choice_eval(eval_file, dataset=None, model='chatgpt-0613', nproc=4, model_name = 'gpt-3.5-turbo-0613' if INTERNAL: model = OpenAIWrapperInternal(model_name, verbose=verbose, retry=10) - else: + elif gpt_key_set(): model = OpenAIWrapper(model_name, verbose=verbose, retry=10) + else: + logger.error('OPENAI_API_KEY is not set properly, will use exact matching for evaluation') + model = None logger.info(f'Evaluating {eval_file}') result_file = eval_file.replace(f'.{suffix}', f'_{name_str}_result.pkl') diff --git a/vlmeval/evaluate/yes_or_no.py b/vlmeval/evaluate/yes_or_no.py index 1dd7f7ef3..2611df7c1 100644 --- a/vlmeval/evaluate/yes_or_no.py +++ b/vlmeval/evaluate/yes_or_no.py @@ -175,18 +175,21 @@ def YOrN_eval(eval_file, model='chatgpt-0613', nproc=4, verbose=False, dataset=N if INTERNAL: model = OpenAIWrapperInternal(model_name, verbose=verbose, retry=10) - else: + elif gpt_key_set(): model = OpenAIWrapper(model_name, verbose=verbose, retry=10) - - lt = len(unknown) - lines = [unknown.iloc[i] for i in range(lt)] - tups = [(model, line) for line in lines] - indices = list(unknown['index']) - - if len(tups): - res = track_progress_rich(YOrN_auxeval, tups, nproc=nproc, chunksize=nproc, keys=indices, save=tmp_file) - for k, v in zip(indices, res): - ans_map[k] = v + else: + logger.error('OPENAI_API_KEY is not set properly, will use exact matching for evaluation') + model = None + + if model is not None: + lt = len(unknown) + lines = [unknown.iloc[i] for i in range(lt)] + tups = [(model, line) for line in lines] + indices = list(unknown['index']) + if len(tups): + res = track_progress_rich(YOrN_auxeval, tups, nproc=nproc, chunksize=nproc, keys=indices, save=tmp_file) + for k, v in zip(indices, res): + ans_map[k] = v data['extracted'] = [ans_map[x] for x in data['index']] dump(data, storage) diff --git a/vlmeval/smp.py b/vlmeval/smp.py index c05d70c35..38ba333cf 100644 --- a/vlmeval/smp.py +++ b/vlmeval/smp.py @@ -1,4 +1,4 @@ - # flake8: noqa: F401, F403 +# flake8: noqa: F401, F403 import abc import argparse import csv @@ -29,6 +29,14 @@ from huggingface_hub import scan_cache_dir import logging +def gpt_key_set(): + openai_key = os.environ.get('OPENAI_API_KEY', None) + return isinstance(openai_key, str) and openai_key.startswith('sk-') + +def apiok(wrapper): + s = wrapper.generate("Hello!") + return wrapper.fail_msg not in s + def isimg(s): return osp.exists(s) or s.startswith('http') @@ -219,24 +227,17 @@ def last_modified(pth): def mmqa_display(question): question = {k.lower(): v for k, v in question.items()} keys = list(question.keys()) - if 'index' in keys: - keys.remove('index') - keys.remove('image') + keys = [k for k in keys if k not in ['index', 'image']] images = question['image'] if isinstance(images, str): images = [images] - idx = 'XXX' - if 'index' in question: - idx = question.pop('index') + idx = question.pop('index', 'XXX') print(f'INDEX: {idx}') for im in images: - image = decode_base64_to_image(im) - w, h = image.size - ratio = 500 / h - image = image.resize((int(ratio * w), int(ratio * h))) + image = decode_base64_to_image(im, target_size=512) display(image) for k in keys: @@ -289,13 +290,6 @@ def mwlines(lines, fname): with open(fname, 'w') as fout: fout.write('\n'.join(lines)) -def default_set(self, args, name, default): - if hasattr(args, name): - val = getattr(args, name) - setattr(self, name, val) - else: - setattr(self, name, default) - def dict_merge(dct, merge_dct): for k, _ in merge_dct.items(): if (k in dct and isinstance(dct[k], dict) and isinstance(merge_dct[k], dict)): #noqa