Source code for cerebras.modelzoo.common.run_cstorch_eval_harness

# Copyright 2022 Cerebras Systems.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Eval Harness run script"""
import argparse
import inspect
import logging
import os
import sys

# isort: off
sys.path.append(os.path.join(os.path.dirname(__file__), "../../.."))
# isort: on
from cerebras.modelzoo.common.utils.run.cli_parser import get_params_from_args
from cerebras.modelzoo.common.utils.run.utils import DeviceType
from cerebras.modelzoo.config_manager.config_loader import (
    validate_config_params,
)


[docs]def setup_hf_env_vars(hf_cache_dir=None): from cerebras.appliance.environment import appliance_environ # Removes annoying logs relating to process forking appliance_environ["TOKENIZERS_PARALLELISM"] = "false" if hf_cache_dir is not None: appliance_environ["TRANSFORMERS_CACHE"] = hf_cache_dir appliance_environ["HF_HOME"] = hf_cache_dir appliance_environ["HF_DATASETS_CACHE"] = hf_cache_dir
[docs]def eeh_parser(): parser = argparse.ArgumentParser( "Script for running Eleuther Eval Harness for GPT style models", add_help=False, ) optional_arguments = parser.add_argument_group( "Eleuther Eval Harness Arguments" ) # EEH-SPECIFIC ARGS # Ref: https://github.com/EleutherAI/lm-evaluation-harness/blob/c9bbec6e7de418b9082379da82797522eb173054/lm_eval/__main__.py#L26 optional_arguments.add_argument( "--tasks", default=None, help="To get full list of tasks, use the command lm-eval --tasks list", ) optional_arguments.add_argument( "--num_fewshot", type=int, default=None, help="Number of examples in few-shot context", ) optional_arguments.add_argument( "--output_path", default=None, type=str, metavar="= [dir/file.jsonl] [DIR]", help="The path to the output file where the result metrics will be saved. If the path is a directory and log_samples is true, the results will be saved in the directory. Else the parent directory will be used.", ) optional_arguments.add_argument( "--limit", type=float, default=None, help="Limit the number of examples per task. " "If <1, limit is a percentage of the total number of examples.", ) optional_arguments.add_argument( "--use_cache", type=str, default=None, help="A path to a sqlite db file for caching model responses. `None` if not caching.", ) optional_arguments.add_argument( "--check_integrity", action="store_true", help="Whether to run the relevant part of the test suite for the tasks", ) optional_arguments.add_argument( "--write_out", action="store_true", default=False, help="Prints the prompt for the first few documents", ) optional_arguments.add_argument( "--log_samples", action="store_true", default=False, help="If True, write out all model outputs and documents for per-sample measurement and post-hoc analysis", ) optional_arguments.add_argument( "--show_config", action="store_true", default=False, help="If True, shows the the full config of all tasks at the end of the evaluation.", ) optional_arguments.add_argument( "--include_path", type=str, default=None, help="Additional path to include if there are external tasks to include.", ) # CEREBRAS-SPECIFIC ARGS optional_arguments.add_argument( "--hf_cache_dir", default=None, help=("Path to directory for caching Hugging Face downloaded data."), ) optional_arguments.add_argument( "--keep_data_dir", action="store_true", default=False, help=( "Specifies whether dumped data samples should be kept for reuse. " "Defaults to False, i.e. data samples are deleted after the run." ), ) return parser
[docs]def run_eval_harness(): """Main run script.""" parent = inspect.getouterframes(inspect.currentframe())[1] run_dir = os.path.dirname(os.path.abspath(parent.filename)) parser_fn = lambda: [eeh_parser()] parser_args = { "parser_epilog": ( "Please run 'python run_cstorch_eval_harness.py CSX -h'. \n \n" "Here is an example command for running on CSX: \n \n" " python run_cstorch_eval_harness.py CSX --params /path/to/params --checkpoint_path " "/path/to/checkpoint --tasks 'hellaswag,winogrande' --num_fewshot 0 \n \n" "Note that Eval Harness is currently only supported for device CSX" ), "csx_parser_epilog": ( "To see a complete list of all available arguments, \n" "please run 'python run_cstorch_eval_harness.py CSX -h'. \n\n" "Here is an example command for running with CSX: \n \n" " python run_cstorch_eval_harness.py CSX --params /path/to/params " "--checkpoint_path /path/to/checkpoint --tasks 'hellaswag,winogrande' --num_fewshot 0 " "\n \nEval Harness resides in the Cerebras Model Zoo. Please specify --python_paths and " "\n --mount_dirs here or in your params.yaml under the 'runconfig' section with \n" "the path to the directory in which the Cerebras Model Zoo resides. \n" ), "modes": ["eval"], } # Parse args params = get_params_from_args( run_dir, argv=sys.argv[1:], extra_args_parser_fn=parser_fn, device_type=DeviceType.CSX, **parser_args, ) runconfig_params = params["runconfig"] from lm_eval.api.registry import get_model from cerebras.modelzoo.common.eval_harness_impl import CS_LLM from cerebras.modelzoo.common.pytorch_utils import ( RunConfigParamsValidator, setup_artifact_dir, setup_logging, ) from cerebras.modelzoo.data.nlp.gpt.InferenceDataProcessor import ( RequestType, ) # Set default model parameters from cerebras.modelzoo.models.nlp.gpt2.utils import set_defaults set_defaults(params) # Validate runconfig RunConfigParamsValidator(parser_fn).validate(runconfig_params) # Validate input params if params.get("eval_input") is not None: num_pt_workers = params["eval_input"].get("num_workers") if num_pt_workers is not None and num_pt_workers > 1: raise ValueError( "Eval harness does not support multiple process data " "loading for `eval_input.num_workers` > 1, but specified " f"{num_pt_workers} worker processes.\nPlease ensure that " "`eval_input.num_workers` is either 0 (default) or 1." ) else: raise RuntimeError( "No `eval_input` section specified in the .yaml config." ) # Set up logging level and env vars artifact_dir = setup_artifact_dir(runconfig_params["model_dir"], "eval") setup_logging( runconfig_params.get("logging"), runconfig_params.get("streamer_logging"), logging_dir=artifact_dir, model_dir=runconfig_params.get("model_dir"), ) setup_hf_env_vars(hf_cache_dir=runconfig_params.get("hf_cache_dir")) # Debug logs logging.debug(f"CMD: {sys.argv}") logging.debug(f"Runconfig: {runconfig_params}") # Construct args namespace object for EEH's main script parser = parser_fn()[0] args = {} for arg in parser._action_groups[0]._actions: arg_name = arg.dest # Exclude Cerebras-specific args if arg_name in {"hf_cache_dir", "keep_data_dir"}: continue else: arg_val = runconfig_params.get(arg_name) args[arg_name] = arg_val from cerebras.modelzoo.data.nlp.gpt.InferenceDataProcessor import ( InferenceDataProcessor, ) from cerebras.modelzoo.models.nlp.gpt2.model import ( Gpt2Model, GptInferenceModel, ) def config_validation(params, model_key): # EEH-specific params added to the runconfig are not supported by our config class validation. # We remove EEH args from the runconfig, perform config validation and then re-add the args extra_parser_param_keys = [] if parser: if parser and isinstance(parser, argparse.ArgumentParser): extra_parser_param_keys.extend( [ action.dest for action in parser._actions if not isinstance(action, argparse._HelpAction) ] ) run_params = params["runconfig"] extra_parser_params = {} for eeh_arg in extra_parser_param_keys: if eeh_arg in run_params: extra_parser_params[eeh_arg] = run_params.pop(eeh_arg, None) # validate the params with config class validate_config_params(params, model_key) # Re-add extra EEH args to the runconfig after config validation run_params.update(extra_parser_params) def model_fn(request_type, params): if request_type == RequestType.loglikelihood: # TODO : params here contain start_token etc which are only part of inference model. # If we use gpt2 model, validation will fail. We need to clean up params to # contain start_token etc only when inference is used config_validation(params, "gpt_inference") return Gpt2Model(params) elif request_type == RequestType.generate_until: config_validation(params, "gpt_inference") return GptInferenceModel(params) else: raise TypeError( f"Invalid request type: {request_type}. At present, only " "`RequestType.loglikelihood` and `RequestType.generate_until` " "request types are supported." ) def eval_input_fn(params, samples_file_list, dataset_size, request_type): return InferenceDataProcessor.from_request_type( request_type, params["eval_input"], samples_file_list, dataset_size, ).create_dataloader() lm = get_model(CS_LLM).create_from_arg_string( arg_string="", additional_config={ "params": params, "model_fn": model_fn, "input_fn": eval_input_fn, "data_fn": InferenceDataProcessor.gen_data_samples, "artifact_dir": artifact_dir, }, ) # These are additional EEH args that we don't expose in our parser additional_args = { "model": lm, "verbosity": "INFO", # EEH logging level "model_args": None, "batch_size": None, "max_batch_size": None, "device": None, "decontamination_ngrams_path": None, "gen_kwargs": None, } final_args = {**args, **additional_args} args_namespace = argparse.Namespace(**final_args) # Invoke EEH script from lm_eval.__main__ import cli_evaluate cli_evaluate(args=args_namespace)
if __name__ == "__main__": run_eval_harness()