Release: v0.10.0 (#1573)

Besides updating versions, removed 2 deprecations.
huggingface · Mar 21, 2024 · 8221246 · 8221246
1 parent 8e979fc
commit 8221246
Show file tree

Hide file tree

Showing 15 changed files with 30 additions and 50 deletions.
diff --git a/examples/causal_language_modeling/peft_lora_clm_with_additional_tokens.ipynb b/examples/causal_language_modeling/peft_lora_clm_with_additional_tokens.ipynb
@@ -41,7 +41,7 @@
     "    PeftConfig,\n",
     "    PeftModel,\n",
     "    get_peft_model,\n",
-    "    prepare_model_for_int8_training,\n",
+    "    prepare_model_for_kbit_training,\n",
     ")\n",
     "from transformers import (\n",
     "    AutoModelForCausalLM,\n",

diff --git a/examples/int8_training/Finetune_flan_t5_large_bnb_peft.ipynb b/examples/int8_training/Finetune_flan_t5_large_bnb_peft.ipynb
@@ -327,7 +327,7 @@
     "id": "4o3ePxrjEDzv"
    },
    "source": [
-    "Some pre-processing needs to be done before training such an int8 model using `peft`, therefore let's import an utiliy function `prepare_model_for_int8_training` that will: \n",
+    "Some pre-processing needs to be done before training such an int8 model using `peft`, therefore let's import an utiliy function `prepare_model_for_kbit_training` that will: \n",
     "- Casts all the non `int8` modules to full precision (`fp32`) for stability\n",
     "- Add a `forward_hook` to the input embedding layer to enable gradient computation of the input hidden states\n",
     "- Enable gradient checkpointing for more memory-efficient training"
@@ -342,9 +342,9 @@
    },
    "outputs": [],
    "source": [
-    "from peft import prepare_model_for_int8_training\n",
+    "from peft import prepare_model_for_kbit_training\n",
     "\n",
-    "model = prepare_model_for_int8_training(model)"
+    "model = prepare_model_for_kbit_training(model)"
    ]
   },
   {

diff --git a/examples/int8_training/Finetune_opt_bnb_peft.ipynb b/examples/int8_training/Finetune_opt_bnb_peft.ipynb
@@ -235,7 +235,7 @@
    "source": [
     "### Prepare model for training\n",
     "\n",
-    "Some pre-processing needs to be done before training such an int8 model using `peft`, therefore let's import an utiliy function `prepare_model_for_int8_training` that will: \n",
+    "Some pre-processing needs to be done before training such an int8 model using `peft`, therefore let's import an utiliy function `prepare_model_for_kbit_training` that will: \n",
     "- Casts all the non `int8` modules to full precision (`fp32`) for stability\n",
     "- Add a `forward_hook` to the input embedding layer to enable gradient computation of the input hidden states\n",
     "- Enable gradient checkpointing for more memory-efficient training"
@@ -249,9 +249,9 @@
    },
    "outputs": [],
    "source": [
-    "from peft import prepare_model_for_int8_training\n",
+    "from peft import prepare_model_for_kbit_training\n",
     "\n",
-    "model = prepare_model_for_int8_training(model)"
+    "model = prepare_model_for_kbit_training(model)"
    ]
   },
   {

diff --git a/examples/int8_training/peft_adalora_whisper_large_training.py b/examples/int8_training/peft_adalora_whisper_large_training.py
@@ -555,9 +555,9 @@ def main():
 
     # preparing peft model
     if args.use_peft:
-        from peft import prepare_model_for_int8_training
+        from peft import prepare_model_for_kbit_training
 
-        model = prepare_model_for_int8_training(model)
+        model = prepare_model_for_kbit_training(model)
 
         # as Whisper model uses Conv layer in encoder, checkpointing disables grad computation
         # to avoid this, make the inputs trainable

diff --git a/examples/int8_training/peft_bnb_whisper_large_v2_training.ipynb b/examples/int8_training/peft_bnb_whisper_large_v2_training.ipynb
@@ -1154,9 +1154,9 @@
    },
    "outputs": [],
    "source": [
-    "from peft import prepare_model_for_int8_training\n",
+    "from peft import prepare_model_for_kbit_training\n",
     "\n",
-    "model = prepare_model_for_int8_training(model)"
+    "model = prepare_model_for_kbit_training(model)"
    ]
   },
   {

diff --git a/setup.py b/setup.py
@@ -15,7 +15,7 @@
 from setuptools import find_packages, setup
 
 
-VERSION = "0.9.1.dev0"
+VERSION = "0.10.0"
 
 extras = {}
 extras["quality"] = [

diff --git a/src/peft/__init__.py b/src/peft/__init__.py
@@ -17,7 +17,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "0.9.1.dev0"
+__version__ = "0.10.0"
 
 from .auto import (
     AutoPeftModel,
@@ -80,7 +80,6 @@
     TaskType,
     bloom_model_postprocess_past_key_value,
     get_peft_model_state_dict,
-    prepare_model_for_int8_training,
     prepare_model_for_kbit_training,
     replace_lora_weights_loftq,
     set_peft_model_state_dict,

diff --git a/src/peft/tuners/lora/model.py b/src/peft/tuners/lora/model.py
@@ -94,7 +94,7 @@ class LoraModel(BaseTuner):
         ```py
         >>> import torch
         >>> import transformers
-        >>> from peft import LoraConfig, PeftModel, get_peft_model, prepare_model_for_int8_training
+        >>> from peft import LoraConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training
 
         >>> rank = ...
         >>> target_modules = ["q_proj", "k_proj", "v_proj", "out_proj", "fc_in", "fc_out", "wte"]
@@ -121,7 +121,7 @@ class LoraModel(BaseTuner):
         ...     torch_dtype=torch.float16,
         ...     quantization_config=quantization_config,
         ... )
-        >>> model = prepare_model_for_int8_training(model)
+        >>> model = prepare_model_for_kbit_training(model)
         >>> lora_model = get_peft_model(model, config)
         ```
 

diff --git a/src/peft/tuners/lora/tp_layer.py b/src/peft/tuners/lora/tp_layer.py
@@ -85,15 +85,6 @@ def __init__(
 
         self.is_target_conv_1d_layer = False
 
-    @property
-    def is_paralle_a(self):
-        # TODO: remove it in PEFT 0.10.0
-        # See https://github.com/huggingface/peft/pull/1439 for more details
-        warnings.warn(
-            "`is_paralle_a` is going to be deprecated in a future release. Please use `is_parallel_a`", FutureWarning
-        )
-        return self.is_parallel_a
-
     def update_layer(
         self,
         adapter_name,

diff --git a/src/peft/utils/__init__.py b/src/peft/utils/__init__.py
@@ -32,7 +32,7 @@
     INCLUDE_LINEAR_LAYERS_SHORTHAND,
     _set_trainable,
     bloom_model_postprocess_past_key_value,
-    prepare_model_for_int8_training,
+    prepare_model_for_kbit_training,
     prepare_model_for_kbit_training,
     shift_tokens_right,
     transpose,

diff --git a/src/peft/utils/other.py b/src/peft/utils/other.py
@@ -142,15 +142,6 @@ def make_inputs_require_grad(module, input, output):
     return model
 
 
-# For backward compatibility
-def prepare_model_for_int8_training(*args, **kwargs):
-    warnings.warn(
-        "prepare_model_for_int8_training is deprecated and will be removed in a future version. Use prepare_model_for_kbit_training instead.",
-        FutureWarning,
-    )
-    return prepare_model_for_kbit_training(*args, **kwargs)
-
-
 # copied from transformers.models.bart.modeling_bart
 def shift_tokens_right(input_ids: torch.Tensor, pad_token_id: int, decoder_start_token_id: int):
     """

diff --git a/tests/test_adaption_prompt.py b/tests/test_adaption_prompt.py
@@ -25,7 +25,7 @@
 from peft.mapping import get_peft_model
 from peft.peft_model import PeftModel
 from peft.tuners.adaption_prompt import AdaptionPromptConfig
-from peft.utils.other import prepare_model_for_int8_training
+from peft.utils.other import prepare_model_for_kbit_training
 from peft.utils.save_and_load import get_peft_model_state_dict
 from tests.testing_common import PeftCommonTester
 
@@ -143,7 +143,7 @@ def test_prepare_for_training_mistral(self) -> None:
 
     def test_prepare_for_int8_training(self) -> None:
         model = LlamaForCausalLM(self._create_test_llama_config())
-        model = prepare_model_for_int8_training(model)
+        model = prepare_model_for_kbit_training(model)
         model = model.to(self.torch_device)
 
         for param in model.parameters():
@@ -168,9 +168,9 @@ def make_inputs_require_grad(module, input, output):
         assert dummy_output.requires_grad
 
     @unittest.skipIf(not is_mistral_available(), "Mistral is not available")
-    def test_prepare_model_for_int8_training_mistral(self) -> None:
+    def test_prepare_model_for_kbit_training_mistral(self) -> None:
         model_mistral = MistralForCausalLM(self._create_test_mistral_config())
-        model_mistral = prepare_model_for_int8_training(model_mistral)
+        model_mistral = prepare_model_for_kbit_training(model_mistral)
         model_mistral = model_mistral.to(self.torch_device)
 
         for param in model_mistral.parameters():

diff --git a/tests/test_gpu_examples.py b/tests/test_gpu_examples.py
@@ -50,7 +50,6 @@
     PeftModel,
     TaskType,
     get_peft_model,
-    prepare_model_for_int8_training,
     prepare_model_for_kbit_training,
     replace_lora_weights_loftq,
 )
@@ -162,7 +161,7 @@ def test_causal_lm_training(self):
             )
 
             tokenizer = AutoTokenizer.from_pretrained(self.causal_lm_model_id)
-            model = prepare_model_for_int8_training(model)
+            model = prepare_model_for_kbit_training(model)
 
             config = LoraConfig(
                 r=16,
@@ -473,7 +472,7 @@ def test_causal_lm_training_multi_gpu(self):
             assert set(model.hf_device_map.values()) == set(range(torch.cuda.device_count()))
 
             tokenizer = AutoTokenizer.from_pretrained(self.causal_lm_model_id)
-            model = prepare_model_for_int8_training(model)
+            model = prepare_model_for_kbit_training(model)
 
             setattr(model, "model_parallel", True)
             setattr(model, "is_parallelizable", True)
@@ -536,7 +535,7 @@ def test_seq2seq_lm_training_single_gpu(self):
             assert set(model.hf_device_map.values()) == {0}
 
             tokenizer = AutoTokenizer.from_pretrained(self.seq2seq_model_id)
-            model = prepare_model_for_int8_training(model)
+            model = prepare_model_for_kbit_training(model)
 
             config = LoraConfig(
                 r=16,
@@ -597,7 +596,7 @@ def test_seq2seq_lm_training_multi_gpu(self):
             assert set(model.hf_device_map.values()) == set(range(torch.cuda.device_count()))
 
             tokenizer = AutoTokenizer.from_pretrained(self.seq2seq_model_id)
-            model = prepare_model_for_int8_training(model)
+            model = prepare_model_for_kbit_training(model)
 
             config = LoraConfig(
                 r=16,
@@ -688,7 +687,7 @@ def prepare_dataset(batch):
             model.config.forced_decoder_ids = None
             model.config.suppress_tokens = []
 
-            model = prepare_model_for_int8_training(model)
+            model = prepare_model_for_kbit_training(model)
 
             # as Whisper model uses Conv layer in encoder, checkpointing disables grad computation
             # to avoid this, make the inputs trainable

diff --git a/tests/test_multitask_prompt_tuning.py b/tests/test_multitask_prompt_tuning.py
@@ -25,7 +25,7 @@
 from peft.mapping import get_peft_model
 from peft.peft_model import PeftModel
 from peft.tuners.multitask_prompt_tuning import MultitaskPromptTuningConfig, MultitaskPromptTuningInit
-from peft.utils.other import WEIGHTS_NAME, prepare_model_for_int8_training
+from peft.utils.other import WEIGHTS_NAME, prepare_model_for_kbit_training
 from peft.utils.save_and_load import get_peft_model_state_dict
 from tests.testing_common import PeftCommonTester
 
@@ -92,7 +92,7 @@ def test_prepare_for_training(self) -> None:
 
     def test_prepare_for_int8_training(self) -> None:
         model = LlamaForCausalLM(self._create_test_llama_config())
-        model = prepare_model_for_int8_training(model)
+        model = prepare_model_for_kbit_training(model)
         model = model.to(self.torch_device)
 
         for param in model.parameters():

diff --git a/tests/testing_common.py b/tests/testing_common.py
@@ -39,7 +39,7 @@
     PromptTuningConfig,
     get_peft_model,
     get_peft_model_state_dict,
-    prepare_model_for_int8_training,
+    prepare_model_for_kbit_training,
 )
 from peft.tuners.lora import LoraLayer
 from peft.utils import _get_submodules, infer_device
@@ -237,9 +237,9 @@ def _test_prepare_for_training(self, model_id, config_cls, config_kwargs):
 
         assert not dummy_output.requires_grad
 
-        # load with `prepare_model_for_int8_training`
+        # load with `prepare_model_for_kbit_training`
         model = self.transformers_class.from_pretrained(model_id).to(self.torch_device)
-        model = prepare_model_for_int8_training(model)
+        model = prepare_model_for_kbit_training(model)
 
         for param in model.parameters():
             assert not param.requires_grad