Remove packing and default batch size from FT cli (#60)

mwu1993 · web-flow · commit 26fbacb7ff69 · 2021-12-21T12:24:14.000-08:00
diff --git a/examples/embeddings/Code_search.ipynb b/examples/embeddings/Code_search.ipynb
@@ -260,7 +260,7 @@
       "def format_inferrer_validator(df):\n",
       "    \"\"\"\n",
       "    This validator will infer the likely fine-tuning format of the data, and display it to the user if it is classification.\n",
-      "    It will also suggest to use ada, --no_packing and explain train/validation split benefits.\n",
+      "    It will also suggest to use ada and explain train/validation split benefits.\n",
       "    \"\"\"\n",
       "    ft_type = infer_task_type(df)\n",
       "    immediate_msg = None\n",
diff --git a/examples/finetuning/finetuning-classification.ipynb b/examples/finetuning/finetuning-classification.ipynb
@@ -257,7 +257,7 @@
       "\n",
       "- Your file contains 1197 prompt-completion pairs\n",
       "- Based on your data it seems like you're trying to fine-tune a model for classification\n",
-      "- For classification, we recommend you try one of the faster and cheaper models, such as `ada`. You should also set the `--no_packing` parameter when fine-tuning\n",
+      "- For classification, we recommend you try one of the faster and cheaper models, such as `ada`\n",
       "- For classification, you can estimate the expected model performance by keeping a held out dataset, which is not used for training\n",
       "- There are 11 examples that are very long. These are rows: [134, 200, 281, 320, 404, 595, 704, 838, 1113, 1139, 1174]\n",
       "For conditional generation, and for classification the examples shouldn't be longer than 2048 tokens.\n",
@@ -277,7 +277,7 @@
       "Feel free to take a look!\n",
       "\n",
       "Now use that file when fine-tuning:\n",
-      "> openai api fine_tunes.create -t \"sport2_prepared_train.jsonl\" -v \"sport2_prepared_valid.jsonl\" --no_packing --compute_classification_metrics --classification_positive_class \" baseball\"\n",
+      "> openai api fine_tunes.create -t \"sport2_prepared_train.jsonl\" -v \"sport2_prepared_valid.jsonl\" --compute_classification_metrics --classification_positive_class \" baseball\"\n",
       "\n",
       "After you’ve fine-tuned a model, remember that your prompt has to end with the indicator string `\\n\\n###\\n\\n` for the model to start generating completions, rather than continuing with the prompt.\n",
       "Once your model starts training, it'll approximately take 30.8 minutes to train a `curie` model, and less for `ada` and `babbage`. Queue will approximately take half an hour per job ahead of you.\n"
@@ -301,7 +301,7 @@
    "cell_type": "markdown",
    "source": [
     "## Fine-tuning\n",
-    "The tool suggests we run the following command to train the dataset. Since this is a classification task, we would like to know what the generalization performance on the provided validation set is for our classification use case. The tool suggests to add `--compute_classification_metrics --classification_positive_class \" baseball\"` in order to compute the classification metrics. Classification performs better with a hyperparameter `--no_packing`.\n",
+    "The tool suggests we run the following command to train the dataset. Since this is a classification task, we would like to know what the generalization performance on the provided validation set is for our classification use case. The tool suggests to add `--compute_classification_metrics --classification_positive_class \" baseball\"` in order to compute the classification metrics.\n",
     "\n",
     "We can simply copy the suggested command from the CLI tool. We specifically add `-m ada` to fine-tune a cheaper and faster ada model, which is usually comperable in performance to slower and more expensive models on classification use cases. "
    ],
@@ -311,7 +311,7 @@
    "cell_type": "code",
    "execution_count": 9,
    "source": [
-    "!openai api fine_tunes.create -t \"sport2_prepared_train.jsonl\" -v \"sport2_prepared_valid.jsonl\" --no_packing --compute_classification_metrics --classification_positive_class \" baseball\" -m ada"
+    "!openai api fine_tunes.create -t \"sport2_prepared_train.jsonl\" -v \"sport2_prepared_valid.jsonl\" --compute_classification_metrics --classification_positive_class \" baseball\" -m ada"
    ],
    "outputs": [
     {
@@ -737,4 +737,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
+}
diff --git a/examples/finetuning/olympics-3-train-qa.ipynb b/examples/finetuning/olympics-3-train-qa.ipynb
@@ -373,7 +373,7 @@
     }
    ],
    "source": [
-    "!openai api fine_tunes.create -t \"olympics-data/discriminator_train.jsonl\" -v \"olympics-data/discriminator_test.jsonl\" --no_packing --batch_size 16  --compute_classification_metrics --classification_positive_class \" yes\" --model ada"
+    "!openai api fine_tunes.create -t \"olympics-data/discriminator_train.jsonl\" -v \"olympics-data/discriminator_test.jsonl\" --batch_size 16  --compute_classification_metrics --classification_positive_class \" yes\" --model ada"
    ]
   },
   {
@@ -391,7 +391,7 @@
     }
    ],
    "source": [
-    "!openai api fine_tunes.create -t \"olympics-data/qa_train.jsonl\" -v \"olympics-data/qa_test.jsonl\" --no_packing --batch_size 16"
+    "!openai api fine_tunes.create -t \"olympics-data/qa_train.jsonl\" -v \"olympics-data/qa_test.jsonl\" --batch_size 16"
    ]
   },
   {
diff --git a/openai/cli.py b/openai/cli.py
@@ -397,7 +397,6 @@ def create(cls, args):
             "batch_size",
             "learning_rate_multiplier",
             "prompt_loss_weight",
-            "use_packing",
             "compute_classification_metrics",
             "classification_n_classes",
             "classification_positive_class",
@@ -891,23 +890,6 @@ def help(args):
         "learning rate is determined by the original learning rate used for "
         "pretraining multiplied by this value.",
     )
-    sub.add_argument(
-        "--use_packing",
-        action="store_true",
-        dest="use_packing",
-        help="On classification tasks, we recommend not setting this flag. "
-        "On all other tasks, we recommend setting it. "
-        "When set, we pack as many prompt-completion pairs as possible into each "
-        "training example. This greatly increases the speed of a fine-tuning job, "
-        "often without negatively affecting model performance.",
-    )
-    sub.add_argument(
-        "--no_packing",
-        action="store_false",
-        dest="use_packing",
-        help="Disables the packing flag (see --use_packing for description).",
-    )
-    sub.set_defaults(use_packing=None)
     sub.add_argument(
         "--prompt_loss_weight",
         type=float,
diff --git a/openai/validators.py b/openai/validators.py
@@ -2,7 +2,6 @@
 import sys
 from typing import Any, Callable, NamedTuple, Optional
 
-import numpy as np
 import pandas as pd
 
 
@@ -535,12 +534,12 @@ def read_any_format(fname, fields=["prompt", "completion"]):
 def format_inferrer_validator(df):
     """
     This validator will infer the likely fine-tuning format of the data, and display it to the user if it is classification.
-    It will also suggest to use ada, --no_packing and explain train/validation split benefits.
+    It will also suggest to use ada and explain train/validation split benefits.
     """
     ft_type = infer_task_type(df)
     immediate_msg = None
     if ft_type == "classification":
-        immediate_msg = f"\n- Based on your data it seems like you're trying to fine-tune a model for {ft_type}\n- For classification, we recommend you try one of the faster and cheaper models, such as `ada`. You should also set the `--no_packing` parameter when fine-tuning\n- For classification, you can estimate the expected model performance by keeping a held out dataset, which is not used for training"
+        immediate_msg = f"\n- Based on your data it seems like you're trying to fine-tune a model for {ft_type}\n- For classification, we recommend you try one of the faster and cheaper models, such as `ada`\n- For classification, you can estimate the expected model performance by keeping a held out dataset, which is not used for training"
     return Remediation(name="num_examples", immediate_msg=immediate_msg)
 
 
@@ -634,27 +633,6 @@ def get_classification_hyperparams(df):
     return n_classes, pos_class
 
 
-def get_batch_size_suggestion(df, no_packing):
-    """
-    Suggest the batch size based on the number of examples after packing optionally is applied.
-    """
-    n_examples, n_characters = (
-        len(df),
-        df.completion.str.len().sum() + df.prompt.str.len().sum(),
-    )
-    BATCH_SIZE_TO_N_EXAMPLES_RATIO = 0.002
-    BATCH_SIZE_TO_N_CHARACTERS_RATIO = BATCH_SIZE_TO_N_EXAMPLES_RATIO / 10_000
-
-    if no_packing:
-        batch_size = BATCH_SIZE_TO_N_EXAMPLES_RATIO * n_examples
-    else:
-        batch_size = BATCH_SIZE_TO_N_CHARACTERS_RATIO * n_characters
-
-    batch_size = max(1, int(2 ** np.ceil(np.log2(batch_size))))
-    batch_size_suggestion = f" --batch_size {batch_size}"
-    return batch_size_suggestion
-
-
 def write_out_file(df, fname, any_remediations, auto_accept):
     """
     This function will write out a dataframe to a file, if the user would like to proceed, and also offer a fine-tuning command with the newly created file.
@@ -670,14 +648,7 @@ def write_out_file(df, fname, any_remediations, auto_accept):
         if accept_suggestion(input_text, auto_accept):
             split = True
 
-    no_packing = ft_format == "classification" or (
-        ft_format == "conditional generation" and len(df) < 1000
-    )
     additional_params = ""
-    if no_packing:
-        additional_params = " --no_packing"
-    additional_params += get_batch_size_suggestion(df, no_packing)
-
     common_prompt_suffix_new_line_handled = common_prompt_suffix.replace("\n", "\\n")
     common_completion_suffix_new_line_handled = common_completion_suffix.replace(
         "\n", "\\n"
diff --git a/openai/version.py b/openai/version.py
@@ -1 +1 @@
-VERSION = "0.11.4"
+VERSION = "0.11.5"

Original file line number	Diff line number	Diff line change
`@@ -373,7 +373,7 @@`
`373`	`373`	`}`
`374`	`374`	`],`
`375`	`375`	`"source": [`
`376`		`- "!openai api fine_tunes.create -t \"olympics-data/discriminator_train.jsonl\" -v \"olympics-data/discriminator_test.jsonl\" --no_packing --batch_size 16 --compute_classification_metrics --classification_positive_class \" yes\" --model ada"`
	`376`	`+ "!openai api fine_tunes.create -t \"olympics-data/discriminator_train.jsonl\" -v \"olympics-data/discriminator_test.jsonl\" --batch_size 16 --compute_classification_metrics --classification_positive_class \" yes\" --model ada"`
`377`	`377`	`]`
`378`	`378`	`},`
`379`	`379`	`{`
`@@ -391,7 +391,7 @@`
`391`	`391`	`}`
`392`	`392`	`],`
`393`	`393`	`"source": [`
`394`		`- "!openai api fine_tunes.create -t \"olympics-data/qa_train.jsonl\" -v \"olympics-data/qa_test.jsonl\" --no_packing --batch_size 16"`
	`394`	`+ "!openai api fine_tunes.create -t \"olympics-data/qa_train.jsonl\" -v \"olympics-data/qa_test.jsonl\" --batch_size 16"`
`395`	`395`	`]`
`396`	`396`	`},`
`397`	`397`	`{`
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-VERSION = "0.11.4"`
	`1`	`+VERSION = "0.11.5"`