Module Values.RFTHyperParametersSource

Hyperparameters for controlling the reinforcement fine-tuning training process, including learning settings and evaluation intervals.

Sourcetype nonrec t = {
  1. epochCount : EpochCount.t option;
    (*

    Number of training epochs to run during reinforcement fine-tuning. Higher values may improve performance but increase training time.

    *)
  2. batchSize : RFTBatchSize.t option;
    (*

    Number of training samples processed in each batch during reinforcement fine-tuning (RFT) training. Larger batches may improve training stability.

    *)
  3. learningRate : RFTLearningRate.t option;
    (*

    Learning rate for the reinforcement fine-tuning. Controls how quickly the model adapts to reward signals.

    *)
  4. maxPromptLength : RFTMaxPromptLength.t option;
    (*

    Maximum length of input prompts during RFT training, measured in tokens. Longer prompts allow more context but increase memory usage and training-time.

    *)
  5. trainingSamplePerPrompt : RFTTrainingSamplePerPrompt.t option;
    (*

    Number of response samples generated per prompt during RFT training. More samples provide better reward signal estimation.

    *)
  6. inferenceMaxTokens : RFTInferenceMaxTokens.t option;
    (*

    Maximum number of tokens the model can generate in response to each prompt during RFT training.

    *)
  7. reasoningEffort : ReasoningEffort.t option;
    (*

    Level of reasoning effort applied during RFT training. Higher values may improve response quality but increase training time.

    *)
  8. evalInterval : RFTEvalInterval.t option;
    (*

    Interval between evaluation runs during RFT training, measured in training steps. More frequent evaluation provides better monitoring.

    *)
}
Sourceval make : ?epochCount:??? -> ?batchSize:??? -> ?learningRate:??? -> ?maxPromptLength:??? -> ?trainingSamplePerPrompt:??? -> ?inferenceMaxTokens:??? -> ?reasoningEffort:??? -> ?evalInterval:??? -> unit -> t
Sourceval to_value : t -> [> `Structure of (string * [> `Enum of string | `Float of RFTLearningRate.t | `Integer of EpochCount.t ]) list ]
Sourceval to_query : t -> Awso.Client.Query.t
Sourceval of_xml : Awso.Xml.t -> t
Sourceval of_string : string -> t
Sourceval of_json : Yojson.Safe.t -> t
Sourceval to_json : t -> Yojson.Safe.t