Values.RFTConfigSourceConfiguration settings for reinforcement fine-tuning (RFT), including grader configuration and training hyperparameters.
type nonrec t = {graderConfig : GraderConfig.t option;Configuration for the grader that evaluates model responses and provides reward signals during RFT training.
*)hyperParameters : RFTHyperParameters.t option;Hyperparameters that control the reinforcement fine-tuning training process, including learning rate, batch size, and epoch count.
*)}val to_value :
t ->
[> `Structure of
(string
* [> `Structure of
(string
* [> `Enum of string
| `Float of RFTLearningRate.t
| `Integer of EpochCount.t
| `Structure of (string * [> `String of LambdaArn.t ]) list ])
list ])
list ]