```# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#
# Unless required by applicable law or agreed to in writing, software
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and

__all__ = ['RDropLoss']

[文档]class RDropLoss(nn.Layer):
"""
R-Drop Loss implementation
Original implementation please refer to this code: https://github.com/dropreg/R-Drop

Args:
reduction(str, optional):
Indicate how to average the loss, the candicates are ``'none'``,``'batchmean'``,``'mean'``,``'sum'``.
If `reduction` is ``'mean'``, the reduced mean loss is returned;
If `reduction` is ``'batchmean'``, the sum loss divided by batch size is returned;
If `reduction` is ``'sum'``, the reduced sum loss is returned;
If `reduction` is ``'none'``, no reduction will be applied.
Defaults to ``'none'``.
"""

def __init__(self, reduction='none'):
super(RDropLoss, self).__init__()
if reduction not in ['sum', 'mean', 'none', 'batchmean']:
raise ValueError(
"'reduction' in 'RDropLoss' should be 'sum', 'mean' 'batchmean', or 'none', "
self.reduction = reduction

"""
Args:
p(Tensor): the first forward logits of training examples.
q(Tensor): the second forward logits of training examples.

Returns:
Tensor: Returns tensor `loss`, the rdrop loss of p and q.
"""
p_loss = F.kl_div(
F.log_softmax(
p, axis=-1),
F.softmax(
q, axis=-1),
reduction=self.reduction)
q_loss = F.kl_div(
F.log_softmax(
q, axis=-1),
F.softmax(
p, axis=-1),
reduction=self.reduction)