vqa_token_layoutlm_loss.py 1.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546
  1. # copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from __future__ import absolute_import
  15. from __future__ import division
  16. from __future__ import print_function
  17. from paddle import nn
  18. from ppocr.losses.basic_loss import DMLLoss
  19. class VQASerTokenLayoutLMLoss(nn.Layer):
  20. def __init__(self, num_classes, key=None):
  21. super().__init__()
  22. self.loss_class = nn.CrossEntropyLoss()
  23. self.num_classes = num_classes
  24. self.ignore_index = self.loss_class.ignore_index
  25. self.key = key
  26. def forward(self, predicts, batch):
  27. if isinstance(predicts, dict) and self.key is not None:
  28. predicts = predicts[self.key]
  29. labels = batch[5]
  30. attention_mask = batch[2]
  31. if attention_mask is not None:
  32. active_loss = attention_mask.reshape([-1, ]) == 1
  33. active_output = predicts.reshape(
  34. [-1, self.num_classes])[active_loss]
  35. active_label = labels.reshape([-1, ])[active_loss]
  36. loss = self.loss_class(active_output, active_label)
  37. else:
  38. loss = self.loss_class(
  39. predicts.reshape([-1, self.num_classes]),
  40. labels.reshape([-1, ]))
  41. return {'loss': loss}