det_ct_loss.py 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276
  1. # copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. """
  15. This code is refer from:
  16. https://github.com/shengtao96/CentripetalText/tree/main/models/loss
  17. """
  18. from __future__ import absolute_import
  19. from __future__ import division
  20. from __future__ import print_function
  21. import paddle
  22. from paddle import nn
  23. import paddle.nn.functional as F
  24. import numpy as np
  25. def ohem_single(score, gt_text, training_mask):
  26. # online hard example mining
  27. pos_num = int(paddle.sum(gt_text > 0.5)) - int(
  28. paddle.sum((gt_text > 0.5) & (training_mask <= 0.5)))
  29. if pos_num == 0:
  30. # selected_mask = gt_text.copy() * 0 # may be not good
  31. selected_mask = training_mask
  32. selected_mask = paddle.cast(
  33. selected_mask.reshape(
  34. (1, selected_mask.shape[0], selected_mask.shape[1])), "float32")
  35. return selected_mask
  36. neg_num = int(paddle.sum((gt_text <= 0.5) & (training_mask > 0.5)))
  37. neg_num = int(min(pos_num * 3, neg_num))
  38. if neg_num == 0:
  39. selected_mask = training_mask
  40. selected_mask = paddle.cast(
  41. selected_mask.reshape(
  42. (1, selected_mask.shape[0], selected_mask.shape[1])), "float32")
  43. return selected_mask
  44. # hard example
  45. neg_score = score[(gt_text <= 0.5) & (training_mask > 0.5)]
  46. neg_score_sorted = paddle.sort(-neg_score)
  47. threshold = -neg_score_sorted[neg_num - 1]
  48. selected_mask = ((score >= threshold) |
  49. (gt_text > 0.5)) & (training_mask > 0.5)
  50. selected_mask = paddle.cast(
  51. selected_mask.reshape(
  52. (1, selected_mask.shape[0], selected_mask.shape[1])), "float32")
  53. return selected_mask
  54. def ohem_batch(scores, gt_texts, training_masks):
  55. selected_masks = []
  56. for i in range(scores.shape[0]):
  57. selected_masks.append(
  58. ohem_single(scores[i, :, :], gt_texts[i, :, :], training_masks[
  59. i, :, :]))
  60. selected_masks = paddle.cast(paddle.concat(selected_masks, 0), "float32")
  61. return selected_masks
  62. def iou_single(a, b, mask, n_class):
  63. EPS = 1e-6
  64. valid = mask == 1
  65. a = a[valid]
  66. b = b[valid]
  67. miou = []
  68. # iou of each class
  69. for i in range(n_class):
  70. inter = paddle.cast(((a == i) & (b == i)), "float32")
  71. union = paddle.cast(((a == i) | (b == i)), "float32")
  72. miou.append(paddle.sum(inter) / (paddle.sum(union) + EPS))
  73. miou = sum(miou) / len(miou)
  74. return miou
  75. def iou(a, b, mask, n_class=2, reduce=True):
  76. batch_size = a.shape[0]
  77. a = a.reshape((batch_size, -1))
  78. b = b.reshape((batch_size, -1))
  79. mask = mask.reshape((batch_size, -1))
  80. iou = paddle.zeros((batch_size, ), dtype="float32")
  81. for i in range(batch_size):
  82. iou[i] = iou_single(a[i], b[i], mask[i], n_class)
  83. if reduce:
  84. iou = paddle.mean(iou)
  85. return iou
  86. class DiceLoss(nn.Layer):
  87. def __init__(self, loss_weight=1.0):
  88. super(DiceLoss, self).__init__()
  89. self.loss_weight = loss_weight
  90. def forward(self, input, target, mask, reduce=True):
  91. batch_size = input.shape[0]
  92. input = F.sigmoid(input) # scale to 0-1
  93. input = input.reshape((batch_size, -1))
  94. target = paddle.cast(target.reshape((batch_size, -1)), "float32")
  95. mask = paddle.cast(mask.reshape((batch_size, -1)), "float32")
  96. input = input * mask
  97. target = target * mask
  98. a = paddle.sum(input * target, axis=1)
  99. b = paddle.sum(input * input, axis=1) + 0.001
  100. c = paddle.sum(target * target, axis=1) + 0.001
  101. d = (2 * a) / (b + c)
  102. loss = 1 - d
  103. loss = self.loss_weight * loss
  104. if reduce:
  105. loss = paddle.mean(loss)
  106. return loss
  107. class SmoothL1Loss(nn.Layer):
  108. def __init__(self, beta=1.0, loss_weight=1.0):
  109. super(SmoothL1Loss, self).__init__()
  110. self.beta = beta
  111. self.loss_weight = loss_weight
  112. np_coord = np.zeros(shape=[640, 640, 2], dtype=np.int64)
  113. for i in range(640):
  114. for j in range(640):
  115. np_coord[i, j, 0] = j
  116. np_coord[i, j, 1] = i
  117. np_coord = np_coord.reshape((-1, 2))
  118. self.coord = self.create_parameter(
  119. shape=[640 * 640, 2],
  120. dtype="int32", # NOTE: not support "int64" before paddle 2.3.1
  121. default_initializer=nn.initializer.Assign(value=np_coord))
  122. self.coord.stop_gradient = True
  123. def forward_single(self, input, target, mask, beta=1.0, eps=1e-6):
  124. batch_size = input.shape[0]
  125. diff = paddle.abs(input - target) * mask.unsqueeze(1)
  126. loss = paddle.where(diff < beta, 0.5 * diff * diff / beta,
  127. diff - 0.5 * beta)
  128. loss = paddle.cast(loss.reshape((batch_size, -1)), "float32")
  129. mask = paddle.cast(mask.reshape((batch_size, -1)), "float32")
  130. loss = paddle.sum(loss, axis=-1)
  131. loss = loss / (mask.sum(axis=-1) + eps)
  132. return loss
  133. def select_single(self, distance, gt_instance, gt_kernel_instance,
  134. training_mask):
  135. with paddle.no_grad():
  136. # paddle 2.3.1, paddle.slice not support:
  137. # distance[:, self.coord[:, 1], self.coord[:, 0]]
  138. select_distance_list = []
  139. for i in range(2):
  140. tmp1 = distance[i, :]
  141. tmp2 = tmp1[self.coord[:, 1], self.coord[:, 0]]
  142. select_distance_list.append(tmp2.unsqueeze(0))
  143. select_distance = paddle.concat(select_distance_list, axis=0)
  144. off_points = paddle.cast(
  145. self.coord, "float32") + 10 * select_distance.transpose((1, 0))
  146. off_points = paddle.cast(off_points, "int64")
  147. off_points = paddle.clip(off_points, 0, distance.shape[-1] - 1)
  148. selected_mask = (
  149. gt_instance[self.coord[:, 1], self.coord[:, 0]] !=
  150. gt_kernel_instance[off_points[:, 1], off_points[:, 0]])
  151. selected_mask = paddle.cast(
  152. selected_mask.reshape((1, -1, distance.shape[-1])), "int64")
  153. selected_training_mask = selected_mask * training_mask
  154. return selected_training_mask
  155. def forward(self,
  156. distances,
  157. gt_instances,
  158. gt_kernel_instances,
  159. training_masks,
  160. gt_distances,
  161. reduce=True):
  162. selected_training_masks = []
  163. for i in range(distances.shape[0]):
  164. selected_training_masks.append(
  165. self.select_single(distances[i, :, :, :], gt_instances[i, :, :],
  166. gt_kernel_instances[i, :, :], training_masks[
  167. i, :, :]))
  168. selected_training_masks = paddle.cast(
  169. paddle.concat(selected_training_masks, 0), "float32")
  170. loss = self.forward_single(distances, gt_distances,
  171. selected_training_masks, self.beta)
  172. loss = self.loss_weight * loss
  173. with paddle.no_grad():
  174. batch_size = distances.shape[0]
  175. false_num = selected_training_masks.reshape((batch_size, -1))
  176. false_num = false_num.sum(axis=-1)
  177. total_num = paddle.cast(
  178. training_masks.reshape((batch_size, -1)), "float32")
  179. total_num = total_num.sum(axis=-1)
  180. iou_text = (total_num - false_num) / (total_num + 1e-6)
  181. if reduce:
  182. loss = paddle.mean(loss)
  183. return loss, iou_text
  184. class CTLoss(nn.Layer):
  185. def __init__(self):
  186. super(CTLoss, self).__init__()
  187. self.kernel_loss = DiceLoss()
  188. self.loc_loss = SmoothL1Loss(beta=0.1, loss_weight=0.05)
  189. def forward(self, preds, batch):
  190. imgs = batch[0]
  191. out = preds['maps']
  192. gt_kernels, training_masks, gt_instances, gt_kernel_instances, training_mask_distances, gt_distances = batch[
  193. 1:]
  194. kernels = out[:, 0, :, :]
  195. distances = out[:, 1:, :, :]
  196. # kernel loss
  197. selected_masks = ohem_batch(kernels, gt_kernels, training_masks)
  198. loss_kernel = self.kernel_loss(
  199. kernels, gt_kernels, selected_masks, reduce=False)
  200. iou_kernel = iou(paddle.cast((kernels > 0), "int64"),
  201. gt_kernels,
  202. training_masks,
  203. reduce=False)
  204. losses = dict(loss_kernels=loss_kernel, )
  205. # loc loss
  206. loss_loc, iou_text = self.loc_loss(
  207. distances,
  208. gt_instances,
  209. gt_kernel_instances,
  210. training_mask_distances,
  211. gt_distances,
  212. reduce=False)
  213. losses.update(dict(loss_loc=loss_loc, ))
  214. loss_all = loss_kernel + loss_loc
  215. losses = {'loss': loss_all}
  216. return losses