ocr_cpp_client.py 2.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # pylint: disable=doc-string-missing
  15. from paddle_serving_client import Client
  16. import sys
  17. import numpy as np
  18. import base64
  19. import os
  20. import cv2
  21. from paddle_serving_app.reader import Sequential, URL2Image, ResizeByFactor
  22. from paddle_serving_app.reader import Div, Normalize, Transpose
  23. from ocr_reader import OCRReader
  24. import codecs
  25. client = Client()
  26. # TODO:load_client need to load more than one client model.
  27. # this need to figure out some details.
  28. client.load_client_config(sys.argv[1:])
  29. client.connect(["127.0.0.1:8181"])
  30. import paddle
  31. test_img_dir = "../../doc/imgs/1.jpg"
  32. ocr_reader = OCRReader(char_dict_path="../../ppocr/utils/ppocr_keys_v1.txt")
  33. def cv2_to_base64(image):
  34. return base64.b64encode(image).decode(
  35. 'utf8') #data.tostring()).decode('utf8')
  36. def _check_image_file(path):
  37. img_end = {'jpg', 'bmp', 'png', 'jpeg', 'rgb', 'tif', 'tiff', 'gif'}
  38. return any([path.lower().endswith(e) for e in img_end])
  39. test_img_list = []
  40. if os.path.isfile(test_img_dir) and _check_image_file(test_img_dir):
  41. test_img_list.append(test_img_dir)
  42. elif os.path.isdir(test_img_dir):
  43. for single_file in os.listdir(test_img_dir):
  44. file_path = os.path.join(test_img_dir, single_file)
  45. if os.path.isfile(file_path) and _check_image_file(file_path):
  46. test_img_list.append(file_path)
  47. if len(test_img_list) == 0:
  48. raise Exception("not found any img file in {}".format(test_img_dir))
  49. for img_file in test_img_list:
  50. with open(img_file, 'rb') as file:
  51. image_data = file.read()
  52. image = cv2_to_base64(image_data)
  53. res_list = []
  54. fetch_map = client.predict(feed={"x": image}, fetch=[], batch=True)
  55. if fetch_map is None:
  56. print('no results')
  57. else:
  58. if "text" in fetch_map:
  59. for x in fetch_map["text"]:
  60. x = codecs.encode(x)
  61. words = base64.b64decode(x).decode('utf-8')
  62. res_list.append(words)
  63. else:
  64. try:
  65. one_batch_res = ocr_reader.postprocess(
  66. fetch_map, with_score=True)
  67. for res in one_batch_res:
  68. res_list.append(res[0])
  69. except:
  70. print('no results')
  71. res = {"res": str(res_list)}
  72. print(res)