|
@@ -220,26 +220,26 @@ def ocr_text_extract() -> None:
|
|
|
|
|
|
|
|
|
|
|
|
-def get_file_from_url(url_file: str, target_path: str):
|
|
|
- send_headers = {
|
|
|
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36",
|
|
|
- "Connection": "keep-alive",
|
|
|
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
|
|
|
- "Accept-Language": "zh-CN,zh;q=0.8"
|
|
|
- }
|
|
|
-
|
|
|
- req = requests.get(url_file, headers=send_headers, timeout=10)
|
|
|
- bytes_io = io.BytesIO(req.content)
|
|
|
-
|
|
|
- (_, file_name) = os.path.split(url_file)
|
|
|
- if not os.path.exists(target_path):
|
|
|
- os.mkdir(target_path)
|
|
|
- target_path = target_path + file_name
|
|
|
- with open(target_path, 'wb') as file:
|
|
|
- file.write(bytes_io.getvalue())
|
|
|
-
|
|
|
- time.sleep(0.1)
|
|
|
- return target_path
|
|
|
+# def get_file_from_url(url_file: str, target_path: str):
|
|
|
+# send_headers = {
|
|
|
+# "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36",
|
|
|
+# "Connection": "keep-alive",
|
|
|
+# "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
|
|
|
+# "Accept-Language": "zh-CN,zh;q=0.8"
|
|
|
+# }
|
|
|
+
|
|
|
+# req = requests.get(url_file, headers=send_headers, timeout=10)
|
|
|
+# bytes_io = io.BytesIO(req.content)
|
|
|
+
|
|
|
+# (_, file_name) = os.path.split(url_file)
|
|
|
+# if not os.path.exists(target_path):
|
|
|
+# os.mkdir(target_path)
|
|
|
+# target_path = target_path + file_name
|
|
|
+# with open(target_path, 'wb') as file:
|
|
|
+# file.write(bytes_io.getvalue())
|
|
|
+
|
|
|
+# time.sleep(0.1)
|
|
|
+# return target_path
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
# port = os.environ.get('FLASK_PORT', '')
|