|
@@ -220,26 +220,26 @@ def ocr_text_extract() -> None:
|
|
|
|
|
|
|
|
|
|
|
|
-def get_file_from_url(url_file: str, target_path: str):
|
|
|
- send_headers = {
|
|
|
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36",
|
|
|
- "Connection": "keep-alive",
|
|
|
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
|
|
|
- "Accept-Language": "zh-CN,zh;q=0.8"
|
|
|
- }
|
|
|
-
|
|
|
- req = requests.get(url_file, headers=send_headers, timeout=10)
|
|
|
- bytes_io = io.BytesIO(req.content)
|
|
|
-
|
|
|
- (_, file_name) = os.path.split(url_file)
|
|
|
- if not os.path.exists(target_path):
|
|
|
- os.mkdir(target_path)
|
|
|
- target_path = target_path + file_name
|
|
|
- with open(target_path, 'wb') as file:
|
|
|
- file.write(bytes_io.getvalue())
|
|
|
-
|
|
|
- time.sleep(0.1)
|
|
|
- return target_path
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|