2 years ago · 207fd23d15
--- a/data_verification.py
+++ b/data_verification.py
@@ -1,7 +1,7 @@
 
				 import uvicorn

			
 
				 import warnings

			
 
				 import os

			
 
				-from fastapi import FastAPI, UploadFile, File

			
 
				+from fastapi import FastAPI, UploadFile, File, BackgroundTasks, routing

			
 
				 from openpyxl import load_workbook

			
 
				 from openpyxl.utils.cell import coordinate_from_string

			
 
				 from openpyxl.comments import Comment

			
@@ -10,6 +10,12 @@ from fastapi.middleware.cors import CORSMiddleware
 
				 from datetime import datetime

			
 
				 from fastapi.responses import FileResponse

			
 
				 from fastapi.staticfiles import StaticFiles

			
 
				+import asyncio

			
 
				+from concurrent.futures.process import ProcessPoolExecutor

			
 
				+from fastapi.responses import StreamingResponse

			
 
				+import shutil

			
 
				+import uuid

			
 
				+import time

			
 
				 

			
 
				 warnings.filterwarnings("ignore")

			
 
				 

			
@@ -24,18 +30,211 @@ app.add_middleware(
 
				     allow_headers=["*"],

			
 
				 )

			
 
				 

			
 
				-shared_dir = 'cache'

			
 
				-app.mount(f"/{shared_dir}", StaticFiles(directory="download_cache"), name={shared_dir})

			
 
				+shared_dir = "download_cache"

			
 
				+app.mount(f"/{shared_dir}", StaticFiles(directory=shared_dir), name={shared_dir})

			
 
				 

			
 
				+diff_dir = "diff_cache"

			
 
				+app.mount(f"/{diff_dir}", StaticFiles(directory=diff_dir), name={diff_dir})

			
 
				 

			
 
				+cur_cache_path = "cur_cache/"

			
 
				 

			
 
				-@app.post("/uploadfile")

			
 
				-async def create_upload_file(file: UploadFile = File(...)):

			
 
				+

			
 
				+def get_title_row(sheet):

			
 
				+    title_row_num = -1

			
 
				+    row_range = sheet[1:5]

			
 
				+    for i, r in enumerate(row_range):

			
 
				+        for j, c in enumerate(r):

			
 
				+            print(f"第{i + 1 }行，第{j}列，值：{c.value}")

			
 
				+            if "证件号码" == c.value or "收入（元）" == c.value or "务工月收入" == c.value:

			
 
				+                title_row_num = c.row

			
 
				+    return title_row_num

			
 
				+

			
 
				+

			
 
				+def get_all_numbers(sheet, start_row, cow):

			
 
				+    keys = {}

			
 
				+    for i in range(start_row, sheet.max_row * 2):

			
 
				+        id_number = sheet[f"{cow}{i}"].value

			
 
				+        if id_number is None:

			
 
				+            break

			
 
				+

			
 
				+        keys[id_number] = i

			
 
				+    return keys

			
 
				+

			
 
				+

			
 
				+def deal_diff_data(file: UploadFile = File(...), target_name: str = None):

			
 
				+    print("开始处理")

			
 
				+

			
 
				+    def generate_diff_data(start_row, max_row, sheet, title_dict, keys, need_copy_data):

			
 
				+        for i in range(start_row, max_row):

			
 
				+            id_number = sheet[f"{title_dict['证件号码']}{i}"].value

			
 
				+            if id_number is None:

			
 
				+                # print(f"该行身份证为空{i}")

			
 
				+                continue

			
 
				+            if id_number not in keys:

			
 
				+                # print(f"该身份证不在省办列表中{id_number}")

			
 
				+                need_copy_data.append(i)

			
 
				+

			
 
				+        new = None

			
 
				+

			
 
				+        # 删除所需要的执行时间太久了，暂时废弃删除的分支

			
 
				+        # if sheet.max_row > len(need_copy_data) * 2 and 3 < 2:

			
 
				+        # """如果diff很少，那么创建一个新表，一条一条添加"""

			
 
				+        new = workbook.create_sheet("仅" + sheet.title + "有的数据")

			
 
				+        for i, row in enumerate(need_copy_data):

			
 
				+            print(f"开始写入{i}行")

			
 
				+            for j, c in enumerate(sheet[row]):

			
 
				+                new.cell(i + 1, j + 1, c.value)

			
 
				+        # else:

			
 
				+        #     """如果diff数据比较多，直接copy旧表，删除不需要的数据"""

			
 
				+        #     start = time.time()

			
 
				+        #     new = workbook.copy_worksheet(sheet)

			
 
				+        #     print(f" copy执行时间{time.time() - start}")

			
 
				+        #     for i in range(max_row, start_row, -1):

			
 
				+        #         id_number = sheet[f"{title_dict['证件号码']}{i}"].value

			
 
				+        #         if id_number is None:

			
 
				+        #             print(f"该行身份证为空{i}")

			
 
				+        #             continue

			
 
				+        #         if id_number in keys:

			
 
				+        #             print(f"该身份证不在省办列表中{id_number}")

			
 
				+        #             new.delete_rows(i)

			
 
				+

			
 
				+        return need_copy_data

			
 
				+

			
 
				+    dir_path = cur_cache_path

			
 
				+    savename = dir_path + file.filename

			
 
				+

			
 
				+    contents = file.file.read()

			
 
				+

			
 
				+    with open(savename, "wb") as f:

			
 
				+        f.write(contents)

			
 
				+

			
 
				+    # 读取excel表

			
 
				+    workbook = load_workbook(savename)

			
 
				+    # 获取指定的sheet

			
 
				+    sheet_names = workbook.sheetnames

			
 
				+

			
 
				+    first = None

			
 
				+    second = None

			
 
				+    for index, name in enumerate(sheet_names):

			
 
				+        print(f"表名为：{name}")

			
 
				+        if name == "省办务工":

			
 
				+            first = workbook[name]

			
 
				+        elif name == "国办务工":

			
 
				+            second = workbook[name]

			
 
				+

			
 
				+    if first is None or second is None:

			
 
				+        return {

			
 
				+            "code": 202,

			
 
				+            "msg": "没有找到待处理的 省办务工 和 国办务工 两张表格",

			
 
				+        }

			
 
				+

			
 
				+    first_title_row_num = get_title_row(first)

			
 
				+

			
 
				+    if first_title_row_num == -1:

			
 
				+        return {"code": 202, "msg": "省办务工没有找到数据"}

			
 
				+

			
 
				+    first_title_dict = {}

			
 
				+    first_title_rows = first[first_title_row_num]

			
 
				+

			
 
				+    for title_cell in first_title_rows:

			
 
				+        x, y = coordinate_from_string(title_cell.coordinate)

			
 
				+        first_title_dict[title_cell.value] = x

			
 
				+

			
 
				+    first_keys = get_all_numbers(first, first_title_row_num + 1, first_title_dict["证件号码"])

			
 
				+

			
 
				+    second_title_row_num = get_title_row(second)

			
 
				+

			
 
				+    if second_title_row_num == -1:

			
 
				+        return {"code": 202, "msg": "国办务工没有找到数据"}

			
 
				+

			
 
				+    second_title_dict = {}

			
 
				+    second_title_rows = second[second_title_row_num]

			
 
				+

			
 
				+    for title_cell in second_title_rows:

			
 
				+        x, y = coordinate_from_string(title_cell.coordinate)

			
 
				+        second_title_dict[title_cell.value] = x

			
 
				+

			
 
				+    second_keys = get_all_numbers(second, second_title_row_num + 1, second_title_dict["证件号码"])

			
 
				+    generate_diff_data(

			
 
				+        first_title_row_num + 1, first.max_row, first, first_title_dict, second_keys, [first_title_row_num]

			
 
				+    )

			
 
				+

			
 
				+    generate_diff_data(

			
 
				+        second_title_row_num + 1,

			
 
				+        second.max_row,

			
 
				+        second,

			
 
				+        second_title_dict,

			
 
				+        first_keys,

			
 
				+        [second_title_row_num],

			
 
				+    )

			
 
				+

			
 
				+    workbook.save(savename)

			
 
				+

			
 
				+    move_file(savename, target_name)

			
 
				+    print(f"处理完成，目标文件夹{diff_dir}, {target_name}")

			
 
				+

			
 
				+

			
 
				+def clean_with_path(dir_path):

			
 
				+    for file in os.listdir(dir_path):

			
 
				+        # 遍历output_path文件夹下文件，删除后缀为woff的字体文件

			
 
				+        if file.endswith(".xlsx"):

			
 
				+            os.remove(f"{dir_path}/{file}")

			
 
				+

			
 
				+

			
 
				+def move_dir(old_path, new_path):

			
 
				+    filelist = os.listdir(old_path)  # 列出该目录下的所有文件,listdir返回的文件列表是不包含路径的。

			
 
				+    print(f"old path is {old_path}, new path is {new_path}")

			
 
				+    for file in filelist:

			
 
				+        src = os.path.join(old_path, file)

			
 
				+        dst = os.path.join(new_path, file)

			
 
				+        print("src:", src)

			
 
				+        print("dst:", dst)

			
 
				+        shutil.move(src, dst)

			
 
				+

			
 
				+

			
 
				+def move_file(old_path, new_path):

			
 
				+    shutil.move(old_path, new_path)

			
 
				+

			
 
				+

			
 
				+@app.get("/python_api/test")

			
 
				+def test():

			
 
				+    # move_file(cur_cache_path + "123.xlsx", diff_dir + "/" + uuid.uuid4().hex + ".xlsx")

			
 
				+    print("准备睡眠")

			
 
				+    time.sleep(5)

			
 
				+    print("执行完成")

			
 
				+    return {"code": 200, "message": "成功"}

			
 
				+

			
 
				+

			
 
				+@app.get("/python_api/is_exist")

			
 
				+def is_exist(file_name: str):

			
 
				+    print(f"查询file{file_name}是否存在")

			
 
				+    for dir in [f"{diff_dir}/", f"{shared_dir}/"]:

			
 
				+        file_path = os.path.join(dir, file_name)

			
 
				+        if os.path.exists(file_path):

			
 
				+            return {"code": 200, "exists": True, "filePath": f"{dir}" + file_name}

			
 
				+        else:

			
 
				+            return {"code": 200, "exists": False}

			
 
				+

			
 
				+

			
 
				+@app.post("/python_api/upload_diff_file")

			
 
				+def diff_file(file: UploadFile, background_tasks: BackgroundTasks):

			
 
				+    # clean_with_path("cur_cache")

			
 
				+    # clean_with_path("diff_cache")

			
 
				+    file_name = diff_dir + "/" + uuid.uuid4().hex + ".xlsx"

			
 
				+    background_tasks.add_task(deal_diff_data, file, file_name)

			
 
				+    print(f"开始处理{file_name}")

			
 
				+    return {"code": 200, "msg": "开始处理", "filePath": file_name}

			
 
				+

			
 
				+

			
 
				+@app.post("/python_api/uploadfile")

			
 
				+def create_upload_file(file: UploadFile = File(...)):

			
 
				     print(f"开始处理{file.filename}")

			
 
				-    contents = await file.read()

			
 
				+    # clean_with_path(f"{shared_dir}/")

			
 
				+    contents = file.file.read()

			
 
				 

			
 
				-    savename = "download_cache/" + file.filename

			
 
				-    # savename = "uploadfile/" + file.filename

			
 
				+    savename = f"{shared_dir}/" + file.filename

			
 
				+    if file.filename.endswith("xlsx"):

			
 
				+        savename = f"{shared_dir}/" + uuid.uuid4().hex + ".xlsx"

			
 
				     with open(savename, "wb") as f:

			
 
				         f.write(contents)

			
 
				     # 读取excel表

			
@@ -69,20 +268,15 @@ async def create_upload_file(file: UploadFile = File(...)):
 
				         x, y = coordinate_from_string(title_cell.coordinate)

			
 
				         title_dict[title_cell.value] = x

			
 
				 

			
 
				-    # print(title_dict)

			
 
				-

			
 
				     # 开始读取表格内容

			
 
				     read_data(sheet, title_row_num + 1, sheet.max_row, title_dict)

			
 
				 

			
 
				     # 保存文档

			
 
				     workbook.save(savename)

			
 
				 

			
 
				-    # return FileResponse(savename, media_type="application/octet-stream", filename="deal.xlsx")

			
 
				-    # return FileResponse(savename)

			
 
				-    # return FileResponse(savename, media_type='application/xlsx', filename="deal.xlsx")

			
 
				-    # return savename

			
 
				     print(f"处理完了{file.filename}文件")

			
 
				-    return {"code": 200, "msg": "分析完成，请点击下载查看分析结果", "filePath": f"/{shared_dir}/" + file.filename}

			
 
				+

			
 
				+    return {"code": 200, "msg": "分析完成，请点击下载查看分析结果", "filePath": savename}

			
 
				 

			
 
				 

			
 
				 def calculate_age_from_id_number(id_number):

			
@@ -244,7 +438,6 @@ def check_risk_type(ws, row_num, title_dict):
 
				         if risk is not None and len(risk) > 0:

			
 
				             risks.append((risk, i))

			
 
				 

			
 
				-

			
 
				     # 定义：健康帮扶，"综合保障，社会帮扶，义务教育保障, 教育帮扶, 住房安全保障, 搬迁, 饮水安全保障, 产业帮扶, 就业帮扶, 金融帮扶, 公益岗位帮扶等常量

			
 
				     HEALTH_SUPPORT = "健康帮扶"

			
 
				     COMPREHENSIVE_GUARANTEE = "综合保障"

			
@@ -275,7 +468,7 @@ def check_risk_type(ws, row_num, title_dict):
 
				             forbinddens_option = [

			
 
				                 HOUSING_SECURITY_GUARANTEE,

			
 
				                 DRINKING_WATER_SECURITY_GUARANTEE,

			
 
				-                EDUCATION_SUPPORT,

			
 
				+                HEALTH_SUPPORT,

			
 
				             ]

			
 
				         elif risk == "因残":

			
 
				             must_selected_option = [COMPREHENSIVE_GUARANTEE, SOCIAL_SUPPORT]

			
@@ -374,30 +567,12 @@ def check_assistance(ws, row_num, title_dict):
 
				             target = ws[f"{title_dict[type]}{row_num}"].value

			
 
				             for key in ["其他", "技能培训"]:

			
 
				                 if key in target:

			
 
				-                    comment_and_fill_yellow_for(target, f"实施开发式帮扶填写状态下，{type} 不允许选择 {key}")

			
 
				+                    comment_and_fill_yellow_for(

			
 
				+                        ws[f"{title_dict[type]}{row_num}"], f"实施开发式帮扶填写状态下，{type} 不允许选择 {key}"

			
 
				+                    )

			
 
				                     return

			
 
				 

			
 
				 

			
 
				-# 填写了的话，剩下四项有一个是其他或者技能培训就不行

			
 
				-

			
 
				-# info_number = "户主证件号码"

			
 
				-# identitycard = ws[f"{title_dict[info_number]}{row_num}"].value

			
 
				-# if len(identitycard) not in [15, 18, 20, 22]:

			
 
				-#     target = ws[f"{title_dict[info_number]}{row_num}"]

			
 
				-#     comment_and_fill_yellow_for(target, "31.监测对象家庭成员证件号码位数异常（证件号码非15、18、20、22位）")

			
 
				-

			
 
				-

			
 
				-# def get_item_values_for(ws, row_num, title_dict, items):

			
 
				-#     result = []

			
 
				-#     for item in items:

			
 
				-#         if item not in title_dict:

			
 
				-#             continue

			
 
				-#         value = ws[f"{title_dict[item]}{row_num}"].value

			
 
				-#         if value is not None:

			
 
				-#             result.append(value)

			
 
				-#     return result

			
 
				-

			
 
				-

			
 
				 def comment_and_fill_yellow_for(target, comment):

			
 
				     target.comment = Comment(text=comment, author="system")

			
 
				     yellow_fill = PatternFill(patternType="solid", fgColor="FFFF00")

			
@@ -405,6 +580,4 @@ def comment_and_fill_yellow_for(target, comment):
 
				 

			
 
				 

			
 
				 if __name__ == "__main__":

			
 
				-    # result = calculate_age_from_id_number("532801200607144126")

			
 
				-    # print(result)

			
 
				     uvicorn.run("data_verification:app", host="0.0.0.0", port=8500, reload=True)