1+ # -*- coding: utf-8 -*-
2+ """
3+ Created on Wed Sep 11 12:08:08 2019
4+
5+ @author: autol
6+ """
7+
8+
9+ #%%
10+ import re
11+ from collections import Counter
12+ from util import split_list ,user_to_list ,save_adjust_xlsx
13+ from globalvar import *
14+
15+ #%%
16+
17+ def copy_users_compare (jrow ,df ,errs = list (' ' )):
18+ '''copy users and check users completement
19+ errs=['【OA无用户记录】','【用户错别字】','【字段重复】','【系列案】']
20+ 如下对比:
21+ 不相交,OA无用户记录
22+ 判断字段重复,输出重复的内容
23+ 比例确定怀疑用户错别字,判别不了直接正常输出
24+ 判决书多于当前案件,认为是系列案
25+ 判决书少于当前案件,当前案件缺部分地址
26+ '''
27+
28+ code0 = str (df ['案号' ]).strip ()
29+ code1 = str (df ['原一审案号' ]).strip ()
30+ jcode = str (jrow ['判决书源号' ]).strip ()
31+ x = Counter (user_to_list (df ['当事人' ])) # 当前案件
32+ y = Counter (list (jrow ['new_adr' ].keys ())) # 判决书
33+ rxy = len (list ((x & y ).elements ()))/ len (list ((x | y ).elements ()))
34+ rxyx = len (list ((x & y ).elements ()))/ len (list (x .elements ()))
35+ rxyy = len (list ((x & y ).elements ()))/ len (list (y .elements ()))
36+ # print('x=',x);print('y=',y);print('rxy=',rxy)
37+ # print('rxyx=',rxyx);print('rxyy=',rxyy)
38+ if rxy == 0 : # 不相交,完全无关
39+ return errs [0 ]
40+ if max (x .values ()) > 1 or max (y .values ()) > 1 : # 有字段重复
41+ xdu = [k for k ,v in x .items () if v > 1 ] # 重复的内容
42+ ydu = [k for k ,v in y .items () if v > 1 ]
43+ print_log ('>>> %s 用户有字段重复【%s】-【案件:%s】 vs 【判决书:%s】'
44+ % (code0 ,'{0:.0%}' .format (rxy ),xdu ,ydu ))
45+ return errs [2 ]
46+ if rxy == 1 : # 完全匹配
47+ return df ['当事人' ]
48+ if 0 < rxy < 1 : # 错别字
49+ dx = list ((x - y ).elements ())
50+ dy = list ((y - x ).elements ())
51+ xx = Counter ('' .join (dx ))
52+ yy = Counter ('' .join (dy ))
53+ rxxyy = len (list (xx & yy .keys ()))/ len (list (xx | yy .keys ()))
54+ # print('rxxyy=',rxxyy)
55+ if rxxyy >= .6 :
56+ print_log ('>>> %s 认为【错别字率 %s】->【案件:%s vs 判决书:%s】'
57+ % (code0 ,'{0:.0%}' .format (1 - rxxyy ),dx ,dy ))
58+ return errs [1 ]
59+ elif rxxyy >= .2 :
60+ print_log ('>>> %s 认为【不好判断当正常处理【差异率 %s】vs【相同范围:%s】->【差异范围:案件:%s vs 判决书:%s】 '
61+ % (code0 ,'{0:.0%}' .format (1 - rxxyy ),
62+ list ((x & y ).elements ()),
63+ dx ,dy ))
64+ return df ['当事人' ]
65+ if rxyx > .8 :
66+ print_log ('>>> %s 案件 %s人 < 判决书 %s人' % (code0 ,len (x ),len (y )))
67+ if jcode != code1 :# 系列案
68+ print_log ('>>> %s 认为【系列案,判决书人员 %s 多出地址】' % (code0 ,list ((y - x ).elements ())))
69+ return errs [3 ]
70+ else :
71+ return df ['当事人' ]
72+ elif rxyy > .8 :
73+ print_log ('>>> %s 案件 %s人 > 判决书 %s人' % (code0 ,len (x ),len (y )))
74+ print_log ('>>> %s 认为【当前案件人员 %s 缺地址】' % (code0 ,list ((x - y ).elements ())))
75+ return df ['当事人' ]
76+ return errs [0 ]
77+
78+
79+ def copy_rows_adr1 (x ,n_adr ):
80+ ''' copy jdocs address to address column
81+ 格式:['当事人','诉讼代理人','地址','new_adr','案号']
82+ 同时排除已有代理人的信息
83+ '''
84+ user = x ['当事人' ];agent = x ['诉讼代理人' ];adr = x ['地址' ]; codes = x ['案号' ]
85+ if not isinstance (n_adr ,dict ):
86+ return adr
87+ else :
88+ y = split_list (r'[,,]' ,adr )
89+ adr1 = y .copy ()
90+ for i ,k in enumerate (n_adr ):
91+ by_agent = any ([k in ag for ag in re .findall (r'[\w+、]*\/[\w+]*' ,agent )]) # 找到代理人格式 'XX、XX/XX_123123'
92+ if by_agent and k in adr : # remove user's address when user with agent 用户有代理人就不要地址
93+ y = list (filter (lambda x :not k in x ,y ))
94+ if type (n_adr ) == dict and not k in adr and k in user and not by_agent :
95+ y += [k + adr_tag + n_adr .get (k )] # append address by rules 输出地址格式
96+ adr2 = y .copy ()
97+ adr = ',' .join (list (filter (None , y )))
98+ if Counter (adr1 ) != Counter (adr2 ) and adr and flag_check_jdocs :
99+ print_log ('>>> 【%s】成功复制判决书地址=>【%s】' % (codes ,adr ))
100+ return adr
101+
102+ address_tmp_xlsx = 'address_tmp.xlsx'
103+
104+ def copy_rows_user_func (dfj ,dfo ):
105+
106+ '''copy users line regard adr user'''
107+ errs = ['【OA无用户记录】' ,'【用户错别字】' ,'【字段重复】' ,'【系列案】' ]
108+
109+ dfo ['判决书源号' ] = ''
110+
111+ def find_source ():
112+ print_log ('\n >>> 判决书信息 | 案号=%s | 源号=%s | 判决书源号=%s' % (code0 ,code1 ,jcode ))
113+ dfo .loc [i ,'地址' ] = copy_rows_adr1 (dfor ,n_adr )
114+ dfo .loc [i ,'判决书源号' ] = jcode
115+
116+ for (i ,dfor ) in dfo .iterrows ():
117+ for (j ,dfjr ) in dfj .iterrows ():
118+ code0 = str (dfor ['案号' ]).strip ()
119+ code1 = str (dfor ['原一审案号' ]).strip ()
120+ jcode = str (dfjr ['判决书源号' ]).strip ()
121+ n_adr = dfjr ['new_adr' ]
122+ if isinstance (n_adr ,dict ):
123+ if not n_adr :continue # 提取jdocs字段失败
124+ if code1 == jcode :# 同案号,则找到内容
125+ find_source () ; break
126+ else :#[::-1] # 没案号
127+ tag1 = copy_users_compare (dfjr ,dfor ,errs )
128+ if tag1 not in errs :
129+ find_source () ; break
130+ else : pass
131+ dfj = dfj .fillna ('' )
132+ save_adjust_xlsx (dfj ,address_tmp_xlsx ,textfit = ('判决书源号' ,'new_adr' )) # 保存临时提取信息
133+ return dfo
0 commit comments