feat:解析新文件
This commit is contained in:
@@ -102,20 +102,259 @@ def convert_to_yi(value):
|
||||
def find_row_by_name(df, row_name):
|
||||
"""在数据框中查找指定名称的行"""
|
||||
first_col = df.iloc[:, 0]
|
||||
|
||||
|
||||
# 尝试精确匹配
|
||||
mask = first_col == row_name
|
||||
if mask.any():
|
||||
return df[mask]
|
||||
|
||||
|
||||
# 尝试模糊匹配(去除空格)
|
||||
mask = first_col.str.strip() == row_name.strip()
|
||||
if mask.any():
|
||||
return df[mask]
|
||||
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _parse_cell_number(cell):
|
||||
"""将单元格解析为数字(支持 =123 格式),无法解析返回 None"""
|
||||
if pd.isna(cell) or cell == '' or cell is None:
|
||||
return None
|
||||
s = str(cell).strip()
|
||||
if s.startswith('='):
|
||||
s = s[1:]
|
||||
s = s.replace(',', '').replace(' ', '').replace(',', '')
|
||||
if not s or s.lower() == 'nan':
|
||||
return None
|
||||
try:
|
||||
return float(s)
|
||||
except (ValueError, TypeError):
|
||||
return None
|
||||
|
||||
|
||||
def _parse_date_cell(cell):
|
||||
"""解析日期单元格,返回 'YYYY-MM-DD' 或 None"""
|
||||
if pd.isna(cell) or cell == '' or cell is None:
|
||||
return None
|
||||
s = str(cell).strip()
|
||||
if s.startswith('='):
|
||||
s = s[1:].strip('"\'')
|
||||
# 兼容 2025-12-31 或 2025/12/31
|
||||
s = s.replace('/', '-')
|
||||
if len(s) == 10 and s[4] == '-' and s[7] == '-':
|
||||
return s
|
||||
return None
|
||||
|
||||
|
||||
def _date_to_quarter(date_str):
|
||||
"""将日期转为季度显示:2024-03-31 -> 2024Q1, 2024-06-30 -> 2024Q2 等"""
|
||||
if not date_str or len(date_str) < 10:
|
||||
return date_str or ''
|
||||
year = date_str[:4]
|
||||
end = date_str[5:] # MM-DD
|
||||
if end == '03-31':
|
||||
return f'{year}Q1'
|
||||
if end == '06-30':
|
||||
return f'{year}Q2'
|
||||
if end == '09-30':
|
||||
return f'{year}Q3'
|
||||
if end == '12-31':
|
||||
return f'{year}Q4'
|
||||
# 其他日期按月份推断
|
||||
try:
|
||||
m = int(date_str[5:7])
|
||||
q = (m - 1) // 3 + 1
|
||||
return f'{year}Q{q}'
|
||||
except (ValueError, TypeError):
|
||||
return date_str
|
||||
|
||||
|
||||
def build_profitability_df(input_file):
|
||||
"""
|
||||
从利润表原始文件构建「盈利能力」Tab 数据。
|
||||
提取:营业收入、毛利(营业收入-营业成本)、毛利率、归母净利润。
|
||||
当存在季度/半年日期(非12-31)时,计算单季数据:Q1=3-31,Q2=6-30-3-31,Q3=9-30-6-30,Q4=12-31-9-30。
|
||||
累计与单季之间空两列,时间竖排,金额转为亿。
|
||||
返回:pd.DataFrame,含表头,可直接写入 Excel。
|
||||
"""
|
||||
if input_file.endswith('.csv'):
|
||||
df = pd.read_csv(input_file, encoding='utf-8')
|
||||
else:
|
||||
df = pd.read_excel(input_file)
|
||||
|
||||
# 日期行:第一列名为「日期」
|
||||
date_row_idx = None
|
||||
for i in range(min(5, len(df))):
|
||||
if str(df.iloc[i, 0]).strip() == '日期':
|
||||
date_row_idx = i
|
||||
break
|
||||
if date_row_idx is None:
|
||||
print("盈利能力: 未找到日期行,跳过")
|
||||
return None
|
||||
|
||||
# 数据列从第 4 列开始(跳过 财报类型/日期/股票代码/上市公司 等前几列)
|
||||
data_start_col = 3
|
||||
row_names = df.iloc[:, 0].astype(str).str.strip()
|
||||
|
||||
def get_row_values(row_name):
|
||||
idx = row_names[row_names == row_name].index
|
||||
if len(idx) == 0:
|
||||
return None
|
||||
row = df.iloc[idx[0]]
|
||||
return [_parse_cell_number(row.iloc[c]) for c in range(data_start_col, len(row))]
|
||||
|
||||
def get_dates():
|
||||
row = df.iloc[date_row_idx]
|
||||
return [_parse_date_cell(row.iloc[c]) for c in range(data_start_col, len(row))]
|
||||
|
||||
dates = get_dates()
|
||||
rev = get_row_values('营业收入')
|
||||
cost = get_row_values('营业成本')
|
||||
# 毛利率优先用报表里的「毛利率(GM)」,没有则用 毛利/营业收入 算
|
||||
gross_margin = get_row_values('毛利率(GM)')
|
||||
net_parent = get_row_values('归属于母公司股东及其他权益持有者的净利润')
|
||||
if net_parent is None:
|
||||
net_parent = get_row_values('归属于母公司普通股股东的净利润')
|
||||
|
||||
if rev is None or cost is None or net_parent is None:
|
||||
print("盈利能力: 缺少营业收入/营业成本/归母净利润行,跳过")
|
||||
return None
|
||||
|
||||
n = len(dates)
|
||||
# 毛利 = 营业收入 - 营业成本
|
||||
gross_profit = []
|
||||
for i in range(n):
|
||||
r, c = rev[i], cost[i]
|
||||
if r is not None and c is not None:
|
||||
gross_profit.append(r - c)
|
||||
else:
|
||||
gross_profit.append(None)
|
||||
|
||||
# 若没有毛利率行,用 毛利/营业收入
|
||||
if gross_margin is None:
|
||||
gross_margin = []
|
||||
for i in range(n):
|
||||
if gross_profit[i] is not None and rev[i] is not None and rev[i] != 0:
|
||||
gross_margin.append(gross_profit[i] / rev[i])
|
||||
else:
|
||||
gross_margin.append(None)
|
||||
else:
|
||||
gross_margin = list(gross_margin)
|
||||
|
||||
# 按日期过滤有效行,并排序(时间竖排:从旧到新)
|
||||
YI = 100000000 # 亿
|
||||
|
||||
rows_cum = []
|
||||
for i in range(n):
|
||||
if dates[i] is None:
|
||||
continue
|
||||
rows_cum.append({
|
||||
'date': dates[i],
|
||||
'rev': rev[i],
|
||||
'gross': gross_profit[i],
|
||||
'margin': gross_margin[i],
|
||||
'net_parent': net_parent[i],
|
||||
})
|
||||
|
||||
if not rows_cum:
|
||||
return None
|
||||
|
||||
# 按日期降序(最新在前)
|
||||
rows_cum.sort(key=lambda x: x['date'], reverse=True)
|
||||
|
||||
# 判断是否为季度/半年数据(存在非 12-31 的期末日)
|
||||
has_quarterly = any(
|
||||
(r['date'] or '').endswith('-03-31') or
|
||||
(r['date'] or '').endswith('-06-30') or
|
||||
(r['date'] or '').endswith('-09-30')
|
||||
for r in rows_cum
|
||||
)
|
||||
|
||||
# 累计转亿
|
||||
def to_yi(v):
|
||||
if v is None:
|
||||
return None
|
||||
return round(v / YI, 2)
|
||||
|
||||
result_rows = []
|
||||
# 表头
|
||||
if has_quarterly:
|
||||
header = ['时间', '营业收入(亿)', '毛利(亿)', '毛利率', '归母净利润(亿)', '', '', '营业收入(亿)', '毛利(亿)', '毛利率', '归母净利润(亿)']
|
||||
sub_header = ['', '累计', '累计', '累计', '累计', '', '', '单季', '单季', '单季', '单季']
|
||||
else:
|
||||
header = ['时间', '营业收入(亿)', '毛利(亿)', '毛利率', '归母净利润(亿)']
|
||||
sub_header = None
|
||||
|
||||
result_rows.append(header)
|
||||
if sub_header is not None:
|
||||
result_rows.append(sub_header)
|
||||
|
||||
if not has_quarterly:
|
||||
for r in rows_cum:
|
||||
result_rows.append([
|
||||
_date_to_quarter(r['date']),
|
||||
to_yi(r['rev']),
|
||||
to_yi(r['gross']),
|
||||
round(r['margin'], 4) if r['margin'] is not None else None,
|
||||
to_yi(r['net_parent']),
|
||||
])
|
||||
return pd.DataFrame(result_rows)
|
||||
|
||||
# 按年分组,计算单季
|
||||
from collections import defaultdict
|
||||
by_year = defaultdict(list)
|
||||
for r in rows_cum:
|
||||
y = (r['date'] or '')[:4]
|
||||
if y:
|
||||
by_year[y].append(r)
|
||||
|
||||
# 单季规则:Q1=3-31, Q2=6-30-3-31, Q3=9-30-6-30, Q4=12-31-9-30;同年内按 3-31,6-30,9-30,12-31 排序
|
||||
quarter_order = ['-03-31', '-06-30', '-09-30', '-12-31']
|
||||
single_quarter = [] # 与 rows_cum 对齐:每个期末日对应一行,单季数据按期末日填
|
||||
date_to_single = {} # date -> { rev, gross, margin, net_parent }
|
||||
|
||||
for year in sorted(by_year.keys()):
|
||||
items = by_year[year]
|
||||
items_sorted = sorted(items, key=lambda x: (x['date'] or ''))
|
||||
|
||||
prev_rev = prev_gross = prev_net = None
|
||||
for item in items_sorted:
|
||||
d = item['date']
|
||||
rev_val, gross_val, net_val = item['rev'], item['gross'], item['net_parent']
|
||||
if d.endswith('-03-31'):
|
||||
sq_rev = rev_val
|
||||
sq_gross = gross_val
|
||||
sq_net = net_val
|
||||
else:
|
||||
if prev_rev is None:
|
||||
sq_rev = sq_gross = sq_net = None
|
||||
else:
|
||||
sq_rev = rev_val - prev_rev if rev_val is not None and prev_rev is not None else None
|
||||
sq_gross = gross_val - prev_gross if gross_val is not None and prev_gross is not None else None
|
||||
sq_net = net_val - prev_net if net_val is not None and prev_net is not None else None
|
||||
prev_rev, prev_gross, prev_net = rev_val, gross_val, net_val
|
||||
sq_margin = (sq_gross / sq_rev) if (sq_rev and sq_rev != 0 and sq_gross is not None) else None
|
||||
date_to_single[d] = {'rev': sq_rev, 'gross': sq_gross, 'margin': sq_margin, 'net_parent': sq_net}
|
||||
|
||||
for r in rows_cum:
|
||||
d = r['date']
|
||||
sq = date_to_single.get(d, {})
|
||||
result_rows.append([
|
||||
_date_to_quarter(d),
|
||||
to_yi(r['rev']),
|
||||
to_yi(r['gross']),
|
||||
round(r['margin'], 4) if r['margin'] is not None else None,
|
||||
to_yi(r['net_parent']),
|
||||
'', '',
|
||||
to_yi(sq.get('rev')),
|
||||
to_yi(sq.get('gross')),
|
||||
round(sq['margin'], 4) if sq.get('margin') is not None else None,
|
||||
to_yi(sq.get('net_parent')),
|
||||
])
|
||||
|
||||
return pd.DataFrame(result_rows)
|
||||
|
||||
|
||||
def process_income_statement_return_df(input_file, config_file):
|
||||
"""
|
||||
处理利润表文件并返回 DataFrame
|
||||
|
||||
Reference in New Issue
Block a user