You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

60 lines
1.9 KiB

# -*- coding: utf-8 -*-
import logging
from logging.handlers import TimedRotatingFileHandler
import re
import os
def init_loging_config():
# 检查log文件夹是否存在
if not os.path.exists("./log"):
os.mkdir("./log")
# 创建一个handler,用于按日期写入日志文件
# 'W0' 表示每周滚动一次,'D' 表示每天滚动一次,'H' 表示每小时滚动一次,'M' 表示每分钟滚动一次
# 'midnight' 表示在午夜滚动,'h:m' 表示在指定的小时和分钟滚动
# backupCount 表示保留的日志文件的个数,超过后会删除最旧的日志文件
# when='D', interval=1, backupCount=7 表示每天滚动一次,并保留最近7天的日志文件
file_handler = TimedRotatingFileHandler('./log/huo_spider.log', when='D', interval=1, encoding='utf-8')
file_handler.setLevel(logging.DEBUG)
# 定义handler的输出格式
formatter = logging.Formatter('%(asctime)s [%(name)s] %(levelname)s %(message)s ')
file_handler.setFormatter(formatter)
level = logging.INFO
logging.basicConfig(
level=level,
format="%(asctime)s [%(name)s] %(levelname)s %(message)s ",
datefmt='[%Y-%m-%d %H:%M:%S]'
)
_logger = logging.getLogger("HuoSpider")
_logger.setLevel(level)
_logger.addHandler(file_handler)
return _logger
logger = init_loging_config()
def is_blank(val: str):
if val is None:
return False
if not val.strip():
return False
return True
def count_characters(val):
"""
统计中文和非中文字符个数
:param val:
:return:
"""
if not isinstance(val, str):
val = str(val)
chinese_pattern = re.compile(r'[\u4e00-\u9fa5]')
not_chinese_pattern = re.compile(r'[^\u4e00-\u9fa5]')
chinese = re.findall(chinese_pattern, val)
not_chinese = re.findall(not_chinese_pattern, val)
return len(chinese), len(not_chinese)