You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
60 lines
1.9 KiB
60 lines
1.9 KiB
# -*- coding: utf-8 -*-
|
|
|
|
import logging
|
|
from logging.handlers import TimedRotatingFileHandler
|
|
import re
|
|
import os
|
|
|
|
|
|
def init_loging_config():
|
|
# 检查log文件夹是否存在
|
|
if not os.path.exists("./log"):
|
|
os.mkdir("./log")
|
|
# 创建一个handler,用于按日期写入日志文件
|
|
# 'W0' 表示每周滚动一次,'D' 表示每天滚动一次,'H' 表示每小时滚动一次,'M' 表示每分钟滚动一次
|
|
# 'midnight' 表示在午夜滚动,'h:m' 表示在指定的小时和分钟滚动
|
|
# backupCount 表示保留的日志文件的个数,超过后会删除最旧的日志文件
|
|
# when='D', interval=1, backupCount=7 表示每天滚动一次,并保留最近7天的日志文件
|
|
file_handler = TimedRotatingFileHandler('./log/huo_spider.log', when='D', interval=1, encoding='utf-8')
|
|
file_handler.setLevel(logging.DEBUG)
|
|
|
|
# 定义handler的输出格式
|
|
formatter = logging.Formatter('%(asctime)s [%(name)s] %(levelname)s %(message)s ')
|
|
file_handler.setFormatter(formatter)
|
|
|
|
level = logging.INFO
|
|
logging.basicConfig(
|
|
level=level,
|
|
format="%(asctime)s [%(name)s] %(levelname)s %(message)s ",
|
|
datefmt='[%Y-%m-%d %H:%M:%S]'
|
|
)
|
|
_logger = logging.getLogger("HuoSpider")
|
|
_logger.setLevel(level)
|
|
_logger.addHandler(file_handler)
|
|
return _logger
|
|
|
|
|
|
logger = init_loging_config()
|
|
|
|
|
|
def is_blank(val: str):
|
|
if val is None:
|
|
return False
|
|
if not val.strip():
|
|
return False
|
|
return True
|
|
|
|
|
|
def count_characters(val):
|
|
"""
|
|
统计中文和非中文字符个数
|
|
:param val:
|
|
:return:
|
|
"""
|
|
if not isinstance(val, str):
|
|
val = str(val)
|
|
chinese_pattern = re.compile(r'[\u4e00-\u9fa5]')
|
|
not_chinese_pattern = re.compile(r'[^\u4e00-\u9fa5]')
|
|
chinese = re.findall(chinese_pattern, val)
|
|
not_chinese = re.findall(not_chinese_pattern, val)
|
|
return len(chinese), len(not_chinese)
|