# -*- coding: utf-8 -*- """ç¬è«é ç½®æä»¶""" import os # redis 表å # ä»»å¡è¡¨æ¨¡ç TAB_REQUSETS = "{redis_key}:z_requsets" # ä»»å¡å¤±è´¥æ¨¡æ¿ TAB_FAILED_REQUSETS = "{redis_key}:z_failed_requsets" # æ°æ®ä¿åå¤±è´¥æ¨¡æ¿ TAB_FAILED_ITEMS = "{redis_key}:s_failed_items" # ç¬è«ç¶æè¡¨æ¨¡ç TAB_SPIDER_STATUS = "{redis_key}:z_spider_status" # ç¬è«æ¶é´è®°å½è¡¨ TAB_SPIDER_TIME = "{redis_key}:h_spider_time" # ç¨æ·æ± TAB_USER_POOL = "{redis_key}:h_{user_type}_pool" # MYSQL MYSQL_IP = os.getenv("MYSQL_IP") MYSQL_PORT = int(os.getenv("MYSQL_PORT", 3306)) MYSQL_DB = os.getenv("MYSQL_DB") MYSQL_USER_NAME = os.getenv("MYSQL_USER_NAME") MYSQL_USER_PASS = os.getenv("MYSQL_USER_PASS") # MONGODB MONGO_IP = os.getenv("MONGO_IP", "localhost") MONGO_PORT = int(os.getenv("MONGO_PORT", 27017)) MONGO_DB = os.getenv("MONGO_DB") MONGO_USER_NAME = os.getenv("MONGO_USER_NAME") MONGO_USER_PASS = os.getenv("MONGO_USER_PASS") # REDIS # ip:port å¤ä¸ªå¯å为å表æè éå·éå¼ å¦ ip1:port1,ip2:port2 æ ["ip1:port1", "ip2:port2"] REDISDB_IP_PORTS = os.getenv("REDISDB_IP_PORTS") REDISDB_USER_PASS = os.getenv("REDISDB_USER_PASS") REDISDB_DB = int(os.getenv("REDISDB_DB", 0)) # éç¨äºrediså¨å µæ¨¡å¼ REDISDB_SERVICE_NAME = os.getenv("REDISDB_SERVICE_NAME") # æ°æ®å ¥åºçpipelineï¼å¯èªå®ä¹ï¼é»è®¤MysqlPipeline ITEM_PIPELINES = [ "feapder.pipelines.mysql_pipeline.MysqlPipeline", # "feapder.pipelines.mongo_pipeline.MongoPipeline", ] EXPORT_DATA_MAX_FAILED_TIMES = 10 # å¯¼åºæ°æ®æ¶æå¤§ç失败次æ°ï¼å æ¬ä¿ååæ´æ°ï¼è¶ è¿è¿ä¸ªæ¬¡æ°æ¥è¦ EXPORT_DATA_MAX_RETRY_TIMES = 10 # å¯¼åºæ°æ®æ¶æå¤§çéè¯æ¬¡æ°ï¼å æ¬ä¿ååæ´æ°ï¼è¶ è¿è¿ä¸ªæ¬¡æ°åæ¾å¼éè¯ # ç¬è«ç¸å ³ # COLLECTOR COLLECTOR_SLEEP_TIME = 1 # ä»ä»»å¡éåä¸è·åä»»å¡å°å åéåçé´é COLLECTOR_TASK_COUNT = 10 # æ¯æ¬¡è·å任塿°é # SPIDER SPIDER_THREAD_COUNT = 1 # ç¬è«å¹¶åæ° SPIDER_SLEEP_TIME = ( 0 # ä¸è½½æ¶é´é´é åä½ç§ã æ¯æéæº å¦ SPIDER_SLEEP_TIME = [2, 5] åé´é为 2~5ç§ä¹é´çéæºæ°ï¼å å«2å5 ) SPIDER_TASK_COUNT = 1 # æ¯ä¸ªparserä»å åéåä¸è·åä»»å¡çæ°é SPIDER_MAX_RETRY_TIMES = 100 # æ¯ä¸ªè¯·æ±æå¤§éè¯æ¬¡æ° SPIDER_AUTO_START_REQUESTS = ( True # æ¯å¦ä¸»å¨æ§è¡æ·»å 设置为False éè¦æå¨è°ç¨start_monitor_taskï¼éç¨äºå¤è¿ç¨æ åµä¸ ) KEEP_ALIVE = False # ç¬è«æ¯å¦å¸¸é©» # æµè§å¨æ¸²æ WEBDRIVER = dict( pool_size=1, # æµè§å¨çæ°é load_images=True, # æ¯å¦å è½½å¾ç user_agent=None, # å符串 æ æ å彿°ï¼è¿åå¼ä¸ºuser_agent proxy=None, # xxx.xxx.xxx.xxx:xxxx æ æ å彿°ï¼è¿åå¼ä¸ºä»£çå°å headless=False, # æ¯å¦ä¸ºæ 头æµè§å¨ driver_type="CHROME", # CHROMEãPHANTOMJSãFIREFOX timeout=30, # 请æ±è¶ æ¶æ¶é´ window_size=(1024, 800), # çªå£å¤§å° executable_path=None, # æµè§å¨è·¯å¾ï¼é»è®¤ä¸ºé»è®¤è·¯å¾ render_time=0, # æ¸²ææ¶é¿ï¼å³æå¼ç½é¡µçå¾ æå®æ¶é´ååè·åæºç custom_argument=["--ignore-certificate-errors"], # èªå®ä¹æµè§å¨æ¸²æåæ° xhr_url_regexes=None, # æ¦æªxhræ¥å£ï¼æ¯ææ£åï¼æ°ç»ç±»å auto_install_driver=False, # èªå¨ä¸è½½æµè§å¨é©±å¨ æ¯æchrome å firefox ) # ç¬è«å¯å¨æ¶ï¼éæ°æå失败çrequests RETRY_FAILED_REQUESTS = False # ä¿å失败çrequest SAVE_FAILED_REQUEST = True # requesté²ä¸¢æºå¶ãï¼æå®çREQUEST_LOST_TIMEOUTæ¶é´å requestè¿æ²¡åå®ï¼ä¼éæ°ä¸å éåï¼ REQUEST_LOST_TIMEOUT = 600 # 10åé # requestç½ç»è¯·æ±è¶ æ¶æ¶é´ REQUEST_TIMEOUT = 22 # çå¾ æå¡å¨ååºçè¶ æ¶æ¶é´ï¼æµ®ç¹æ°ï¼æ(connect timeout, read timeout)å ç» # ä¸è½½ç¼å å©ç¨redisç¼åï¼ä½ç±äºå å大å°éå¶ï¼æä»¥å»ºè®®ä» ä¾å¼åè°è¯ä»£ç æ¶ä½¿ç¨ï¼é²æ¢æ¯æ¬¡debugé½éè¦ç½ç»è¯·æ± RESPONSE_CACHED_ENABLE = False # æ¯å¦å¯ç¨ä¸è½½ç¼å ææ¬é«çæ°æ®æå®¹æåéæ±çæ°æ®ï¼å»ºè®®è®¾ç½®ä¸ºTrue RESPONSE_CACHED_EXPIRE_TIME = 3600 # ç¼åæ¶é´ ç§ RESPONSE_CACHED_USED = False # æ¯å¦ä½¿ç¨ç¼å è¡¥éæ°æ®æ¶å¯è®¾ç½®ä¸ºTrue # redis åæ¾itemä¸requestçæ ¹ç®å½ REDIS_KEY = "" # ç¬è«å¯å¨æ¶å é¤çkeyï¼ç±»å: å ç»/bool/stringã æ¯ææ£å; 常ç¨äºæ¸ 空任å¡éåï¼å¦åé坿¶ä¼æç¹ç»ç¬ DELETE_KEYS = [] # 设置代ç PROXY_EXTRACT_API = None # 代çæåAPI ï¼è¿åç代çåå²ç¬¦ä¸º\r\n PROXY_ENABLE = True # éæºheaders RANDOM_HEADERS = True # UserAgentç±»å æ¯æ 'chrome', 'opera', 'firefox', 'internetexplorer', 'safari'ï¼'mobile' è¥ä¸æå®åéæºç±»å USER_AGENT_TYPE = "chrome" # é»è®¤ä½¿ç¨çæµè§å¨å¤´ RANDOM_HEADERS=Trueæ¶ä¸çæ DEFAULT_USERAGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36" # requests 使ç¨session USE_SESSION = False # å»é ITEM_FILTER_ENABLE = False # item å»é ITEM_FILTER_SETTING = dict( filter_type=1 # æ°¸ä¹ å»éï¼BloomFilterï¼ = 1 ãå åå»éï¼MemoryFilterï¼ = 2ã 临æ¶å»éï¼ExpireFilterï¼= 3 ) REQUEST_FILTER_ENABLE = False # request å»é REQUEST_FILTER_SETTING = dict( filter_type=3, # æ°¸ä¹ å»éï¼BloomFilterï¼ = 1 ãå åå»éï¼MemoryFilterï¼ = 2ã 临æ¶å»éï¼ExpireFilterï¼= 3 expire_time=2592000, # è¿ææ¶é´1个æ ) # æ¥è¦ æ¯æééãä¼ä¸å¾®ä¿¡ãé®ä»¶ # ééæ¥è¦ DINGDING_WARNING_URL = "" # ééæºå¨äººapi DINGDING_WARNING_PHONE = "" # æ¥è¦äºº æ¯æå表ï¼å¯æå®å¤ä¸ª DINGDING_WARNING_ALL = False # æ¯å¦æç¤ºææäººï¼ é»è®¤ä¸ºFalse # é®ä»¶æ¥è¦ EMAIL_SENDER = "" # å件人 EMAIL_PASSWORD = "" # ææç EMAIL_RECEIVER = "" # æ¶ä»¶äºº æ¯æå表ï¼å¯æå®å¤ä¸ª EMAIL_SMTPSERVER = "smtp.163.com" # é®ä»¶æå¡å¨ é»è®¤ä¸º163é®ç®± # ä¼ä¸å¾®ä¿¡æ¥è¦ WECHAT_WARNING_URL = "" # ä¼ä¸å¾®ä¿¡æºå¨äººapi WECHAT_WARNING_PHONE = "" # æ¥è¦äºº å°ä¼å¨ç¾¤å @æ¤äºº, æ¯æå表ï¼å¯æå®å¤äºº WECHAT_WARNING_ALL = False # æ¯å¦æç¤ºææäººï¼ é»è®¤ä¸ºFalse # æ¶é´é´é WARNING_INTERVAL = 3600 # ç¸åæ¥è¦çæ¥è¦æ¶é´é´éï¼é²æ¢å·å±; 0表示ä¸å»é WARNING_LEVEL = "DEBUG" # æ¥è¦çº§å«ï¼ DEBUG / ERROR WARNING_FAILED_COUNT = 1000 # ä»»å¡å¤±è´¥æ° è¶ è¿WARNING_FAILED_COUNTåæ¥è¦ LOG_NAME = os.path.basename(os.getcwd()) LOG_PATH = "log/%s.log" % LOG_NAME # logåå¨è·¯å¾ LOG_LEVEL = "DEBUG" LOG_COLOR = True # æ¯å¦å¸¦æé¢è² LOG_IS_WRITE_TO_CONSOLE = True # æ¯å¦æå°å°æ§å¶å° LOG_IS_WRITE_TO_FILE = False # æ¯å¦åæä»¶ LOG_MODE = "w" # åæä»¶çæ¨¡å¼ LOG_MAX_BYTES = 10 * 1024 * 1024 # æ¯ä¸ªæ¥å¿æä»¶çæå¤§åèæ° LOG_BACKUP_COUNT = 20 # æ¥å¿æä»¶ä¿çæ°é LOG_ENCODING = "utf8" # æ¥å¿æä»¶ç¼ç OTHERS_LOG_LEVAL = "ERROR" # ç¬¬ä¸æ¹åºçlogç级 # æç¹çæ§ influxdb é ç½® INFLUXDB_HOST = os.getenv("INFLUXDB_HOST", "localhost") INFLUXDB_PORT = int(os.getenv("INFLUXDB_PORT", 8086)) INFLUXDB_UDP_PORT = int(os.getenv("INFLUXDB_UDP_PORT", 8089)) INFLUXDB_USER = os.getenv("INFLUXDB_USER") INFLUXDB_PASSWORD = os.getenv("INFLUXDB_PASSWORD") INFLUXDB_DATABASE = os.getenv("INFLUXDB_DB") # çæ§æ°æ®åå¨ç表åï¼ç¬è«ç®¡çç³»ç»ä¸ä¼ä»¥task_idå½å INFLUXDB_MEASUREMENT = "task_" + os.getenv("TASK_ID") if os.getenv("TASK_ID") else None # æç¹çæ§å ¶ä»åæ°ï¼è¥è¿éä¹é ç½®äºinfluxdbçåæ°, åä¼è¦çå¤é¢çé ç½® METRICS_OTHER_ARGS = dict(retention_policy_duration="180d", emit_interval=60) ############# å¯¼å ¥ç¨æ·èªå®ä¹çsetting ############# try: from setting import * # å ¼å®¹èçæ¬çé ç½® KEEP_ALIVE = not AUTO_STOP_WHEN_SPIDER_DONE except: pass