# -*- coding: utf-8 -*-
"""
Created on 2018-09-06 14:21
---------
@summary: å·¥å
·
---------
@author: Boris
@email: [email protected]
"""
import asyncio
import calendar
import codecs
import configparser # 读é
ç½®æä»¶ç
import datetime
import functools
import hashlib
import html
import json
import os
import pickle
import random
import re
import socket
import ssl
import string
import sys
import time
import traceback
import urllib
import urllib.parse
import uuid
import weakref
from functools import partial, wraps
from hashlib import md5
from pprint import pformat
from pprint import pprint
from urllib import request
from urllib.parse import urljoin
import execjs # pip install PyExecJS
import redis
import requests
import six
from requests.cookies import RequestsCookieJar
from w3lib.url import canonicalize_url as _canonicalize_url
import feapder.setting as setting
from feapder.db.redisdb import RedisDB
from feapder.utils.email_sender import EmailSender
from feapder.utils.log import log
os.environ["EXECJS_RUNTIME"] = "Node" # 设置使ç¨nodeæ§è¡js
# å
¨å±åæ¶sslè¯ä¹¦éªè¯
ssl._create_default_https_context = ssl._create_unverified_context
TIME_OUT = 30
TIMER_TIME = 5
redisdb = None
def get_redisdb():
global redisdb
if not redisdb:
redisdb = RedisDB()
return redisdb
# è£
饰å¨
class Singleton(object):
def __init__(self, cls):
self._cls = cls
self._instance = {}
def __call__(self, *args, **kwargs):
if self._cls not in self._instance:
self._instance[self._cls] = self._cls(*args, **kwargs)
return self._instance[self._cls]
def log_function_time(func):
try:
@functools.wraps(func) # å°å½æ°ç忥屿§ä»ç»æ°å½æ°
def calculate_time(*args, **kw):
began_time = time.time()
callfunc = func(*args, **kw)
end_time = time.time()
log.debug(func.__name__ + " run time = " + str(end_time - began_time))
return callfunc
return calculate_time
except:
log.debug("æ±åæ¶é´æ æ å ä¸ºå½æ°åæ°ä¸ç¬¦")
return func
def run_safe_model(module_name):
def inner_run_safe_model(func):
try:
@functools.wraps(func) # å°å½æ°ç忥屿§ä»ç»æ°å½æ°
def run_func(*args, **kw):
callfunc = None
try:
callfunc = func(*args, **kw)
except Exception as e:
log.error(module_name + ": " + func.__name__ + " - " + str(e))
traceback.print_exc()
return callfunc
return run_func
except Exception as e:
log.error(module_name + ": " + func.__name__ + " - " + str(e))
traceback.print_exc()
return func
return inner_run_safe_model
def memoizemethod_noargs(method):
"""Decorator to cache the result of a method (without arguments) using a
weak reference to its object
"""
cache = weakref.WeakKeyDictionary()
@functools.wraps(method)
def new_method(self, *args, **kwargs):
if self not in cache:
cache[self] = method(self, *args, **kwargs)
return cache[self]
return new_method
########################ãç½é¡µè§£æç¸å
³ã###############################
# @log_function_time
def get_html_by_requests(
url, headers=None, code="utf-8", data=None, proxies={}, with_response=False
):
html = ""
r = None
try:
if data:
r = requests.post(
url, headers=headers, timeout=TIME_OUT, data=data, proxies=proxies
)
else:
r = requests.get(url, headers=headers, timeout=TIME_OUT, proxies=proxies)
if code:
r.encoding = code
html = r.text
except Exception as e:
log.error(e)
finally:
r and r.close()
if with_response:
return html, r
else:
return html
def get_json_by_requests(
url,
params=None,
headers=None,
data=None,
proxies={},
with_response=False,
cookies=None,
):
json = {}
response = None
try:
# response = requests.get(url, params = params)
if data:
response = requests.post(
url,
headers=headers,
data=data,
params=params,
timeout=TIME_OUT,
proxies=proxies,
cookies=cookies,
)
else:
response = requests.get(
url,
headers=headers,
params=params,
timeout=TIME_OUT,
proxies=proxies,
cookies=cookies,
)
response.encoding = "utf-8"
json = response.json()
except Exception as e:
log.error(e)
finally:
response and response.close()
if with_response:
return json, response
else:
return json
def get_cookies(response):
cookies = requests.utils.dict_from_cookiejar(response.cookies)
return cookies
def get_cookies_from_str(cookie_str):
"""
>>> get_cookies_from_str("key=value; key2=value2; key3=; key4=; ")
{'key': 'value', 'key2': 'value2', 'key3': '', 'key4': ''}
Args:
cookie_str: key=value; key2=value2; key3=; key4=
Returns:
"""
cookies = {}
for cookie in cookie_str.split(";"):
cookie = cookie.strip()
if not cookie:
continue
key, value = cookie.split("=", 1)
key = key.strip()
value = value.strip()
cookies[key] = value
return cookies
def get_cookies_jar(cookies):
"""
@summary: éç¨äºseleniumçæçcookies转requestsçcookies
requests.get(xxx, cookies=jar)
åèï¼https://www.cnblogs.com/small-bud/p/9064674.html
---------
@param cookies: [{},{}]
---------
@result: cookie jar
"""
cookie_jar = RequestsCookieJar()
for cookie in cookies:
cookie_jar.set(cookie["name"], cookie["value"])
return cookie_jar
def get_cookies_from_selenium_cookie(cookies):
"""
@summary: éç¨äºseleniumçæçcookies转requestsçcookies
requests.get(xxx, cookies=jar)
åèï¼https://www.cnblogs.com/small-bud/p/9064674.html
---------
@param cookies: [{},{}]
---------
@result: cookie jar
"""
cookie_dict = {}
for cookie in cookies:
if cookie.get("name"):
cookie_dict[cookie["name"]] = cookie["value"]
return cookie_dict
def cookiesjar2str(cookies):
str_cookie = ""
for k, v in requests.utils.dict_from_cookiejar(cookies).items():
str_cookie += k
str_cookie += "="
str_cookie += v
str_cookie += "; "
return str_cookie
def cookies2str(cookies):
str_cookie = ""
for k, v in cookies.items():
str_cookie += k
str_cookie += "="
str_cookie += v
str_cookie += "; "
return str_cookie
def get_urls(
html,
stop_urls=(
"javascript",
"+",
".css",
".js",
".rar",
".xls",
".exe",
".apk",
".doc",
".jpg",
".png",
".flv",
".mp4",
),
):
# ä¸å¹é
javascriptã +ã # è¿æ ·çurl
regex = r'