/*
 * Decompiled with CFR 0.152.
 */
package cn.wanghaomiao.seimi.core;

import cn.wanghaomiao.seimi.annotation.Interceptor;
import cn.wanghaomiao.seimi.core.SeimiInterceptor;
import cn.wanghaomiao.seimi.core.SeimiQueue;
import cn.wanghaomiao.seimi.def.BaseSeimiCrawler;
import cn.wanghaomiao.seimi.http.HttpClientFactory;
import cn.wanghaomiao.seimi.http.HttpMethod;
import cn.wanghaomiao.seimi.struct.BodyType;
import cn.wanghaomiao.seimi.struct.CrawlerModel;
import cn.wanghaomiao.seimi.struct.Request;
import cn.wanghaomiao.seimi.struct.Response;
import cn.wanghaomiao.seimi.utils.StrFormatUtil;
import cn.wanghaomiao.seimi.utils.StructValidator;
import cn.wanghaomiao.xpath.exception.NoSuchAxisException;
import cn.wanghaomiao.xpath.exception.NoSuchFunctionException;
import cn.wanghaomiao.xpath.exception.XpathSyntaxErrorException;
import cn.wanghaomiao.xpath.model.JXDocument;
import com.alibaba.fastjson.JSON;
import java.lang.reflect.Method;
import java.nio.charset.Charset;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.client.methods.RequestBuilder;
import org.apache.http.entity.ContentType;
import org.apache.http.protocol.BasicHttpContext;
import org.apache.http.protocol.HttpContext;
import org.apache.http.util.EntityUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class SeimiProcessor
implements Runnable {
    private SeimiQueue queue;
    private List<SeimiInterceptor> interceptors;
    private CrawlerModel crawlerModel;
    private BaseSeimiCrawler crawler;
    private Logger logger = LoggerFactory.getLogger(this.getClass());
    private Pattern metaRefresh = Pattern.compile("<(?:META|meta|Meta)\\s+(?:HTTP-EQUIV|http-equiv)\\s*=\\s*\"refresh\".*URL=(.*)\">");

    public SeimiProcessor(List<SeimiInterceptor> interceptors, CrawlerModel crawlerModel) {
        this.queue = crawlerModel.getQueueInstance();
        this.interceptors = interceptors;
        this.crawlerModel = crawlerModel;
        this.crawler = crawlerModel.getInstance();
    }

    @Override
    public void run() {
        while (true) {
            Request request = this.queue.bPop(this.crawlerModel.getCrawlerName());
            try {
                Interceptor interAnno;
                if (request == null) continue;
                if (this.crawlerModel == null) {
                    this.logger.error("No such crawler name:'{}'", (Object)request.getCrawlerName());
                    continue;
                }
                if (request.isStop()) {
                    this.logger.info("SeimiProcessor[{}] will stop!", (Object)Thread.currentThread().getName());
                    break;
                }
                if (!StructValidator.validateAnno(request)) {
                    this.logger.warn("Request={} is illegal", (Object)JSON.toJSONString((Object)request));
                    continue;
                }
                if (!StructValidator.validateAllowRules(this.crawler.allowRules(), request.getUrl())) {
                    this.logger.warn("Request={} will be dropped by allowRules=[{}]", (Object)JSON.toJSONString((Object)request), (Object)StringUtils.join((Object[])this.crawler.allowRules(), (String)","));
                    continue;
                }
                if (StructValidator.validateDenyRules(this.crawler.denyRules(), request.getUrl())) {
                    this.logger.warn("Request={} will be dropped by denyRules=[{}]", (Object)JSON.toJSONString((Object)request), (Object)StringUtils.join((Object[])this.crawler.denyRules(), (String)","));
                    continue;
                }
                if (this.crawlerModel.isUseUnrepeated() && (request.getCurrentReqCount() == 0 || request.getCurrentReqCount() >= request.getMaxReqCount()) && this.queue.isProcessed(request)) {
                    this.logger.info("This request has bean processed,so current request={} will be dropped!", (Object)JSON.toJSONString((Object)request));
                    continue;
                }
                HttpClient hc = this.crawlerModel.isUseCookie() ? HttpClientFactory.getHttpClient(10000, this.crawler.getCookieStore()) : HttpClientFactory.getHttpClient();
                RequestConfig config = RequestConfig.custom().setProxy(this.crawlerModel.getProxy()).build();
                RequestBuilder requestBuilder = HttpMethod.POST.equals((Object)request.getHttpMethod()) ? RequestBuilder.post().setUri(request.getUrl()) : RequestBuilder.get().setUri(request.getUrl());
                if (request.getParams() != null) {
                    for (Map.Entry<String, String> entry : request.getParams().entrySet()) {
                        requestBuilder.addParameter(entry.getKey(), entry.getValue());
                    }
                }
                requestBuilder.setConfig(config).setHeader("User-Agent", this.crawler.getUserAgent());
                BasicHttpContext httpContext = new BasicHttpContext();
                HttpResponse httpResponse = hc.execute(requestBuilder.build(), (HttpContext)httpContext);
                Response seimiResponse = this.renderResponse(httpResponse, request, (HttpContext)httpContext);
                Matcher mm = this.metaRefresh.matcher(seimiResponse.getContent());
                while (mm.find()) {
                    String nextUrl = mm.group(1).replaceAll("'", "");
                    if (!nextUrl.startsWith("http")) {
                        String prefix = this.getRealUrl((HttpContext)httpContext);
                        nextUrl = prefix + nextUrl;
                    }
                    this.logger.info("Seimi refresh url to={} from={}", (Object)nextUrl, (Object)requestBuilder.getUri());
                    requestBuilder.setUri(nextUrl);
                    httpResponse = hc.execute(requestBuilder.build(), (HttpContext)httpContext);
                    seimiResponse = this.renderResponse(httpResponse, request, (HttpContext)httpContext);
                    mm = this.metaRefresh.matcher(seimiResponse.getContent());
                }
                this.queue.addProcessed(request);
                Method requestCallback = this.crawlerModel.getMemberMethods().get(request.getCallBack());
                if (requestCallback == null) continue;
                for (SeimiInterceptor interceptor : this.interceptors) {
                    interAnno = interceptor.getClass().getAnnotation(Interceptor.class);
                    if (!interAnno.everyMethod() && !requestCallback.isAnnotationPresent(interceptor.getTargetAnnotationClass()) && !this.crawlerModel.getClazz().isAnnotationPresent(interceptor.getTargetAnnotationClass())) continue;
                    interceptor.before(requestCallback, seimiResponse);
                }
                if (this.crawlerModel.getDelay() > 0) {
                    TimeUnit.SECONDS.sleep(this.crawlerModel.getDelay());
                }
                requestCallback.invoke((Object)this.crawlerModel.getInstance(), seimiResponse);
                for (SeimiInterceptor interceptor : this.interceptors) {
                    interAnno = interceptor.getClass().getAnnotation(Interceptor.class);
                    if (!interAnno.everyMethod() && !requestCallback.isAnnotationPresent(interceptor.getTargetAnnotationClass()) && !this.crawlerModel.getClazz().isAnnotationPresent(interceptor.getTargetAnnotationClass())) continue;
                    interceptor.after(requestCallback, seimiResponse);
                }
                this.logger.debug("Crawler[{}] ,url={} ,responseStatus={}", new Object[]{this.crawlerModel.getCrawlerName(), request.getUrl(), httpResponse.getStatusLine().getStatusCode()});
            }
            catch (Exception e) {
                if (request.getCurrentReqCount() < request.getMaxReqCount()) {
                    request.incrReqCount();
                    this.queue.push(request);
                    this.logger.info("Request process error,req will go into queue again,url={},maxReqCount={],currentReqCount={}", new Object[]{request.getUrl(), request.getMaxReqCount(), request.getCurrentReqCount()});
                } else if (request.getCurrentReqCount() >= request.getMaxReqCount() && request.getMaxReqCount() > 0) {
                    this.crawler.handleErrorRequest(request);
                }
                this.logger.error(e.getMessage(), (Throwable)e);
            }
        }
    }

    private Response renderResponse(HttpResponse httpResponse, Request request, HttpContext httpContext) {
        Response seimiResponse;
        block9: {
            seimiResponse = new Response();
            HttpEntity entity = httpResponse.getEntity();
            seimiResponse.setHttpResponse(httpResponse);
            seimiResponse.setReponseEntity(entity);
            seimiResponse.setRealUrl(this.getRealUrl(httpContext));
            seimiResponse.setUrl(request.getUrl());
            seimiResponse.setRequest(request);
            if (entity != null) {
                Header referer = httpResponse.getFirstHeader("Referer");
                if (referer != null) {
                    seimiResponse.setReferer(referer.getValue());
                }
                if (!entity.getContentType().getValue().contains("image")) {
                    seimiResponse.setBodyType(BodyType.TEXT);
                    try {
                        seimiResponse.setData(EntityUtils.toByteArray((HttpEntity)entity));
                        ContentType contentType = ContentType.get((HttpEntity)entity);
                        Charset charset = contentType.getCharset();
                        if (charset == null) {
                            seimiResponse.setContent(new String(seimiResponse.getData(), "ISO-8859-1"));
                            String docCharset = this.renderRealCharset(seimiResponse);
                            seimiResponse.setContent(new String(seimiResponse.getContent().getBytes("ISO-8859-1"), docCharset));
                            break block9;
                        }
                        seimiResponse.setContent(new String(seimiResponse.getData(), charset));
                    }
                    catch (Exception e) {
                        e.printStackTrace();
                        this.logger.error("no content data");
                    }
                } else {
                    seimiResponse.setBodyType(BodyType.BINARY);
                    try {
                        seimiResponse.setData(EntityUtils.toByteArray((HttpEntity)entity));
                        seimiResponse.setContent(StringUtils.substringAfterLast((String)request.getUrl(), (String)"/"));
                    }
                    catch (Exception e) {
                        this.logger.error("no data can be read from httpResponse");
                    }
                }
            }
        }
        return seimiResponse;
    }

    private String renderRealCharset(Response response) throws NoSuchFunctionException, XpathSyntaxErrorException, NoSuchAxisException {
        JXDocument doc = response.document();
        String charset = StrFormatUtil.getFirstEmStr(doc.sel("//meta[@charset]/@charset"), "").trim();
        if (StringUtils.isBlank((CharSequence)charset)) {
            charset = StrFormatUtil.getFirstEmStr(doc.sel("//meta[@http-equiv='charset']/@content"), "").trim();
        }
        if (StringUtils.isBlank((CharSequence)charset)) {
            String ct = StringUtils.join((Iterable)doc.sel("//meta[@http-equiv='Content-Type']/@content|//meta[@http-equiv='content-type']/@content"), (String)";").trim();
            charset = StrFormatUtil.parseCharset(ct.toLowerCase());
        }
        return StringUtils.isNotBlank((CharSequence)charset) ? charset : "UTF-8";
    }

    private String getRealUrl(HttpContext httpContext) {
        Object target = httpContext.getAttribute("http.target_host");
        Object reqUri = httpContext.getAttribute("http.request");
        if (target == null || reqUri == null) {
            return null;
        }
        HttpHost t = (HttpHost)target;
        HttpUriRequest r = (HttpUriRequest)reqUri;
        return r.getURI().isAbsolute() ? r.getURI().toString() : t.toString() + r.getURI().toString();
    }
}

