package at.molindo.webtools.crawler;

import at.molindo.utils.io.StreamUtils;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.net.MalformedURLException;
import org.apache.http.Header;
import org.apache.http.HttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.protocol.HTTP;
import org.w3c.tidy.Dict;
import org.w3c.tidy.Tidy;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

/* loaded from: input_file:at/molindo/webtools/crawler/CrawlerTask.class */
public class CrawlerTask implements Runnable {
    private final Crawler _crawler;
    private final String _urlString;
    private final CrawlerReferrer _referrer;
    private boolean _tidy;

    public CrawlerTask(Crawler crawler, String str, CrawlerReferrer crawlerReferrer, boolean z) {
        this._tidy = true;
        this._crawler = crawler;
        this._urlString = str;
        this._referrer = crawlerReferrer;
        this._tidy = z;
    }

    public String getUrlString() {
        return this._urlString;
    }

    public CrawlerReferrer getReferrer() {
        return this._referrer;
    }

    @Override // java.lang.Runnable
    public void run() {
        if (!(Thread.currentThread() instanceof CrawlerThread)) {
            throw new Error("not a cralwer thread");
        }
        CrawlerResult crawlerResult = new CrawlerResult();
        crawlerResult.setUrl(this._urlString);
        if (this._referrer != null) {
            crawlerResult.getReferrers().add(this._referrer);
        }
        HttpGet httpGet = new HttpGet(this._urlString);
        try {
            try {
                try {
                    long currentTimeMillis = System.currentTimeMillis();
                    HttpResponse execute = ((CrawlerThread) Thread.currentThread()).getClient().execute(httpGet);
                    crawlerResult.setStatus(execute.getStatusLine().getStatusCode());
                    crawlerResult.setTime((int) (System.currentTimeMillis() - currentTimeMillis));
                    Header[] headers = execute.getHeaders(HTTP.CONTENT_TYPE);
                    crawlerResult.setContentType((headers == null || headers.length == 0) ? null : headers[0].getValue());
                    Object consumeContent = consumeContent(execute.getEntity().getContent(), crawlerResult.getContentType(), execute.getEntity().getContentLength(), execute.getEntity().getContentEncoding() == null ? null : execute.getEntity().getContentEncoding().getValue());
                    if (crawlerResult.getStatus() / 100 == 3) {
                        Header[] headers2 = execute.getHeaders("location");
                        if (headers2 == null || headers2.length <= 0) {
                            System.err.println("redirect without location from " + this._urlString);
                        } else {
                            String value = headers2[0].getValue();
                            if (value.startsWith("/")) {
                                value = this._crawler._host + value.substring(1);
                            }
                            this._crawler.queue(value, new CrawlerReferrer(this._urlString, execute.getStatusLine().getReasonPhrase() + ": " + this._referrer));
                        }
                    } else if (crawlerResult.getStatus() == 200 && (consumeContent instanceof String)) {
                        crawlerResult.setText((String) consumeContent);
                        if (crawlerResult.getContentType().startsWith("text/html")) {
                            parseResult(crawlerResult.getText());
                        }
                    }
                    this._crawler.report(crawlerResult);
                } catch (IOException e) {
                    crawlerResult.setErrorMessage(e.getMessage());
                    e.printStackTrace();
                    this._crawler.report(crawlerResult);
                } catch (Throwable th) {
                    th.printStackTrace();
                    this._crawler.report(crawlerResult);
                }
            } catch (MalformedURLException e2) {
                crawlerResult.setErrorMessage(e2.getMessage());
                this._crawler.report(crawlerResult);
            } catch (SAXException e3) {
                crawlerResult.setErrorMessage(e3.getMessage());
                this._crawler.report(crawlerResult);
            }
        } catch (Throwable th2) {
            this._crawler.report(crawlerResult);
            throw th2;
        }
    }

    private Object consumeContent(InputStream inputStream, String str, long j, String str2) throws IOException {
        if (str == null) {
            str = "";
        }
        try {
            if (!str.startsWith("text/")) {
                ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream((j <= 0 || j > 2147483647L) ? Dict.CM_PARAM : (int) j);
                StreamUtils.copy(inputStream, byteArrayOutputStream, Dict.CM_PARAM);
                byte[] byteArray = byteArrayOutputStream.toByteArray();
                byteArrayOutputStream.flush();
                byteArrayOutputStream.close();
                try {
                    inputStream.close();
                } catch (IOException e) {
                }
                return byteArray;
            }
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream, str2 == null ? "utf-8" : str2));
            StringBuilder sb = new StringBuilder();
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    break;
                }
                sb.append(readLine).append("\n");
            }
            if (sb.length() > 0) {
                sb.setLength(sb.length() - 1);
            }
            return sb.toString();
        } finally {
            try {
                inputStream.close();
            } catch (IOException e2) {
            }
        }
    }

    protected void parseResult(String str) throws SAXException, IOException {
        InputSource inputSource;
        if (this._tidy) {
            Tidy tidy = new Tidy();
            tidy.setXHTML(true);
            tidy.setErrfile("/dev/null");
            ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(str.getBytes());
            ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
            tidy.parse(byteArrayInputStream, byteArrayOutputStream);
            inputSource = new InputSource(new ByteArrayInputStream(byteArrayOutputStream.toByteArray()));
        } else {
            inputSource = new InputSource(new StringReader(str));
        }
        ((CrawlerThread) Thread.currentThread()).getParser().parse(inputSource, new DefaultHandler() { // from class: at.molindo.webtools.crawler.CrawlerTask.1
            @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
            public void startElement(String str2, String str3, String str4, Attributes attributes) throws SAXException {
                if (!"a".equals(str4)) {
                    return;
                }
                String value = attributes.getValue("href");
                if (value == null) {
                    return;
                }
                int lastIndexOf = value.lastIndexOf("#");
                if (lastIndexOf > 0) {
                    value = value.substring(0, lastIndexOf);
                } else if (lastIndexOf == 0) {
                    return;
                }
                if (value == null) {
                    return;
                }
                CrawlerReferrer crawlerReferrer = new CrawlerReferrer(CrawlerTask.this._urlString, value);
                if (value.startsWith("http://")) {
                    if (value.startsWith(CrawlerTask.this._crawler._host)) {
                        CrawlerTask.this._crawler.queue(value, crawlerReferrer);
                        return;
                    }
                    return;
                }
                if (value.startsWith("/")) {
                    CrawlerTask.this._crawler.queue(CrawlerTask.this._crawler._host + value.substring(1), crawlerReferrer);
                    return;
                }
                if (value.startsWith("javascript:") || value.startsWith("ftp:") || value.startsWith("mailto:")) {
                    return;
                }
                String substring = CrawlerTask.this._urlString.substring(0, CrawlerTask.this._urlString.lastIndexOf("/"));
                boolean z = false;
                while (true) {
                    boolean startsWith = value.startsWith("../");
                    if (!startsWith) {
                        boolean startsWith2 = value.startsWith("./");
                        z = startsWith2;
                        if (!startsWith2) {
                            CrawlerTask.this._crawler.queue(substring + "/" + value, crawlerReferrer);
                            return;
                        }
                    }
                    if (startsWith) {
                        value = value.substring(3);
                        substring = substring.substring(0, substring.lastIndexOf("/"));
                    } else if (z) {
                        value = value.substring(2);
                    }
                }
            }

            @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.EntityResolver
            public InputSource resolveEntity(String str2, String str3) throws IOException, SAXException {
                if ("http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd".equals(str3)) {
                    str3 = getClass().getClassLoader().getResource("xhtml1-transitional.dtd").toString();
                }
                return CrawlerTask.this._crawler.getDtdMemoryCache().resolveEntity(str2, str3);
            }
        });
    }
}
