/*
 * Decompiled with CFR 0.152.
 */
package de.l3s.boilerpipe.sax;

import de.l3s.boilerpipe.sax.HTMLDocument;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.net.URLConnection;
import java.nio.charset.Charset;
import java.nio.charset.UnsupportedCharsetException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;

public class HTMLFetcher {
    private static final Pattern PAT_CHARSET = Pattern.compile("charset=([^; ]+)$");

    private HTMLFetcher() {
    }

    public static HTMLDocument fetch(URL url) throws IOException {
        int r;
        Matcher m;
        URLConnection conn = url.openConnection();
        String ct = conn.getContentType();
        Charset cs = Charset.forName("Cp1252");
        if (ct != null && (m = PAT_CHARSET.matcher(ct)).find()) {
            String charset = m.group(1);
            try {
                cs = Charset.forName(charset);
            }
            catch (UnsupportedCharsetException e) {
                // empty catch block
            }
        }
        InputStream in = conn.getInputStream();
        String encoding = conn.getContentEncoding();
        if (encoding != null) {
            if ("gzip".equalsIgnoreCase(encoding)) {
                in = new GZIPInputStream(in);
            } else {
                System.err.println("WARN: unsupported Content-Encoding: " + encoding);
            }
        }
        ByteArrayOutputStream bos = new ByteArrayOutputStream();
        byte[] buf = new byte[4096];
        while ((r = in.read(buf)) != -1) {
            bos.write(buf, 0, r);
        }
        in.close();
        byte[] data = bos.toByteArray();
        return new HTMLDocument(data, cs);
    }
}

