/*
 * Decompiled with CFR 0.152.
 */
package keel.Algorithms.Decision_Trees.M5;

import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.io.Serializable;
import java.io.StreamTokenizer;
import java.util.Enumeration;
import java.util.Random;
import keel.Algorithms.Decision_Trees.M5.M5AttrStats;
import keel.Algorithms.Decision_Trees.M5.M5Attribute;
import keel.Algorithms.Decision_Trees.M5.M5Instance;
import keel.Algorithms.Decision_Trees.M5.M5SparseInstance;
import keel.Algorithms.Decision_Trees.M5.M5StaticUtils;
import keel.Algorithms.Decision_Trees.M5.M5Vector;
import keel.Algorithms.Decision_Trees.M5.SimpleStatistics;

public class M5Instances
implements Serializable {
    protected String m_RelationName;
    protected M5Vector m_Attributes;
    protected M5Vector m_Instances;
    protected int m_ClassIndex;
    protected double[] m_ValueBuffer;
    protected int[] m_IndicesBuffer;
    protected String m_NameClassIndex;

    public M5Instances(Reader reader) throws IOException {
        StreamTokenizer tokenizer = new StreamTokenizer(reader);
        this.initTokenizer(tokenizer);
        this.readHeader(tokenizer);
        this.m_ClassIndex = -1;
        this.m_Instances = new M5Vector(1000);
        while (this.getInstance(tokenizer, true)) {
        }
        this.compactify();
    }

    public M5Instances(Reader reader, int capacity) throws IOException {
        if (capacity < 0) {
            throw new IllegalArgumentException("Capacity has to be positive!");
        }
        StreamTokenizer tokenizer = new StreamTokenizer(reader);
        this.initTokenizer(tokenizer);
        this.readHeader(tokenizer);
        this.m_ClassIndex = -1;
        this.m_Instances = new M5Vector(capacity);
    }

    public M5Instances(M5Instances dataset) {
        this(dataset, dataset.numInstances());
        dataset.copyInstances(0, this, dataset.numInstances());
    }

    public M5Instances(M5Instances dataset, int capacity) {
        if (capacity < 0) {
            capacity = 0;
        }
        this.m_ClassIndex = dataset.m_ClassIndex;
        this.m_RelationName = dataset.m_RelationName;
        this.m_Attributes = dataset.m_Attributes;
        this.m_Instances = new M5Vector(capacity);
    }

    public M5Instances(M5Instances source, int first, int toCopy) {
        this(source, toCopy);
        if (first < 0 || first + toCopy > source.numInstances()) {
            throw new IllegalArgumentException("Parameters first and/or toCopy out of range");
        }
        source.copyInstances(first, this, toCopy);
    }

    public M5Instances(String name, M5Vector attInfo, int capacity) {
        this.m_RelationName = name;
        this.m_ClassIndex = -1;
        this.m_Attributes = attInfo;
        for (int i = 0; i < this.numAttributes(); ++i) {
            this.attribute(i).setIndex(i);
        }
        this.m_Instances = new M5Vector(capacity);
    }

    public M5Instances stringFreeStructure() {
        M5Vector atts = (M5Vector)this.m_Attributes.copy();
        for (int i = 0; i < atts.size(); ++i) {
            M5Attribute att = (M5Attribute)atts.elementAt(i);
            if (att.type() != 2) continue;
            atts.setElementAt(new M5Attribute(att.name(), null), i);
        }
        M5Instances result = new M5Instances(this.relationName(), atts, 0);
        result.m_ClassIndex = this.m_ClassIndex;
        return result;
    }

    public final void add(M5Instance instance) {
        M5Instance newInstance = (M5Instance)instance.copy();
        newInstance.setDataset(this);
        this.m_Instances.addElement(newInstance);
    }

    public final M5Attribute attribute(int index) {
        return (M5Attribute)this.m_Attributes.elementAt(index);
    }

    public final M5Attribute attribute(String name) {
        for (int i = 0; i < this.numAttributes(); ++i) {
            if (!this.attribute(i).name().equals(name)) continue;
            return this.attribute(i);
        }
        return null;
    }

    public boolean checkForStringAttributes() {
        int i = 0;
        while (i < this.m_Attributes.size()) {
            if (!this.attribute(i++).isString()) continue;
            return true;
        }
        return false;
    }

    public final boolean checkInstance(M5Instance instance) {
        if (instance.numAttributes() != this.numAttributes()) {
            return false;
        }
        for (int i = 0; i < this.numAttributes(); ++i) {
            if (instance.isMissing(i) || !this.attribute(i).isNominal() && !this.attribute(i).isString()) continue;
            if (!M5StaticUtils.eq(instance.value(i), (int)instance.value(i))) {
                return false;
            }
            if (!M5StaticUtils.sm(instance.value(i), 0.0) && !M5StaticUtils.gr(instance.value(i), this.attribute(i).numValues())) continue;
            return false;
        }
        return true;
    }

    public final M5Attribute classAttribute() throws Exception {
        if (this.m_ClassIndex < 0) {
            throw new Exception("Class index is negative (not set)!");
        }
        return this.attribute(this.m_ClassIndex);
    }

    public final int classIndex() {
        return this.m_ClassIndex;
    }

    public final String NameClassIndex() {
        return this.m_NameClassIndex;
    }

    public final void compactify() {
        this.m_Instances.trimToSize();
    }

    public final void delete() {
        this.m_Instances = new M5Vector();
    }

    public final void delete(int index) {
        this.m_Instances.removeElementAt(index);
    }

    public void deleteAttributeAt(int position) {
        int i;
        if (position < 0 || position >= this.m_Attributes.size()) {
            throw new IllegalArgumentException("Index out of range");
        }
        if (position == this.m_ClassIndex) {
            throw new IllegalArgumentException("Can't delete class attribute");
        }
        this.freshAttributeInfo();
        if (this.m_ClassIndex > position) {
            --this.m_ClassIndex;
        }
        this.m_Attributes.removeElementAt(position);
        for (i = position; i < this.m_Attributes.size(); ++i) {
            M5Attribute current = (M5Attribute)this.m_Attributes.elementAt(i);
            current.setIndex(current.index() - 1);
        }
        for (i = 0; i < this.numInstances(); ++i) {
            this.instance(i).forceDeleteAttributeAt(position);
        }
    }

    public void deleteStringAttributes() {
        int i = 0;
        while (i < this.m_Attributes.size()) {
            if (this.attribute(i).isString()) {
                this.deleteAttributeAt(i);
                continue;
            }
            ++i;
        }
    }

    public final void deleteWithMissing(int attIndex) {
        M5Vector newInstances = new M5Vector(this.numInstances());
        for (int i = 0; i < this.numInstances(); ++i) {
            if (this.instance(i).isMissing(attIndex)) continue;
            newInstances.addElement(this.instance(i));
        }
        this.m_Instances = newInstances;
    }

    public final void deleteWithMissing(M5Attribute att) {
        this.deleteWithMissing(att.index());
    }

    public final void deleteWithMissingClass() throws Exception {
        if (this.m_ClassIndex < 0) {
            throw new Exception("Class index is negative (not set)!");
        }
        this.deleteWithMissing(this.m_ClassIndex);
    }

    public Enumeration enumerateAttributes() {
        return this.m_Attributes.elements(this.m_ClassIndex);
    }

    public final Enumeration enumerateInstances() {
        return this.m_Instances.elements();
    }

    public final boolean equalHeaders(M5Instances dataset) {
        if (this.m_ClassIndex != dataset.m_ClassIndex) {
            return false;
        }
        if (this.m_Attributes.size() != dataset.m_Attributes.size()) {
            return false;
        }
        for (int i = 0; i < this.m_Attributes.size(); ++i) {
            if (this.attribute(i).equals(dataset.attribute(i))) continue;
            return false;
        }
        return true;
    }

    public final M5Instance firstInstance() {
        return (M5Instance)this.m_Instances.firstElement();
    }

    public void insertAttributeAt(M5Attribute att, int position) {
        int i;
        if (position < 0 || position > this.m_Attributes.size()) {
            throw new IllegalArgumentException("Index out of range");
        }
        att = (M5Attribute)att.copy();
        this.freshAttributeInfo();
        att.setIndex(position);
        this.m_Attributes.insertElementAt(att, position);
        for (i = position + 1; i < this.m_Attributes.size(); ++i) {
            M5Attribute current = (M5Attribute)this.m_Attributes.elementAt(i);
            current.setIndex(current.index() + 1);
        }
        for (i = 0; i < this.numInstances(); ++i) {
            this.instance(i).forceInsertAttributeAt(position);
        }
        if (this.m_ClassIndex >= position) {
            ++this.m_ClassIndex;
        }
    }

    public final M5Instance instance(int index) {
        return (M5Instance)this.m_Instances.elementAt(index);
    }

    public final M5Instance lastInstance() {
        return (M5Instance)this.m_Instances.lastElement();
    }

    public final double meanOrMode(int attIndex) {
        if (this.attribute(attIndex).isNumeric()) {
            double found = 0.0;
            double result = 0.0;
            for (int j = 0; j < this.numInstances(); ++j) {
                if (this.instance(j).isMissing(attIndex)) continue;
                found += this.instance(j).weight();
                result += this.instance(j).weight() * this.instance(j).value(attIndex);
            }
            if (M5StaticUtils.eq(found, 0.0)) {
                return 0.0;
            }
            return result / found;
        }
        if (this.attribute(attIndex).isNominal()) {
            int[] counts = new int[this.attribute(attIndex).numValues()];
            for (int j = 0; j < this.numInstances(); ++j) {
                if (this.instance(j).isMissing(attIndex)) continue;
                int n = (int)this.instance(j).value(attIndex);
                counts[n] = (int)((double)counts[n] + this.instance(j).weight());
            }
            return M5StaticUtils.maxIndex(counts);
        }
        return 0.0;
    }

    public final double meanOrMode(M5Attribute att) {
        return this.meanOrMode(att.index());
    }

    public final int numAttributes() {
        return this.m_Attributes.size();
    }

    public final int numClasses() throws Exception {
        if (this.m_ClassIndex < 0) {
            throw new Exception("Class index is negative (not set)!");
        }
        if (!this.classAttribute().isNominal()) {
            return 1;
        }
        return this.classAttribute().numValues();
    }

    public final int numDistinctValues(int attIndex) {
        if (this.attribute(attIndex).isNumeric()) {
            M5Instance current;
            double[] attVals = this.attributeToDoubleArray(attIndex);
            int[] sorted = M5StaticUtils.sort(attVals);
            double prev = 0.0;
            int counter = 0;
            for (int i = 0; i < sorted.length && !(current = this.instance(sorted[i])).isMissing(attIndex); ++i) {
                if (i != 0 && !M5StaticUtils.gr(current.value(attIndex), prev)) continue;
                prev = current.value(attIndex);
                ++counter;
            }
            return counter;
        }
        return this.attribute(attIndex).numValues();
    }

    public final int numDistinctValues(M5Attribute att) {
        return this.numDistinctValues(att.index());
    }

    public final int numInstances() {
        return this.m_Instances.size();
    }

    public final void randomize(Random random) {
        for (int j = this.numInstances() - 1; j > 0; --j) {
            this.swap(j, (int)(random.nextDouble() * (double)j));
        }
    }

    public final boolean readInstance(Reader reader) throws IOException {
        StreamTokenizer tokenizer = new StreamTokenizer(reader);
        this.initTokenizer(tokenizer);
        return this.getInstance(tokenizer, false);
    }

    public final String relationName() {
        return this.m_RelationName;
    }

    public final void renameAttribute(int att, String name) {
        M5Attribute newAtt = this.attribute(att).copy(name);
        M5Vector newVec = new M5Vector(this.numAttributes());
        for (int i = 0; i < this.numAttributes(); ++i) {
            if (i == att) {
                newVec.addElement(newAtt);
                continue;
            }
            newVec.addElement(this.attribute(i));
        }
        this.m_Attributes = newVec;
    }

    public final void renameAttribute(M5Attribute att, String name) {
        this.renameAttribute(att.index(), name);
    }

    public final void renameAttributeValue(int att, int val, String name) {
        M5Attribute newAtt = (M5Attribute)this.attribute(att).copy();
        M5Vector newVec = new M5Vector(this.numAttributes());
        newAtt.setValue(val, name);
        for (int i = 0; i < this.numAttributes(); ++i) {
            if (i == att) {
                newVec.addElement(newAtt);
                continue;
            }
            newVec.addElement(this.attribute(i));
        }
        this.m_Attributes = newVec;
    }

    public final void renameAttributeValue(M5Attribute att, String val, String name) {
        int v = att.indexOfValue(val);
        if (v == -1) {
            throw new IllegalArgumentException(val + " not found");
        }
        this.renameAttributeValue(att.index(), v, name);
    }

    public final M5Instances resample(Random random) {
        M5Instances newData = new M5Instances(this, this.numInstances());
        while (newData.numInstances() < this.numInstances()) {
            int i = (int)(random.nextDouble() * (double)this.numInstances());
            newData.add(this.instance(i));
        }
        return newData;
    }

    public final M5Instances resampleWithWeights(Random random) {
        double[] weights = new double[this.numInstances()];
        boolean foundOne = false;
        for (int i = 0; i < weights.length; ++i) {
            weights[i] = this.instance(i).weight();
            if (M5StaticUtils.eq(weights[i], weights[0])) continue;
            foundOne = true;
        }
        if (foundOne) {
            return this.resampleWithWeights(random, weights);
        }
        return new M5Instances(this);
    }

    public final M5Instances resampleWithWeights(Random random, double[] weights) {
        if (weights.length != this.numInstances()) {
            throw new IllegalArgumentException("weights.length != numInstances.");
        }
        M5Instances newData = new M5Instances(this, this.numInstances());
        double[] probabilities = new double[this.numInstances()];
        double sumProbs = 0.0;
        double sumOfWeights = M5StaticUtils.sum(weights);
        for (int i = 0; i < this.numInstances(); ++i) {
            probabilities[i] = sumProbs += random.nextDouble();
        }
        M5StaticUtils.normalize(probabilities, sumProbs / sumOfWeights);
        probabilities[this.numInstances() - 1] = sumOfWeights;
        int k = 0;
        sumProbs = 0.0;
        for (int l = 0; k < this.numInstances() && l < this.numInstances(); ++l) {
            if (weights[l] < 0.0) {
                throw new IllegalArgumentException("Weights have to be positive.");
            }
            sumProbs += weights[l];
            while (k < this.numInstances() && probabilities[k] <= sumProbs) {
                newData.add(this.instance(l));
                newData.instance(k).setWeight(1.0);
                ++k;
            }
        }
        return newData;
    }

    public final void setClass(M5Attribute att) {
        this.m_ClassIndex = att.index();
    }

    public final void setClassIndex(int classIndex) {
        if (classIndex >= this.numAttributes()) {
            throw new IllegalArgumentException("Invalid class index: " + classIndex);
        }
        this.m_ClassIndex = classIndex;
    }

    public final void setRelationName(String newName) {
        this.m_RelationName = newName;
    }

    public final void sort(int attIndex) {
        int j = this.numInstances() - 1;
        int i = 0;
        while (i <= j) {
            if (this.instance(j).isMissing(attIndex)) {
                --j;
                continue;
            }
            if (this.instance(i).isMissing(attIndex)) {
                this.swap(i, j);
                --j;
            }
            ++i;
        }
        this.quickSort(attIndex, 0, j);
    }

    public final void sort(M5Attribute att) {
        this.sort(att.index());
    }

    public final void stratify(int numFolds) throws Exception {
        if (numFolds <= 0) {
            throw new IllegalArgumentException("Number of folds must be greater than 1");
        }
        if (this.m_ClassIndex < 0) {
            throw new Exception("Class index is negative (not set)!");
        }
        if (this.classAttribute().isNominal()) {
            for (int index = 1; index < this.numInstances(); ++index) {
                M5Instance instance1 = this.instance(index - 1);
                for (int j = index; j < this.numInstances(); ++j) {
                    M5Instance instance2 = this.instance(j);
                    if (instance1.classValue() != instance2.classValue() && (!instance1.classIsMissing() || !instance2.classIsMissing())) continue;
                    this.swap(index, j);
                    ++index;
                }
            }
            this.stratStep(numFolds);
        }
    }

    public final double sumOfWeights() {
        double sum = 0.0;
        for (int i = 0; i < this.numInstances(); ++i) {
            sum += this.instance(i).weight();
        }
        return sum;
    }

    public M5Instances testCV(int numFolds, int numFold) {
        int offset;
        if (numFolds < 2) {
            throw new IllegalArgumentException("Number of folds must be at least 2!");
        }
        if (numFolds > this.numInstances()) {
            throw new IllegalArgumentException("Can't have more folds than instances!");
        }
        int numInstForFold = this.numInstances() / numFolds;
        if (numFold < this.numInstances() % numFolds) {
            ++numInstForFold;
            offset = numFold;
        } else {
            offset = this.numInstances() % numFolds;
        }
        M5Instances test = new M5Instances(this, numInstForFold);
        int first = numFold * (this.numInstances() / numFolds) + offset;
        this.copyInstances(first, test, numInstForFold);
        return test;
    }

    public final String toString() {
        int i;
        StringBuffer text = new StringBuffer();
        text.append("@relation " + M5StaticUtils.quote(this.m_RelationName) + "\n\n");
        for (i = 0; i < this.numAttributes(); ++i) {
            text.append(this.attribute(i) + "\n");
        }
        text.append("\n@data\n");
        for (i = 0; i < this.numInstances(); ++i) {
            text.append(this.instance(i));
            if (i >= this.numInstances() - 1) continue;
            text.append('\n');
        }
        return text.toString();
    }

    public M5Instances trainCV(int numFolds, int numFold) {
        int offset;
        if (numFolds < 2) {
            throw new IllegalArgumentException("Number of folds must be at least 2!");
        }
        if (numFolds > this.numInstances()) {
            throw new IllegalArgumentException("Can't have more folds than instances!");
        }
        int numInstForFold = this.numInstances() / numFolds;
        if (numFold < this.numInstances() % numFolds) {
            ++numInstForFold;
            offset = numFold;
        } else {
            offset = this.numInstances() % numFolds;
        }
        M5Instances train = new M5Instances(this, this.numInstances() - numInstForFold);
        int first = numFold * (this.numInstances() / numFolds) + offset;
        this.copyInstances(0, train, first);
        this.copyInstances(first + numInstForFold, train, this.numInstances() - first - numInstForFold);
        return train;
    }

    public final double variance(int attIndex) {
        double sum = 0.0;
        double sumSquared = 0.0;
        double sumOfWeights = 0.0;
        if (!this.attribute(attIndex).isNumeric()) {
            throw new IllegalArgumentException("Can't compute variance because attribute is not numeric!");
        }
        for (int i = 0; i < this.numInstances(); ++i) {
            if (this.instance(i).isMissing(attIndex)) continue;
            sum += this.instance(i).weight() * this.instance(i).value(attIndex);
            sumSquared += this.instance(i).weight() * this.instance(i).value(attIndex) * this.instance(i).value(attIndex);
            sumOfWeights += this.instance(i).weight();
        }
        if (M5StaticUtils.smOrEq(sumOfWeights, 1.0)) {
            return 0.0;
        }
        return (sumSquared - sum * sum / sumOfWeights) / (sumOfWeights - 1.0);
    }

    public final double variance(M5Attribute att) {
        return this.variance(att.index());
    }

    public M5AttrStats attributeStats(int index) {
        M5AttrStats result = new M5AttrStats();
        if (this.attribute(index).isNominal()) {
            result.nominalCounts = new int[this.attribute(index).numValues()];
        }
        if (this.attribute(index).isNumeric()) {
            result.numericStats = new SimpleStatistics();
        }
        result.totalCount = this.numInstances();
        double[] attVals = this.attributeToDoubleArray(index);
        int[] sorted = M5StaticUtils.sort(attVals);
        int currentCount = 0;
        double prev = M5Instance.missingValue();
        for (int j = 0; j < this.numInstances(); ++j) {
            M5Instance current = this.instance(sorted[j]);
            if (current.isMissing(index)) {
                result.missingCount = this.numInstances() - j;
                break;
            }
            if (M5StaticUtils.eq(current.value(index), prev)) {
                ++currentCount;
                continue;
            }
            result.addDistinct(prev, currentCount);
            currentCount = 1;
            prev = current.value(index);
        }
        result.addDistinct(prev, currentCount);
        --result.distinctCount;
        return result;
    }

    public double[] attributeToDoubleArray(int index) {
        double[] result = new double[this.numInstances()];
        for (int i = 0; i < result.length; ++i) {
            result[i] = this.instance(i).value(index);
        }
        return result;
    }

    public String toSummaryString() {
        StringBuffer result = new StringBuffer();
        result.append("Relation Name:  ").append(this.relationName()).append('\n');
        result.append("Num Instances:  ").append(this.numInstances()).append('\n');
        result.append("Num Attributes: ").append(this.numAttributes()).append('\n');
        result.append('\n');
        result.append(M5StaticUtils.padLeft("", 5)).append(M5StaticUtils.padRight("Name", 25));
        result.append(M5StaticUtils.padLeft("Type", 5)).append(M5StaticUtils.padLeft("Nom", 5));
        result.append(M5StaticUtils.padLeft("Int", 5)).append(M5StaticUtils.padLeft("Real", 5));
        result.append(M5StaticUtils.padLeft("Missing", 12));
        result.append(M5StaticUtils.padLeft("Unique", 12));
        result.append(M5StaticUtils.padLeft("Dist", 6)).append('\n');
        for (int i = 0; i < this.numAttributes(); ++i) {
            long percent;
            M5Attribute a = this.attribute(i);
            M5AttrStats as = this.attributeStats(i);
            result.append(M5StaticUtils.padLeft("" + (i + 1), 4)).append(' ');
            result.append(M5StaticUtils.padRight(a.name(), 25)).append(' ');
            switch (a.type()) {
                case 1: {
                    result.append(M5StaticUtils.padLeft("Nom", 4)).append(' ');
                    percent = Math.round(100.0 * (double)as.intCount / (double)as.totalCount);
                    result.append(M5StaticUtils.padLeft("" + percent, 3)).append("% ");
                    result.append(M5StaticUtils.padLeft("0", 3)).append("% ");
                    percent = Math.round(100.0 * (double)as.realCount / (double)as.totalCount);
                    result.append(M5StaticUtils.padLeft("" + percent, 3)).append("% ");
                    break;
                }
                case 0: {
                    result.append(M5StaticUtils.padLeft("Num", 4)).append(' ');
                    result.append(M5StaticUtils.padLeft("0", 3)).append("% ");
                    percent = Math.round(100.0 * (double)as.intCount / (double)as.totalCount);
                    result.append(M5StaticUtils.padLeft("" + percent, 3)).append("% ");
                    percent = Math.round(100.0 * (double)as.realCount / (double)as.totalCount);
                    result.append(M5StaticUtils.padLeft("" + percent, 3)).append("% ");
                    break;
                }
                case 2: {
                    result.append(M5StaticUtils.padLeft("Str", 4)).append(' ');
                    percent = Math.round(100.0 * (double)as.intCount / (double)as.totalCount);
                    result.append(M5StaticUtils.padLeft("" + percent, 3)).append("% ");
                    result.append(M5StaticUtils.padLeft("0", 3)).append("% ");
                    percent = Math.round(100.0 * (double)as.realCount / (double)as.totalCount);
                    result.append(M5StaticUtils.padLeft("" + percent, 3)).append("% ");
                    break;
                }
                default: {
                    result.append(M5StaticUtils.padLeft("???", 4)).append(' ');
                    result.append(M5StaticUtils.padLeft("0", 3)).append("% ");
                    percent = Math.round(100.0 * (double)as.intCount / (double)as.totalCount);
                    result.append(M5StaticUtils.padLeft("" + percent, 3)).append("% ");
                    percent = Math.round(100.0 * (double)as.realCount / (double)as.totalCount);
                    result.append(M5StaticUtils.padLeft("" + percent, 3)).append("% ");
                }
            }
            result.append(M5StaticUtils.padLeft("" + as.missingCount, 5)).append(" /");
            percent = Math.round(100.0 * (double)as.missingCount / (double)as.totalCount);
            result.append(M5StaticUtils.padLeft("" + percent, 3)).append("% ");
            result.append(M5StaticUtils.padLeft("" + as.uniqueCount, 5)).append(" /");
            percent = Math.round(100.0 * (double)as.uniqueCount / (double)as.totalCount);
            result.append(M5StaticUtils.padLeft("" + percent, 3)).append("% ");
            result.append(M5StaticUtils.padLeft("" + as.distinctCount, 5)).append(' ');
            result.append('\n');
        }
        return result.toString();
    }

    protected boolean getInstance(StreamTokenizer tokenizer, boolean flag) throws IOException {
        if (this.m_Attributes.size() == 0) {
            this.errms(tokenizer, "no header information available");
        }
        this.getFirstToken(tokenizer);
        if (tokenizer.ttype == -1) {
            return false;
        }
        if (tokenizer.ttype == 123) {
            return this.getInstanceSparse(tokenizer, flag);
        }
        return this.getInstanceFull(tokenizer, flag);
    }

    protected boolean getInstanceSparse(StreamTokenizer tokenizer, boolean flag) throws IOException {
        int numValues = 0;
        int maxIndex = -1;
        while (true) {
            this.getIndex(tokenizer);
            if (tokenizer.ttype == 125) break;
            try {
                this.m_IndicesBuffer[numValues] = Integer.valueOf(tokenizer.sval);
            }
            catch (NumberFormatException e) {
                this.errms(tokenizer, "index number expected");
            }
            if (this.m_IndicesBuffer[numValues] <= maxIndex) {
                this.errms(tokenizer, "indices have to be ordered");
            }
            if (this.m_IndicesBuffer[numValues] < 0 || this.m_IndicesBuffer[numValues] >= this.numAttributes()) {
                this.errms(tokenizer, "index out of bounds");
            }
            maxIndex = this.m_IndicesBuffer[numValues];
            this.getNextToken(tokenizer);
            if (tokenizer.ttype == 63) {
                this.m_ValueBuffer[numValues] = M5Instance.missingValue();
            } else {
                if (tokenizer.ttype != -3) {
                    this.errms(tokenizer, "not a valid value");
                }
                if (this.attribute(this.m_IndicesBuffer[numValues]).isNominal()) {
                    int valIndex = this.attribute(this.m_IndicesBuffer[numValues]).indexOfValue(tokenizer.sval);
                    if (valIndex == -1) {
                        this.errms(tokenizer, "nominal value not declared in header");
                    }
                    this.m_ValueBuffer[numValues] = valIndex;
                } else if (this.attribute(this.m_IndicesBuffer[numValues]).isNumeric()) {
                    try {
                        this.m_ValueBuffer[numValues] = Double.valueOf(tokenizer.sval);
                    }
                    catch (NumberFormatException e) {
                        this.errms(tokenizer, "number expected");
                    }
                } else {
                    this.m_ValueBuffer[numValues] = this.attribute(this.m_IndicesBuffer[numValues]).addStringValue(tokenizer.sval);
                }
            }
            ++numValues;
        }
        if (flag) {
            this.getLastToken(tokenizer, true);
        }
        double[] tempValues = new double[numValues];
        int[] tempIndices = new int[numValues];
        System.arraycopy(this.m_ValueBuffer, 0, tempValues, 0, numValues);
        System.arraycopy(this.m_IndicesBuffer, 0, tempIndices, 0, numValues);
        this.add(new M5SparseInstance(1.0, tempValues, tempIndices, this.numAttributes()));
        return true;
    }

    protected boolean getInstanceFull(StreamTokenizer tokenizer, boolean flag) throws IOException {
        double[] instance = new double[this.numAttributes()];
        for (int i = 0; i < this.numAttributes(); ++i) {
            if (i > 0) {
                this.getNextToken(tokenizer);
            }
            if (tokenizer.ttype == 63) {
                instance[i] = M5Instance.missingValue();
                continue;
            }
            if (tokenizer.ttype != -3) {
                this.errms(tokenizer, "not a valid value");
            }
            if (this.attribute(i).isNominal()) {
                int index = this.attribute(i).indexOfValue(tokenizer.sval);
                if (index == -1) {
                    this.errms(tokenizer, "nominal value not declared in header");
                }
                instance[i] = index;
                continue;
            }
            if (this.attribute(i).isNumeric()) {
                try {
                    instance[i] = Double.valueOf(tokenizer.sval);
                }
                catch (NumberFormatException e) {
                    this.errms(tokenizer, "number expected");
                }
                continue;
            }
            instance[i] = this.attribute(i).addStringValue(tokenizer.sval);
        }
        if (flag) {
            this.getLastToken(tokenizer, true);
        }
        this.add(new M5Instance(1.0, instance));
        return true;
    }

    protected void readHeader(StreamTokenizer tokenizer) throws IOException {
        String output = "";
        this.m_NameClassIndex = "";
        this.getFirstToken(tokenizer);
        if (tokenizer.ttype == -1) {
            this.errms(tokenizer, "premature end of file");
        }
        if (tokenizer.sval.equalsIgnoreCase("@relation")) {
            this.getNextToken(tokenizer);
            this.m_RelationName = tokenizer.sval;
            this.getLastToken(tokenizer, false);
        } else {
            this.errms(tokenizer, "keyword @relation expected");
        }
        this.m_Attributes = new M5Vector();
        this.getFirstToken(tokenizer);
        if (tokenizer.ttype == -1) {
            this.errms(tokenizer, "premature end of file");
        }
        while (tokenizer.sval.equalsIgnoreCase("@attribute")) {
            this.getNextToken(tokenizer);
            String attributeName = tokenizer.sval;
            this.getNextToken(tokenizer);
            if (tokenizer.ttype == -3) {
                if (tokenizer.sval.equalsIgnoreCase("real") || tokenizer.sval.equalsIgnoreCase("integer") || tokenizer.sval.equalsIgnoreCase("numeric")) {
                    this.m_Attributes.addElement(new M5Attribute(attributeName, this.numAttributes()));
                    this.readTillEOL(tokenizer);
                } else if (tokenizer.sval.equalsIgnoreCase("string")) {
                    this.m_Attributes.addElement(new M5Attribute(attributeName, null, this.numAttributes()));
                    this.readTillEOL(tokenizer);
                } else {
                    this.errms(tokenizer, "no valid attribute type or invalid enumeration");
                }
            } else {
                M5Vector attributeValues = new M5Vector();
                tokenizer.pushBack();
                if (tokenizer.nextToken() != 123) {
                    this.errms(tokenizer, "{ expected at beginning of enumeration");
                }
                while (tokenizer.nextToken() != 125) {
                    if (tokenizer.ttype == 10) {
                        this.errms(tokenizer, "} expected at end of enumeration");
                        continue;
                    }
                    attributeValues.addElement(tokenizer.sval);
                }
                if (attributeValues.size() == 0) {
                    this.errms(tokenizer, "no nominal values found");
                }
                this.m_Attributes.addElement(new M5Attribute(attributeName, attributeValues, this.numAttributes()));
            }
            this.getLastToken(tokenizer, false);
            this.getFirstToken(tokenizer);
            if (tokenizer.ttype != -1) continue;
            this.errms(tokenizer, "premature end of file");
        }
        while (!tokenizer.sval.equalsIgnoreCase("@data")) {
            output = tokenizer.sval;
            this.getFirstToken(tokenizer);
        }
        if (!output.equalsIgnoreCase("")) {
            this.m_NameClassIndex = output;
        }
        if (this.m_Attributes.size() == 0) {
            this.errms(tokenizer, "no attributes declared");
        }
        this.m_ValueBuffer = new double[this.numAttributes()];
        this.m_IndicesBuffer = new int[this.numAttributes()];
    }

    private void copyInstances(int from, M5Instances dest, int num) {
        for (int i = 0; i < num; ++i) {
            dest.add(this.instance(from + i));
        }
    }

    private void errms(StreamTokenizer tokenizer, String theMsg) throws IOException {
        throw new IOException(theMsg + ", read " + tokenizer.toString());
    }

    private void freshAttributeInfo() {
        this.m_Attributes = (M5Vector)this.m_Attributes.copyElements();
    }

    private void getFirstToken(StreamTokenizer tokenizer) throws IOException {
        while (tokenizer.nextToken() == 10) {
        }
        if (tokenizer.ttype == 39 || tokenizer.ttype == 34) {
            tokenizer.ttype = -3;
        } else if (tokenizer.ttype == -3 && (tokenizer.sval.equals("?") || tokenizer.sval.equals("<null>"))) {
            tokenizer.ttype = 63;
        }
    }

    private void getIndex(StreamTokenizer tokenizer) throws IOException {
        if (tokenizer.nextToken() == 10) {
            this.errms(tokenizer, "premature end of line1");
        }
        if (tokenizer.ttype == -1) {
            this.errms(tokenizer, "premature end of file");
        }
    }

    private void getLastToken(StreamTokenizer tokenizer, boolean endOfFileOk) throws IOException {
        if (!(tokenizer.nextToken() == 10 || tokenizer.nextToken() == -1 && endOfFileOk)) {
            this.errms(tokenizer, "end of line expected");
        }
    }

    private void getNextToken(StreamTokenizer tokenizer) throws IOException {
        if (tokenizer.nextToken() == 10) {
            this.errms(tokenizer, "premature end of line2");
        }
        if (tokenizer.ttype == -1) {
            this.errms(tokenizer, "premature end of file");
        } else if (tokenizer.ttype == 39 || tokenizer.ttype == 34) {
            tokenizer.ttype = -3;
        } else if (tokenizer.ttype == -3 && (tokenizer.sval.equals("?") || tokenizer.sval.equals("<null>"))) {
            tokenizer.ttype = 63;
        }
    }

    private void initTokenizer(StreamTokenizer tokenizer) {
        tokenizer.resetSyntax();
        tokenizer.whitespaceChars(0, 32);
        tokenizer.wordChars(33, 255);
        tokenizer.whitespaceChars(44, 44);
        tokenizer.commentChar(37);
        tokenizer.quoteChar(34);
        tokenizer.quoteChar(39);
        tokenizer.ordinaryChar(123);
        tokenizer.ordinaryChar(125);
        tokenizer.eolIsSignificant(true);
    }

    private String instancesAndWeights() {
        StringBuffer text = new StringBuffer();
        for (int i = 0; i < this.numInstances(); ++i) {
            text.append(this.instance(i) + " " + this.instance(i).weight());
            if (i >= this.numInstances() - 1) continue;
            text.append("\n");
        }
        return text.toString();
    }

    private void quickSort(int attIndex, int lo0, int hi0) {
        int lo = lo0;
        int hi = hi0;
        if (hi0 > lo0) {
            double mid = this.instance((lo0 + hi0) / 2).value(attIndex);
            double midPlus = mid + 1.0E-6;
            double midMinus = mid - 1.0E-6;
            while (lo <= hi) {
                while (this.instance(lo).value(attIndex) < midMinus && lo < hi0) {
                    ++lo;
                }
                while (this.instance(hi).value(attIndex) > midPlus && hi > lo0) {
                    --hi;
                }
                if (lo > hi) continue;
                this.swap(lo, hi);
                ++lo;
                --hi;
            }
            if (lo0 < hi) {
                this.quickSort(attIndex, lo0, hi);
            }
            if (lo < hi0) {
                this.quickSort(attIndex, lo, hi0);
            }
        }
    }

    private void readTillEOL(StreamTokenizer tokenizer) throws IOException {
        while (tokenizer.nextToken() != 10) {
        }
        tokenizer.pushBack();
    }

    private void stratStep(int numFolds) {
        M5Vector newVec = new M5Vector(this.m_Instances.capacity());
        int start = 0;
        while (newVec.size() < this.numInstances()) {
            for (int j = start; j < this.numInstances(); j += numFolds) {
                newVec.addElement(this.instance(j));
            }
            ++start;
        }
        this.m_Instances = newVec;
    }

    private void swap(int i, int j) {
        this.m_Instances.swap(i, j);
    }

    public static M5Instances mergeInstances(M5Instances first, M5Instances second) {
        int i;
        if (first.numInstances() != second.numInstances()) {
            throw new IllegalArgumentException("Instance sets must be of the same size");
        }
        M5Vector newAttributes = new M5Vector();
        for (i = 0; i < first.numAttributes(); ++i) {
            newAttributes.addElement(first.attribute(i));
        }
        for (i = 0; i < second.numAttributes(); ++i) {
            newAttributes.addElement(second.attribute(i));
        }
        M5Instances merged = new M5Instances(first.relationName() + '_' + second.relationName(), newAttributes, first.numInstances());
        for (int i2 = 0; i2 < first.numInstances(); ++i2) {
            merged.add(first.instance(i2).mergeInstance(second.instance(i2)));
        }
        return merged;
    }

    public static void test(String[] argv) {
        Random random = new Random(2L);
        try {
            int j;
            int i;
            if (argv.length > 1) {
                throw new Exception("Usage: M5Instances [<filename>]");
            }
            M5Vector testVals = new M5Vector(2);
            testVals.addElement("first_value");
            testVals.addElement("second_value");
            M5Vector testAtts = new M5Vector(2);
            testAtts.addElement(new M5Attribute("nominal_attribute", testVals));
            testAtts.addElement(new M5Attribute("numeric_attribute"));
            M5Instances instances = new M5Instances("test_set", testAtts, 10);
            instances.add(new M5Instance(instances.numAttributes()));
            instances.add(new M5Instance(instances.numAttributes()));
            instances.add(new M5Instance(instances.numAttributes()));
            instances.setClassIndex(0);
            System.out.println("\nSet of instances created from scratch:\n");
            System.out.println(instances);
            if (argv.length == 1) {
                String filename = argv[0];
                FileReader reader = new FileReader(filename);
                System.out.println("\nFirst five instances from file:\n");
                instances = new M5Instances(reader, 1);
                instances.setClassIndex(instances.numAttributes() - 1);
                for (i = 0; i < 5 && instances.readInstance(reader); ++i) {
                }
                System.out.println(instances);
                reader = new FileReader(filename);
                instances = new M5Instances(reader);
                instances.setClassIndex(instances.numAttributes() - 1);
                System.out.println("\nDataset:\n");
                System.out.println(instances);
                System.out.println("\nClass index: " + instances.classIndex());
            }
            System.out.println("\nClass name: " + instances.classAttribute().name());
            System.out.println("\nClass index: " + instances.classIndex());
            System.out.println("\nClass is nominal: " + instances.classAttribute().isNominal());
            System.out.println("\nClass is numeric: " + instances.classAttribute().isNumeric());
            System.out.println("\nClasses:\n");
            for (i = 0; i < instances.numClasses(); ++i) {
                System.out.println(instances.classAttribute().value(i));
            }
            System.out.println("\nClass values and labels of instances:\n");
            for (i = 0; i < instances.numInstances(); ++i) {
                M5Instance inst = instances.instance(i);
                System.out.print(inst.classValue() + "\t");
                System.out.print(inst.toString(inst.classIndex()));
                if (instances.instance(i).classIsMissing()) {
                    System.out.println("\tis missing");
                    continue;
                }
                System.out.println();
            }
            System.out.println("\nCreating random weights for instances.");
            for (i = 0; i < instances.numInstances(); ++i) {
                instances.instance(i).setWeight(random.nextDouble());
            }
            System.out.println("\nInstances and their weights:\n");
            System.out.println(instances.instancesAndWeights());
            System.out.print("\nSum of weights: ");
            System.out.println(instances.sumOfWeights());
            M5Instances secondInstances = new M5Instances(instances);
            M5Attribute testAtt = new M5Attribute("Inserted");
            secondInstances.insertAttributeAt(testAtt, 0);
            System.out.println("\nSet with inserted attribute:\n");
            System.out.println(secondInstances);
            System.out.println("\nClass name: " + secondInstances.classAttribute().name());
            secondInstances.deleteAttributeAt(0);
            System.out.println("\nSet with attribute deleted:\n");
            System.out.println(secondInstances);
            System.out.println("\nClass name: " + secondInstances.classAttribute().name());
            System.out.println("\nHeaders equal: " + instances.equalHeaders(secondInstances) + "\n");
            System.out.println("\nData (internal values):\n");
            for (i = 0; i < instances.numInstances(); ++i) {
                for (j = 0; j < instances.numAttributes(); ++j) {
                    if (instances.instance(i).isMissing(j)) {
                        System.out.print("? ");
                        continue;
                    }
                    System.out.print(instances.instance(i).value(j) + " ");
                }
                System.out.println();
            }
            System.out.println("\nEmpty dataset:\n");
            M5Instances empty = new M5Instances(instances, 0);
            System.out.println(empty);
            System.out.println("\nClass name: " + empty.classAttribute().name());
            if (empty.classAttribute().isNominal()) {
                M5Instances copy = new M5Instances(empty, 0);
                copy.renameAttribute(copy.classAttribute(), "new_name");
                copy.renameAttributeValue(copy.classAttribute(), copy.classAttribute().value(0), "new_val_name");
                System.out.println("\nDataset with names changed:\n" + copy);
                System.out.println("\nOriginal dataset:\n" + empty);
            }
            int start = instances.numInstances() / 4;
            int num = instances.numInstances() / 2;
            System.out.print("\nSubset of dataset: ");
            System.out.println(num + " instances from " + (start + 1) + ". instance");
            secondInstances = new M5Instances(instances, start, num);
            System.out.println("\nClass name: " + secondInstances.classAttribute().name());
            System.out.println("\nInstances and their weights:\n");
            System.out.println(secondInstances.instancesAndWeights());
            System.out.print("\nSum of weights: ");
            System.out.println(secondInstances.sumOfWeights());
            System.out.println("\nTrain and test folds for 3-fold CV:");
            if (instances.classAttribute().isNominal()) {
                instances.stratify(3);
            }
            for (j = 0; j < 3; ++j) {
                M5Instances train = instances.trainCV(3, j);
                M5Instances test = instances.testCV(3, j);
                System.out.println("\nTrain: ");
                System.out.println("\nInstances and their weights:\n");
                System.out.println(train.instancesAndWeights());
                System.out.print("\nSum of weights: ");
                System.out.println(train.sumOfWeights());
                System.out.println("\nClass name: " + train.classAttribute().name());
                System.out.println("\nTest: ");
                System.out.println("\nInstances and their weights:\n");
                System.out.println(test.instancesAndWeights());
                System.out.print("\nSum of weights: ");
                System.out.println(test.sumOfWeights());
                System.out.println("\nClass name: " + test.classAttribute().name());
            }
            System.out.println("\nRandomized dataset:");
            instances.randomize(random);
            System.out.println("\nInstances and their weights:\n");
            System.out.println(instances.instancesAndWeights());
            System.out.print("\nSum of weights: ");
            System.out.println(instances.sumOfWeights());
            System.out.print("\nInstances sorted according to first attribute:\n ");
            instances.sort(0);
            System.out.println("\nInstances and their weights:\n");
            System.out.println(instances.instancesAndWeights());
            System.out.print("\nSum of weights: ");
            System.out.println(instances.sumOfWeights());
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }
}

