Skip to content

Commit

Permalink
add infer type and related it
Browse files Browse the repository at this point in the history
  • Loading branch information
Cpaulyz committed May 24, 2024
1 parent 7af89d1 commit 0e3ba33
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,15 @@

package org.apache.iotdb.db.it.schema;

import org.apache.iotdb.db.conf.IoTDBDescriptor;
import org.apache.iotdb.db.queryengine.common.header.ColumnHeaderConstant;
import org.apache.iotdb.it.env.EnvFactory;
import org.apache.iotdb.itbase.category.ClusterIT;
import org.apache.iotdb.itbase.category.LocalStandaloneIT;
import org.apache.iotdb.itbase.constant.TestConstant;
import org.apache.iotdb.util.AbstractSchemaIT;

import org.apache.tsfile.enums.TSDataType;
import org.junit.After;
import org.junit.Assert;
import org.junit.Test;
Expand All @@ -41,6 +43,8 @@
import java.util.HashSet;
import java.util.Set;

import static org.junit.Assert.assertEquals;

/**
* Notice that, all test begins with "IoTDB" is integration test. All test which will start the
* IoTDB server should be defined as integration test.
Expand Down Expand Up @@ -224,4 +228,57 @@ public void testInsertAutoCreate3() throws SQLException {
}
}
}

/**
* insert data when database hasn't been set, timeseries hasn't been created and have null values
*/
@Test
public void testAutoCreateDataType() throws SQLException {
int textLen = IoTDBDescriptor.getInstance().getConfig().getInferStringMaxLength() + 1;
StringBuilder sb = new StringBuilder();
for (int i = 0; i < textLen; i++) {
sb.append("a");
}
String SQL =
"INSERT INTO root.sg0.d1(time,s1,s2,s3,s4,s5,s6,s7) values(1,true,1,now(),X'cafe',\"string\",\"2024-01-01\", \""
+ sb
+ "\")";
try (Connection connection = EnvFactory.getEnv().getConnection();
Statement statement = connection.createStatement()) {
statement.execute(SQL);
ResultSet resultSet = statement.executeQuery("show timeseries");
while (resultSet.next()) {
switch (resultSet.getString(ColumnHeaderConstant.TIMESERIES)) {
case "root.sg0.d1.s1":
assertEquals(
TSDataType.BOOLEAN.toString(), resultSet.getString(ColumnHeaderConstant.DATATYPE));
break;
case "root.sg0.d1.s2":
assertEquals(
TSDataType.DOUBLE.toString(), resultSet.getString(ColumnHeaderConstant.DATATYPE));
break;
case "root.sg0.d1.s3":
assertEquals(
TSDataType.INT64.toString(), resultSet.getString(ColumnHeaderConstant.DATATYPE));
break;
case "root.sg0.d1.s4":
assertEquals(
TSDataType.BLOB.toString(), resultSet.getString(ColumnHeaderConstant.DATATYPE));
break;
case "root.sg0.d1.s5":
assertEquals(
TSDataType.STRING.toString(), resultSet.getString(ColumnHeaderConstant.DATATYPE));
break;
case "root.sg0.d1.s6":
assertEquals(
TSDataType.STRING.toString(), resultSet.getString(ColumnHeaderConstant.DATATYPE));
break;
case "root.sg0.d1.s7":
assertEquals(
TSDataType.TEXT.toString(), resultSet.getString(ColumnHeaderConstant.DATATYPE));
break;
}
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,8 @@ public class IoTDBConfig {
/** register time series as which type when receiving a floating number string "6.7" */
private TSDataType floatingStringInferType = TSDataType.DOUBLE;

private int inferStringMaxLength = 512;

/**
* register time series as which type when receiving the Literal NaN. Values can be DOUBLE, FLOAT
* or TEXT
Expand Down Expand Up @@ -2284,6 +2286,10 @@ public TSDataType getFloatingStringInferType() {
return floatingStringInferType;
}

public int getInferStringMaxLength() {
return inferStringMaxLength;
}

public void setFloatingStringInferType(TSDataType floatingNumberStringInferType) {
if (floatingNumberStringInferType != TSDataType.DOUBLE
&& floatingNumberStringInferType != TSDataType.FLOAT
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1911,7 +1911,11 @@ private void parseInsertValuesSpec(
List<ConstantContext> values = row.constant();
for (int j = 0, columnCount = values.size(); j < columnCount; j++) {
if (j != timeIndex) {
if (values.get(j).STRING_LITERAL() != null) {
if (values.get(j).dateExpression() != null) {
valueList.add(
parseDateExpression(
values.get(j).dateExpression(), CommonDateTimeUtils.currentTime()));
} else if (values.get(j).STRING_LITERAL() != null) {
valueList.add(parseStringLiteralInInsertValue(values.get(j).getText()));
} else {
valueList.add(values.get(j).getText());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import org.apache.iotdb.db.utils.constant.SqlConstant;

import org.apache.commons.lang3.StringUtils;
import org.apache.tsfile.common.conf.TSFileConfig;
import org.apache.tsfile.enums.TSDataType;

import java.util.Collections;
Expand All @@ -52,12 +53,22 @@ public class TypeInferenceUtils {
private static final TSDataType nanStringInferType =
IoTDBDescriptor.getInstance().getConfig().getNanStringInferType();

private static final int inferStringMaxLength =
IoTDBDescriptor.getInstance().getConfig().getInferStringMaxLength();

private TypeInferenceUtils() {}

private static boolean isBlob(String s) {
return s.length() >= 3 && s.startsWith("X'") && s.endsWith("'");
}

private static boolean isString(String s) {
if (s.getBytes(TSFileConfig.STRING_CHARSET).length <= inferStringMaxLength) {
return true;
}
return false;
}

static boolean isNumber(String s) {
if (s == null || s.equals("NaN")) {
return false;
Expand Down Expand Up @@ -122,8 +133,9 @@ public static TSDataType getPredictedDataType(Object value, boolean inferType) {
return nanStringInferType;
} else if (isBlob(strValue)) {
return TSDataType.BLOB;
} else if (isString(strValue)) {
return TSDataType.STRING;
} else {
// TODO: use string as default data type
return TSDataType.TEXT;
}
}
Expand Down

0 comments on commit 0e3ba33

Please sign in to comment.