machinelearning/src/Microsoft.ML.Data/Transforms/doc.xml at master · devhttps/machinelearning

113 lines (109 loc) · 5.19 KB
﻿<?xml version="1.0" encoding="utf-8" ?>
  <members>
    <member name="NAFilter">
      <summary>
        Removes missing values from vector type columns.
      </summary>
      <remarks>
        This transform removes the entire row if any of the input columns have a missing value in that row.
        This preprocessing is required for many ML algorithms that cannot work with missing values.
        Useful if any missing entry invalidates the entire row.
        If the <see cref="P:Microsoft.ML.Transforms.MissingValuesRowDropper.Complement"/> is set to true, this transform would do the exact opposite,
        it will keep only the rows that have missing values.
      </remarks>
    </member>
    <example name="NAFilter">
      <example>
        <code language="csharp">
          pipeline.Add(new MissingValuesRowDropper(&quot;Column1&quot;));
        </code>
      </example>
    </example>
    <member name="TextToKey">
      <summary>
        Converts input values (words, numbers, etc.) to index in a dictionary.
      </summary>
      <remarks>
        The TextToKeyConverter transform builds up term vocabularies (dictionaries).
        The TextToKeyConverter and the <see cref="T:Microsoft.ML.Transforms.HashConverter"/> are the two one primary mechanisms by which raw input is transformed into keys.
        If multiple columns are used, each column builds/uses exactly one vocabulary.
        The output columns are KeyType-valued.
        The Key value is the one-based index of the item in the dictionary.
        If the key is not found in the dictionary, it is assigned the missing value indicator.
        This dictionary mapping values to keys is most commonly learnt from the unique values in input data,
        but can be defined through other means: either with the mapping defined directly on the command line, or as loaded from an external file.
      </remarks>
      <seealso cref="T:Microsoft.ML.Transforms.HashConverter"/>
      <seealso cref="T:Microsoft.ML.Transforms.KeyToTextConverter"/>
    </member>
    <example name="TextToKey">
      <example>
        <code language="csharp">
          pipeline.Add(new TextToKeyConverter((&quot;Column&quot;, &quot;OutColumn&quot;))
              Sort = TermTransformSortOrder.Occurrence 
          });
        </code>
      </example>
    </example>
    <member name="ValueToKeyMappingEstimator">
      <summary>
        Converts input values (words, numbers, etc.) to index in a dictionary.
      </summary>
      <remarks>
        The ValueToKeyMappingEstimator builds up term vocabularies (dictionaries).
        If multiple columns are used, each column builds/uses exactly one vocabulary.
        The output columns are KeyType-valued.
        The Key value is the one-based index of the item in the dictionary.
        If the key is not found in the dictionary, it is assigned the missing value indicator.
        This dictionary mapping values to keys is most commonly learnt from the unique values in input data,
        but can be defined through other means: either with the mapping defined directly on the command line, or as loaded from an external file.
      </remarks>
    </member>
    <member name="NAHandle">
      <summary>
        Handle missing values by replacing them with either the default value or the indicated value.
      </summary>
      <remarks>
        This transform handles missing values in the input columns. For each input column, it creates an output column
        where the missing values are replaced by one of these specified values:
        <list type='bullet'>
          <item>
            <description>The default value of the appropriate type.</description>
          </item>
          <item>
            <description>The mean value of the appropriate type.</description>
          </item>
          <item>
            <description>The max value of the appropriate type.</description>
          </item>
          <item>
            <description>The min value of the appropriate type.</description>
          </item>
        </list>
        <para>The last three work only for numeric/TimeSpan/DateTime kind columns.</para>
        <para>
          The output column can also optionally include an indicator vector for which slots were missing in the input column.
          This can be done only when the indicator vector type can be converted to the input column type, i.e. only for numeric columns.
        </para>
        <para>
          When computing the mean/max/min value, there is also an option to compute it over the whole column instead of per slot.
          This option has a default value of true for variable length vectors, and false for known length vectors.
          It can be changed to true for known length vectors, but it results in an error if changed to false for variable length vectors.
        </para>
      </remarks>
      <seealso cref="T:Microsoft.ML.Data.DataKind"/>
    </member>
    <example name="NAHandle">
      <example>
        <code language="csharp">
          pipeline.Add(new MissingValueHandler(&quot;FeatureCol&quot;, &quot;CleanFeatureCol&quot;)
              ReplaceWith  = NAHandleTransformReplacementKind.Mean
          });
        </code>
      </example>
    </example>
  </members>
Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

doc.xml

Latest commit

History

doc.xml

File metadata and controls