taxonomy_update_7005

7 taxonomy.install taxonomy_update_7005(&$sandbox)

Migrate {taxonomy_term_node} table to field storage.

@todo: This function can possibly be made much faster by wrapping a transaction around all the inserts.

File

modules/taxonomy/taxonomy.install, line 526
Install, update and uninstall functions for the taxonomy module.

Code

function taxonomy_update_7005(&$sandbox) {
  // $sandbox contents:
  // - total: The total number of term_node relationships to migrate.
  // - count: The number of term_node relationships that have been
//   migrated so far.
  // - last: The db_query_range() offset to use when querying
//   term_node; this field is incremented in quantities of $batch
//   (1000) but at the end of each call to this function, last and
//   count are the same.
  // - vocabularies: An associative array mapping vocabulary id and node
//   type to field name. If a voc id/node type pair does not appear
//   in this array but a term_node relationship exists mapping a
//   term in voc id to node of that type, the relationship is
//   assigned to the taxonomymyextra field which allows terms of all
//   vocabularies.
  // - cursor[values], cursor[deltas]: The contents of $values and
//   $deltas at the end of the previous call to this function. These
//   need to be preserved across calls because a single batch of
//   1000 rows from term_node may end in the middle of the terms for
//   a single node revision.
  //
  // $values is the array of values about to be/most recently inserted
  // into the SQL data table for the taxonomy_term_reference
  // field. Before $values is constructed for each record, the
  // $values from the previous insert is checked to see if the two
  // records are for the same node revision id; this enables knowing
  // when to reset the delta counters which are incremented across all
  // terms for a single field on a single revision, but reset for each
  // new field and revision.
  //
  // $deltas is an associative array mapping field name to the number
  // of term references stored so far for the current revision, which
  // provides the delta value for each term reference data insert. The
  // deltas are reset for each new revision.

  $conditions = array(
    'type' => 'taxonomy_term_reference', 
    'deleted' => 0,
  );
  $field_info = _update_7000_field_read_fields($conditions, 'field_name');

  // This is a multi-pass update. On the first call we need to initialize some
  // variables.
  if (!isset($sandbox['total'])) {
    $sandbox['last'] = 0;
    $sandbox['count'] = 0;

    // Run the same joins as the query that is used later to retrieve the
    // term_node data, this ensures that bad records in that table - for
    // tids which aren't in taxonomy_term_data or nids which aren't in {node}
    // are not included in the count.
    $sandbox['total'] = db_query('SELECT COUNT(*) FROM {taxonomy_term_data} td INNER JOIN {taxonomy_term_node} tn ON td.tid = tn.tid INNER JOIN {node} n ON tn.nid = n.nid LEFT JOIN {node} n2 ON tn.vid = n2.vid')->fetchField();

    // Use an inline version of Drupal 6 taxonomy_get_vocabularies() here since
    // we can no longer rely on $vocabulary->nodes from the API function.
    $result = db_query('SELECT v.vid, v.machine_name, n.type FROM {taxonomy_vocabulary} v INNER JOIN {taxonomy_vocabulary_node_type} n ON v.vid = n.vid');
    $vocabularies = array();
    foreach ($result as $record) {

      // If no node types are associated with a vocabulary, the LEFT JOIN will
      // return a NULL value for type.
      if (isset($record->type)) {
        $vocabularies[$record->vid][$record->type] = 'taxonomy_' . $record->machine_name;
      }
    }

    if (!empty($vocabularies)) {
      $sandbox['vocabularies'] = $vocabularies;
    }
  }
  else {
    // We do each pass in batches of 1000.
    $batch = 1000;

    // Query selects all revisions at once and processes them in revision and
    // term weight order. Join types:
    //
    // - INNER JOIN term_node ON tn.tid: We are migrating term-node
//   relationships. If there are none for a term, we do not need the
//   term_data row.
    // - INNER JOIN {node} n ON n.nid: If a term-node relationship exists for a
//   nid that does not exist, we cannot migrate it as we have no node to
//   relate it to; thus we do not need that row from term_node.
    // - LEFT JOIN {node} n2 ON n2.vid: If the current term-node relationship
//   is for the current revision of the node, this left join will match and
//   is_current will be non-NULL (we also get the current sticky and
//   created in this case). This tells us whether to insert into the
//   current data tables in addition to the revision data tables.
    //
    // This query must return a consistent ordering across multiple calls.  We
    // need them ordered by node vid (since we use that to decide when to reset
    // the delta counters) and by term weight so they appear within each node
    // in weight order. However, tn.vid,td.weight is not guaranteed to be
    // unique, so we add tn.tid as an additional sort key because tn.tid,tn.vid
    // is the primary key of the D6 term_node table and so is guaranteed
    // unique. Unfortunately it also happens to be in the wrong order which is
    // less efficient, but c'est la vie.
    $query = 'SELECT td.vid AS vocab_id, td.tid, tn.nid, tn.vid, n.type, n2.created, n2.sticky, n2.nid AS is_current FROM {taxonomy_term_data} td INNER JOIN {taxonomy_term_node} tn ON td.tid = tn.tid INNER JOIN {node} n ON tn.nid = n.nid LEFT JOIN {node} n2 ON tn.vid = n2.vid ORDER BY tn.vid, td.weight ASC, tn.tid';
    $result = db_query_range($query, $sandbox['last'], $batch);
    if (isset($sandbox['cursor'])) {
      $values = $sandbox['cursor']['values'];
      $deltas = $sandbox['cursor']['deltas'];
    }
    else {
      $deltas = array();
    }
    foreach ($result as $record) {
      $sandbox['count'] += 1;

      // Use the valid field for this vocabulary and node type or use the
      // overflow vocabulary if there is no valid field.
      $field_name = isset($sandbox['vocabularies'][$record->vocab_id][$record->type]) ? $sandbox['vocabularies'][$record->vocab_id][$record->type] : 'taxonomyextra';
      $field = $field_info[$field_name];

      // Start deltas from 0, and increment by one for each term attached to a
      // node.
      if (!isset($deltas[$field_name])) {
        $deltas[$field_name] = 0;
      }

      if (isset($values)) {

        // If the last inserted revision_id is the same as the current record,
        // use the previous deltas to calculate the next delta.
        if ($record->vid == $values[2]) {

          // For limited cardinality fields, the delta must not be allowed to
          // exceed the cardinality during the update. So ensure that the
          // delta about to be inserted is within this limit.
          // @see field_default_validate().
          if ($field['cardinality'] != FIELD_CARDINALITY_UNLIMITED && ($deltas[$field_name] + 1) > $field['cardinality']) {

            // For excess values of a single-term vocabulary, switch over to
            // the overflow field.
            $field_name = 'taxonomyextra';
            $field = $field_info[$field_name];
            if (!isset($deltas[$field_name])) {
              $deltas[$field_name] = 0;
            }
          }
        }
        else {

          // When the record is a new revision, empty the deltas array.
          $deltas = array($field_name => 0);
        }
      }

      // Table and column found in the field's storage details. During upgrades,
      // it's always SQL.
      $table_name = "field_data_{$field_name}";
      $revision_name = "field_revision_{$field_name}";
      $value_column = $field_name . '_tid';

      // Column names and values in field storage are the same for current and
      // revision.
      $columns = array('entity_type', 'entity_id', 'revision_id', 'bundle', 'language', 'delta', $value_column);
      $values = array('node', $record->nid, $record->vid, $record->type, LANGUAGE_NONE, $deltas[$field_name]++, $record->tid);

      // Insert rows into the revision table.
      db_insert($revision_name)->fields($columns)->values($values)->execute();

      // is_current column is a node ID if this revision is also current.
      if ($record->is_current) {
        db_insert($table_name)->fields($columns)->values($values)->execute();

        // Update the {taxonomy_index} table.
        db_insert('taxonomy_index')
          ->fields(array('nid', 'tid', 'sticky', 'created'))
          ->values(array($record->nid, $record->tid, $record->sticky, $record->created))
          ->execute();
      }
    }

    // Store the set of inserted values and the current revision's deltas in the
    // sandbox.
    $sandbox['cursor'] = array(
      'values' => $values, 
      'deltas' => $deltas,
    );
    $sandbox['last'] += $batch;
  }

  if ($sandbox['count'] < $sandbox['total']) {
    $sandbox['#finished'] = FALSE;
  }
  else {
    db_drop_table('taxonomy_vocabulary_node_type');
    db_drop_table('taxonomy_term_node');

    // If there are no vocabs, we're done.
    $sandbox['#finished'] = TRUE;

    // Determine necessity of taxonomyextras field.
    $field = $field_info['taxonomyextra'];
    $revision_name = 'field_revision_' . $field['field_name'];
    $node_types = db_select($revision_name)->distinct()->fields($revision_name, array('bundle'))
      ->execute()->fetchCol();

    if (empty($node_types)) {
      // Delete the overflow field if there are no rows in the revision table.
      _update_7000_field_delete_field('taxonomyextra');
    }
    else {
      // Remove instances which are not actually used.
      $bundles = db_query('SELECT bundle FROM {field_config_instance} WHERE field_name = :field_name', array(':field_name' => 'taxonomyextra'))->fetchCol();
      $bundles = array_diff($bundles, $node_types);
      foreach ($bundles as $bundle) {
        _update_7000_field_delete_instance('taxonomyextra', 'node', $bundle);
      }
    }
  }
}
Login or register to post comments