Setup purge config to always leave the most recent report (per type)

Per the discussion started here, we attempted to ensure that the newest record is always omitted during purge by implementing the following:

module.exports = {
  run_every_days: 7,
  cron: '*/5 * * * *',
  fn: function (userCtx = {}, contact = {}, reports = [], messages, chtScriptApi, permissions) {
    const NOW = Date.now();
    const monthsAgo = months => NOW - 1000 * 60 * 60 * 24 * 31 * months;

    const householdCOPC = 'copc-hhscreening';
    const householdCSharp = 'csharp-householdconsentandquestionnaire';
    const TEST_FORM = 'YYYZ';
    const individualCOPC = 'copc-individualhealthcaretasks';
    const individualCSharp = 'csharp-individualhhconsentedquestionnaire';
    const individualDeath = 'death_report';
    
    const PRESERVE_ALL = -1;
    const PRESERVE_NONE = 0;

    const CONF = Object.freeze({
      'team_lead': {
        'reportTypes': {
          [householdCOPC]: 1,
          [householdCSharp]: 1,
          [individualCOPC]: 1,
          [individualCSharp]: 1,
          [individualDeath]: 1,
          [TEST_FORM]: PRESERVE_NONE,
        },
      }
    });

    const role = userCtx && userCtx.roles && userCtx.roles.length >= 1 ? userCtx.roles[0] : false;

    if (role && role in CONF) {
      console.info(`Purge running for "${role}" on contact "${contact._id} - ${contact.contact_type}" with report count "${reports.length}"`);
      const reportTypes = CONF[role]['reportTypes'];
      const contactTypes = CONF[role]['contactTypes'];

      const shouldPurge = (doc, conf) => {
        if (typeof conf === 'object') {
          return (conf['appliesIf'] ? conf['appliesIf'](doc) : true) && doc.reported_date <= monthsAgo(conf['preserve']);
        }
        else if (typeof conf === 'number') {
          return conf !== PRESERVE_NONE ? conf !== PRESERVE_ALL ? doc.reported_date <= monthsAgo(conf) : false : true;
        }

        return false;
      };

      const purgeContact = (contactTypes && contact.contact_type in contactTypes && shouldPurge(contact, contactTypes[contact.contact_type]) ? [contact._id] : []).filter((value) => value);

      const splitByType = reports.reduce((accumulator, doc) => {
        if ( doc.form in accumulator) {
          accumulator[doc.form].push(doc);
        }

        return accumulator;
      },
        Object.assign({}, ...Object.keys(reportTypes).map(value => ({ [value]: [] })))
      );

      const reportsToPurge = [];
      for (type in splitByType) {
        console.info(`Processing ${splitByType[type].length} records of type ${type}`);
        const results = splitByType[type].filter((doc) => doc.form in reportTypes && shouldPurge(doc, reportTypes[doc.form])).map(r => r._id).filter((v) => v);
        if(results === 0){
          continue;
        }
        else if(results.length === splitByType[type].length){
          console.info("Purge reports within period except latest");
          const latestItem = splitByType[type].reduce((latest, item) => 
            item.reported_date > (latest?.reported_date || 0) ? item : latest, null) ?? splitByType[type][splitByType[type].length-1];
          reportsToPurge.push(...splitByType[type].filter(item => item._id !== latestItem._id));
        }
        else {
          console.log('Purge reports within period');
          reportsToPurge.push(...results);
        }
      }

      // Purging of messages are not needed as we don't use the feature
      return [
        ...purgeContact,
        ...reportsToPurge
      ];
    }

    console.info('Purge completed without any purge');
    return [];
  }
};

Questions/Concerns:

  1. You’ll notice that if the latest item cannot be found by the reducer, it falls back to the last item in the report array. Are we correct in assuming that the last report will always be the newest?
  2. Unsure if the reports are in descending order, we decided to perform the “latest record check” at the end, rather than sorting the reports ahead of time. This means that for each report type, only if we’re about to purge all records, we grab the latest report and omit it from the purge list, rather than sorting all reports to identify the latest upfront. If necessary, do you see a more efficient way of doing this?
  3. Is there an appetite to build up the “split by report type” before passing in the reports to the purge function?

Thank you, @diana, for pointing out that the purge function receives a context similar to the contact-summary. While it loops through ‘one doc at a time,’ this refers to one contact along with all its associated reports, allowing us to compare the reports of that contact against one another.

Are we correct in assuming that the last report will always be the newest?

You have access to the reported_date property on the report and should use that to sort the reports as you desire. you should not assume that reports are sorted in any significant way.

Is there an appetite to build up the “split by report type” before passing in the reports to the purge function?

since you can easily achieve this in the purge function, I doubt this will be implemented.

For interest’s sake, what we settled on for now:

module.exports = {
  run_every_days: 7,
  cron: '0 23 * * 5',
  fn: function (userCtx = {}, contact = {}, reports = [], messages, chtScriptApi, permissions) {
    if (!contact || contact._deleted) {
      console.warn(`[PURGE] Skipping purge for invalid or deleted contact: ${contact._id || 'unknown'}`);
      return [];
    }

    const NOW = Date.now();
    const monthsAgo = months => NOW - 1000 * 60 * 60 * 24 * 31 * months;

    const householdCOPC = 'copc-hhscreening';
    const householdCSharp = 'csharp-householdconsentandquestionnaire';
    const TEST_FORM = 'YYYZ';
    const individualCOPC = 'copc-individualhealthcaretasks';
    const individualCSharp = 'csharp-individualhhconsentedquestionnaire';
    const individualDeath = 'death_report';
    
    const PRESERVE_ALL = -1;
    const PRESERVE_NONE = 0;

    const CONF = Object.freeze({
      'dho': {
        'reportTypes': {
          [householdCOPC]: 1,
          [householdCSharp]: 1,
          [individualCOPC]: 1,
          [individualCSharp]: 1,
          [individualDeath]: 1,
          [TEST_FORM]: PRESERVE_NONE,
        },
      },
      'team_lead': {
        'reportTypes': {
          [householdCOPC]: 1,
          [householdCSharp]: 1,
          [individualCOPC]: 1,
          [individualCSharp]: 1,
          [individualDeath]: 1,
          [TEST_FORM]: PRESERVE_NONE,
        },
      }
    });

    const role = userCtx && userCtx.roles && userCtx.roles.length >= 1 ? userCtx.roles[0] : false;

    if (role && role in CONF) {
      console.info(`[PURGE] Running for "${role}" on contact "${contact._id} - ${contact.contact_type}" with report count "${reports.length}"`);
      const reportTypes = CONF[role]['reportTypes'];
      const contactTypes = CONF[role]['contactTypes'];

      const getReportedDate = (doc) => {
        const date = doc.reported_date;
        return typeof date === 'number' && !isNaN(date) ? date : 0;
      };

      const shouldPurge = (doc, conf) => {
        if (typeof conf === 'object') {
          return (conf['appliesIf'] ? conf['appliesIf'](doc) : true) && getReportedDate(doc) <= monthsAgo(conf['preserve']);
        }
        else if (typeof conf === 'number') {
          return conf !== PRESERVE_NONE ? conf !== PRESERVE_ALL ? getReportedDate(doc) <= monthsAgo(conf) : false : true;
        }

        return false;
      };

      const purgeContact = (contactTypes && contact.contact_type in contactTypes && shouldPurge(contact, contactTypes[contact.contact_type]) ? [contact._id] : []).filter((value) => value);

      const splitByType = reports.reduce((accumulator, doc) => {
        const form = doc.form;
        if (form in reportTypes) {
          accumulator[form] = accumulator[form] || [];
          accumulator[form].push(doc);
        }
        return accumulator;
      }, {});

      const reportsToPurge = [];
      Object.entries(splitByType).forEach(([type, reports]) => {
        console.info(`[PURGE] Processing ${reports.length} report(s) of type ${type}`);

        const results = [];
        let latestItem = undefined;
        for (const doc of reports) {
          if (shouldPurge(doc, reportTypes[doc.form])) {
            results.push(doc._id);
          }

          if (!latestItem || getReportedDate(latestItem) < getReportedDate(doc)) {
            latestItem = doc;
          }
        }

        if (results.length === reports.length && reports.length > 0) {
          if (!latestItem) {
            console.error(`[PURGE] ERROR: Unable to determine the latest report for type ${type}. Skipping.`);
            return;
          }

          console.info('[PURGE] Latest report omitted with id: ', latestItem._id);
          reportsToPurge.push(...results.filter(id => id !== latestItem._id));
        } else {
          reportsToPurge.push(...results);
        }
      });

      console.debug('[PURGE] Report ids: ', reportsToPurge);
      return [
        ...purgeContact,
        ...reportsToPurge
      ];
    }

    console.warn(`[PURGE] No configuration found for role: ${role}`);
    return [];
  }
};
2 Likes