Data Expression Average Use by Day of Week

533
7
Jump to solution
11-23-2021 12:46 PM
Labels (1)
erica_tefft
Regular Contributor

Hi,

I have a Data Expression that I am using in my Dashboard to help me create a graph that shows average use by day of week for three separate locations. I am unsure if this is actually providing me with the correct values, and was hoping someone can look this over to see if what I am doing makes sense / is actually right.

Any help refining this would also be really helpful, as I'm sure this is not elegant. 

 

var portal = Portal("https://www.arcgis.com");
// Group the features by the BLA 

var BLA = Filter(FeatureSetByPortalItem(portal,"xxx",0,['BLA_Used', 'Survey_Date', 'Name'],false), "NOT Name = 'Rental%'");

var groupBLA = GroupBy(BLA,
  ["Survey_Date"],
  [
    { name: "BoatLaunchArea", expression: "BLA_Used", statistic: "MIN" },
    { name: "cntBoats", expression: "BLA_Used", statistic: "COUNT" },
    { name: "date", expression: "Survey_Date", statistic: "MIN" },
  ]
);

var combinedDict1 = {
  fields: [
    { name: "sdate", type: "esriFieldTypeDate" },
    { name: "dow", type: "esriFieldTypeInteger" },
    { name: "BLA", type: "esriFieldTypeString" },
    { name: "cntSurveyScans", type: "esriFieldTypeInteger" },
  ],
  geometryType: "",
  features: [],
};

var i = 0;

for (var m in groupBLA) {
  combinedDict1.features[i] = {
    attributes: {
      BLA: m["BoatLaunchArea"],
      cntSurveyScans: (Floor(m["cntBoats"] / 2)),
      sdate: DateDiff(m["Survey_Date"], Date(1970, 0, 1), "MM/DD/yyyy"),
      dow: Weekday(m["Survey_Date"]),
    },
  };
  i++;
}

var dict1 = FeatureSet(Text(combinedDict1));

var groupDOW = GroupBy(dict1,
  [
    { name: "dow", expression: "dow" },
    { name: "BLA", expression: "BLA" }
  ],
  [
    // { name: "BLAName", expression: "BLA", statistic: "MIN" },
    // { name: "surveydate", expression: "sdate", statistic: "MIN" },
    { name: "countBoats", expression: "BLA", statistic: "COUNT" },
  ]
);

var combinedDict2 = {
  fields: [
    { name: "BoatArea", type: "esriFieldTypeString" },
    { name: "wday", type: "esriFieldTypeInteger" },
    { name: "avgBoats", type: "esriFieldTypeInteger" },
  ],
  geometryType: "",
  features: [],
};

var x = 0;

for (var n in groupDOW) {
  combinedDict2.features[x] = {
    attributes: {
      BoatArea: n["BLA"],
      wday: n["dow"],
      avgBoats: (Floor(Average(n["countBoats"] / 2))),
    },
  };
  x++;
}

return FeatureSet(Text(combinedDict2));

 

One additional note...towards the bottom for 'avgBoats', I have divided by 2 in addition to using 'Average' because each boat is scanned twice - once when launching and once when coming off water. I wanted to divide by 2 so each boat is only counted once, and then Average what was left. 

erica_tefft_0-1637700499212.png

Here is a sample of the output:

erica_tefft_0-1637700290493.png

Any help would be appreciated!

 

Erica
0 Kudos
1 Solution

Accepted Solutions
erica_tefft
Regular Contributor

I was able to resolve this - there were two issues.

First, within my data expression to get the Average Private Launches per Day of Week, there were two things wrong. 

  1. The filter expression in line 4 was incorrect and I needed to update it to say "NOT Name like 'Rental%'" instead of "Not Name = 'Rental%'".
  2.  The count from line 16 is really the count of total survey scans - I need to drill this down to the number of boats. Since each boat has 2 surveys completed each day (for launch/retrieval), I needed to divide this value by 2 in line 31. 

 

var portal = Portal("https://www.arcgis.com");
// Group the features by the BLA 

var BLA = Filter(FeatureSetByPortalItem(portal,"###",0,['BLA_Used', 'Survey_Date', 'Name'],false), "NOT Name like 'Rental%'");

// group by boat area and date, return count of launches i.e., boat count
var groupBLA = GroupBy(
    BLA,
    [
        {name: "the_year", expression: "EXTRACT(YEAR FROM Survey_Date)"},
        {name: "the_month", expression: "EXTRACT(MONTH FROM Survey_Date)"},
        {name: "the_day", expression: "EXTRACT(DAY FROM Survey_Date)"},
        {name: "BLA", expression: "BLA_Used"}
    ],
    [
        { name: "survey_count", expression: "BLA_Used", statistic: "COUNT" }
    ]
);

// empty feature array 
var features = [];


// iterate over grouped featureset to populate array
for (var g in groupBLA){
    var wday = Weekday(Date(g['the_year'], g['the_month']-1, g['the_day']))
    var feat = {
        attributes: {
            BoatArea: g['BLA'],
            wday: wday,
            boats: g['survey_count']/2,
            wday_label: Decode(wday, 0, 'Sunday', 1, 'Monday', 2, 'Tuesday', 3, 'Wednesday', 4, 'Thursday', 5, 'Friday', 6, 'Saturday', '')
        }
    }
    Push(features, feat)
};

// pass feature array into dict
var outDict = {
    fields: [
        { name: "BoatArea", type: "esriFieldTypeString" },
        { name: "wday", type: "esriFieldTypeInteger" },
        { name: "boats", type: "esriFieldTypeInteger" },
        { name: "wday_label", type: "esriFieldTypeString" },
    ],
    geometryType: "",
    features: features
};

// convert dict to FeatureSet
return FeatureSet(Text(outDict))

 

 

Next, within the data expression to get the Count of Private Launches by Day of Week, I needed to correct the Filter expression in line 5 the same way I did above. I also commented out some redundant lines within the GroupBy statement on line 7. 

 

var portal = Portal("https://www.arcgis.com");
// Group the features by the BLA 

var BLA = Filter(FeatureSetByPortalItem(portal,"###",0,['BLA_Used', 'Survey_Date', 'Name'],false), "NOT Name like 'Rental%'");

var groupBLA = GroupBy(BLA,
  ["Survey_Date", "BLA_Used"],
  [
    //{ name: "BoatLaunchArea", expression: "BLA_Used", statistic: "MIN" },
    { name: "cntBoats", expression: "BLA_Used", statistic: "COUNT" },
    //{ name: "date", expression: "Survey_Date", statistic: "MIN" },
  ]
);

var combinedDict = {
  fields: [
    { name: "sdate", type: "esriFieldTypeDate" },
    { name: "dow", type: "esriFieldTypeInteger" },
    { name: "BLA", type: "esriFieldTypeString" },
    { name: "cntSurveyScans", type: "esriFieldTypeDouble" },
  ],
  geometryType: "",
  features: [],
};

var i = 0;

for (var m in groupBLA) {
  combinedDict.features[i] = {
    attributes: {
      BLA: m["BLA_Used"],
      cntSurveyScans: (m["cntBoats"] / 2),
      sdate: DateDiff(m["Survey_Date"], Date(1970, 0, 1), "MM/DD/yyyy"),
      dow: Weekday(m["Survey_Date"]),
    },
  };
  i++;
}

return FeatureSet(Text(combinedDict));

 

 

This resulted in consistent and verifiable results when comparing data between the data expression and Excel:

BLA 1 – Private Launches, Saturday

  • AVG serial chart = 18
  • AVG Excel = 17.88
Erica

View solution in original post

0 Kudos
7 Replies
jcarlson
MVP Honored Contributor

Hard to say if it's giving you the "right" values without seeing the actual data. That said, we can probably refine that code a bit. Since the intended output is simply average boats by area by weekday, we can probably get this done in fewer steps. We can almost get away with using the GroupBy output directly, but that pesky weekday isn't available through an SQL expression.

First of all, if we're grouping both by date as well as by boat launch area, we can include both fields in our initial function. The grouping fields are included in the output, and do not need to be part of the statistics array.

Secondly, I should point out that after the first grouping, re-using the COUNT statistic is going to mess up your values. In the code you shared, look at line 50. The countboats field in your FeatureSet was already a count of boats from the initial grouping by date. Using COUNT on this field is counting the number of unique dates in the group. You probably want to use SUM here instead.

But the point may be moot. Since we've gotten a grouped Featureset in our first step that has already grouped by date and launch area, that's everything we need except for the weekday. Keep in mind that the chart widget in a dashboard can do its own aggregations and statistics, too. As long as we have the right fields in the output FeatureSet, certain things can be managed there instead.

Final point: with recent updates to Arcade, we can use Push with an array object, rather than using an index variable. Works the same, but I think it's a little cleaner.

 

 

var groupBLA = GroupBy(
    BLA,
    [
        {name: "the_date", expression: "Survey_Date"},
        {name: "BLA", expression: "BLA_Used"
    ],
    [
        { name: "cntBoats", expression: "BLA_Used", statistic: "COUNT" }
    ]
);


var features = [];
    
for (var g in groupBLA){
    var feat = {
        attributes: {
            BoatArea: g['BLA'],
            wday: Weekday(g['the_date']),
            boats: Floor(g['boat_count']/2)
        }
    }
    Push(features, feat)
};

var outDict = {
    fields: [
        { name: "BoatArea", type: "esriFieldTypeString" },
        { name: "wday", type: "esriFieldTypeInteger" },
        { name: "boats", type: "esriFieldTypeInteger" }
    ],
    geometryType: "",
    features: features
};

return FeatureSet(Text(outDict))

 

 

Once you have this in your chart, you can set it up as follows:

  • Categories from: Grouped values
  • Category field: wday
  • Split by field: BoatArea
  • Statistic: Average
  • Field: Boats
  • Sort by: wday

jcarlson_0-1637708125002.png

I used a sample dataset of my own with a date and category field to replicate this.

Final thoughts: what is the "sdate" field doing in your first dict? I couldn't tell so I omitted it. Also, you may wish to add a 4th field for the textual representation of the weekday so that something like "Sun" or "Fri" displays, but can still be sorted by the wday field. (I'd suggest using Decode to populate such a field, if you want to do that.)

 

- Josh Carlson
Kendall County GIS
0 Kudos
erica_tefft
Regular Contributor

Hi @jcarlson - 

Thank you. So something in the code you suggest is not working properly for me once I get to the actual graphing portion. I was having this issue last week in my testing with trying to get this data expression to work:

erica_tefft_0-1637761079372.png

You can see the value (y) axis is not correct - I end up with decimal values of 0.1 for everything. I do not really understand why this is happening. 

Also, to answer your question about the "sdate" field in my original script - this came from a separate data expression that I wrote so I could graph the count of boats per day of week through the entire boating season (April - October) so that we could have a sense of overall use trends throughout the boating season. I figured I'd use that as a basis for this "average use by weekday" data expression you are helping me with now.  The "sdate" field allowed me to include the date in the featureSet, and then I used that to calculate Weekday for each row in my featureSet.

The graph output of the other data expression (where "sdate" originated) is:

erica_tefft_1-1637761458627.png

If you have the time or inclination...here is a View of the data I am working with if you are able to test this with the real data...https://mass-eoeea.maps.arcgis.com/home/item.html?id=faadbf5e0ef44523a8eb0d37adab33aa#overview 

Thank you,

Erica
0 Kudos
jcarlson
MVP Honored Contributor

Thank you! Always best to work with the real data. So, it looks like I made a couple mistakes above.

Many "dates" have a boat count of just 1, since grouping by a datetime will consider Monday at 12:30 different from Monday at 12:32. So we need to change the GroupBy expression to ignore the time. When it groups by date and time, those single-boat counts, divided by 2 and taking the Floor, is going to yield a 0. And then averaging a bunch of weekdays with 0s, you're going to get a small decimal.

So to get around that, we'll extract the year, month, and day in our GroupBy, in order to group per date regardless of time. We'll then use those values to cobble together a date for the Weekday function. And as long as we're at it, let's include a Decode function!

 

// get feature layer
var BLA = FeatureSetByPortalItem(
    Portal("https://arcgis.com"),
    'faadbf5e0ef44523a8eb0d37adab33aa',
    '0',
    [
        'Survey_Date',
        'BLA_Used'
    ],
    false
);

// group by boat area and date, return count of launches i.e., boat count
var groupBLA = GroupBy(
    BLA,
    [
        {name: "the_year", expression: "EXTRACT(YEAR FROM Survey_Date)"},
        {name: "the_month", expression: "EXTRACT(MONTH FROM Survey_Date)"},
        {name: "the_day", expression: "EXTRACT(DAY FROM Survey_Date)"},
        {name: "BLA", expression: "BLA_Used"}
    ],
    [
        { name: "boat_count", expression: "BLA_Used", statistic: "COUNT" }
    ]
);

// empty feature array 
var features = [];


// iterate over grouped featureset to populate array
for (var g in groupBLA){
    var wday = Weekday(Date(g['the_year'], g['the_month'], g['the_day']))
    var feat = {
        attributes: {
            BoatArea: g['BLA'],
            wday: wday,
            boats: g['boat_count'],
            wday_label: Decode(wday, 0, 'Sun', 1, 'Mon', 2, 'Tue', 3, 'Wed', 4, 'Thu', 5, 'Fri', 6, 'Sat', '')
        }
    }
    Push(features, feat)
};

// pass feature array into dict
var outDict = {
    fields: [
        { name: "BoatArea", type: "esriFieldTypeString" },
        { name: "wday", type: "esriFieldTypeInteger" },
        { name: "boats", type: "esriFieldTypeInteger" },
        { name: "wday_label", type: "esriFieldTypeString" }
    ],
    geometryType: "",
    features: features
};

// convert dict to FeatureSet
return FeatureSet(Text(outDict))

 

jcarlson_0-1637767463763.png

 

- Josh Carlson
Kendall County GIS
0 Kudos
erica_tefft
Regular Contributor

Hi @jcarlson 

This is awesome! It worked perfectly. I do have a follow-up question however relating to what you mentioned above about time stamps. 

Here is another data expression - this is the one that gives me a count of boat usage by day of week for the season. I am wondering if the fact that I did not strip away the time stamps here will result in an incorrect output. I'd be interested in your thoughts. This uses the same data as above. 

The reason I ask...I took the graph which is generated by the script below and filtered for just "BLA1". I took all of the values provided for one day of the week (Saturday) and used the average function in Excel to see if the average was the same as what was provided by the graph I created using the script above - it was not (27.3 vs ~18 in Excel). Theoretically, I could add up all of the Saturday counts for BLA1, average them in Excel and get the same value as what the above script provides. 

var portal = Portal("https://www.arcgis.com");
// Group the features by the BLA 

var BLA = FeatureSetByPortalItem(portal,"faadbf5e0ef44523a8eb0d37adab33aa",0,['BLA_Used', 'Survey_Date'],false);

var groupBLA = GroupBy(BLA,
  ["Survey_Date"],
  [
    { name: "BoatLaunchArea", expression: "BLA_Used", statistic: "MIN" },
    { name: "cntBoats", expression: "BLA_Used", statistic: "COUNT" },
    { name: "date", expression: "Survey_Date", statistic: "MIN" },
  ]
);

var combinedDict = {
  fields: [
    { name: "sdate", type: "esriFieldTypeDate" },
    { name: "dow", type: "esriFieldTypeInteger" },
    { name: "BLA", type: "esriFieldTypeString" },
    { name: "cntSurveyScans", type: "esriFieldTypeInteger" },
  ],
  geometryType: "",
  features: [],
};

var i = 0;

for (var m in groupBLA) {
  combinedDict.features[i] = {
    attributes: {
      BLA: m["BoatLaunchArea"],
      cntSurveyScans: (Floor(m["cntBoats"] / 2)),
      sdate: DateDiff(m["Survey_Date"], Date(1970, 0, 1), "MM/DD/yyyy"),
      dow: Weekday(m["Survey_Date"]),
    },
  };
  i++;
}

return FeatureSet(Text(combinedDict));

The chart is configured like this:

 

erica_tefft_0-1637772891836.png

Thank you for your help!

Erica
0 Kudos
jcarlson
MVP Honored Contributor

Can you try looking at other metrics as well, like the count, max, etc? Are those also inconsistent between Excel and the Dashboard?

- Josh Carlson
Kendall County GIS
0 Kudos
erica_tefft
Regular Contributor

Hi @jcarlson -

I looked at sum, min, max and those are also inconsistent. Here is how I am getting these inconsistent values... 

1. First, I updated my data expression from that last post on Wednesday to remove the Floor - this was good because I was eliminating many 0.5 values which should not happen. That data expression powers the "Private Launches per DOW" and "Rental Launches per DOW" charts which you can see at the Dashboard I have linked to below (these charts show all data, but can be filtered using the 'Select Boat Launch Area' option at top). 

2. Filter the chart that results from that data expression by day of week and take all values presented and enter into Excel. Calculate average, sum, max, min. 

3. Open serial chart config for the "average private launches per DOW" and change statistic to compare results for specific BLA and DOW to see how they compare. Here is one example:

For Saturday BLA1 - 

  • AVG - 42 (Excel) vs. 27 (serial chart)
  • SUM - 1,145 (Excel) vs. 654 (serial chart)
  • MAX - 88 (Excel) vs. 64 (serial chart)
  • MIN - 4 (Excel) vs. 2 (serial chart)

You can see a public version of my current Dashboard here - https://mass-eoeea.maps.arcgis.com/apps/dashboards/0773a2c3a27b4690b8632b082e33690c 

I am mystified by these differences. Also, I would expect that Saturday & Sunday would have a much higher average use per DOW as these are the two most popular boating days. 

I would be interested in your thoughts on this.

Thank you,

Erica
0 Kudos
erica_tefft
Regular Contributor

I was able to resolve this - there were two issues.

First, within my data expression to get the Average Private Launches per Day of Week, there were two things wrong. 

  1. The filter expression in line 4 was incorrect and I needed to update it to say "NOT Name like 'Rental%'" instead of "Not Name = 'Rental%'".
  2.  The count from line 16 is really the count of total survey scans - I need to drill this down to the number of boats. Since each boat has 2 surveys completed each day (for launch/retrieval), I needed to divide this value by 2 in line 31. 

 

var portal = Portal("https://www.arcgis.com");
// Group the features by the BLA 

var BLA = Filter(FeatureSetByPortalItem(portal,"###",0,['BLA_Used', 'Survey_Date', 'Name'],false), "NOT Name like 'Rental%'");

// group by boat area and date, return count of launches i.e., boat count
var groupBLA = GroupBy(
    BLA,
    [
        {name: "the_year", expression: "EXTRACT(YEAR FROM Survey_Date)"},
        {name: "the_month", expression: "EXTRACT(MONTH FROM Survey_Date)"},
        {name: "the_day", expression: "EXTRACT(DAY FROM Survey_Date)"},
        {name: "BLA", expression: "BLA_Used"}
    ],
    [
        { name: "survey_count", expression: "BLA_Used", statistic: "COUNT" }
    ]
);

// empty feature array 
var features = [];


// iterate over grouped featureset to populate array
for (var g in groupBLA){
    var wday = Weekday(Date(g['the_year'], g['the_month']-1, g['the_day']))
    var feat = {
        attributes: {
            BoatArea: g['BLA'],
            wday: wday,
            boats: g['survey_count']/2,
            wday_label: Decode(wday, 0, 'Sunday', 1, 'Monday', 2, 'Tuesday', 3, 'Wednesday', 4, 'Thursday', 5, 'Friday', 6, 'Saturday', '')
        }
    }
    Push(features, feat)
};

// pass feature array into dict
var outDict = {
    fields: [
        { name: "BoatArea", type: "esriFieldTypeString" },
        { name: "wday", type: "esriFieldTypeInteger" },
        { name: "boats", type: "esriFieldTypeInteger" },
        { name: "wday_label", type: "esriFieldTypeString" },
    ],
    geometryType: "",
    features: features
};

// convert dict to FeatureSet
return FeatureSet(Text(outDict))

 

 

Next, within the data expression to get the Count of Private Launches by Day of Week, I needed to correct the Filter expression in line 5 the same way I did above. I also commented out some redundant lines within the GroupBy statement on line 7. 

 

var portal = Portal("https://www.arcgis.com");
// Group the features by the BLA 

var BLA = Filter(FeatureSetByPortalItem(portal,"###",0,['BLA_Used', 'Survey_Date', 'Name'],false), "NOT Name like 'Rental%'");

var groupBLA = GroupBy(BLA,
  ["Survey_Date", "BLA_Used"],
  [
    //{ name: "BoatLaunchArea", expression: "BLA_Used", statistic: "MIN" },
    { name: "cntBoats", expression: "BLA_Used", statistic: "COUNT" },
    //{ name: "date", expression: "Survey_Date", statistic: "MIN" },
  ]
);

var combinedDict = {
  fields: [
    { name: "sdate", type: "esriFieldTypeDate" },
    { name: "dow", type: "esriFieldTypeInteger" },
    { name: "BLA", type: "esriFieldTypeString" },
    { name: "cntSurveyScans", type: "esriFieldTypeDouble" },
  ],
  geometryType: "",
  features: [],
};

var i = 0;

for (var m in groupBLA) {
  combinedDict.features[i] = {
    attributes: {
      BLA: m["BLA_Used"],
      cntSurveyScans: (m["cntBoats"] / 2),
      sdate: DateDiff(m["Survey_Date"], Date(1970, 0, 1), "MM/DD/yyyy"),
      dow: Weekday(m["Survey_Date"]),
    },
  };
  i++;
}

return FeatureSet(Text(combinedDict));

 

 

This resulted in consistent and verifiable results when comparing data between the data expression and Excel:

BLA 1 – Private Launches, Saturday

  • AVG serial chart = 18
  • AVG Excel = 17.88
Erica
0 Kudos