People Analytics

An exploratory analysis for a HR dataset

Read Data

data = read_csv('https://raw.githubusercontent.com/ariannalangwang/Analyze-HR-Data/master/HRDataProject/HRDataset_v13.csv') %>% 
   clean_names()
data
## # A tibble: 310 × 35
##    employee_name      emp_id married_id marital_status_… gender_id emp_status_id
##    <chr>               <dbl>      <dbl>            <dbl>     <dbl>         <dbl>
##  1 Brown, Mia         1.10e9          1                1         0             1
##  2 LaRotonda, William 1.11e9          0                2         1             1
##  3 Steans, Tyrone     1.30e9          0                0         1             1
##  4 Howard, Estelle    1.21e9          1                1         0             1
##  5 Singh, Nan         1.31e9          0                0         0             1
##  6 Smith, Leigh Ann   7.11e8          1                1         0             5
##  7 Bunbury, Jessica   1.50e9          1                1         0             5
##  8 Carter, Michelle   1.40e9          0                0         0             1
##  9 Dietrich, Jenna    1.41e9          0                0         0             1
## 10 Digitale, Alfred   1.31e9          1                1         1             1
## # … with 300 more rows, and 29 more variables: dept_id <dbl>,
## #   perf_score_id <dbl>, from_diversity_job_fair_id <dbl>, pay_rate <dbl>,
## #   termd <dbl>, position_id <dbl>, position <chr>, state <chr>, zip <chr>,
## #   dob <chr>, sex <chr>, marital_desc <chr>, citizen_desc <chr>,
## #   hispanic_latino <chr>, race_desc <chr>, dateof_hire <chr>,
## #   dateof_termination <chr>, term_reason <chr>, employment_status <chr>,
## #   department <chr>, manager_name <chr>, manager_id <dbl>, …

Explore Data

skimr::skim(data) #summary
Table 1: Data summary
Name data
Number of rows 310
Number of columns 35
_______________________
Column type frequency:
character 19
numeric 16
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
employee_name 0 1.00 8 23 0 310 0
position 0 1.00 3 28 0 31 0
state 0 1.00 2 2 0 28 0
zip 0 1.00 5 5 0 158 0
dob 0 1.00 8 8 0 306 0
sex 0 1.00 1 1 0 2 0
marital_desc 0 1.00 6 9 0 5 0
citizen_desc 0 1.00 10 19 0 3 0
hispanic_latino 0 1.00 2 3 0 4 0
race_desc 0 1.00 5 32 0 6 0
dateof_hire 0 1.00 8 10 0 99 0
dateof_termination 207 0.33 8 8 0 93 0
term_reason 1 1.00 5 32 0 17 0
employment_status 0 1.00 6 22 0 5 0
department 0 1.00 5 20 0 6 0
manager_name 0 1.00 8 18 0 21 0
recruitment_source 0 1.00 5 38 0 23 0
performance_score 0 1.00 3 17 0 4 0
last_performance_review_date 103 0.67 8 9 0 42 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
emp_id 0 1.00 1.199745e+09 1.8296e+08 602000312.00 1.101024e+09 1.203032e+09 1.378814e+09 1988299991 ▁▇▇▁▁
married_id 0 1.00 4.000000e-01 4.9000e-01 0.00 0.000000e+00 0.000000e+00 1.000000e+00 1 ▇▁▁▁▅
marital_status_id 0 1.00 8.100000e-01 9.4000e-01 0.00 0.000000e+00 1.000000e+00 1.000000e+00 4 ▇▇▂▁▁
gender_id 0 1.00 4.300000e-01 5.0000e-01 0.00 0.000000e+00 0.000000e+00 1.000000e+00 1 ▇▁▁▁▆
emp_status_id 0 1.00 2.400000e+00 1.8000e+00 1.00 1.000000e+00 1.000000e+00 5.000000e+00 5 ▇▁▁▁▃
dept_id 0 1.00 4.610000e+00 1.0800e+00 1.00 5.000000e+00 5.000000e+00 5.000000e+00 6 ▁▂▁▇▁
perf_score_id 0 1.00 2.980000e+00 5.8000e-01 1.00 3.000000e+00 3.000000e+00 3.000000e+00 4 ▁▁▁▇▁
from_diversity_job_fair_id 0 1.00 9.000000e-02 2.9000e-01 0.00 0.000000e+00 0.000000e+00 0.000000e+00 1 ▇▁▁▁▁
pay_rate 0 1.00 3.128000e+01 1.5380e+01 14.00 2.000000e+01 2.400000e+01 4.531000e+01 80 ▇▂▂▂▁
termd 0 1.00 3.300000e-01 4.7000e-01 0.00 0.000000e+00 0.000000e+00 1.000000e+00 1 ▇▁▁▁▃
position_id 0 1.00 1.684000e+01 6.2300e+00 1.00 1.800000e+01 1.900000e+01 2.000000e+01 30 ▂▁▁▇▁
manager_id 8 0.97 1.458000e+01 8.0900e+00 1.00 1.000000e+01 1.500000e+01 1.900000e+01 39 ▅▇▇▁▁
engagement_survey 0 1.00 3.330000e+00 1.2900e+00 1.03 2.080000e+00 3.470000e+00 4.520000e+00 5 ▅▅▃▅▇
emp_satisfaction 0 1.00 3.890000e+00 9.1000e-01 1.00 3.000000e+00 4.000000e+00 5.000000e+00 5 ▁▁▇▇▇
special_projects_count 0 1.00 1.210000e+00 2.3500e+00 0.00 0.000000e+00 0.000000e+00 0.000000e+00 8 ▇▁▁▂▁
days_late_last30 103 0.67 0.000000e+00 0.0000e+00 0.00 0.000000e+00 0.000000e+00 0.000000e+00 0 ▁▁▇▁▁
DataExplorer::plot_bar(data)

Pivot Table

rpivotTable::rpivotTable(data,
    height = 1500)

  • employee_name
  • emp_id
  • married_id
  • marital_status_id
  • gender_id
  • emp_status_id
  • dept_id
  • perf_score_id
  • from_diversity_job_fair_id
  • pay_rate
  • termd
  • position_id
  • position
  • state
  • zip
  • dob
  • sex
  • marital_desc
  • citizen_desc
  • hispanic_latino
  • race_desc
  • dateof_hire
  • dateof_termination
  • term_reason
  • employment_status
  • department
  • manager_name
  • manager_id
  • recruitment_source
  • performance_score
  • engagement_survey
  • emp_satisfaction
  • special_projects_count
  • last_performance_review_date
  • days_late_last30
  • Totals310

    Check for duplicates

    (nrow(get_dupes(data)) == 0) #duplicates?
    ## [1] TRUE

    Check reduntant information

    data %>% distinct(hispanic_latino)
    ## # A tibble: 4 × 1
    ##   hispanic_latino
    ##   <chr>          
    ## 1 No             
    ## 2 Yes            
    ## 3 yes            
    ## 4 no
    data <- data %>% mutate(
        hispanic_latino = as_factor(hispanic_latino),
        hispanic_latino = recode(data$hispanic_latino,
        yes = 'Yes', no = 'No'))

    Missing data

    #missing data?
    data %>% miss_var_summary()
    ## # A tibble: 35 × 3
    ##    variable                     n_miss pct_miss
    ##    <chr>                         <int>    <dbl>
    ##  1 dateof_termination              207   66.8  
    ##  2 last_performance_review_date    103   33.2  
    ##  3 days_late_last30                103   33.2  
    ##  4 manager_id                        8    2.58 
    ##  5 term_reason                       1    0.323
    ##  6 employee_name                     0    0    
    ##  7 emp_id                            0    0    
    ##  8 married_id                        0    0    
    ##  9 marital_status_id                 0    0    
    ## 10 gender_id                         0    0    
    ## # … with 25 more rows
    data %>% gg_miss_upset()

    # plot missing data (using raw data)
    DataExplorer::plot_missing(
      title   = "% of Missing Data",
      data    = data,
      ggtheme = tidyquant::theme_tq(), 
      missing_only = TRUE) 

    #Term Reason column
    data %>% select(dateof_termination, term_reason) %>% count(term_reason,sort=T) 
    ## # A tibble: 18 × 2
    ##    term_reason                          n
    ##    <chr>                            <int>
    ##  1 N/A - still employed               196
    ##  2 Another position                    20
    ##  3 unhappy                             14
    ##  4 more money                          11
    ##  5 N/A - Has not started yet           11
    ##  6 career change                        9
    ##  7 hours                                9
    ##  8 attendance                           7
    ##  9 relocation out of area               5
    ## 10 return to school                     5
    ## 11 military                             4
    ## 12 performance                          4
    ## 13 retiring                             4
    ## 14 maternity leave - did not return     3
    ## 15 medical issues                       3
    ## 16 no-call, no-show                     3
    ## 17 gross misconduct                     1
    ## 18 <NA>                                 1
    #it seems there is a missing value here, we drop the row
    data <- data %>% drop_na(term_reason)
    
    #Manager ID
    #Count how many employees has each manager
    data %>% select(manager_name, manager_id) %>% count(manager_name,sort=T) 
    ## # A tibble: 21 × 2
    ##    manager_name       n
    ##    <chr>          <int>
    ##  1 Elijiah Gray      22
    ##  2 Kelley Spirea     22
    ##  3 Kissy Sullivan    22
    ##  4 Michael Albert    22
    ##  5 Amy Dunn          21
    ##  6 Brannon Miller    21
    ##  7 David Stanley     21
    ##  8 Ketsia Liebig     21
    ##  9 Webster Butler    21
    ## 10 Janet King        19
    ## # … with 11 more rows
    #Which manager has not id values?
    data %>% select(manager_name, manager_id) %>% filter(is.na(manager_id))
    ## # A tibble: 8 × 2
    ##   manager_name   manager_id
    ##   <chr>               <dbl>
    ## 1 Webster Butler         NA
    ## 2 Webster Butler         NA
    ## 3 Webster Butler         NA
    ## 4 Webster Butler         NA
    ## 5 Webster Butler         NA
    ## 6 Webster Butler         NA
    ## 7 Webster Butler         NA
    ## 8 Webster Butler         NA
    # Webster Butler has not id, let'see if it applies for other rows.
    data %>% select(manager_name, manager_id) %>% filter(manager_name == 'Webster Butler')
    ## # A tibble: 21 × 2
    ##    manager_name   manager_id
    ##    <chr>               <dbl>
    ##  1 Webster Butler         39
    ##  2 Webster Butler         NA
    ##  3 Webster Butler         NA
    ##  4 Webster Butler         39
    ##  5 Webster Butler         39
    ##  6 Webster Butler         39
    ##  7 Webster Butler         39
    ##  8 Webster Butler         39
    ##  9 Webster Butler         39
    ## 10 Webster Butler         39
    ## # … with 11 more rows
    # As expected just few rows did not reported his id, so we impute with his reported id of 39
    #fill with 39 where is.na == TRUE
    data$manager_id[is.na(data$manager_id)] <- 39
    
    data %>% select(dateof_termination,
                                    last_performance_review_date,
                                    days_late_last30)
    ## # A tibble: 309 × 3
    ##    dateof_termination last_performance_review_date days_late_last30
    ##    <chr>              <chr>                                   <dbl>
    ##  1 <NA>               1/15/2019                                   0
    ##  2 <NA>               1/17/2019                                   0
    ##  3 <NA>               1/18/2019                                   0
    ##  4 <NA>               1/15/2019                                   0
    ##  5 09/25/13           <NA>                                       NA
    ##  6 08/02/14           <NA>                                       NA
    ##  7 <NA>               1/21/2019                                   0
    ##  8 <NA>               1/29/2019                                   0
    ##  9 <NA>               1/30/2019                                   0
    ## 10 <NA>               1/17/2019                                   0
    ## # … with 299 more rows
    unique(data$days_late_last30)
    ## [1]  0 NA
    data %>% select(-days_late_last30)
    ## # A tibble: 309 × 34
    ##    employee_name      emp_id married_id marital_status_… gender_id emp_status_id
    ##    <chr>               <dbl>      <dbl>            <dbl>     <dbl>         <dbl>
    ##  1 Brown, Mia         1.10e9          1                1         0             1
    ##  2 LaRotonda, William 1.11e9          0                2         1             1
    ##  3 Steans, Tyrone     1.30e9          0                0         1             1
    ##  4 Singh, Nan         1.31e9          0                0         0             1
    ##  5 Smith, Leigh Ann   7.11e8          1                1         0             5
    ##  6 Bunbury, Jessica   1.50e9          1                1         0             5
    ##  7 Carter, Michelle   1.40e9          0                0         0             1
    ##  8 Dietrich, Jenna    1.41e9          0                0         0             1
    ##  9 Digitale, Alfred   1.31e9          1                1         1             1
    ## 10 Friedman, Gerry    1.20e9          0                0         1             1
    ## # … with 299 more rows, and 28 more variables: dept_id <dbl>,
    ## #   perf_score_id <dbl>, from_diversity_job_fair_id <dbl>, pay_rate <dbl>,
    ## #   termd <dbl>, position_id <dbl>, position <chr>, state <chr>, zip <chr>,
    ## #   dob <chr>, sex <chr>, marital_desc <chr>, citizen_desc <chr>,
    ## #   hispanic_latino <chr>, race_desc <chr>, dateof_hire <chr>,
    ## #   dateof_termination <chr>, term_reason <chr>, employment_status <chr>,
    ## #   department <chr>, manager_name <chr>, manager_id <dbl>, …

    It seems that no employees came late in the last 30 days as the column days_late_last30 presents only 0 values and NA for the rows wherein the employee has already left the company. So I will drop the column.

    unique(data[,c("dateof_termination","term_reason")])
    ## # A tibble: 103 × 2
    ##    dateof_termination term_reason              
    ##    <chr>              <chr>                    
    ##  1 <NA>               N/A - still employed     
    ##  2 09/25/13           career change            
    ##  3 08/02/14           Another position         
    ##  4 09/05/15           attendance               
    ##  5 10/31/14           relocation out of area   
    ##  6 <NA>               N/A - Has not started yet
    ##  7 09/12/15           performance              
    ##  8 03/15/15           no-call, no-show         
    ##  9 02/22/15           no-call, no-show         
    ## 10 05/01/16           performance              
    ## # … with 93 more rows
    employees <- data %>% filter(
            is.na(dateof_termination)) %>% 
        select(-dateof_termination)
    
    term_employees <- data %>% filter(
            is.na(dateof_termination) == F)

    Data Visualization

    Gender Gap

    employees %>% select(sex, pay_rate, 
                        dateof_hire, position) %>% mutate(
            position = as.factor(position),
            position = fct_lump(position, 5),
            dateof_hire=anytime::anydate(dateof_hire),
            sex = as_factor(sex),
            year = year(dateof_hire)) %>% 
    group_by(year,sex, position) %>% summarise(
        avg = mean(pay_rate)) %>% 
        ggplot(aes(x=year, y=avg, color =sex)) + geom_line() + facet_wrap(~position) + ggthemes::theme_gdocs() + theme(
            axis.text.x = element_text(angle = 35)
        )

    Barplot

    data %>%
      filter(term_reason=="N/A - still employed") %>%
      group_by(sex,position) %>%
      summarise(Total=n()) %>% 
      hchart(type="column",
            hcaes(x=position,y=Total,group=sex)) %>%
          hc_add_theme(hc_theme_economist())
    Created with Highcharts 8.1.2positionTotalFMAccountant IAdministrative AssistantArea Sales ManagerBI DeveloperCIOData AnalystData ArchitectDatabase AdministratorDirector of OperationsDirector of SalesIT SupportNetwork EngineerPresident & CEOProduction ManagerProduction Technician IProduction Technician IISales ManagerSenior BI DeveloperSoftware EngineerSr. AccountantSr. Network EngineerBI DirectorIT DirectorIT Manager - DBIT Manager - InfraIT Manager - SupportShared Services ManagerSoftware Engineering Manager0102030405060
    data %>% group_by(position) %>% summarise(pay_rate = round(mean(pay_rate),2)) %>% 
        hchart("column", hcaes(x = position, y = pay_rate)) %>%
        hc_title(text = "Company Pay Rates") %>%
          hc_subtitle(text = "Average pay rate by job position") %>%
          hc_add_theme(hc_theme_economist())
    Created with Highcharts 8.1.2positionpay_rateCompany Pay RatesAverage pay rate by job positionAccountant IAdministrative AssistantArea Sales ManagerBI DeveloperBI DirectorCIOData AnalystData ArchitectDatabase AdministratorDirector of OperationsDirector of SalesEnterprise ArchitectIT DirectorIT Manager - DBIT Manager - InfraIT Manager - SupportIT SupportNetwork EngineerPresident & CEOPrincipal Data ArchitectProduction ManagerProduction Technician IProduction Technician IISales ManagerSenior BI DeveloperShared Services ManagerSoftware EngineerSoftware Engineering ManagerSr. AccountantSr. DBASr. Network Engineer020406080100

    Boxplot

    highchart() %>%
      hc_xAxis(type ="category")%>%
      hc_add_series_list(data_to_boxplot(data,
                                    pay_rate, race_desc,
                                    group_var = sex,
                                    add_outliers = F)) %>% 
      hc_legend(enabled= F) %>%
          hc_add_theme(hc_theme_economist())
    Created with Highcharts 8.1.2FM10203040506070

    Pie Charts

    race_pie <- data %>% count(race_desc) %>% 
        hchart('pie', hcaes(race_desc,n)) %>% hc_add_theme(hc_theme_economist()) 
    
    gender_pie <- data %>% count(sex) %>% 
        hchart('pie', hcaes(sex,n)) %>% 
        hc_add_theme(hc_theme_economist())
    
    department_pie <- data %>% count(department) %>% 
        hchart('pie', hcaes(department, n)) %>% 
        hc_add_theme(hc_theme_economist())
    
    managers_pie <- data %>% count(manager_name) %>% 
        hchart('pie', hcaes(manager_name, n)) %>% 
        hc_add_theme(hc_theme_economist())
    
    hw_grid(race_pie, gender_pie, 
                    department_pie, managers_pie,
                    ncol = 2, rowheight = 350)
    Created with Highcharts 8.1.2American Indian or Alaska NativeAmerican Indian or Alaska NativeAsianAsianBlack or African AmericanBlack or African AmericanHispanicHispanicTwo or more racesTwo or more racesWhiteWhite
    Created with Highcharts 8.1.2FFMM
    Created with Highcharts 8.1.2Admin OfficesAdmin OfficesExecutive OfficeExecutive OfficeIT/ISIT/ISProductionProductionSalesSalesSoftware EngineeringSoftware Engineering
    Created with Highcharts 8.1.2Alex SweetwaterAlex SweetwaterAmy DunnAmy DunnBoard of DirectorsBoard of DirectorsBrandon R. LeBlancBrandon R. LeBlancBrannon MillerBrannon MillerBrian ChampaigneBrian ChampaigneDavid StanleyDavid StanleyDebra HoulihanDebra HoulihanElijiah GrayElijiah GrayEric DougallEric DougallJanet KingJanet KingJennifer ZamoraJennifer ZamoraJohn SmithJohn SmithKelley SpireaKelley SpireaKetsia LiebigKetsia LiebigKissy SullivanKissy SullivanLynn DaneaultLynn DaneaultMichael AlbertMichael AlbertPeter MonroePeter MonroeSimon RoupSimon RoupWebster ButlerWebster Butler

    Waffle Plot

    pacman::p_load(waffle)
    
    View(employees %>% select(marital_desc,performance_score) %>% 
                group_by(marital_desc,performance_score) %>% 
        summarise(n= n()) %>% mutate(freq = round(n/sum(n),2)))
      
    waffle(c('Exceeds = 15%' = 15,
                     'Fully Meets = 78%' =78,
                     'Needs Improvement = 4%' = 4,
                     'PIP = 3%' = 3), 
                 rows = 8, size = 0.2,
                 title = 'Performance Score For Single Employees') + 
        theme(title = element_text(
            family = 'Arial',
            size = 1, face = 'bold'
        ))

    Ridgeline Plots

    library(ggridges)
    ggplot(data, aes(
        y=as_factor(race_desc),
        x=pay_rate,
        fill=sex
    )) + geom_density_ridges(alpha=0.7) + theme_minimal()

    Treemap

    data$manager_name <- as_factor(data$manager_name)
    
    data %>% mutate(
        manager_name = fct_lump(manager_name,15)) %>%
        group_by(manager_name) %>% count() %>% 
    treemap::treemap(
        index = 'manager_name',
        vSize = 'n',
        title = "Treemap: Managers names by number of employees")

    Chicklet chart

    term_employees %>% head() %>% 
        miss_var_summary() 
    ## # A tibble: 35 × 3
    ##    variable                     n_miss pct_miss
    ##    <chr>                         <int>    <dbl>
    ##  1 last_performance_review_date      6      100
    ##  2 days_late_last30                  6      100
    ##  3 employee_name                     0        0
    ##  4 emp_id                            0        0
    ##  5 married_id                        0        0
    ##  6 marital_status_id                 0        0
    ##  7 gender_id                         0        0
    ##  8 emp_status_id                     0        0
    ##  9 dept_id                           0        0
    ## 10 perf_score_id                     0        0
    ## # … with 25 more rows
    term_employees %<>% select(-last_performance_review_date,
                                                         -days_late_last30)
    
    term_employees %>% select(dateof_hire, dateof_termination)
    ## # A tibble: 102 × 2
    ##    dateof_hire dateof_termination
    ##    <chr>       <chr>             
    ##  1 9/26/2011   09/25/13          
    ##  2 8/15/2011   08/02/14          
    ##  3 7/7/2014    09/05/15          
    ##  4 3/7/2011    10/31/14          
    ##  5 7/7/2014    09/12/15          
    ##  6 2/16/2015   03/15/15          
    ##  7 2/16/2015   02/22/15          
    ##  8 12/1/2014   05/01/16          
    ##  9 1/5/2015    10/31/15          
    ## 10 1/9/2012    11/04/15          
    ## # … with 92 more rows
    library(lubridate)
    term_employees %<>% mutate(
        dateof_hire=anytime::anydate(dateof_hire),
        dateof_termination=as.Date(dateof_termination,
                                                             format("%m/%d/%y")),
        worktime = difftime(dateof_termination, dateof_hire)
    )
    
    
    term_employees %>% select(position, worktime) %>% 
        mutate(position=as.factor(term_employees$position),
                     worktime = as.integer(worktime)) %>% 
        group_by(position) %>% summarise(
            avg = round(mean(worktime),0)) %>% 
        arrange(desc(avg)) %>% 
        ggplot(aes(x=reorder(position,avg), y=avg)) +
        ggchicklet::geom_chicklet(aes(fill=avg),
                    width = 0.8, radius = grid::unit(5, "pt")) + coord_flip() + scale_fill_gradient(low = '#4B3F72', 
                                                 high = '#7C9EB2') +    
        coord_flip() + 
        scale_y_discrete(expand = c(0, 0)) +
        labs(title = "Number of days before quitting job",
                 subtitle = "By position",
                 caption = "Data: Kaggle",
                 fill = "Days") +
        theme(
            aspect.ratio=4/3,
            plot.title = element_text(face = "bold",
                                                                size = 13,
                                                                family = "Times",
                                                                colour = "black"),
            plot.subtitle = element_text(size = 11.7,
                                                                     family = "Times",
                                                                     colour = "grey45"),
            plot.caption = element_text(size = 10,
                                                                    family = "Times",
                                                                    colour = "grey45"),
            axis.text.y = element_text(vjust = 0.5,
                                                                 family  = "Times"),
            axis.title.x = element_blank(),
            axis.title.y = element_blank(),
            axis.ticks = element_blank(),
            axis.line = element_blank(),
            panel.grid.major.y = element_blank(),
            panel.background = element_blank(),
            panel.grid = element_blank(),
            panel.border = element_blank(),
            legend.justification= c(1,0),
            legend.key.size = unit(15, "pt"),
            legend.text = element_text(size = 10,
                                                                 family = "Times"),
            legend.position = c(1,0))

    Time series of people quitting

    term_employees %>% select(dateof_hire,
        dateof_termination, worktime, 
        employee_name,term_reason) %>% 
        arrange(worktime) %>% filter( 
        worktime < quantile(worktime, 0.05) |
        worktime > quantile(worktime, 0.95)) %>% 
        mutate(term_reason = str_to_title(term_reason))
    ## # A tibble: 12 × 5
    ##    dateof_hire dateof_termination worktime  employee_name      term_reason      
    ##    <date>      <date>             <drtn>    <chr>              <chr>            
    ##  1 2012-09-24  2012-09-26            2 days MacLennan, Samuel  Hours            
    ##  2 2011-01-10  2011-01-12            2 days Baczenski, Rachael Another Position 
    ##  3 2015-02-16  2015-02-22            6 days Hernandez, Daniff  No-Call, No-Show 
    ##  4 2014-02-17  2014-02-25            8 days Evensen, April     No-Call, No-Show 
    ##  5 2011-11-07  2011-11-15            8 days Gerke, Melisa      Hours            
    ##  6 2011-05-16  2011-06-04           19 days Power, Morissa     Another Position 
    ##  7 2011-01-10  2016-01-26         1842 days Robinson, Alain    Attendance       
    ##  8 2011-01-10  2016-04-01         1908 days Jung, Judy         Unhappy          
    ##  9 2011-01-10  2016-05-17         1954 days Robinson, Cherly   Attendance       
    ## 10 2009-10-26  2015-04-08         1990 days Sloan, Constance   Maternity Leave …
    ## 11 2010-10-25  2016-05-18         2032 days Peterson, Ebonee   Another Position 
    ## 12 2008-09-02  2015-09-29         2583 days Ybarra, Catherine  Another Position

    Network Visualization

    library(igraph)
    library(networkD3)
    employees %>% select(manager_name, employee_name) %>% head(30) %>% simpleNetwork(
        fontSize = 13, charge = -15,
        linkDistance = 65, zoom = T,
        nodeColour = 'red',
    )
    Brandon R. LeBlancBrian ChampaigneBrown, MiaBuck, EdwardCarter, MichelleCosta, LatiaDietrich, JennaDigitale, AlfredFraval, MarukFriedman, GerryGivens, MyriamGonzales, RicardoJeremy PraterJohn SmithKhemmich, BartholemewLaRotonda, WilliamLeruth, GiovanniLynn DaneaultMartins, JosephMcKinzie, JacMullaney, HowardNguyen, DheepaOnque, JasmineOzark, TravisPotts, XanaRachael, MaggieRiordan, MichaelSingh, NanSmith, JasonSteans, TyroneStrong, CaitrinTerry, SharleneValentin,JackieVillanueva, Noah