*# dat # Alternative to @1hidata1@2duplicates@3 - reports number of {it:unique} records, and can list info from duplicate records # help *! 9jan2008 program dups, rclass version 8.2 syntax [varlist(default=none)] [if] [in] , [ List(string) ] if "`varlist'" == "" { unab varlist : * local vartext "{txt} all variables" } else local vartext "{res} `varlist'" preserve if ~mi("`if'`in'") qui keep `if' `in' qui count if r(N) == 0 error 2000 tempvar Ngroup freq surplus unique c bysort `varlist' : gen long `Ngroup' = _N by `varlist': gen `c'=_n==1 bysort `Ngroup' : gen long `freq' = _N by `Ngroup': gen long `unique'= _N / `Ngroup' label var `Ngroup' "copies" label var `freq' "total" label var `unique' "unique" di _n "{p 0 4}{txt}Duplicates in terms of `vartext'{p_end}" tabdisp `Ngroup', cell(`unique' `freq') sum `c', mean display "{res:{ralign 22:`r(sum)'}{ralign 12:`c(N)'}}" if ~mi("`list'") { local list= subinstr(`"`list'"',`"#C"',`"`Ngroup'"',.) if ~strpos("`list'",",") local list `list', Sublist `Ngroup' `list' duplist(`varlist') } sum `Ngroup', mean return scalar min= `r(min)' return scalar max= `r(max)' end program Sublist syntax varlist [if] [in], duplist(varlist) * gettoken Ngroup varlist: varlist if mi("`if'") local if if `Ngroup'>1 unab temps: __* local varlist: list varlist-temps sort `duplist' `varlist' list `varlist' `if' `in',`options' noobs /*obs#s not correct*/ end