@ARTICLE{RGSPC04rat, AUTHOR = "Rat Genome Sequencing Project Consortium", TITLE = "Genome sequence of the Brown Norway rat yields insights into mammalian evolution", JOURNAL = "Nature", YEAR = 2004, volume = 428, pages = "493--521", publisher = "Nature Publishing Group", url = "http://www.nature.com/cgi-taf/DynaPage.taf?file=/nature/journal/v428/n6982/full/nature02426_fs.html", abstract = "The laboratory rat (Rattus norvegicus) is an indispensable tool in experimental medicine and drug development, having made inestimable contributions to human health. We report here the genome sequence of the Brown Norway (BN) rat strain. The sequence represents a high-quality 'draft' covering over 90% of the genome. The BN rat sequence is the third complete mammalian genome to be deciphered, and three-way comparisons with the human and mouse genomes resolve details of mammalian evolution. This first comprehensive analysis includes genes and proteins and their relation to human disease, repeated sequences, comparative genome-wide studies of mammalian orthologous chromosomal regions and rearrangement breakpoints, reconstruction of ancestral karyotypes and the events leading to existing species, rates of variation, and lineage-specific and lineage-independent evolutionary events such as expansion of gene families, orthology relations and protein evolution." } @ARTICLE{yang04patterns, AUTHOR = "Yang, Shan and Smit, Arian F. and Schwartz, Scott and Chiaromonte, Francesca and Roskin, Krishna M. and Haussler, David and Miller, Webb and Hardison, Ross C.", TITLE = "Patterns of Insertions and Their Covariation With Substitutions in the Rat, Mouse, and Human Genomes", JOURNAL = "Genome Research", YEAR = 2004, volume = 14, number = 4, pages = "517--527", publisher = "Cold Spring Harbor Laboratory Press", url = "http://www.genome.org/cgi/content/abstract/14/4/517", abstract = {The rates at which human genomic DNA changes by neutral substitution and insertion of certain families of transposable elements covary in large, megabase-sized segments. We used the rat, mouse, and human genomic DNA sequences to examine these processes in more detail in comparisons over both shorter (rat-mouse) and longer (rodent-primate) times, and demonstrated the generality of the covariation. Different families of transposable elements show distinctive insertion preferences and patterns of variation with substitution rates. SINEs are more abundant in GC-rich DNA, but the regional GC preference for insertion (monitored in young SINEs) differs between rodents and humans. In contrast, insertions in the rodent genomes are predominantly LINEs, which prefer to insert into AT-rich DNA in all three mammals. The insertion frequency of repeats other than SINEs correlates strongly positively with the frequency of substitutions in all species. However, correlations with SINEs show the opposite effects. The correlations are explained only in part by the GC content, indicating that other factors also contribute to the inherent tendency of DNA segments to change over evolutionary time.} } @ARTICLE{JensenSeaman04recombination, AUTHOR = "Jensen-Seaman, Michael I. and Furey, Terrence S. and Payseur, Bret A. and Lu, Yontao and Roskin, Krishna M. and Chen, Chin-Fu and Thomas, Michael A. and Haussler, David and Jacob, Howard J.", TITLE = "Comparative Recombination Rates in the Rat, Mouse, and Human Genomes", JOURNAL = "Genome Research", YEAR = 2004, volume = 14, number = 4, pages = "528--538", publisher = "Cold Spring Harbor Laboratory Press", url = "http://www.genome.org/cgi/content/abstract/14/4/528", abstract = {Levels of recombination vary among species, among chromosomes within species, and among regions within chromosomes in mammals. This heterogeneity may affect levels of diversity, efficiency of selection, and genome composition, as well as have practical consequences for the genetic mapping of traits. We compared the genetic maps to the genome sequence assemblies of rat, mouse, and human to estimate local recombination rates across these genomes. Humans have greater overall levels of recombination, as well as greater variance. In rat and mouse, the size of the chromosome and proximity to telomere have less effect on local recombination rate than in human. At the chromosome level, rat and mouse X chromosomes have the lowest recombination rates, whereas human chromosome X does not show the same pattern. In all species, local recombination rate is significantly correlated with several sequence variables, including GC%, CpG density, repetitive elements, and the neutral mutation rate, with some pronounced differences between species. Recombination rate in one species is not strongly correlated with the rate in another, when comparing homologous syntenic blocks of the genome. This comparative approach provides additional insight into the causes and consequences of genomic heterogeneity in recombination.} } @ARTICLE{blanchette04tba, AUTHOR = "Blanchette, Mathieu and Kent, W. James and Riemer, Cathy and Elnitski, Laura and Smit, Arian F.A. and Roskin, Krishna M. and Baertsch, Robert and Rosenbloom, Kate and Clawson, Hiram and Green, Eric D. and Haussler, David and Miller, Webb", TITLE = "Aligning Multiple Genomic Sequences With the {Threaded Blockset Aligner}", JOURNAL = "Genome Research", YEAR = 2004, volume = 14, number = 4, pages = "708--715", publisher = "Cold Spring Harbor Laboratory Press", url = "http://www.genome.org/cgi/content/abstract/14/4/708", abstract = {We define a "threaded blockset," which is a novel generalization of the classic notion of a multiple alignment. A new computer program called TBA (for "threaded blockset aligner") builds a threaded blockset under the assumption that all matching segments occur in the same order and orientation in the given sequences; inversions and duplications are not addressed. TBA is designed to be appropriate for aligning many, but by no means all, megabase-sized regions of multiple mammalian genomes. The output of TBA can be projected onto any genome chosen as a reference, thus guaranteeing that different projections present consistent predictions of which genomic positions are orthologous. This capability is illustrated using a new visualization tool to view TBA-generated alignments of vertebrate Hox clusters from both the mammalian and fish perspectives. Experimental evaluation of alignment quality, using a program that simulates evolutionary change in genomic sequences, indicates that TBA is more accurate than earlier programs. To perform the dynamic-programming alignment step, TBA runs a stand-alone program called MULTIZ, which can be used to align highly rearranged or incompletely sequenced genomes. We describe our use of MULTIZ to produce the whole-genome multiple alignments at the Santa Cruz Genome Browser.} } @INPROCEEDINGS{roskin03scoring, AUTHOR = "Krishna M. Roskin and Mark Diekhans and David Haussler", TITLE = "Scoring Two-Species Local Alignments to Try to Statistically Separate Neutrally Evolving from Selected {DNA} Segments", BOOKTITLE = "Proceedings of the seventh annual international conference on Computational molecular biology", YEAR = 2003, pages = "257--266", location = "Berlin, Germany", month = apr, publisher = "ACM Press", isbn = "1-58113-635-8", url = "http://doi.acm.org/10.1145/640075.640109", abstract = "We construct several score functions for use in locating unusually conserved regions in a genome-wide search of aligned DNA from two species. We test these functions on regions of the human genome aligned to the mouse genome. These score functions are derived from properties of neutrally evolving sites on the mouse and human genome, and can be adjusted to the local background rate of conservation. The aim of these functions is to try to identify regions of the human genome that are conserved by evolutionary selection, because they have an important function, rather than by chance. We use them to get a very rough estimate of the amount of DNA in the human genome that is under selection.", keywords = "neutral evolution, evolutionary models, ancestral repeat, comparative genomics, mouse-human alignments, dinucleotide dependence, mutual information, CpG effect, context-dependent base substitutions, fraction of human genome under selection" } @ARTICLE{karolchik03database, AUTHOR = "D. Karolchik and R. Baertsch and M. Diekhans and T. S. Furey and A. Hinrichs and Y. T. Lu and K. M. Roskin and M. Schwartz and C. W. Sugnet and D. J. Thomas and R. J. Weber and D. Haussler and W. J. Kent", TITLE = "The {UCSC} Genome Browser Database", JOURNAL = "Nucleic Acids Research", YEAR = 2003, volume = 31, number = 1, pages = "51--54", month = oct, publisher = "Oxford University Press", url = "http://nar.oupjournals.org/cgi/content/abstract/31/1/51", abstract = "The University of California Santa Cruz (UCSC) Genome Browser Database is an up to date source for genome sequence data integrated with a large collection of related annotations. The database is optimized to support fast interactive performance with the web-based UCSC Genome Browser, a tool built on top of the database for rapid visualization and querying of the data at many levels. The annotations for a given genome are displayed in the browser as a series of tracks aligned with the genomic sequence. Sequence data and annotations may also be viewed in a text-based tabular format or downloaded as tab-delimited flat files. The Genome Browser Database, browsing tools and downloadable data files can all be found on the UCSC Genome Bioinformatics website (http://genome.ucsc.edu), which also contains links to documentation and related technical information." } @ARTICLE{MGSC02mouse, AUTHOR = "Mouse Genome Sequencing Consortium", TITLE = "Initial sequencing and comparative analysis of the mouse genome", JOURNAL = "Nature", YEAR = 2002, volume = 420, number = 6915, pages = "520--562", month = dec, publisher = "Nature Publishing Group", url = "http://www.nature.com/doifinder/10.1038/nature01262", abstract = "The sequence of the mouse genome is a key informational tool for understanding the contents of the human genome and a key experimental tool for biomedical research. Here, we report the results of an international collaboration to produce a high-quality draft sequence of the mouse genome. We also present an initial comparative analysis of the mouse and human genomes, describing some of the insights that can be gleaned from the two sequences. We discuss topics including the analysis of the evolutionary forces shaping the size, structure and sequence of the genomes; the conservation of large-scale synteny across most of the genomes; the much lower extent of sequence orthology covering less than half of the genomes; the proportions of the genomes under selection; the number of protein-coding genes; the expansion of gene families related to reproduction and immunity; the evolution of proteins; and the identification of intraspecies polymorphism." } @TECHREPORT{roskin02functions, AUTHOR = "Krishna M. Roskin and Mark Diekhans and W. James Kent and David Haussler", TITLE = "Score Functions for Assessing Conservation in Locally Aligned Regions of {DNA} from Two Species", INSTITUTION = "University of California--Santa Cruz", YEAR = 2002, number = "UCSC-CRL-02-30", type = "Technical Report", address = "Santa Cruz, CA, USA", month = sep, url = "http://www.soe.ucsc.edu/research/reports/ucsc-crl-02-30.pdf", abstract = "We construct several score functions for use in locating unusually conserved regions in genome-wide search of aligned DNA from two species. We test these functions on regions of the human genome aligned to mouse. These score functions are derived from properties of neutrally evolving sites on the mouse and human genome, and can be adjusted to the local background rate of conservation. The aim of these functions is to identify regions of the human genome that are conserved by evolutionary selection, because they have an important function, rather than by chance. We use them to get a very rough estimate of the amount of DNA in the human genome that is under selection." } @ARTICLE{hardison03covariation, AUTHOR = "Ross C. Hardison and Krishna M. Roskin and Shan Yang and Mark Diekhans and W. James Kent and Ryan Weber and Laura Elnitski and Jia Li and Michael O'Connor and Diana Kolbe and Scott Schwartz and Terrence S. Furey and Simon Whelan and Nick Goldman and Arian Smit and Webb Miller and Francesca Chiaromonte and David Haussler", TITLE = "Covariation in Frequencies of Substitution, Deletion, Transposition, and Recombination During Eutherian Evolution", JOURNAL = "Genome Research", YEAR = 2003, volume = 13, number = 1, pages = "13--26", month = jan, publisher = "Cold Spring Harbor Laboratory Press", isbn = "1088-9051/03", url = "http://www.genome.org/cgi/content/abstract/13/1/13", abstract = {Six measures of evolutionary change in the human genome were studied, three derived from the aligned human and mouse genomes in conjunction with the Mouse Genome Sequencing Consortium, consisting of (1) nucleotide substitution per fourfold degenerate site in coding regions, (2) nucleotide substitution per site in relics of transposable elements active only before the human-mouse speciation, and (3) the nonaligning fraction of human DNA that is nonrepetitive or in ancestral repeats; and three derived from human genome data alone, consisting of (4) SNP density, (5) frequency of insertion of transposable elements, and (6) rate of recombination. Features 1 and 2 are measures of nucleotide substitutions at two classes of "neutral" sites, whereas 4 is a measure of recent mutations. Feature 3 is a measure dominated by deletions in mouse, whereas 5 represents insertions in human. It was found that all six vary significantly in megabase-sized regions genome-wide, and many vary together. This indicates that some regions of a genome change slowly by all processes that alter DNA, and others change faster. Regional variation in all processes is correlated with, but not completely accounted for, by GC content in human and the difference between GC content in human and mouse.} } @ARTICLE{kent02browser, AUTHOR = "W. James Kent and Charles W. Sugnet and Terrence S. Furey and Krishna M. Roskin and Tom H. Pringle and Alan M. Zahler and David Haussler", TITLE = "The Human Genome Browser at {UCSC}", JOURNAL = "Genome Research", YEAR = 2002, volume = 12, number = 6, pages = "996--1006", month = jun, publisher = "Cold Spring Harbor Laboratory Press", isbn = "1088-9051/01", url = "http://www.genome.org/cgi/content/abstract/12/6/996", abstract = "As vertebrate genome sequences near completion and research refocuses to their analysis, the issue of effective genome annotation display becomes critical. A mature web tool for rapid and reliable display of any requested portion of the genome at any scale, together with several dozen aligned annotation tracks, is provided at http://genome.ucsc.edu. This browser displays assembly contigs and gaps, mRNA and expressed sequence tag alignments, multiple gene predictions, cross-species homologies, single nucleotide polymorphisms, sequence-tagged sites, radiation hybrid data, transposon repeats, and more as a stack of coregistered tracks. Text and sequence-based searches provide quick and precise access to any region of specific interest. Secondary links from individual features lead to sequence details and supplementary off-site databases. One-half of the annotation tracks are computed at the University of California, Santa Cruz from publicly available sequence data; collaborators worldwide provide the rest. Users can stably add their own custom tracks to the browser for educational or research purposes. The conceptual and technical framework of the browser, its underlying MYSQL database, and overall use are described. The web site currently serves over 50,000 pages per day to over 3000 different users." } @INPROCEEDINGS{lodha01sounds, AUTHOR = "Suresh K. Lodha and Ellen Venable and David Marsh and Nguyet Manh and Doanna Meads and Casey Robinson and Krishna M. Roskin", TITLE = "Use of Natural Sounds and Metaphors for Data Mapping", BOOKTITLE = "Proceedings of the SPIE Conference on Visual Data Exploration and Analysis", YEAR = 2001, volume = 4302, series = "Proceedings of SPIE", month = jan, organization = "International Society for Optical Engineering", publisher = "International Society for Optical Engineering" } @INPROCEEDINGS{lodha00topology, AUTHOR = "Suresh K. Lodha and Jose C. Renteria and Krishna M. Roskin", TITLE = "Topology Preserving Compression of 2D Vector Fields", BOOKTITLE = "Proceedings of the conference on Visualization '00", LOCATION = "Salt Lake City, Utah, USA", YEAR = 2000, pages = "343--350", publisher = "IEEE Computer Society Press", isbn = "1-58113-309-X", url = "http://doi.acm.org/10.1145/375213.375266", abstract = "We present an algorithm for compressing 2D vector fields that preserves topology. Our approach is to simplify the given data set using constrained clustering. We employ different types of global and local error metrics including the earth mover's distance metric to measure the degradation in topology as well as weighted magnitude and angular errors. As a result, we obtain precise error bounds in the compressed vector fields. Experiments with both analytic and simulated data sets are presented. Results indicate that one can obtain significant compression with low errors without losing topology information.", keywords = "compression, topology, vector fields, error metrics, clustering" }