From f28481455a238c07fb1c2b3c935361a1abb44e32 Mon Sep 17 00:00:00 2001 From: pajanne Date: Wed, 19 Dec 2018 11:36:26 +0000 Subject: [PATCH 1/2] update ex 2.4.3 and add solution --- data/yeast_genes.txt | 2 +- python_basic_2_4.ipynb | 32 +++++++++---------- solutions/pybasic_ex2_4_3.py | 60 ++++++++++++++++++++++++++++++++++++ 3 files changed, 75 insertions(+), 19 deletions(-) create mode 100644 solutions/pybasic_ex2_4_3.py diff --git a/data/yeast_genes.txt b/data/yeast_genes.txt index ff82063..feeb104 100644 --- a/data/yeast_genes.txt +++ b/data/yeast_genes.txt @@ -1,4 +1,4 @@ -Systematic_name Standard_name Chromosome Start End +systematic_name standard_name chrom start end 21S_rRNA_4 21S_RRNA_4 chrmt 61868 62447 9S_rRNA_1 9S_RRNA_1 chrmt 1 11 9S_rRNA_5 9S_RRNA_5 chrmt 85290 85779 diff --git a/python_basic_2_4.ipynb b/python_basic_2_4.ipynb index d4983ce..aeb347f 100644 --- a/python_basic_2_4.ipynb +++ b/python_basic_2_4.ipynb @@ -9,7 +9,9 @@ "## Session 2.4: Delimited files\n", "\n", "- [Data formats](#Data-formats)\n", - "- [Exercises 2.4.1](#Exercises-2.4.1)" + "- [Exercises 2.4.1](#Exercises-2.4.1)\n", + "- [Exercises 2.4.2](#Exercises-2.4.2)\n", + "- [Exercises 2.4.3](#Exercises-2.4.3)" ] }, { @@ -217,7 +219,7 @@ "## Exercises 2.4.3 \n", "#### Real life example\n", "\n", - "You have a tab separated file which contains information about all the yeast (*S.cerevisiae*) gene `data/yeast_genes.txt`; \n", + "You have a tab separated file which contains information about all the yeast (*S.cerevisiae*) gene `data/yeast_genes.txt`:\n", "\n", "`Systematic_name\tStandard_name\tChromosome\tStart\t End\n", "YBR127C VMA2 chrII 491269 492822\n", @@ -225,25 +227,19 @@ "...\n", "`\n", "\n", - "\n", - "For every gene its location and coordinates are reported. \n", - "You should to read through the file and store the data in an appropriate structure.\n", + "For every gene, its location and coordinates are recorded. \n", + "You should read through the file and store the data into an appropriate structure.\n", "Then answer these questions:\n", "\n", - "* How many genes are there in *S.cerevisiae*?\n", - "\n", - "* Which is the longest and which is the shortest gene?\n", - "\n", - "* How many genes per chromosome? Return a list sorted by the number of genes\n", - "\n", - "* For each chromosome, what is the longest and what is the shortest gene?\n", - "\n", - "* For each chromosome, how many genes on the Watson strand and how many genes on the Crick strand?\n", + "- How many genes are there in *S.cerevisiae*?\n", + "- Which is the longest and which is the shortest gene?\n", + "- How many genes per chromosome? Print the number of genes per chromosome.\n", + "- For each chromosome, what is the longest and what is the shortest gene?\n", + "- For each chromosome, how many genes on the Watson strand and how many genes on the Crick strand?\n", "\n", - "###### bonus \n", + "**bonus** \n", "\n", - "* What is the chromosome with the highest gene density? \n", - " You can calculate the length of each chromosome assuming that they start at 1 and they end at the end (if on the Watson strand) or at the start (if on the Crick strand) of their last gene. Then you can calculate the length of all the genes on each chromosome and the ratio between coding vs. noncoding regions." + "- What is the chromosome with the highest gene density? You can calculate the length of each chromosome assuming that they start at 1 and they end at the end (if on the Watson strand) or at the start (if on the Crick strand) of their last gene. Then you can calculate the length of all the genes on each chromosome and the ratio between coding vs. noncoding regions." ] }, { @@ -270,7 +266,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.6" + "version": "3.6.4" } }, "nbformat": 4, diff --git a/solutions/pybasic_ex2_4_3.py b/solutions/pybasic_ex2_4_3.py new file mode 100644 index 0000000..d5a7839 --- /dev/null +++ b/solutions/pybasic_ex2_4_3.py @@ -0,0 +1,60 @@ +genes = [] +chromosomes = [] + +# How many genes are there in *S.cerevisiae*? +# Read a tab delimited file which has 5 columns: systematic_name, standard_name, chrom, start, end +with open('data/yeast_genes.txt') as yeast_gene_file: + header = yeast_gene_file.readline() + for line in yeast_gene_file: + sys_name, std_name, chrom, start, end = line.strip().split('\t') + chromosomes.append(chrom) + genes.append({'sys_name': sys_name, + 'std_name': std_name, # NB. some genes do not have a standard name + 'chrom': chrom, + 'start': int(start), + 'end': int(end), + 'length': int(end) - int(start) + 1}) + +print("There are", len(genes), "genes in S.cerevisiae.") + +# Which is the longest and which is the shortest gene? +shortest = genes[0]['length'] +shortest_gene = genes[0]['sys_name'] +longest = 0 +longest_gene = '' +for g in genes: + if g['length'] > longest: + longest = g['length'] + longest_gene = g['sys_name'] + if g['length'] < shortest: + shortest = g['length'] + shortest_gene = g['sys_name'] + +print("The shortest gene is", shortest_gene, "which is", shortest, "bases long.") +print("The longest gene is", longest_gene, "which is", longest, "bases long.") + +# How many genes per chromosome? Print the number of genes per chromosome. +unique_chrom = set(chromosomes) + +for chrom in unique_chrom: + genes_per_chrom = 0 + for g in genes: + if g['chrom'] == chrom: + genes_per_chrom += 1 + print(chrom, "has", genes_per_chrom, "genes") + +# For each chromosome, what is the longest and what is the shortest gene? +for chrom in unique_chrom: + shortest = 99999999999 + shortest_gene = '' + longest = 0 + longest_gene = '' + for g in genes: + if g['chrom'] == chrom: + if g['length'] > longest: + longest = g['length'] + longest_gene = g['sys_name'] + if g['length'] < shortest: + shortest = g['length'] + shortest_gene = g['sys_name'] + print("On chrom", chrom, "the shortest gene is", shortest_gene, "(", shortest, ")", "and the longest is", longest_gene, "(", longest, ")") From b6eb729bdf0a77129f029cc22e6f5950fe9deada Mon Sep 17 00:00:00 2001 From: seb-mueller Date: Tue, 19 Feb 2019 09:53:32 +0000 Subject: [PATCH 2/2] Readme correction: python-basic corrected for archived python-intro repo --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 95e1963..24febdb 100644 --- a/README.md +++ b/README.md @@ -11,8 +11,8 @@ If you wish to run the course on your personal computer, here are the steps to f ## Clone this github project ```bash -git clone https://github.com/pycam/python-intro.git -cd python-intro +git clone https://github.com/pycam/python-basic.git +cd python-basic ``` ## Dependencies